# Module 02_02: SVC: targeting CPU and Patching 

![Assets/SVCacceleration.jpg](Assets/svcPlot.png)


# Learning Objectives:

1) Describe how to surgically unpatch specific optimized functions if needed
2) Describe differences in patching more globally versus more surgically
1) applied patching to SVC algorithm
2) Describe acceleration for the covtype dataset usinf SVC



# Scikit-learn breast Cancer Dataset
Fetch the Data

- [Back to Sections](#Back_to_Sections)


In [None]:
from sklearnex import unpatch_sklearn, patch_sklearn
unpatch_sklearn()

In [None]:
#Import scikit-learn dataset library
from sklearn import datasets

#Load dataset
cancer = datasets.load_breast_cancer()

# Explore the Data

In [None]:
# print the names of the 13 features
print("Features: ", cancer.feature_names)

# print the label type of cancer('malignant' 'benign')
print("Labels: ", cancer.target_names)

# print data(feature)shape
cancer.data.shape

In [None]:
# print the cancer labels (0:malignant, 1:benign)
print(cancer.target)

# Split the data

In [None]:
# Import train_test_split function
from sklearn.model_selection import train_test_split

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, test_size=0.3,random_state=109) # 70% training and 30% test


In [None]:
#Import svm model
unpatch_sklearn()
from sklearn import svm
import time

start = time.time()
#Create a svm Classifier

clf = svm.SVC(kernel='linear') # Linear Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)
end = time.time()
print(f"Elapsed: {end-start:.2f}")
#Predict the response for test dataset
start = time.time()
y_pred = clf.predict(X_test)
end = time.time()
print(f"Elapsed: {end-start:.2f}")

In [None]:
#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics

# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(y_test, y_pred))

# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(y_test, y_pred))

# Plot the data

In [None]:
# Plotting settings
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(4, 3))
#x_min, x_max, y_min, y_max = -3, 3, -3, 3
#ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))

# Plot samples by color and add legend
scatter = ax.scatter(X_train[:, 0], X_train[:, 1], s=150, c=y_train, label=y_train, edgecolors="k")
ax.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
ax.set_title("Samples in two-dimensional feature space")
_ = plt.show()

# Too many Dimensions

Reduce with PCA, for now fit on X_train only

In [None]:
import numpy as np
from sklearn.decomposition import PCA
start = time.time()
pca = PCA(n_components=2, svd_solver='arpack')
pca.fit(X_train)
end = time.time()
print(f"Elapsed: {end-start:.2f}")
print(pca.explained_variance_ratio_)
print(pca.singular_values_)

start = time.time()
PCA_X_train = pca.transform(X_train)
PCA_X_test = pca.transform(X_test)
end = time.time()
print(f"Elapsed: {end-start:.2f}")


In [None]:
from sklearn import svm
from sklearn.inspection import DecisionBoundaryDisplay


def plot_training_data_with_decision_boundary(kernel):
    # Train the SVC
    clf = svm.SVC(kernel=kernel, gamma=2).fit(PCA_X_train, y_train)

    # Settings for plotting
    _, ax = plt.subplots(figsize=(4, 3))

    # Plot decision boundary and margins
    common_params = {"estimator": clf, "X": PCA_X_test, "ax": ax}
    DecisionBoundaryDisplay.from_estimator(
        **common_params,
        response_method="predict",
        plot_method="pcolormesh",
        alpha=0.3,
    )
    DecisionBoundaryDisplay.from_estimator(
        **common_params,
        response_method="decision_function",
        plot_method="contour",
        levels=[-1, 0, 1],
        colors=["k", "k", "k"],
        linestyles=["--", "-", "--"],
    )

    # Plot bigger circles around samples that serve as support vectors
    ax.scatter(
        clf.support_vectors_[:, 0],
        clf.support_vectors_[:, 1],
        s=250,
        facecolors="none",
        edgecolors="k",
    )
    # Plot samples by color and add legend
    ax.scatter(PCA_X_test[:, 0], PCA_X_test[:, 1], c=y_test, s=150, edgecolors="k")
    ax.legend(*scatter.legend_elements(), loc="upper right", title="Classes")
    ax.set_title(f" Decision boundaries of {kernel} kernel in SVC")

    _ = plt.show()

In [None]:
plot_training_data_with_decision_boundary("linear")

In [None]:
from sklearn.ensemble import RandomForestClassifier
start = time.time()
clf = RandomForestClassifier(random_state=0).fit(X_train, y_train) 
y_pred = clf.predict(X_test) #Predict on CPU
end = time.time()

print(f"Elapsed: {end-start:.2f}")

# Exercise:

Apply patch below

In [None]:
#Import svm model
from sklearn.metrics import classification_report

# Apply the patch_sklearn() function to this cell then run the cell and note the time:

###############################
## add patch here ##
patch_sklearn()
###############################

import numpy as np
from sklearn.decomposition import PCA
start = time.time()
pca = PCA(n_components=2, svd_solver='arpack')
pca.fit(X_train)
end = time.time()
print(f"Elapsed: {end-start:.2f}")
print(pca.explained_variance_ratio_)
print(pca.singular_values_)

start = time.time()
PCA_X_train = pca.transform(X_train)
PCA_X_test = pca.transform(X_test)
end = time.time()
print(f"Elapsed: {end-start:.2f}")

In [None]:


from sklearn import svm
import time

start = time.time()
#Create a svm Classifier
clf = svm.SVC(kernel='linear') # Linear Kernel

#Train the model using the training sets
clf.fit(X_train, y_train)
end = time.time()
print(f"Elapsed: {end-start:.2f}")
#Predict the response for test dataset
start = time.time()
y_pred = clf.predict(X_test)
end = time.time()
print(f"Elapsed: {end-start:.2f}")

In [None]:
plot_training_data_with_decision_boundary("linear")

In [None]:
from sklearn.ensemble import RandomForestClassifier
start = time.time()
clf = RandomForestClassifier(random_state=0).fit(X_train, y_train) 
y_pred = clf.predict(X_test) #Predict on CPU
end = time.time()

print(f"Elapsed: {end-start:.2f}")

# Observations

Observe any differences in acceleration with patching on versus off for the two algorithms.
- Did SVC speed up under patching for this dataset?
- Did Random Forest speed up under patching for this dataset?

As we saw in chapter one in notebook, 02_sklearnex_Motivation_Acceleration.ipynb, SVC can be accerlated to a high degree for some datasets. The nature of this Breast Cancer dataset is one rare instance where we have not seen a performance boost from this library with patching.

# Summary:

You have:

1) applied patching to SVC algorithm
2) Describe acceleration for tehe covtype dataset
    

# Notices & Disclaimers 

Intel technologies may require enabled hardware, software or service activation.
No product or component can be absolutely secure.

Your costs and results may vary.

© Intel Corporation. Intel, the Intel logo, and other Intel marks are trademarks of Intel Corporation or its subsidiaries. 
*Other names and brands may be claimed as the property of others.

In [None]:
print("All Done")