In [1]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [2]:
iris = datasets.load_iris()
X = iris["data"][:, (2,3)]
y = (iris["target"] == 2).astype(np.float64)

In [3]:
svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge"))
])

svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('linear_svc', LinearSVC(C=1, loss='hinge'))])

In [4]:
svm_clf.predict([[5.5, 1.7]])

array([1.])

## Nonlinear SVM Classification

In [5]:
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

X,y = make_moons(n_samples=100, noise=0.15)
polynomial_svm_clf = Pipeline([
    ("poly_features", PolynomialFeatures(degree=3)),
    ("scaler", StandardScaler()),
    ("svm_clf", LinearSVC(C=10, loss="hinge"))
])

polynomial_svm_clf.fit(X,y)



Pipeline(steps=[('poly_features', PolynomialFeatures(degree=3)),
                ('scaler', StandardScaler()),
                ('svm_clf', LinearSVC(C=10, loss='hinge'))])

###  Polynomial Kernel

In [6]:
from sklearn.svm import SVC
poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5))
])

poly_kernel_svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=5, coef0=1, kernel='poly'))])

### Gaussian RBF Kernel

In [7]:
rbf_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="rbf", gamma=5, C=0.001))
])

rbf_kernel_svm_clf.fit(X, y)

Pipeline(steps=[('scaler', StandardScaler()),
                ('svm_clf', SVC(C=0.001, gamma=5))])

## SVM Regression

Linear Regression

In [8]:
from sklearn.svm import LinearSVR

svm_reg = LinearSVR(epsilon=1.5)
svm_reg.fit(X,y)

LinearSVR(epsilon=1.5)

Non Linear Regression

In [9]:
from sklearn.svm import SVR

svm_poly_reg = SVR(kernel="poly", degree=2, C=100, epsilon=0.1)
svm_poly_reg.fit(X,y)

SVR(C=100, degree=2, kernel='poly')

## Exercises

### 8.
Train  a  LinearSVC  on  a  linearly  separable  dataset.  Then  train  an  SVC  and  aSGDClassifier on the same dataset. See if you can get them to produce roughlythe same model.

In [10]:
sensor_readings = pd.read_csv("sensor_readings_2.csv")
sensor_readings

Unnamed: 0,SD_front,SD_left,Classes
0,1.687,0.445,Slight-Right-Turn
1,1.687,0.449,Slight-Right-Turn
2,1.687,0.449,Slight-Right-Turn
3,1.687,0.449,Slight-Right-Turn
4,1.687,0.449,Slight-Right-Turn
...,...,...,...
5451,1.024,0.657,Move-Forward
5452,0.894,0.649,Sharp-Right-Turn
5453,0.873,0.642,Sharp-Right-Turn
5454,0.967,0.635,Move-Forward


In [11]:
from sklearn.model_selection import train_test_split

train_set, test_set = train_test_split(sensor_readings, test_size = 0.2, random_state= 42)

X_train = pd.DataFrame()
X_test = pd.DataFrame()



In [12]:
X_train["SD_front"] = train_set["SD_front"]
X_train

Unnamed: 0,SD_front
1115,1.540
1041,1.325
1647,1.793
3316,0.914
4716,0.729
...,...
3772,1.415
5191,0.839
5226,3.256
5390,0.822


In [13]:
X_train

Unnamed: 0,SD_front
1115,1.540
1041,1.325
1647,1.793
3316,0.914
4716,0.729
...,...
3772,1.415
5191,0.839
5226,3.256
5390,0.822


In [14]:
y_train = train_set.loc[:, train_set.columns == "Classes"]
X_test["SD_front"] = test_set.loc[:, test_set.columns == "SD_front"]
X_test["SD_left"] = test_set.loc[:, test_set.columns == "SD_left"]
y_test = train_set.loc[:, test_set.columns == "Classes"]

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

### 9.
Train  an  SVM  classifier  on  the  MNIST  dataset.  Since  SVM  classifiers  are  binaryclassifiers,  you  will  need  to  use  one-versus-the-rest  to  classify  all  10  digits.  You may want to tune the hyperparameters using small validation sets to speed up theprocess. What accuracy can you reach?

In [15]:
from sklearn.datasets import fetch_openml
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV

mnist = fetch_openml('mnist_784', version=1, as_frame = False)

mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [16]:
X,y = mnist["data"], mnist["target"].astype(np.uint8)

X_train = X[:60000]
y_train = y[:60000]
X_test = X[60000:]
y_test = y[60000:]

In [17]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train.astype(np.float32)) 
X_test_scalec = scaler.transform(X_test.astype(np.float32))

In [18]:
linear_svc_clf = LinearSVC(random_state=42)
linear_svc_clf.fit(X_train_scaled, y_train)

pred_linear = linear_svc_clf.predict(X_train_scaled)
accuracy_score(y_train, pred_linear)



0.9217333333333333

In [19]:
svc_clf = SVC(gamma="scale")
svc_clf.fit(X_train_scaled, y_train)

pred_svc_clf = svc_clf.predict(X_train_scaled)
accuracy_score(y_train, pred_svc_clf)

0.9866333333333334

In [None]:
param_grid =  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001]}

rnd_search_cv = RandomizedSearchCV(svc_clf, param_grid, n_iter=10, verbose=2, cv=3)
rnd_search_cv.fit(X_train_scaled, y_train)
rnd_search_cv.best_estimator_



Fitting 3 folds for each of 8 candidates, totalling 24 fits
[CV] END ...................................C=1, gamma=0.001; total time= 5.4min


In [None]:
rnd_search_cv.best_score

In [None]:
y_pred = rnd_search_cv.best_estimator_.predict(X_train_scaled)
accuracy_score(y_train,y_pred)