In [7]:
# train a Linear Support Vector Classifier on iris data
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = (iris["target"] == 2).astype(np.float64)  # Iris-Virginica

svm_clf = Pipeline([
        ("scaler", StandardScaler()),
        ("linear_svc", LinearSVC(C=1, loss="hinge", random_state=42)),
])

svm_clf.fit(X, y)
svm_clf.predict ([[5.1, 1.7]])

array([ 1.])

In [10]:
# polynomial SVM classification
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

polynomial_svm_clf = Pipeline([
        ("poly_features", PolynomialFeatures(degree=3)),
        ("scaler", StandardScaler()),
        ("linear_svc", LinearSVC(C=10, loss="hinge")),
])

polynomial_svm_clf.fit(X, y)
polynomial_svm_clf.predict([[5.1, 1.7]])

array([ 1.])

In [12]:
# apply kernel trick to SVM
from sklearn.svm import SVC

poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5))
])

poly_kernel_svm_clf.fit(X, y)
poly_kernel_svm_clf.predict([[5.1, 1.7]])

array([ 1.])

In [15]:
# apply Gaussian Radical Basis Function on SVM model

rbf_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="rbf", gamma=5, C=0.001))
])

rbf_kernel_svm_clf.fit(X, y)
rbf_kernel_svm_clf.predict([[8.5, 1.7]])

array([ 0.])

In [11]:
# linear SVM regression
from sklearn.svm import LinearSVR

svm_reg = LinearSVR(epsilon=1.5)

svm_reg.fit(X, y)

LinearSVR(C=1.0, dual=True, epsilon=1.5, fit_intercept=True,
     intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
     random_state=None, tol=0.0001, verbose=0)

In [13]:
svm_reg.predict([[8.5, 1.7]])

array([ 0.])

In [17]:
# Exercise 8
# train a SDGclassifier on the iris data
from sklearn.linear_model import SGDClassifier

iris = datasets.load_iris()
X = iris["data"][:, (2,3)] # pedal lenth, pedal width
y = (iris["target"]==2).astype(np.float64) # iris-Virginica
linear_sgd_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("sgd_clf", SGDClassifier(random_state=42)),
])
linear_sgd_clf.fit(X, y)
linear_sgd_clf.predict([[5.1, 1.7]])



array([ 0.])

In [18]:
# train a SVC on the iris data
from sklearn.svm import SVC

poly_kernel_svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("svm_clf", SVC(kernel="rbf", gamma=5, C=0.001))
])

poly_kernel_svm_clf.fit(X, y)
poly_kernel_svm_clf.predict([[5.1, 1.7]])

array([ 0.])

In [None]:
# Exercise 9
import numpy as np
from sklearn.datasets import fetch_mldata
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import confusion_matrix, f1_score


mnist = fetch_mldata('MNIST original')
X, y = mnist["data"], mnist["target"]
some_digit = X[20000]

X_train, X_test, y_train, y_test = X[:60000], X[60000:], y[:60000], y[60000:]
# shuffle the dataset to help cross-validation 
shuffle_index = np.random.permutation(60000)
X_train, y_train = X_train[shuffle_index], y_train[shuffle_index]
# create target vector for the classification task
y_train_3 = (y_train == 3)
y_test_3 = (y_test == 3)
# train a linearSVC classifier on the dataset
svm_clf = Pipeline([
    ("scaler", StandardScaler()),
    ("linear_svc", LinearSVC(C=1, loss="hinge")),
])
svm_clf.fit(X_train, y_train_3)
svm_clf.predict(some_digit)



In [None]:
# measure accuracy
y_train_pred = cross_val_predict(svm_clf, X_train, y_train_3, cv=10)
confusion_matrix(y_train_3, y_train_pred)
f1_score(y_train_3, y_trian_pred)
y_train_pred