In [1]:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

In [2]:
# loading iris dataset
iris = datasets.load_iris()
iris['target_names']

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [3]:
X = iris['data'][:, (2,3)] # petal length, petal width
y = (iris['target'] == 2).astype(np.float64)

In [4]:
svm_clf = Pipeline(( ("scaler", StandardScaler()), ("linear_svc", LinearSVC(C=1, loss="hinge")) ))
svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linear_svc',
                 LinearSVC(C=1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [5]:
svm_clf.predict( [[5.5, 1.7]] )

array([1.])

In [7]:
# grid search
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn.svm import SVC
svc = SVC(kernel = 'linear', C=1)
Cs = np.logspace(-6, -1, 10)
clf = GridSearchCV(estimator=svc, param_grid=dict(C=Cs), n_jobs=-1)
clf.fit(X[:-10], y[:-10])



GridSearchCV(cv='warn', error_score='raise-deprecating',
             estimator=SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='auto_deprecated', kernel='linear',
                           max_iter=-1, probability=False, random_state=None,
                           shrinking=True, tol=0.001, verbose=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': array([1.00000000e-06, 3.59381366e-06, 1.29154967e-05, 4.64158883e-05,
       1.66810054e-04, 5.99484250e-04, 2.15443469e-03, 7.74263683e-03,
       2.78255940e-02, 1.00000000e-01])},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=0)

In [8]:
clf.best_score_

0.9357142857142857

In [9]:
clf.score(X[-10:], y[-10:])

1.0

In [8]:
# loading moons dataset
from sklearn.datasets import make_moons
from sklearn.preprocessing import PolynomialFeatures

In [9]:
moons = make_moons()

In [14]:
X, y = moons
X[:10, :]

array([[ 0.94905575,  0.31510822],
       [ 0.03205158,  0.99948622],
       [ 0.09903113,  0.06611626],
       [ 0.57211666,  0.82017225],
       [ 1.96729486,  0.24634542],
       [-0.8713187 ,  0.49071755],
       [-0.1595999 ,  0.98718178],
       [ 0.65463495, -0.43846842],
       [ 0.07308324,  0.124733  ],
       [ 1.40478334, -0.41441262]])

In [16]:
polynomial_svm_clf = Pipeline(( ("poly_features", PolynomialFeatures(degree=3)), ("scaler", StandardScaler()), 
                               ("svm_clf", LinearSVC(C=10, loss="hinge")) ))
polynomial_svm_clf.fit(X, y)



Pipeline(memory=None,
         steps=[('poly_features',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=False, order='C')),
                ('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 LinearSVC(C=10, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [15]:
polynomial_svm_clf.predict([[0.67, 0.35]])

array([0])

In [17]:
#using polynomial kernel

In [18]:
poly_kernel_svm_clf = Pipeline(( ("scaler", StandardScaler()), 
                                ("svm_clf", SVC(kernel="poly", degree=3, coef0=1, C=5))
))

In [19]:
poly_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 SVC(C=5, cache_size=200, class_weight=None, coef0=1,
                     decision_function_shape='ovr', degree=3,
                     gamma='auto_deprecated', kernel='poly', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)