# SVM CLASSIFICATION

# 1. Linear SVM Classification

In [1]:
# This is where classes can clearly be separted with a straight line(they are linearly separable).
# Training a linear SVM model using LinearSVC class with C = 1 and the hinge loss function:
import numpy as np
from sklearn import datasets
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import LinearSVC

iris = datasets.load_iris()
X = iris['data'][:, (2, 3)] # petal length, petal width
y = (iris['target'] == 2).astype(np.float64) # Iris-Virginica

svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('linear_svc', LinearSVC(C = 1, loss = 'hinge'))
])
svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('linear_svc',
                 LinearSVC(C=1, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [2]:
# Using the model to predict:
svm_clf.predict([[5.5, 1.7]])#unlike Logistic Regression classifiers SVM classifiers  do not output probabilities for each class

array([1.])

In [3]:
#Alternatively, you could use the SVC class, using SVC(kernel="linear", C=1), but it is much slower especially with large
#training sets, so it is not recommended:
from sklearn.svm import SVC

svc_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('SVC', SVC(kernel = 'linear', C = 1))
])
svc_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('SVC',
                 SVC(C=1, break_ties=False, cache_size=200, class_weight=None,
                     coef0=0.0, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='linear', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [4]:
svc_clf.predict([[5.5, 1.7]])

array([1.])

In [5]:
# Another option is to use the SGDClassifier class, with SGDClassifier(loss="hinge", alpha=1/(m*C)).
# This applies regular Stochastic Gradient Descent to train a linear SVM classifier.
# It does not converge as fast as the LinearSVC class, but it can be useful to handle huge datasets that do not fit in memory
#(out-of-core training), or to handle online classification tasks.
from sklearn.linear_model import SGDClassifier

sgd_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('sgd', SGDClassifier(loss = 'hinge', alpha = 1 / (2 * 1)))
])
sgd_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('sgd',
                 SGDClassifier(alpha=0.5, average=False, class_weight=None,
                               early_stopping=False, epsilon=0.1, eta0=0.0,
                               fit_intercept=True, l1_ratio=0.15,
                               learning_rate='optimal', loss='hinge',
                               max_iter=1000, n_iter_no_change=5, n_jobs=None,
                               penalty='l2', power_t=0.5, random_state=None,
                               shuffle=True, tol=0.001, validation_fraction=0.1,
                               verbose=0, warm_start=False))],
         verbose=False)

In [6]:
sgd_clf.predict([[5.5, 1.7]])

array([1.])

# 2. Nonlinear SVM Classification

In [7]:
#One approach to handling nonlinear datasets is to add more features, such as polynomial features; in some cases this can result
#in a linearly separable dataset.
#Let's test this on the moons dataset: this is a toy dataset for binary classification in which the data points are shaped as 
#two interleaving half circles.
from sklearn.datasets import make_moons
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

polynomial_svm_clf = Pipeline([
    ('poly_features', PolynomialFeatures(degree = 3)),
    ('scaler', StandardScaler()),
    ('svm_clf', LinearSVC(C = 10, loss = 'hinge'))
])
polynomial_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('poly_features',
                 PolynomialFeatures(degree=3, include_bias=True,
                                    interaction_only=False, order='C')),
                ('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 LinearSVC(C=10, class_weight=None, dual=True,
                           fit_intercept=True, intercept_scaling=1,
                           loss='hinge', max_iter=1000, multi_class='ovr',
                           penalty='l2', random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [8]:
polynomial_svm_clf.predict([[5.5, 1.7]])

array([1.])

## Polynomial Kernel

In [9]:
# This is another approach to handling nonlinear datasets
# This trick is implemented by the SVC class.
from sklearn.svm import SVC

poly_kernel_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_clf', SVC(kernel = 'poly', degree = 3, coef0 = 1, C = 5))
])
poly_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 SVC(C=5, break_ties=False, cache_size=200, class_weight=None,
                     coef0=1, decision_function_shape='ovr', degree=3,
                     gamma='scale', kernel='poly', max_iter=-1,
                     probability=False, random_state=None, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [10]:
# This code trains an SVM classifier using a 3rd degree polynomial kernel.
# The hyperparameter coef0 controls how much the model is influenced by high-degree polynomials vs low-degree polynomials.

In [11]:
poly_kernel_svm_clf.predict([[5.5, 1.7]])

array([1.])

## Adding Similarity Features.

In [12]:
# Another technique to handle nonlinear problems is to add features computed using a similarity function that measures how much
#each instance resembles a particular landmark.

### Gaussian RBF Kernel

In [13]:
rbf_kernel_svm_clf = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_clf', SVC(kernel = 'rbf', gamma = 5, C = 0.001))
])
rbf_kernel_svm_clf.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_clf',
                 SVC(C=0.001, break_ties=False, cache_size=200,
                     class_weight=None, coef0=0.0,
                     decision_function_shape='ovr', degree=3, gamma=5,
                     kernel='rbf', max_iter=-1, probability=False,
                     random_state=None, shrinking=True, tol=0.001,
                     verbose=False))],
         verbose=False)

In [14]:
rbf_kernel_svm_clf.predict([[5.5, 1.7]])

array([0.])

# SVM REGRESSION

In [15]:
from sklearn.svm import LinearSVR

lin_svm_reg = Pipeline([
    ('scaler', StandardScaler()),
    ('svm_reg', LinearSVR(epsilon = 1.5))
])
lin_svm_reg.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svm_reg',
                 LinearSVR(C=1.0, dual=True, epsilon=1.5, fit_intercept=True,
                           intercept_scaling=1.0, loss='epsilon_insensitive',
                           max_iter=1000, random_state=None, tol=0.0001,
                           verbose=0))],
         verbose=False)

In [16]:
lin_svm_reg.predict([[5.5, 1.7]])

array([0.])

In [17]:
# To tackle nonlinear regression tasks, you can use a kernelized regression model:
from sklearn.svm import SVR

svr_poly_reg = Pipeline([
    ('scaler', StandardScaler()),
    ('svr_reg', SVR(kernel = 'poly', degree = 2, C = 100, epsilon = 0.1))
])
svr_poly_reg.fit(X, y)

Pipeline(memory=None,
         steps=[('scaler',
                 StandardScaler(copy=True, with_mean=True, with_std=True)),
                ('svr_reg',
                 SVR(C=100, cache_size=200, coef0=0.0, degree=2, epsilon=0.1,
                     gamma='scale', kernel='poly', max_iter=-1, shrinking=True,
                     tol=0.001, verbose=False))],
         verbose=False)

In [18]:
svr_poly_reg.predict([[5.5, 1.7]])

array([0.17935854])