### Support Vector Machines 

In [None]:
# standard imports
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import scipy

In [None]:
from sklearn.datasets._samples_generator import make_blobs
X, y = make_blobs(n_samples=50, centers=2, random_state=0, cluster_std=0.60)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')

In [None]:
xfit = np.linspace(-1, 3.5)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
plt.plot([0.6], [2.1], 'x', color='red', markeredgewidth=2, markersize=10)
for m, b in [(1, 0.65), (0.5, 1.6), (-0.2, 2.9)]:
    plt.plot(xfit, m * xfit + b, '-k')
plt.xlim(-1, 3.5)

The main intuition behind SVMs are as follows: rather than simply drawing a zero-width line between the classes, we can draw\
around each line a margin of some width, up to the nearest point

In [None]:
xfit = np.linspace(-1, 3.5)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
for m, b, d in [(1, 0.65, 0.33), (0.5, 1.6, 0.55), (-0.2, 2.9, 0.2)]:
    yfit = m * xfit + b
    plt.plot(xfit, yfit, '-k')
    plt.fill_between(xfit, yfit - d, yfit + d, edgecolor='none', color='#AAAAAA',
                     alpha=0.4)
plt.xlim(-1, 3.5)

In SVM, the line with the maximum width is chosen as the optimal model

##### 1. Fitting a SVM

In [None]:
from sklearn.svm import SVC  # Support Vector Classifier

In [None]:
model = SVC(kernel='linear', C=1E10)
model.fit(X, y)

Let's visualize what is going on

In [None]:
def plot_svc_decision_function(model, ax=None, plot_support=True):
    """Plot the decision function for a two-dimensional SVC"""
    if ax is None:
        ax = plt.gca()

    xlim = ax.get_xlim()
    ylim = ax.get_ylim()

    # create grid to evaluate model
    x = np.linspace(xlim[0], xlim[1], 30)
    y = np.linspace(ylim[0], ylim[1], 30)
    Y, X = np.meshgrid(y, x)
    xy = np.vstack([X.ravel(), Y.ravel()]).T
    P = model.decision_function(xy).reshape(X.shape)

    # plot decision boundary and margins
    ax.contour(X, Y, P, colors='k',
               levels=[-1, 0, 1], alpha=0.5,
               linestyles=['--', '-', '--'])

    # plot support vectors
    if plot_support:
        ax.scatter(model.support_vectors_[:, 0],
                   model.support_vectors_[:, 1],
                   s=300, linewidth=1, facecolors='none')

    ax.set_xlim(xlim)
    ax.set_ylim(ylim)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
plot_svc_decision_function(model)

One strength of SVM is its insensitivity to the exact behaviour of distant points.
SVM is only foucused on the support vectors as seen below

In [None]:
def plot_svm(N=10, ax=None):
    X, y = make_blobs(n_samples=200, centers=2,
                      random_state=0, cluster_std=0.60)
    X = X[:N]
    y = y[:N]
    model = SVC(kernel='linear', C=1E10)
    model.fit(X, y)
    ax = ax or plt.gca()
    ax.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
    ax.set_xlim(-1, 4)
    ax.set_ylim(-1, 6)
    plot_svc_decision_function(model, ax)


fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)
for axi, N in zip(ax, [60, 120]):
    plot_svm(N, axi)
    axi.set_title('N = {0}'.format(N))

We can see the model above doesn't change when the no. of training points increrase...This can be confirmed interactively 

In [None]:
from ipywidgets import interact, fixed
interact(plot_svm, N=[a for a in range(20, 200, 20)], ax=fixed(None))

#### Beyond linear boundaries : Kernel SVMs

To see the need for kernels, let's see some data that is not linearly separable

In [None]:
from sklearn.datasets._samples_generator import make_circles

X, y = make_circles(100, factor=.1, noise=.1)

clf = SVC(kernel='linear').fit(X, y)

# Visualizing
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
plot_svc_decision_function(clf, plot_support=False)

It is clear that no linear discrimination will ever be able to separate the data.\
This behaviour is generally summarised as : 'A linear classifier performs poorly for nonlinear boundaries'\
<br>
One solution is to project the data into higher dimensions such that a linear separator would be sufficient. This is mostly seen in Regression models

In [None]:
# Creating a radial basis function centered on the middle clump
r = np.exp(-(X ** 2).sum(1))

let's visulaize this xtra dimension

In [None]:
from mpl_toolkits import mplot3d


def plot_3D(elev=30, azim=30, X=X, y=y):
    ax = plt.subplot(projection='3d')
    ax.scatter3D(X[:, 0], X[:, 1], r, c=y, s=50, cmap='autumn')
    ax.view_init(elev=elev, azim=azim)
    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.set_zlabel('r')


interact(plot_3D, elev=[-90, 90], azip=(-180, 180),
         X=fixed(X), y=fixed(y))

The good thing is that all this kernel tricks are implemented in SVM, you just have to specify in the kernel hyperparameter.\
If you want to use radial basis function, you change the kernel from linear to rbf

In [None]:
clf = SVC(kernel='rbf', C=1E6).fit(X, y)

In [None]:
# Visualizing
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
plot_svc_decision_function(clf)
plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
            s=300, lw=1, facecolors='none')

#### Tuning the SVM : softening margins
This is usually applied when the data has some sort of overlap

In [None]:
X, y = make_blobs(n_samples=100, centers=2,
                  random_state=0, cluster_std=1.2)
plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')

For this case, you have to alter the tuning param C, such that it produces the optimal model. You can use cross validation\
or a similar procedure to get the optimal value of  the C hyperparam.\
For now let's see the how changing C affects the final fit

In [None]:
X, y = make_blobs(n_samples=100, centers=2,
                  random_state=0, cluster_std=0.8)

fig, ax = plt.subplots(1, 2, figsize=(16, 6))
fig.subplots_adjust(left=0.0625, right=0.95, wspace=0.1)

for axi, C in zip(ax, [10.0, 0.1]):
    model = SVC(kernel='linear', C=C).fit(X, y)
    axi.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='BrBG')
    plot_svc_decision_function(model, axi)
    axi.scatter(model.support_vectors_[:, 0],
                model.support_vectors_[:, 1],
                s=300, lw=1, facecolors='none')

    axi.set_title('C = {0:.1f}'.format(C), size=14)