In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

import sklearn
from sklearn import linear_model
from sklearn import cross_validation
from sklearn.learning_curve import learning_curve
from sklearn import svm

%matplotlib inline

In [None]:
raw_data = loadmat('data/ex6data1.mat')
df = pd.DataFrame(raw_data['X'], columns=['X1', 'X2'])
df['y'] = raw_data['y']

X = df[['X1', 'X2']].values
Y = df['y'].values

In [None]:
plt.grid()
plt.scatter(df[df.y == 1].X1, df[df.y == 1].X2, s=30, marker='+', c='g')
plt.scatter(df[df.y == 0].X1, df[df.y == 0].X2, s=30, marker='x', c='r')

In [None]:
def graph_svm_decision_boundary(clf, with_support_vectors=True):  
    # get the separating hyperplane
    w = clf.coef_[0]
    a = -w[0] / w[1]
    xx = np.linspace(-5, 5)
    yy = a * xx - (clf.intercept_[0]) / w[1]

    # plot the parallels to the separating hyperplane that pass through the
    # support vectors
    b = clf.support_vectors_[0]
    yy_down = a * xx + (b[1] - a * b[0])
    b = clf.support_vectors_[-2]
    yy_up = a * xx + (b[1] - a * b[0])

    # plot the line, the points, and the nearest vectors to the plane
    plt.plot(xx, yy, 'k-')
    plt.plot(xx, yy_down, 'k--')
    plt.plot(xx, yy_up, 'k--')
    
    if with_support_vectors:
        plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1],
                    s=80, facecolors='none')
    plt.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired)

    plt.axis('tight')
    plt.show()

Large C (No Regularization)

In [None]:
clf = sklearn.svm.SVC(C=1000, kernel='linear')
clf.fit(df[['X1','X2']], df['y'])
graph_svm_decision_boundary(clf)

Small C (Lots of Regularization)

In [None]:
clf = sklearn.svm.SVC(C=1, kernel='linear')
clf.fit(df[['X1','X2']], df['y'])
graph_svm_decision_boundary(clf)

**With Gaussian Kernels**

In [None]:
def norm(x):
    return np.sqrt(np.sum(np.power(x,2)))

def gaussian_kernel(x, y, sigma):
    return np.exp(-np.power(norm(x-y),2) / (2 * np.power(sigma, 2)))

In [None]:
x1 = np.array([1.0, 2.0, 1.0])
x2 = np.array([0.0, 4.0, -1.0])
sigma = 2

gaussian_kernel(x1, x2, sigma)

In [None]:
raw_data2 = loadmat('data/ex6data2.mat')

data = pd.DataFrame(raw_data2['X'], columns=['X1', 'X2'])
data['y'] = raw_data2['y']

positive = data[data['y'].isin([1])]
negative = data[data['y'].isin([0])]

fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(positive['X1'], positive['X2'], s=30, marker='x', label='Positive')
ax.scatter(negative['X1'], negative['X2'], s=30, marker='o', label='Negative')
ax.legend()

In [None]:
clf = sklearn.svm.SVC(C=100, gamma=1, kernel='rbf', probability=True)
clf.fit(data[['X1','X2']], data['y'])

fig, ax = plt.subplots(figsize=(12,8))
ax.scatter(data['X1'], data['X2'], s=30, c=clf.predict_proba(data[['X1', 'X2']])[:,0], cmap='Reds')