In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm

## Prepare the data 

In [None]:
# generate some random data
from sklearn.datasets.samples_generator import make_blobs

X, y = make_blobs(
    n_samples=50,
    n_features=2, 
    centers=2, 
    random_state=42, 
    cluster_std=0.9)

print(X.shape, y.shape)
print(X[:5])
print(y[:5])
print(np.unique(y))

## Visualize the data

In [None]:
def plot_data_set(X,y):    
    classes = np.unique(y)
    for c in classes:
        X_new = X[y==c]
        plt.scatter(X_new[:, 0], X_new[:, 1], 
                    edgecolors='gray', 
                    label = classes[c],
                    cmap=plt.cm.coolwarm, alpha=0.7
        )

        plt.xlabel('X[0]')
        plt.ylabel('X[1]')
        plt.legend(loc='best')


In [None]:
def plt_random_set_lines():
    # draw three different line separators
    x_values = np.linspace(-5, 2, num=3)

    # line: a*x + b
    a = [-1, -0.5, -0.1]
    b = [2, 2, 2]
    l_colors = ['red', 'green', 'blue']

    for a, b, c in zip(a,b,l_colors):
        plt.plot(x_values, a * x_values + b, color=c)


In [None]:
plt.figure(dpi=100)
plot_data_set(X,y)
plt_random_set_lines()
# plt.show()

## Choose the model

In [None]:
clf = svm.SVC(kernel="linear", C=0.05)

## Train

In [None]:
clf.fit(X, y)

### inspect the model

In [None]:
# get support vectors
sv = clf.support_vectors_
print(sv)

In [None]:
plot_data_set(X,y)
plt.scatter(sv[:,0], sv[:,1],c="red")

In [None]:
def plot_margins(clf):
    # get the separating hyperplane
    w = clf.coef_[0]
    a = -w[0] / w[1]
    xx = np.linspace(-5, 5)
    yy = a * xx - (clf.intercept_[0]) / w[1]

    # plot the parallels to the separating hyperplane that pass through the
    # support vectors (margin away from hyperplane in direction
    # perpendicular to hyperplane). This is sqrt(1+a^2) away vertically in
    # 2-d.
    margin = 1 / np.sqrt(np.sum(clf.coef_ ** 2))
    yy_down = yy - np.sqrt(1 + a ** 2) * margin
    yy_up = yy + np.sqrt(1 + a ** 2) * margin

    # plot the line, the points, and the nearest vectors to the plane
    # fig, ax = plt.subplots()  #create figure and axes
    plt.figure(1, figsize=(8, 5))
    plt.clf()
    plt.plot(xx, yy, 'k-')
    plt.plot(xx, yy_down, 'k--')
    plt.plot(xx, yy_up, 'k--')

    plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80,
                facecolors='none', zorder=10, edgecolors='k')
    plt.scatter(X[:, 0], X[:, 1], zorder=10, cmap=plt.cm.Paired, edgecolors='k')

    plt.axis('tight')
    x_min = -4.8
    x_max = 4.2
    y_min = -6
    y_max = 6

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
    Z = clf.predict(np.c_[XX.ravel(), YY.ravel()])

    # Put the result into a color plot
    Z = Z.reshape(XX.shape)
    plt.pcolormesh(XX, YY, Z, cmap=plt.cm.Pastel1)

    plt.xlim(x_min, x_max)
    plt.ylim(y_min, y_max)

    plt.xticks(())
    plt.yticks(())    
    
    ax = plt.gca()
    ax.set_xlabel('X')
    ax.set_ylabel('Y')
    ax.set_xticks(())
    ax.set_yticks(())
    ax.set_title('Figure 1')
    
    plt.legend(['boundary', 'margin = {:.2f}'.format(margin)], 
               bbox_to_anchor=(1,1), 
               loc="upper left")

plt.show()

In [None]:
plot_margins(clf)

In [None]:
# plot margins on different penality values
for name, penalty in (('unreg', 1), ('reg', 0.01)):
    # fit the model with each penality value:
    clf = svm.SVC(kernel='linear', C=penalty)
    clf.fit(X, y)
    
    plot_boundary(clf)

    plt.show()

In [None]:
from mlxtend.plotting import plot_decision_regions

plot_decision_regions(X, 
                      y,
                      clf=clf, 
                      legend=2)

## Predict

In [None]:
y_pred = clf.predict([[-3, 3.52126257]])

## Evaluating the Model

In [None]:
from sklearn import metrics

In [None]:
# Model Accuracy: how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

In [None]:
# Model Precision: what percentage of positive tuples are labeled as such?
print("Precision:",metrics.precision_score(y_test, y_pred))

In [None]:
# Model Recall: what percentage of positive tuples are labelled as such?
print("Recall:",metrics.recall_score(y_test, y_pred))