# Neural Networks and Support Vector Classifier

In [None]:
from sklearn.neural_network import BernoulliRBM
from sklearn.linear_model import LogisticRegression
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, linear_model, datasets
from sklearn.cross_validation import cross_val_score
from sklearn.pipeline import Pipeline
%matplotlib inline

In [None]:
# new dataset, handwritten digits!
digits = datasets.load_digits()
digits.data

In [None]:
len(digits.data)      # 1,797 observations

In [None]:
len(digits.data[0])   # 8 x 8 pixel image

In [None]:
plt.imshow(digits.images[-5], cmap=plt.cm.gray_r, interpolation='nearest')
# the number 9
plt.show() 

In [None]:
digits.target

In [None]:
len(digits.target)

In [None]:
digits.target[-5]
# 9

In [None]:
digits_X, digits_y = digits.data, digits.target

Manual page for scikit-learn neural network models (unsupervised): http://scikit-learn.org/stable/modules/neural_networks.html#neural-network

In [None]:
rbm = BernoulliRBM(random_state=0)
rbm.fit(digits_X, digits_y,)
biases_for_visible = rbm.intercept_visible_
print biases_for_visible.shape
print biases_for_visible

In [None]:
biases_for_hidden = rbm.intercept_hidden_
print biases_for_hidden.shape
print biases_for_hidden

In [None]:
weights = rbm.components_
print weights.shape
print weights

In [None]:
# use an unsupervised artifical neural network to ascertain feature
rbm = BernoulliRBM(random_state=0)
logistic = linear_model.LogisticRegression()
classifier = Pipeline(steps=[('rbm', rbm), ('logistic', logistic)])
# We can use a pipeline to do two things at once. Use the neural network to find features
# and use a logistic regression to classify


In [None]:
cross_val_score(classifier, digits_X, digits_y, cv=5, scoring='accuracy').mean()
# OOF! Not so great...

In [None]:
# Ok but what if we just use logistic regression without first getting new features?
logistic = linear_model.LogisticRegression()
cross_val_score(logistic, digits_X, digits_y, cv=5, scoring='accuracy').mean()
# OK not bad!!!

## SVMs - Support Vector Classifier

Manual page for scikit-learn SVC: http://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html

In [None]:
# Let's try a SVM classifier
clf = svm.SVC()
clf.fit(digits_X, digits_y)
plt.imshow(digits.images[-5], cmap=plt.cm.gray_r, interpolation='nearest')

In [None]:
clf.predict(digits.data[-5])
# WOOHOO

In [None]:
cross_val_score(clf, digits_X, digits_y, cv=5, scoring='accuracy').mean()

# OOF again, we lost to the logistic regression...
# This estimator defaults to the Gaussian (aka radial basis function)
# let's try something else

### Guassian has two parameters, gamma and C

Intuitively, the gamma parameter defines how far the influence of a 
single training example reaches, with low values meaning ‘far’ and 
high values meaning ‘close’. 

* small gamma: The model is constrained, can under-fit!
* big gamma: Tries to capture the shape too well: can over-fit!


* small C: Makes the decision surface smooth and simple, can under-fit!
* big C: Selects more support vectors: can over-fit!


In [None]:
# note the scale of gamma and C
clf = svm.SVC(gamma=0.001, C=1)
cross_val_score(clf, digits_X, digits_y, cv=5, scoring='accuracy').mean()

# Skadoosh!


In [None]:
# import some data to play with
iris = datasets.load_iris()
iris_X = iris.data[:, :2]  # we only take the first two features. We could
                      # avoid this ugly slicing by using a two-dim dataset
iris_y = iris.target

In [None]:
# Start with logistic Regression
logistic = linear_model.LogisticRegression()
cross_val_score(logistic, iris_X, iris_y, cv=5, scoring='accuracy').mean()

In [None]:
# Let's try a SVM
clf = svm.SVC()
cross_val_score(clf, iris_X, iris_y, cv=5, scoring='accuracy').mean()

### Let's compare three SVMs with different kernels

* Radial Bias Function (RBF)
* Linear
* Poly of degree 3

In [None]:
C = 1.0  # SVM regularization parameter
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(iris_X, iris_y)  # default kernel
svc = svm.SVC(kernel='linear', C=C).fit(iris_X, iris_y)
poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(iris_X, iris_y)

In [None]:
# create a mesh to plot in
x_min, x_max = iris_X[:, 0].min() - 1, iris_X[:, 0].max() + 1
y_min, y_max = iris_X[:, 1].min() - 1, iris_X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),
                     np.arange(y_min, y_max, .02))

In [None]:
# title for the plots
titles = ['SVC with linear kernel',
          'SVC with RBF kernel',
          'SVC with polynomial (degree 3) kernel']

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 11, 11  # set plot size

for i, clf in enumerate((svc, rbf_svc, poly_svc)):
    plt.subplot(2, 2, i + 1)
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
    # Plot also the training points
    plt.scatter(iris_X[:, 0], iris_X[:, 1], c=iris_y, cmap=plt.cm.Paired)
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
    plt.xticks(())
    plt.yticks(())
    plt.title(titles[i])
plt.show()

In [None]:
print 'SVC with linear kernel score: ',cross_val_score(svc, iris_X, iris_y, cv=10, scoring='accuracy').mean()
print 'SVC with RBF kernel score: ',cross_val_score(rbf_svc, iris_X, iris_y, cv=10, scoring='accuracy').mean()
print 'SVC with polynomial (degree 3) kernel score: ',cross_val_score(poly_svc, iris_X, iris_y, cv=10, scoring='accuracy').mean()

### Lets try SVC on some different data

In [None]:
from sklearn.datasets import make_circles
circles_X, circles_y = make_circles(n_samples=1000, random_state=123, noise=0.1, factor=0.2)
cm = cmap=plt.cm.Paired
plt.scatter(circles_X[:,0], circles_X[:,1], c=circles_y, cmap=cm)

In [None]:
circles_y

In [None]:
# let's predict them without graphs
logreg = LogisticRegression()
cross_val_score(logreg, circles_X, circles_y, cv=5, scoring='accuracy').mean()


In [None]:
clf = svm.SVC(kernel = 'linear')        # I like lines
cross_val_score(clf, circles_X, circles_y, cv=5, scoring='accuracy').mean()


In [None]:
clf = svm.SVC(kernel = 'poly', degree = 3)        # I like 3rd degree polys
cross_val_score(clf, circles_X, circles_y, cv=5, scoring='accuracy').mean()

In [None]:
clf = svm.SVC(kernel = 'rbf')           # I like circles
cross_val_score(clf, circles_X, circles_y, cv=5, scoring='accuracy').mean()


#### The radial basis function kernel projects the data into higher dimensions that accompany circles well

OK now with graphs


In [None]:
logreg.fit(circles_X, circles_y)
C = 1.0  # SVM regularization parameter
rbf_svc = svm.SVC(kernel='rbf', gamma=0.7, C=C).fit(circles_X, circles_y)  # default kernel
svc = svm.SVC(kernel='linear', C=C).fit(circles_X, circles_y)
poly_svc = svm.SVC(kernel='poly', degree=3, C=C).fit(circles_X, circles_y)

In [None]:
# create a mesh to plot in
x_min, x_max = circles_X[:, 0].min() - 1, circles_X[:, 0].max() + 1
y_min, y_max = circles_X[:, 1].min() - 1, circles_X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, .02),
                     np.arange(y_min, y_max, .02))

In [None]:
# title for the plots
titles = ['Logistic Regression ',
          'SVC with linear kernel',
          'SVC with polynomial (degree 3) kernel',
          'SVC with RBF kernel']

In [None]:
print 'Logistic Regression score: ',cross_val_score(logreg, circles_X, circles_y, cv=5, scoring='accuracy').mean()
print 'SVC with linear kernel score: ',cross_val_score(svc, circles_X, circles_y, cv=5, scoring='accuracy').mean()
print 'SVC with polynomial (degree 3) kernel score: ',cross_val_score(poly_svc, circles_X, circles_y, cv=5, scoring='accuracy').mean()
print 'SVC with RBF kernel score: ',cross_val_score(rbf_svc, circles_X, circles_y, cv=5, scoring='accuracy').mean()


for i, clf in enumerate((logreg, svc, poly_svc, rbf_svc)):
    plt.subplot(2, 2, i + 1)
    plt.subplots_adjust(wspace=0.4, hspace=0.4)
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    # Put the result into a color plot
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8)
    # Plot also the training points
    plt.scatter(circles_X[:, 0], circles_X[:, 1], c=circles_y, cmap=plt.cm.Paired)
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
    plt.xticks(())
    plt.yticks(())
    plt.title(titles[i])
plt.show()

#### a real thing of beauty

<pr>
### Bonus: Visualize C

Intuitively, the gamma parameter defines how far the influence of a 
single training example reaches, with low values meaning ‘far’ and 
high values meaning ‘close’. 

* small gamma: The model is constrained, can under-fit!
* big gamma: Tries to capture the shape too well: can over-fit!


* small C: Makes the decision surface smooth and simple, can under-fit!
* big C: Selects more support vectors: can over-fit!


In [None]:
# Visualizing different C

X = np.r_[np.random.randn(20, 2) - [2, 2], np.random.randn(20, 2) + [2, 2]]
Y = [0] * 20 + [1] * 20

# figure number
fignum = 1

# fit the model
for name, penalty in (('C of 1', 1), ('C of 0.05', 0.05)):

    clf = svm.SVC(kernel='linear', C=penalty)
    clf.fit(X, Y)

    # get the separating hyperplane
    w = clf.coef_[0]
    a = -w[0] / w[1]
    xx = np.linspace(-5, 5)
    yy = a * xx - (clf.intercept_[0]) / w[1]

    # plot the parallels to the separating hyperplane that pass through the
    # support vectors
    margin = 1 / np.sqrt(np.sum(clf.coef_ ** 2))
    yy_down = yy + a * margin
    yy_up = yy - a * margin

    # plot the line, the points, and the nearest vectors to the plane
    plt.figure(fignum, figsize=(4, 3))
    plt.clf()
    plt.plot(xx, yy, 'k-')
    plt.plot(xx, yy_down, 'k--')
    plt.plot(xx, yy_up, 'k--')

    plt.scatter(clf.support_vectors_[:, 0], clf.support_vectors_[:, 1], s=80,
                facecolors='none', zorder=10)
    plt.scatter(X[:, 0], X[:, 1], c=Y, zorder=10, cmap=plt.cm.Paired)

    plt.axis('tight')
    plt.title(name)

    XX, YY = np.mgrid[x_min:x_max:200j, y_min:y_max:200j]
    Z = clf.predict(np.c_[XX.ravel(), YY.ravel()])

    fignum = fignum + 1

plt.show()


* small C: can under-fit!
* big C: can over-fit!
