# A short example for using Support Vector Machines (and machine-learning classifiers in general)

In [None]:
%matplotlib inline
import matplotlib
matplotlib.rcParams.update({'figure.figsize': (12.0, 8.0)})

In [None]:
from pylab import *
from sklearn import svm


In [None]:
def draw_boxes(x,y,category=None):
    plt_width = (len(x)+2)*max(x)+1
    plt_height = max(y)+1
    plt.figure(figsize=(plt_width, plt_height))
    plt.xlim((0, plt_width))
    plt.ylim((0,plt_height))
    ax = plt.gca()
    for i in range(len(x)):
        if category:
            rect = matplotlib.patches.Rectangle((i*6 + 1,0),x1[i],y1[i],linewidth=1, facecolor='r' if category[i] == 0 else 'g')
        else:
            rect = matplotlib.patches.Rectangle((i*6 + 1,0),x1[i],y1[i],linewidth=1, facecolor='b')
        ax.add_patch(rect)

# Our Datasets

In [None]:
# Example dataset
# e.g. dimensions of boxes
x1 = [1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 5.0] # width of the box
y1 = [3.0, 2.0, 4.0, 1.0, 5.0, 2.0, 4.0, 3.0] # height of the box

draw_boxes(x1,y1)

In [None]:
# classifications
c1 = [ 0 ,  1 ,  0 ,  1 ,  0 ,  1 ,  0 ,  1 ] # possible categorization #1 (high vs. short box)
c2 = [ 0 ,  1 ,  1 ,  0 ,  0 ,  1 ,  1 ,  0 ] # possible categorization #2 (user-selected 'nice' boxes)

In [None]:
c1 = [ 0 ,  1 ,  0 ,  1 ,  0 ,  1 ,  0 ,  1 ] # possible categorization #1 (high vs. short box)

draw_boxes(x1,y1,c1)


In [None]:
# a slightly less obvious classification
c2 = [ 0 ,  1 ,  1 ,  0 ,  0 ,  1 ,  1 ,  0 ] # possible categorization #2 (user-selected 'nice' boxes)

draw_boxes(x1,y1,c2)

In [None]:
# a sample we want to classify
xu = 3.5
yu = 3.5

draw_boxes([xu],[yu])

# Let's plot the data

In [None]:
scatter(x1,y1,c='y')
scatter([xu],[yu], c='b', marker='x')

In [None]:
# categorization 1 (aspect ratio)

# need to construct helper lists as pyplot.scatter does not allow for marker lists
xa = [i[0] for i in zip(x1,c1) if i[1] == 0]
ya = [i[0] for i in zip(y1,c1) if i[1] == 0]

xb = [i[0] for i in zip(x1,c1) if i[1] == 1]
yb = [i[0] for i in zip(y1,c1) if i[1] == 1]

scatter(xa,ya,marker='+', c='r')
scatter(xb,yb,marker='o', c='g')
scatter([xu],[yu], marker='x', c='b')

In [None]:
xc = [i[0] for i in zip(x1,c2) if i[1] == 0]
yc = [i[0] for i in zip(y1,c2) if i[1] == 0]

xd = [i[0] for i in zip(x1,c2) if i[1] == 1]
yd = [i[0] for i in zip(y1,c2) if i[1] == 1]

scatter(xc,yc,marker='+', c='r')
scatter(xd,yd,marker='o', c='g')
scatter([xu],[yu], marker='x', c='b')

## Feature Extraction

**Goal:** transform data in such a way that the characteristic components are available as numerical or categorical values.

**Example:** apply FFT to a waveform to extract frequency components

## Standardization

[usually consists of two steps](http://scikit-learn.org/stable/modules/preprocessing.html#standardization-or-mean-removal-and-variance-scaling):

* **mean removal** ('center' all values around mean)
* **normalization** (map all values to a certain range)

([When to standardize data](http://www.faqs.org/faqs/ai-faq/neural-nets/part2/section-16.html))

In [None]:
from sklearn.preprocessing import scale, StandardScaler, MinMaxScaler

In [None]:
samples = list(zip(x1,y1))

In [None]:
samples

In [None]:
scaled_samples = scale(samples)

In [None]:
xy = list(zip(*scaled_samples))
scatter(*xy)

In [None]:
s = MinMaxScaler()

In [None]:
s.fit(samples)

In [None]:
scaled_samples = s.transform(samples)

In [None]:
xy = list(zip(*scaled_samples))
scatter(*xy)

## Classification (using Support Vector Machines (SVMs))

In [None]:
from sklearn import svm

In [None]:
classifier = svm.SVC()

## Feature Vectors for SVMs must have the same length!

In [None]:
c1

In [None]:
c2

In [None]:
xu

In [None]:
yu

In [None]:
classifier.fit(samples, c1) 

In [None]:
u_class = classifier.predict([[xu,yu]])
print(u_class)

In [None]:
xa = [i[0] for i in zip(x1,c1) if i[1] == 0]
ya = [i[0] for i in zip(y1,c1) if i[1] == 0]

xb = [i[0] for i in zip(x1,c1) if i[1] == 1]
yb = [i[0] for i in zip(y1,c1) if i[1] == 1]

scatter(xa,ya,marker='+', c='r')
scatter(xb,yb,marker='o', c='g')
if u_class[0] == 0:
    scatter([xu],[yu], marker='+', c='b')
else:
    scatter([xu],[yu], marker='o', c='b')

In [None]:
classifier.fit(samples, c2)

In [None]:
u_class = classifier.predict([[xu,yu]])
print(u_class)

In [None]:
xc = [i[0] for i in zip(x1,c2) if i[1] == 0]
yc = [i[0] for i in zip(y1,c2) if i[1] == 0]

xd = [i[0] for i in zip(x1,c2) if i[1] == 1]
yd = [i[0] for i in zip(y1,c2) if i[1] == 1]

scatter(xc,yc,marker='+', c='r')
scatter(xd,yd,marker='o', c='g')
if u_class[0] == 0:
    scatter([xu],[yu], marker='+', c='b')
else:
    scatter([xu],[yu], marker='o', c='b')

## Showing the decision surface of a classifier

In [None]:
n_classes = 2
plot_colors = "ryb"
plot_step=0.02
xx, yy = np.meshgrid(np.arange(min(x1), max(x1), plot_step),
                         np.arange(min(y1), max(y1), plot_step))
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
zz = classifier.predict(np.c_[xx.ravel(), yy.ravel()])
zz = zz.reshape(xx.shape)
cs = plt.contourf(xx, yy, zz, cmap=plt.cm.RdYlBu)

scatter(xc,yc,marker='+', c='k')
scatter(xd,yd,marker='o', c='k')
scatter([xu],[yu], marker='x', c='k')


# Same approach with a multi-layer perceptron

In [None]:
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
                    hidden_layer_sizes=(5, 2), random_state=1)

clf.fit(samples, c2)

In [None]:
clf.fit(samples, c1)
u_class = clf.predict([[xu,yu]])
print(u_class)

In [None]:
clf.fit(samples, c2)
u_class = clf.predict([[xu,yu]])
print(u_class)

In [None]:
#n_classes = 3
#plot_colors = "ryb"
plot_step=0.02
xx, yy = np.meshgrid(np.arange(min(x1), max(x1), plot_step),
                         np.arange(min(y1), max(y1), plot_step))
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
zz = clf.predict(np.c_[xx.ravel(), yy.ravel()])
zz = zz.reshape(xx.shape)
cs = plt.contourf(xx, yy, zz, cmap=plt.cm.RdYlBu)

scatter(xc,yc,marker='+', c='k')
scatter(xd,yd,marker='o', c='k')
scatter([xu],[yu], marker='x', c='k')


## Same approach with a decision tree

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
clf = DecisionTreeClassifier()
clf.fit(samples, c1)

In [None]:
u_class = clf.predict([[xu,yu]])
print(u_class)

In [None]:
print(tree.export_text(clf))
tree.plot_tree(clf)
plt.show()

In [None]:
clf = DecisionTreeClassifier()
clf.fit(samples, c2)
u_class = clf.predict([[xu,yu]])
print(u_class)
print(tree.export_text(clf))
tree.plot_tree(clf)
plt.show()

("wrong" result, i.e. different from our intuition)

In [None]:
#n_classes = 3
#plot_colors = "ryb"
plot_step=0.02
xx, yy = np.meshgrid(np.arange(min(x1), max(x1), plot_step),
                         np.arange(min(y1), max(y1), plot_step))
plt.tight_layout(h_pad=0.5, w_pad=0.5, pad=2.5)
zz = clf.predict(np.c_[xx.ravel(), yy.ravel()])
zz = zz.reshape(xx.shape)
cs = plt.contourf(xx, yy, zz, cmap=plt.cm.RdYlBu)

scatter(xc,yc,marker='+', c='k')
scatter(xd,yd,marker='o', c='k')
scatter([xu],[yu], marker='x', c='k')


## Conclusion: for analog/continuous data (e.g., most sensor data), SVM is a reliable, well-understood choice.

Outlook: scikit-learn offers many more features, tools, and classifiers, e.g., [Pipelines](http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline) 