## See SVM's separating hyperplane for 1D, 2D, and 3D data.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm, datasets
from io import StringIO
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D

### Start with 1D toy data:

In [None]:
data_string = """
x1,  y
 0, -1
 1, -1
 3,  1
 4,  1
"""
df = pd.read_csv(StringIO(data_string), sep='\s*,\s+', engine='python')
X = np.array(df.iloc[:, 0:1])
y = np.array(df.iloc[:, 1])
print(f'X={X}, y={y}')

In [None]:
clf = svm.SVC(kernel="linear", C=1000) # 'SVC' = 'support vector classification'
clf.fit(X, y)
print(f'clf.coef_={clf.coef_}')
print(f'clf.intercept_={clf.intercept_}')

In [None]:
plt.scatter(x=X[:, 0], y=np.zeros_like(X[:, 0]), c=y, cmap=plt.cm.Paired)
plt.xlim(-1, 5)
plt.axhline(y=0, c='k')
ax = plt.gca()
ax.axes.get_yaxis().set_visible(False)
# The boundary is given by wx + b = 0 => x = -b / w.
boundary = -clf.intercept_[0] / clf.coef_[0]
plt.axvline(x=boundary, c='k', label=r'decision boundary $\mathbf{wx} + b = 0$')
plt.axvline(x=boundary+1, c='g', linestyle=':', label=r'+1 support $\mathbf{wx} + b =  1$')
plt.axvline(x=boundary-1, c='r', linestyle=':', label=r'-1 support $\mathbf{wx} + b = -1$')
plt.legend()
plt.show(block=False)

In [None]:
# make a few predictions
X_new = np.array([0, 1.5, 2, 2.5, 4])
X_new = np.reshape(a=X_new, newshape=(-1, 1)) # (-1, 1) = (calculate #rows, 1 column)
clf.predict(X_new)

### Now try 2D toy data:

In [None]:
data_string = """
x1, x2,  y
 0,  0, -1
-1,  1, -1
 1, -1, -1
 0,  1,  1
 1,  1,  1
 1,  0,  1
"""
df = pd.read_csv(StringIO(data_string), sep='\s*,\s+', engine='python')
df # a data frame is like a spreadsheet

In [None]:
X = np.array(df.iloc[:, 0:2])
y = np.array(df.iloc[:, 2])
print(f'X={X}, y={y}')

In [None]:
clf = svm.SVC(kernel="linear", C=1000)
clf.fit(X, y)
print(f'clf.coef_={clf.coef_}')
print(f'clf.intercept_={clf.intercept_}')

The decision boundary is defined by $\mathbf{wx} + b = 0$, where $\mathbf{w}$ is given by `clf.coef_[0]` and $b$ is given by `clf.intercept_`. In the 2D case, the boundary is

`clf.coef_[0] * [x1, x2] + clf.intercept_ = 0` (I'm mixing math and code notation here); that is

`clf.coef_[0][0] * x1 + clf.coef_[0][1] * x2 + clf.intercept_ = 0`

$\implies$

`x2 = -(clf.coef_[0][0] * x1 + clf.intercept_) / clf.coef_[0][1]`.

The constraints are $\mathbf{wx} + b \ge 1$ if $y = +1$ and $\mathbf{wx} + b \le -1$ if $y = -1$; use $=$ instead of $\ge$ or $\le$ to get the margin edges.

With these formulas in mind, let's plot the data, decision boundary, and margin edges.

In [None]:
# plot the data
plt.scatter(x=X[:, 0], y=X[:, 1], c=y, cmap=plt.cm.Paired)
plt.axis('square')
plt.grid()
plt.xlim(-4, 4)
plt.ylim(-4, 4)

# add the decision boundary and margin boundaries
x1 = X[:, 0]
x2 = -(clf.coef_[0][0] * x1 + clf.intercept_) / clf.coef_[0][1]
plt.plot(x1, x2, label=r'decision boundary $\mathbf{wx} + b = 0$')
plt.plot(x1, x2 + 1 / clf.coef_[0][1], linestyle=':', label=r'+1 support $\mathbf{wx} + b =  1$')
plt.plot(x1, x2 - 1 / clf.coef_[0][1], linestyle=':', label=r'+1 support $\mathbf{wx} + b = -1$')
plt.plot([0, clf.coef_[0][0]], [0, clf.coef_[0][1]], label=r'normal') # arguments are x1, x2, y1, y2
plt.axvline(x=0, c='k')
plt.axhline(y=0, c='k')
plt.legend()
plt.show(block=False)

### HW01 tip


In [None]:
# Or here's an alternative to the plt.scatter() call that makes using
# specific colors easier.
# First plot the y == -1 values red with x coordinate from the 0 column of X
# and y coordinate from the 1 column of X.
plt.plot(X[y == -1, 0], X[y == -1, 1], '.', color='red', label='y=-1 data')
# Second plot the y == 1 values blue
# and y coordinate from the 1 column of X.
plt.plot(X[y ==  1, 0], X[y ==  1, 1], '.', color='blue', label='y=1 data')
plt.axis('square')
plt.grid()
plt.xlim(-4, 4)
plt.ylim(-4, 4)

# add the decision boundary and margin boundaries
x1 = X[:, 0]
x2 = -(clf.coef_[0][0] * x1 + clf.intercept_) / clf.coef_[0][1]
plt.plot(x1, x2, label=r'decision boundary $\mathbf{wx} + b = 0$')
plt.plot(x1, x2 + 1 / clf.coef_[0][1], linestyle=':', color='blue', label=r'+1 support $\mathbf{wx} + b =  1$')
plt.plot(x1, x2 - 1 / clf.coef_[0][1], linestyle=':', color='red', label=r'+1 support $\mathbf{wx} + b = -1$')
plt.plot([0, clf.coef_[0][0]], [0, clf.coef_[0][1]], label=r'normal') # arguments are x1, x2, y1, y2
plt.axvline(x=0, c='k')
plt.axhline(y=0, c='k')
plt.legend()
plt.show(block=False)

In [None]:
# make a couple of predictions
print(f'clf.predict([[2, 2], [-2, -2]]={clf.predict([[2, 2], [-2, -2]])}')

### Now let's try 3D data:
(modified from [Support Vector Machines (SVM) clearly explained](https://towardsdatascience.com/support-vector-machines-svm-clearly-explained-a-python-tutorial-for-classification-problems-29c539f3ad8); data described at [Iris plants dataset](https://scikit-learn.org/stable/datasets/toy_dataset.html#iris-dataset), which says "One class is linearly separable from the other 2; the latter are NOT linearly separable from each other," and [sklearn.datasets.load_iris¶](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_iris.html))

In [None]:
iris = datasets.load_iris()
# inspect the data:
print(f'iris.feature_names={iris.feature_names}')
print(f'first few rows:\n{iris.data[0:3, :]}')
print(f'first few target values: {iris.target[0:3]} (there are 0, 1, and 2 values)')

In [None]:
X = iris.data[:, :3]  # we only take the first three features (columns) for 3D visualization
Y = iris.target
# make it a binary classification problem by excluding the Y==2 examples
X = X[np.logical_or(Y==0, Y==1)]
Y = Y[np.logical_or(Y==0, Y==1)]
print(f'X=\n{X[0:3]}..., Y={Y[:3]}...')

In [None]:
model = svm.SVC(kernel='linear')
clf = model.fit(X, Y)
print(f'clf.coef_={clf.coef_}')
print(f'clf.intercept_={clf.intercept_}')

In [None]:
# plot the data
fig = plt.figure()
ax  = fig.add_subplot(111, projection='3d')
ax.plot3D(X[Y==0,0], X[Y==0,1], X[Y==0,2],'ob')
ax.plot3D(X[Y==1,0], X[Y==1,1], X[Y==1,2],'sr')

# The equation of the separating plane is given by all x so that
# np.dot(svc.coef_[0], x) + b = 0. Solve for x_3, the third coordinate in
# (x_1, x_2, x_3) or, in plotting notation, (x, y, z).)
def z(x, y): # define a function that gives z from x and y based on the model
    return((-clf.intercept_[0] - clf.coef_[0][0]*x - clf.coef_[0][1]*y) /
           clf.coef_[0][2])
linspace = np.linspace(start=-5, stop=5, num=30) # 30 values between -5 and 5
x,y = np.meshgrid(linspace, linspace) # make 2D coordinate array from two 1D vectors

# https://matplotlib.org/2.0.2/mpl_toolkits/mplot3d/tutorial.html#surface-plots
ax.plot_surface(X=x, Y=y, Z=z(x,y))
ax.view_init(30, 60)
plt.show(block=False)