## Mathematics of Machine Learning

### Chapter 3: Linear classification methods
### Section 3.5: Kernel SVM Rule

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

from sklearn import svm
from sklearn.metrics import accuracy_score


#### (0) Data Preparation

In [None]:
# Load data
x = np.genfromtxt("data_KSVM_X.csv", delimiter=',')
y = np.genfromtxt("data_KSVM_Y.csv", delimiter=',')

# print(x)
# print(x.shape)
# print(y)
# print(y.shape)

In [None]:
# Plot the training data

fig, ax = plt.subplots()

indp = [i for (i, val) in enumerate(y) if val == 1]
indm = [i for (i, val) in enumerate(y) if val == -1]

ax.scatter(x[0][indp], x[1][indp], c="b", marker="o", linewidths = 2)
ax.scatter(x[0][indm], x[1][indm], c="r", marker="+", linewidths = 2)

plt.xlabel("x_1")
plt.ylabel("x_2")

fig.tight_layout()

#### (1) Soft Kernel SVM rule (Gaussian kernel)

In [None]:
# Lambda for Soft Rule
m = len(y)
lam = 0.5/m

print(m)
print(lam)

In [None]:
# Scaling parameter
kappa = 4

#### gamma == kappa ####

https://scikit-learn.org/stable/modules/svm.html 
1.4.6 Kernel functions

When training an SVM with the Radial Basis Function (RBF) kernel, two parameters must be considered: C and gamma. The parameter C, common to all SVM kernels, trades off misclassification of training examples against simplicity of the decision surface. A low C makes the decision surface smooth, while a high C aims at classifying all training examples correctly. gamma defines how much influence a single training example has. The larger gamma is, the closer other examples must be to be affected.

https://scikit-learn.org/stable/modules/svm.html 
1.4.7 Mathematical formulation

In [None]:
# Create a support vector classifier
KSVM = svm.SVC(C=0.5/m/lam, kernel='rbf', gamma=kappa)

# Learn on the train subset
KSVM.fit(x.T, y)

In [None]:
# Get support vectors
support_vectors = KSVM.support_vectors_

#### (2) Plot the learned hypothesis

In [None]:
# Generate discretization grid
x_min = -1.75
x_max = 2.25
y_min = -1.75
y_max = 2.25

XX1, XX2 = np.mgrid[x_min:x_max + 4/500:4/500, y_min:y_max + 4/500:4/500]

In [None]:
# Decision function
Z = KSVM.decision_function(np.c_[XX1.ravel(), XX2.ravel()])
print('Z', Z)

Z = Z.reshape(XX1.shape)
# print(Z)
# print(Z.shape)

#### (2.1) Plot the dividing lines

In [None]:
# Plotting

fig, ax = plt.subplots(figsize=(10, 7))

indp = [i for (i, val) in enumerate(y) if val == 1]
indm = [i for (i, val) in enumerate(y) if val == -1]

ax.scatter(x[0][indp], x[1][indp], c="b", marker="o", linewidths = 2, label = "1")
ax.scatter(x[0][indm], x[1][indm], c="r", marker="+", linewidths = 2, label = "-1")

# Plot support vectors
plt.plot(support_vectors[:, 0], support_vectors[:, 1], marker="o", markersize=12, 
         markerfacecolor="None", linestyle='None', label = "Support Vectors")

# Plot learned dividing lines
plt.contour(XX1, XX2, Z, levels = 0)  # label = "$h_S$"

# Plot true dividing lines
x1 = XX1[:, 0]
plt.plot(x1, (x1**2 - 3*x1 - 2*0)/3, '-k', label = "truth")
plt.plot(x1, (x1**2 - 3*x1 - 2*np.pi)/3, '-k')
plt.plot(x1, (x1**2 - 3*x1 + 2*np.pi)/3, '-k')

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

# plt.xticks(())
# plt.yticks(())

plt.legend()
plt.title(f'$ \kappa $ = {kappa:.3f}', fontsize=20)
plt.show()

#### (2.2) Plot RKHS function

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(7, 7))

ax.plot_surface(XX1, XX2, Z, cmap=cm.coolwarm, linewidth=0, antialiased=False)

plt.show()

In [None]:
# Grid point matrice
M = np.c_[XX1.ravel(), XX2.ravel()]
print(M.shape)
print(len(M))
print(M[1][:, None])

In [None]:
# Coordinates of support vectors
SV_supp = support_vectors.T

# Number of support vectors
m_supp = SV_supp.shape[1]
print(m_supp)

# print(SV_supp)
# print(np.tile(M[1][:, None], (1, m_supp)))

In [None]:
# Learned coefficients
alpha_S = KSVM.dual_coef_.T
print(alpha_S.shape)

b_S = KSVM.intercept_
print(b_S)

kappa_S = kappa
print(kappa_S)

In [None]:
# Distance function to the support vectors
def SV_fun(x): return np.sum((SV_supp - np.tile(x, (1, m_supp)))**2, axis=0)[:, None].T

In [None]:
# Learned RKHS function
def kfun_S(x): return np.dot(np.exp(-kappa_S * SV_fun(x)), alpha_S) + b_S

In [None]:
# Values of the RKHS function on the grid
Z = np.empty((len(M), 1))
for i in range(len(M)):
    Z[i] = kfun_S(M[i][:, None])
print(Z)
print(Z.shape)

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(7, 7))

ax.plot_surface(XX1, XX2, np.reshape(Z, (len(x1), len(x1))), cmap=cm.coolwarm, linewidth=0, antialiased=False)

plt.show()

In [None]:
# 2D-Plot

C = np.max(Z)

fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(10, 10))

ax.plot_surface(XX1, XX2, np.reshape(Z-C, (len(x1), len(x1))), cmap=cm.coolwarm, linewidth=0, antialiased=False)

# Plot learned dividing lines
plt.contour(XX1, XX2, np.reshape(Z, (len(x1), len(x1))), levels = 0)  # label = "$h_S$"

ax.scatter(x[0][indp], x[1][indp], c="b", marker="o", linewidths = 2, label = "1")
ax.scatter(x[0][indm], x[1][indm], c="r", marker="+", linewidths = 2, label = "-1")

# Plot support vectors
plt.plot(support_vectors[:, 0], support_vectors[:, 1], marker="o", markersize=12, 
         markerfacecolor="None", linestyle='None', label = "Support Vectors")

ax.view_init(90)

plt.show()

#### (3) Kappa study (Gaussian kernel)

In [None]:
# Scaling parameter
kappa = 1/(0.075)**2
# kappa = 1/9

In [None]:
# Create a support vector classifier
KSVM_kappa = svm.SVC(C=0.5/m/lam, kernel='rbf', gamma=kappa)

# Learn on the train subset
KSVM_kappa.fit(x.T, y)

In [None]:
# Get support vectors
support_vectors_kappa = KSVM_kappa.support_vectors_

In [None]:
# Decision function
Z = KSVM_kappa.decision_function(np.c_[XX1.ravel(), XX2.ravel()])
print('Z', Z)

Z = Z.reshape(XX1.shape)

In [None]:
# Plotting

fig, ax = plt.subplots(figsize=(10, 7))

indp = [i for (i, val) in enumerate(y) if val == 1]
indm = [i for (i, val) in enumerate(y) if val == -1]

ax.scatter(x[0][indp], x[1][indp], c="b", marker="o", linewidths = 2, label = "1")
ax.scatter(x[0][indm], x[1][indm], c="r", marker="+", linewidths = 2, label = "-1")

# Plot support vectors
plt.plot(support_vectors_kappa[:, 0], support_vectors_kappa[:, 1], marker="o", markersize=12, 
         markerfacecolor="None", linestyle='None', label = "Support Vectors")

# Plot learned dividing lines
plt.contour(XX1, XX2, Z, levels = 0)  # label = "$h_S$"

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

# plt.xticks(())
# plt.yticks(())

# plt.legend()
plt.title(f'$ \kappa $ = {kappa:.3f}', fontsize=20)
plt.show()

#### (4) Lambda study (Gaussian kernel)

In [None]:
# Lambda for Soft Rule
# lam = 0.5/m;
# lam = 1/(2*m)^2;
lam = 0.035

In [None]:
# Scaling parameter
kappa = 4

In [None]:
# Create a support vector classifier
KSVM_lam = svm.SVC(C=0.5/m/lam, kernel='rbf', gamma=kappa)

# Learn on the train subset
KSVM_lam.fit(x.T, y)

In [None]:
# Get support vectors
support_vectors_lam = KSVM_lam.support_vectors_

In [None]:
# Decision function
Z = KSVM_lam.decision_function(np.c_[XX1.ravel(), XX2.ravel()])
print('Z', Z)

Z = Z.reshape(XX1.shape)

In [None]:
# Plotting

fig, ax = plt.subplots(figsize=(10, 7))

indp = [i for (i, val) in enumerate(y) if val == 1]
indm = [i for (i, val) in enumerate(y) if val == -1]

ax.scatter(x[0][indp], x[1][indp], c="b", marker="o", linewidths = 2, label = "1")
ax.scatter(x[0][indm], x[1][indm], c="r", marker="+", linewidths = 2, label = "-1")

# Plot support vectors
plt.plot(support_vectors_lam[:, 0], support_vectors_lam[:, 1], marker="o", markersize=12, 
         markerfacecolor="None", linestyle='None', label = "Support Vectors")

# Plot learned dividing lines
plt.contour(XX1, XX2, Z, levels = 0)  # label = "$h_S$"

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

# plt.xticks(())
# plt.yticks(())

# plt.legend()
plt.title(f'$ \lambda $ = {lam:.3f}', fontsize=20)
plt.show()

#### (5) Polynomial kernel

In [None]:
# Parameters
lam = 0.5/m
kappa = 1
q = 3

In [None]:
# Create a support vector classifier
KSVM_poly = svm.SVC(C=0.5/m/lam, kernel='poly', gamma=kappa, degree=3, coef0=1)

# Learn on the train subset
KSVM_poly.fit(x.T, y)

###### IMPORTANT

Difference between the formulas in Matlab and Sklearn
See
https://scikit-learn.org/stable/modules/svm.html 1.4.6 Kernel functions
https://de.mathworks.com/help/stats/fitcsvm.html#bt9w6j6_sep_shared-PolynomialOrder


In [None]:
# Get support vectors
support_vectors_poly = KSVM_poly.support_vectors_

In [None]:
# Decision function
Z = KSVM_poly.decision_function(np.c_[XX1.ravel(), XX2.ravel()])
print('Z', Z)

Z = Z.reshape(XX1.shape)

#### (5.1) Plot the dividing lines

In [None]:
# Plotting

fig, ax = plt.subplots(figsize=(10, 7))

indp = [i for (i, val) in enumerate(y) if val == 1]
indm = [i for (i, val) in enumerate(y) if val == -1]

ax.scatter(x[0][indp], x[1][indp], c="b", marker="o", linewidths = 2, label = "1")
ax.scatter(x[0][indm], x[1][indm], c="r", marker="+", linewidths = 2, label = "-1")

# Plot support vectors
plt.plot(support_vectors_poly[:, 0], support_vectors_poly[:, 1], marker="o", markersize=12, 
         markerfacecolor="None", linestyle='None', label = "Support Vectors")

# Plot learned dividing lines
plt.contour(XX1, XX2, Z, levels = 0)  # label = "$h_S$"

# Plot true dividing lines
x1 = XX1[:, 0]
plt.plot(x1, (x1**2 - 3*x1 - 2*0)/3, '-k', label = "truth")
plt.plot(x1, (x1**2 - 3*x1 - 2*np.pi)/3, '-k')
plt.plot(x1, (x1**2 - 3*x1 + 2*np.pi)/3, '-k')

plt.xlim(x_min, x_max)
plt.ylim(y_min, y_max)

# plt.xticks(())
# plt.yticks(())

plt.legend()
plt.title(f'Polynomial Kernel (q={q})', fontsize=20)
plt.show()

#### (5.2) Plot RKHS function

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(7, 7))

ax.plot_surface(XX1, XX2, Z, cmap=cm.coolwarm, linewidth=0, antialiased=False)

plt.show()

In [None]:
# Grid point matrice
M = np.c_[XX1.ravel(), XX2.ravel()]

In [None]:
# Coordinates of support vectors
SV_supp = support_vectors_poly.T

# Number of support vectors
m_supp = SV_supp.shape[1]
print(m_supp)

# print(SV_supp)

In [None]:
# Learned coefficients
alpha_S = KSVM_poly.dual_coef_.T
print(alpha_S.shape)

b_S = KSVM_poly.intercept_
print(b_S)

kappa_S = kappa
print(kappa_S)

In [None]:
# Distance function to the support vectors
def SV_fun(x): return np.sum((SV_supp * np.tile(x, (1, m_supp))), axis=0)[:, None].T

In [None]:
# Learned RKHS function
def kfun_S(x): return np.dot((1 + kappa_S * SV_fun(x))**q, alpha_S) + b_S

In [None]:
# Values of the RKHS function on the grid
Z = np.empty((len(M), 1))
for i in range(len(M)):
    Z[i] = kfun_S(M[i][:, None])
print(Z)
print(Z.shape)

In [None]:
fig, ax = plt.subplots(subplot_kw={"projection": "3d"}, figsize=(7, 7))

ax.plot_surface(XX1, XX2, np.reshape(Z, (len(x1), len(x1))), cmap=cm.coolwarm, linewidth=0, antialiased=False)

plt.show()