## Mathematics of Machine Learning

### Chapter 3: Linear classification methods
### Section 3.4: Soft SVM Rule

In [None]:
# Install scikit-learn 
# pip install -U scikit-learn

# for more information https://scikit-learn.org/stable/install.html

In [None]:
import datetime
import numpy as np
import matplotlib.pyplot as plt

from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix

#### (0) Data Preparation

In [None]:
# Load data
X = np.genfromtxt("data_MNIST_78_X.csv", delimiter=',')
y = np.genfromtxt("data_MNIST_78_Y.csv", delimiter=',')

In [None]:
# Transfom the labels in +1 (7) and -1 (8)
# y = (y == 7) - (y == 8)
for ind, val in enumerate(y):
    if val == 7:
        y[ind] = +1
    else:
        y[ind] = -1
y = y[None, :]

In [None]:
print(X.shape)
print(y)
print(y.shape)

In [None]:
# Size of the dataset
m = len(y[0])
print(m)

In [None]:
# Number of features (= dimension of the feature space)
d = X.shape[0]
print(d)

#### (1) Soft SVM Rule

In [None]:
# both terms are equally weighted
lam = 1/m
print(lam)

In [None]:
# Create a support vector classifier
SVM = svm.SVC(C=1/(2*m*lam), kernel='linear')

In [None]:
# Learn on the train subset
SVM.fit(X.T, y[0])

In [None]:
SVM.get_params()

In [None]:
# Determine misclassification rate
label = SVM.predict(X.T)
print(label)
print(1 - accuracy_score(y[0], label))

In [None]:
# Generate and plot confusion matrix
matrix = plot_confusion_matrix(SVM, X.T, y[0],
                                 cmap=plt.cm.Blues)
                                 # normalize='true')
plt.title('Confusion matrix for our classifier')
plt.show(matrix)

In [None]:
# Get support vectors
support_vectors = SVM.support_vectors_
print(support_vectors.shape)

In [None]:
# Calculate weight vector
alphas = SVM.dual_coef_
w_S_cal = np.dot(alphas, support_vectors)

# Get weight vector directly
w_S = SVM.coef_

# print(w_S_cal == w_S)

In [None]:
# Plot the weight vector
fig, ax = plt.subplots()

ax.scatter(np.arange(len(w_S[0])), w_S[0])

plt.xlabel("Component k")
plt.ylabel("Weight w_k")
plt.title(f'Weight vector w_S for $ \lambda $ = 1/m')
plt.show()

In [None]:
# Get bias
bias = SVM.intercept_
print(bias)

In [None]:
# Find misclassifications
misclassifications = np.multiply(y, (np.dot(w_S, X) + bias)) < 0
ind_wrong = [i for (i, val) in enumerate(misclassifications[0]) if val == True]
# print(ind_wrong)

In [None]:
# Plot the misclassified images

plt.subplots(5, 5, figsize=(10, 10))

for i in range(len(ind_wrong)):
    l1 = plt.subplot(5, 5, i + 1)
    x = X[:, ind_wrong[i]]
    l1.imshow(np.reshape(x, (28, 28)), cmap='gray')
    l1.axis('off')
plt.show()

In [None]:
# Investigate influence of lambda

lambdas = [2**(10-j+1) for j in range(1, 21)]
print(lambdas)

rate = []

start = datetime.datetime.now().time()
print(start)
for lam in lambdas:
    # Train for this lambda
    SVM = svm.SVC(C=1/(2*m*lam), kernel='linear')
    SVM.fit(X.T, y[0])
    
    # Predicted labels on the training data
    label = SVM.predict(X.T)
    
    # Misclassification rate
    rate.append(1 - accuracy_score(y[0], label))
    
end = datetime.datetime.now().time()
print(end)
print(rate)

In [None]:
# Plot 

plt.subplots(figsize=(10, 5))
plt.semilogx(lambdas, rate,'-D' ,color='b')
plt.grid(True)
plt.xlabel("$ \lambda $")
plt.ylabel("Misclassification rate")
plt.xlim(1e-3, 2e3)
plt.ylim(0, 0.5)
plt.title('Misclassification in the training data')
plt.show()

useful links 


https://de.mathworks.com/help/stats/support-vector-machines-for-binary-classification.html

https://github.com/christianversloot/machine-learning-articles/blob/main/creating-a-simple-binary-svm-classifier-with-python-and-scikit-learn.md

https://dmkothari.github.io/Machine-Learning-Projects/SVM_with_MNIST.html
