In [1]:
import sys
sys.path.append(r'..')
from data import read_svm_data
from cvxopt import matrix, solvers, spmatrix
import numpy as np


In [2]:
training_labels, training_images = read_svm_data("training", r"../../MNIST_ORG", [2, 3, 8, 9])
testing_labels, testing_images = read_svm_data("testing", r"../../MNIST_ORG", [2, 3, 8, 9])

training_images.shape, training_labels.shape, testing_images.shape, testing_labels.shape

((20000, 784), (20000,), (3974, 784), (3974,))

Normalize the images

In [3]:
training_images = training_images.astype(np.float32) / 255.0
testing_images = testing_images.astype(np.float32) / 255.0

In [4]:
C = 1
N = training_labels.shape[0]  # number of training samples
d = training_images.shape[1]  # dimension of each sample
labels_to_classify = [2, 3, 8, 9]
classifiers = {}

In [5]:
N = training_labels.shape[0]
d = training_images.shape[1]

Q_rows = []
Q_cols = []
Q_vals = []

# identity for w part
for i in range(d):
    Q_rows.append(i)
    Q_cols.append(i)
    Q_vals.append(1.0)

# ensuring that the slack variables' matrix is positive semi-definite (otherwise cvxopt raises an error)
for i in range(N):
    Q_rows.append(d + 1 + i)
    Q_cols.append(d + 1 + i)
    Q_vals.append(1e-6)

# sparse Q matrix
Q = spmatrix(Q_vals, Q_rows, Q_cols, (d + N + 1, d + N + 1), 'd')

# p vector
p = matrix([0.0] * (d + 1) + [C] * N)

In [6]:

for label in labels_to_classify:
    print(f"Training classifier for digit {label}...")
    yn = np.where(training_labels == label, 1, -1)
    
    A_rows, A_cols, A_vals = [], [], []

    # constructing sparse A matrix for the constraint y_i(w*x_i​ + b) >= 1 − e_i
    # converted to −y_i(w*x_i + b) − e_i <= −1, which is Gx <= h form
    for i in range(N):
        # -y_i * x_i
        for j in range(d):
            if training_images[i, j] != 0:
                A_rows.append(i)
                A_cols.append(j)
                A_vals.append(float(-yn[i] * training_images[i, j]))
        # -y_i
        A_rows.append(i)
        A_cols.append(d)
        A_vals.append(float(-yn[i]))
        
        # slack variable
        A_rows.append(i)
        A_cols.append(d + 1 + i)
        A_vals.append(-1.0)

    # constraint e_i >= 0
    for i in range(N):
        A_rows.append(N + i)
        A_cols.append(d + 1 + i)
        A_vals.append(-1.0)

    A = spmatrix(A_vals, A_rows, A_cols, (N + N, d + N + 1))

    # c vector
    c = matrix([-1.0] * N + [0.0] * N)
    
    # solve
    sol = solvers.qp(Q, p, A, c)

    w = np.array(sol['x'][:d]).flatten()
    b = sol['x'][d]

    classifiers[label] = (w, b)

Training classifier for digit 2...
     pcost       dcost       gap    pres   dres
 0: -1.6215e+04  5.4711e+04  4e+05  5e+00  1e+03
 1:  2.5185e+04 -2.8912e+04  7e+04  8e-01  2e+02
 2:  1.1248e+04 -7.1832e+03  2e+04  2e-01  5e+01
 3:  5.9229e+03 -2.5777e+03  1e+04  9e-02  2e+01
 4:  3.5283e+03 -5.1084e+02  5e+03  4e-02  9e+00
 5:  2.5430e+03  3.2893e+02  2e+03  2e-02  4e+00
 6:  1.8199e+03  8.3451e+02  1e+03  6e-03  2e+00
 7:  1.5973e+03  1.0083e+03  6e+02  2e-03  5e-01
 8:  1.4723e+03  1.0768e+03  4e+02  1e-03  3e-01
 9:  1.3881e+03  1.1217e+03  3e+02  5e-04  1e-01
10:  1.3351e+03  1.1499e+03  2e+02  3e-04  7e-02
11:  1.2837e+03  1.1773e+03  1e+02  9e-05  2e-02
12:  1.2585e+03  1.1916e+03  7e+01  3e-05  9e-03
13:  1.2398e+03  1.2043e+03  4e+01  1e-05  4e-03
14:  1.2318e+03  1.2088e+03  2e+01  3e-06  8e-04
15:  1.2247e+03  1.2147e+03  1e+01  1e-06  3e-04
16:  1.2207e+03  1.2180e+03  3e+00  5e-08  1e-05
17:  1.2197e+03  1.2190e+03  7e-01  1e-08  3e-06
18:  1.2193e+03  1.2193e+03  4e-02 

In [7]:
def predict(X, classifiers):
    predictions = {label: np.dot(X, w) + b for label, (w, b) in classifiers.items()}
    final_predictions = np.fromiter((max(predictions, key=lambda x: predictions[x][i]) for i in range(len(X))), dtype=int)
    return final_predictions

In [8]:
predictions = predict(training_images, classifiers)
mapped_labels = np.array([label if label in labels_to_classify else None for label in training_labels])
correct_predictions = np.sum(predictions == mapped_labels)
accuracy = correct_predictions / len(training_labels)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

predictions = predict(testing_images, classifiers)
mapped_labels = np.array([label if label in labels_to_classify else None for label in testing_labels])
correct_predictions = np.sum(predictions == mapped_labels)
accuracy = correct_predictions / len(testing_labels)
print(f"Testing Accuracy: {accuracy * 100:.2f}%")


Training Accuracy: 95.93%
Testing Accuracy: 94.64%
