In [1]:
import sys
sys.path.append(r'..')

import numpy as np
from cvxopt import matrix, solvers, spmatrix
from data import read_svm_data


In [2]:
training_labels, training_images = read_svm_data("training", r"../../MNIST_ORG", [2, 3, 8, 9])
testing_labels, testing_images = read_svm_data("testing", r"../../MNIST_ORG", [2, 3, 8, 9])

training_images.shape, training_labels.shape, testing_images.shape, testing_labels.shape

((20000, 784), (20000,), (3974, 784), (3974,))

In [3]:

training_images = training_images.astype(np.float32) / 255.0
testing_images = testing_images.astype(np.float32) / 255.0

training_images_sub = training_images[::10]
training_labels_sub = training_labels[::10]

In [4]:
def polynomial_kernel(X1, X2, degree=2):
    K = np.dot(X1, X2.T)
    return (1 + K) ** degree

In [5]:
C = 10
N = training_labels_sub.shape[0]  # number of training samples
d = training_images_sub.shape[1]  # dimension of each sample
labels_to_classify = [2, 3, 8, 9]
classifiers = {}

In [6]:
solvers.options['show_progress'] = False

for label in labels_to_classify:
    print(f"\nTraining classifier for digit {label}...")
    yn = np.where(training_labels_sub == label, 1, -1)

    print("Building Kernel matrix...")
    
    # Q is a NxN matrix, where N is the number of training samples
    # Q[i, j] = y[i] * y[j] * K(x[i], x[j])
    K = polynomial_kernel(training_images_sub, training_images_sub)
    Y = yn.reshape(-1, 1) * yn.reshape(1, -1)
    Q = matrix(K * Y, tc='d')

    # p is a Nx1 matrix of -1s
    p = matrix(-np.ones(N))
    
    # Ax = c equality constraint sum(alpha * y) = 0
    # A is a row vector of labels y
    # c is a scalar 0
    A = matrix(yn, (1, N), 'd')
    c = matrix(0.0)
    
    # Gx <= h inequality constraint 0 <= alpha <= C
    # lower bound 0 <= alpha
    G = spmatrix([], [], [], (N, N), 'd')
    G[::N+1] = -1
    h = matrix(np.zeros(N), tc='d')
    
    # upper bound alpha <= C
    G_up = spmatrix([], [], [], (N, N), 'd')
    G_up[::N+1] = 1
    h_up = matrix(C * np.ones(N), tc='d')
    
    # concatenate lower and upper bound
    G = matrix([G, G_up])
    h = matrix([h, h_up])

    print("Solving QP...")
    solution = solvers.qp(Q, p, G, h, A, c)
    alphas = np.array(solution['x']).flatten()

    # support vectors have non zero lagrange multipliers
    # alphas > 0 caused numerical problems, so we use 1e-6
    sv = alphas > 1e-6
    ind = np.arange(len(alphas))[sv]    # indices of support vectors
    alpha_sv = alphas[sv]               # alphas of support vectors
    sv_y = yn[sv]                       # labels of support vectors
    sv_X = training_images_sub[sv]      # support vectors

    # b = 1/N * sum(y - sum(alpha * y * K))
    b = np.mean(sv_y - np.sum(alpha_sv * sv_y * K[np.ix_(ind, sv)], axis=0))

    classifiers[label] = (alpha_sv, sv_X, sv_y, b)


Training classifier for digit 2...
Building Kernel matrix...
Solving QP...

Training classifier for digit 3...
Building Kernel matrix...
Solving QP...

Training classifier for digit 8...
Building Kernel matrix...
Solving QP...

Training classifier for digit 9...
Building Kernel matrix...
Solving QP...


In [7]:
def predict(X, classifiers):
    results = {}
    for label, (alphas, support_vectors, sv_labels, b) in classifiers.items():
        K_eval = polynomial_kernel(X, support_vectors)
        prediction = np.dot(K_eval, alphas * sv_labels) + b
        results[label] = prediction
    predictions = np.argmax(np.column_stack([results[label] for label in labels_to_classify]), axis=1)
    mapped_labels = [labels_to_classify[i] for i in predictions]
    return np.array(mapped_labels)

# Prediction and accuracy calculation
predictions = predict(training_images, classifiers)
accuracy = np.mean(predictions == training_labels)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

predictions = predict(testing_images, classifiers)
accuracy = np.mean(predictions == testing_labels)
print(f"Testing Accuracy: {accuracy * 100:.2f}%")

Training Accuracy: 97.04%
Testing Accuracy: 96.90%
