In [40]:
import numpy as np
import pandas as pd
from quadprog_wrapper import solve_quadprog
from sklearn.preprocessing import scale

In [41]:
def polynomial_kernel(row_data, col_data, order):
    # Obtain the gram matrix
    K = (np.dot(row_data.T, col_data) + 1) ** order
    return K

In [42]:
def rbf_kernel(row, col, order):
    K = np.exp(-(np.sum(row_data ** 2, axis=0, keepdims=True).T + np.sum(col_data ** 2, axis=0) - 2 * row_data.T.dot(col_data)) / (2 * sigma ** 2))
    return K

In [43]:
def linear_kernel(row, col):
    return row_data.T.dot(col_data)

In [52]:
def kernal_svm_train(data, labels, params):
    if params['kernel'] == 'rbf':
        gram_matrix = rbf_kernel(data, data, params['sigma'])
    elif params['kernel'] == 'polynomial':
        gram_matrix = polynomial_kernel(data, data, params['order'])
    else:
        gram_matrix = linear_kernel(data, data)
    
    print(gram_matrix.shape)
    n = gram_matrix.shape[0]
    hessian = np.outer(labels, labels) * gram_matrix
    weights = np.ones(n)
    eq_coeffs = np.zeros((1, n))
    eq_coeffs[0, :] = labels
    eq_constants = np.zeros(1)

    lower_bounds = np.zeros(n)
    upper_bounds = params['C']
    alphas = solve_quadprog(hessian, weights, eq_coeffs, eq_constants, None,
                            None, lower_bounds, upper_bounds)
    model = dict()
    tolerance = 1e-6
    sv_indices = alphas > tolerance
    model['support_vectors'] = data[:, sv_indices]
    model['alphas'] = alphas[sv_indices]
    model['params'] = params  # store the kernel type and parameters
    model['sv_labels'] = labels[sv_indices]

    # find all alphas that represent points on the decision margin
    margin_alphas = np.logical_and(
        alphas > tolerance, alphas < params['C'] - tolerance)

    # compute the bias value
    if np.any(margin_alphas):
        model['bias'] = np.mean(
            labels[margin_alphas].T - (alphas * labels).T.dot(gram_matrix[:, margin_alphas]))
    else:
        # there were no support vectors on the margin (this should only happen due to numerical errors)
        model['bias'] = 0

    return model

In [45]:
def kernel_svm_predict(data, model):
    if model['params']['kernel'] == 'rbf':
        gram_matrix = rbf_kernel(data, model['support_vectors'], model['params']['sigma'])
    elif model['params']['kernel'] == 'polynomial':
        gram_matrix = polynomial_kernel(data, model['support_vectors'], model['params']['order'])
    else:
        # use a linear kernel by default
        gram_matrix = linear_kernel(data, model['support_vectors'])

    scores = gram_matrix.dot(
        model['alphas'] * model['sv_labels']) + model['bias']
    scores = scores.ravel()

    labels = 2 * (scores > 0) - 1  # threshold and map to {-1, 1}

    return labels, scores

In [48]:
trainData = pd.read_csv('train.csv')
testData = pd.read_csv('test.csv')
trainLabel = trainData.label
testLabel = testData.label
trainPixel = trainData.drop(columns = 'label')
testPixel = testData.drop(columns = 'label')

In [51]:
params = {'kernel': 'polynomial', 'C': 1.0, 'order': 2}
svm_model = kernal_svm_train(trainPixel, trainLabel, params)
predictions, _ = kernel_svm_predict(testPixel, svm_model)
test_accuracy = np.mean(predictions == testLabel)
test_accuracy

(784, 784)


ValueError: operands could not be broadcast together with shapes (38000,38000) (784,784) 

In [None]:
params = {'kernel': 'rbf', 'C': 1.0, 'sigma': 0.5}
svm_model = kernal_svm_train(trainPixel, trainLabel, params)
predictions, _ = kernel_svm_predict(testPixel, svm_model)
test_accuracy = np.mean(predictions == testLabel)
test_accuracy