In [1]:
import numpy as np
from sklearn import svm
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [2]:
training_features = np.loadtxt("features.train")
testing_features = np.loadtxt("features.test")

In [3]:
def split_data(one_versus_all, dataset, choice1, choice2):
    to_return = dataset
    if not one_versus_all:
        to_return = []
        
        for digit, intensity, symmetry in dataset:
            if digit == choice1 or digit == choice2:
                to_return.append([digit, intensity, symmetry])

    to_return = np.array(to_return)
    digits = to_return[:, 0]
    intensity = to_return[:, 1]
    symmetry = to_return[:, 2]
    
    return digits, intensity, symmetry

In [4]:
def build_my_kernel(Q):
    def my_kernel(xn, xm):
        return (1 + np.dot(xn, xm.T)) ** Q
    
    return my_kernel

In [5]:
def generate_one_versus_all_yn(digits, one_choice):
    yn = np.ones(len(digits))
    
    for i, digit in enumerate(digits):
        if digit != one_choice:
            yn[i] = -1
            
    return np.array(yn)

In [6]:
def parse_digits(digits, choice1, choice2):
    result = []
    
    for digit in digits:
        if digit == choice1 or digit == choice2:
            result.append(digit)
            
    return digits

In [7]:
def generate_one_versus_one_yn(digits, choice1, choice2):
    parsed_digits = parse_digits(digits, choice1, choice2)
    yn = np.ones(len(parsed_digits))
    
    for i, digit in enumerate(digits):
        if digit == choice2:
            yn[i] = -1
    
    return yn

In [8]:
def calc_error(yn, y_pred):
    # misclassified points are opposite sign so will be -1 when multiplied
    error = yn * y_pred
    return np.count_nonzero(error == -1) / len(yn)

In [33]:
def run_svm(one_versus_all, choice, choice1=-1, C=0.01, Q=2):
    digits_train, intensities_train, symmetries_train = split_data(one_versus_all, training_features, choice, choice1)
    digits_test, intensities_test, symmetries_test = split_data(one_versus_all, testing_features, choice, choice1)
    
    if (one_versus_all):
        yn_train = generate_one_versus_all_yn(digits_train, choice)
        yn_test = generate_one_versus_all_yn(digits_test, choice)
    
    else:
        yn_train = generate_one_versus_one_yn(digits_train, choice, choice1)
        yn_test = generate_one_versus_one_yn(digits_test, choice, choice1)

    xn_train = np.column_stack((intensities_train, symmetries_train))
    xn_test= np.column_stack((intensities_test, symmetries_test))
    model = svm.SVC(C=C, kernel='poly', degree=Q, gamma=1)
    model.fit(xn_train, yn_train)
    
    y_train_pred = model.predict(xn_train)
    y_test_pred = model.predict(xn_test)
    
    error = cross_val_score(model, xn_train, yn_train)
    
    return error

In [34]:
C = [0.0001, 0.001, 0.01, 0.1, 1]
result = [0, 0, 0, 0, 0]

for i in range(100):
    Ecv_arr = []
    
    for c in C:
        Ecv = run_svm(False, 1, 5, c)
        Ecv_arr.append(Ecv)
    
    result[np.argmin(Ecv_arr)] += 1

result

[0, 0, 0, 0, 100]

In [35]:
Ecv_arr = []

for i in range(100):
    for c in C:
        Ecv = run_svm(False, 1, 5, 1)
        Ecv_arr.append(Ecv)

np.mean(Ecv_arr)

0.9833893557422969

In [36]:
1 - 0.9833893557422969

0.01661064425770309