In [2]:
from sklearn.svm import SVC
from cvxopt import matrix
from cvxopt import solvers
import cvxpy as cp
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [4]:
# import data
train = pd.read_csv('data/train.csv', header=None)
test = pd.read_csv('data/test.csv', header=None)


train_data = train.iloc[:4000, 1:].to_numpy()
val_data = train.iloc[4000:, 1:].to_numpy()

train_label = train.iloc[:4000, 0].to_numpy()
val_label = train.iloc[4000:, 0].to_numpy()


test_data = test.iloc[:, 1:].to_numpy()
test_label = test.iloc[:, 0].to_numpy()

train_label = np.where(train_label == 0, -1, train_label)
val_label = np.where(val_label == 0, -1, val_label)
test_label = np.where(test_label == 0, -1, test_label)

In [6]:
# Question 2
def svm_train_primal(data_train, label_train, regularisation_para_C):
    m, n = data_train.shape
    
    w = cp.Variable(n)
    b = cp.Variable()
    xi = cp.Variable(m, pos=True)

    objective = cp.Minimize(0.5 * cp.square(cp.norm(w)) + (regularisation_para_C / m) * cp.sum(xi))
    constraints = [label_train[i] * (data_train[i,:] @ w + b) >= 1 - xi[i] for i in range(m)]

    prob = cp.Problem(objective, constraints)
    prob.solve()
    
    svm_model = {'w': w.value, 'b': b.value, 'xi': xi.value}
    return svm_model

def svm_predict_primal(data_test, label_test, svm_model):
    
    w = svm_model['w']
    b = svm_model['b']
    predictions = np.sign(data_test @ w + b)
    correct_predictions = np.sum(predictions == label_test)
    accuracy = correct_predictions / len(label_test)
    return accuracy

In [7]:
# Question 2 output
C = 100
svm_model_primal = svm_train_primal(train_data, train_label, C)

# Test the SVM
val_accuracy = svm_predict_primal(val_data, val_label, svm_model_primal)
test_accuracy = svm_predict_primal(test_data, test_label, svm_model_primal)
print (f"Validation accuracy: {val_accuracy}")
print(f"Test accuracy: {test_accuracy}")

# Report the solution of b and sum of all dimensions of w
print(f"Solution for b: {svm_model_primal['b']}")
print(f"Sum of w dimensions: {np.sum(svm_model_primal['w'])}")

Validation accuracy: 0.9695555555555555
Test accuracy: 0.968
Solution for b: 1.7798137170833168
Sum of w dimensions: -0.1452156803642558


In [30]:
# Question 3 

def svm_train_dual(data, label, C):
    m, n = data.shape
    label = label.reshape(-1, 1)* 1.
    y_X = label * data
    H = np.dot(y_X, y_X.T) * 1.
    P = matrix(H)
    q = matrix(-np.ones((m,1)))
    G = matrix(np.vstack((np.eye(m)*-1,np.eye(m))))
    h = matrix(np.hstack((np.zeros(m),np.ones(m)* C/m)))
    A = matrix(label.reshape(1,-1))
    b = matrix(np.zeros(1))

    sol = solvers.qp(P, q, G, h, A, b)

    alphas = np.array(sol['x'])
    threshold = 1e-5
    w = ((label*alphas).T @ data).reshape(-1,1)
    S = (alphas > threshold).flatten()
    b_values = label[S] - np.dot(data[S], w)
    b = np.mean(b_values)

    svm = {
        "alphas" : alphas,
        "w" : w,
        "b" : b
    }
    return svm

In [31]:
# Question 3 output
C = 100
svm_model_dual = svm_train_dual(train_data, train_label, C)

     pcost       dcost       gap    pres   dres
 0: -3.8416e+02 -2.2800e+02  4e+04  1e+02  4e-13
 1: -2.2204e+01 -2.2367e+02  1e+03  3e+00  4e-13
 2: -1.2704e+01 -1.3798e+02  2e+02  4e-01  6e-14
 3: -8.2452e+00 -4.8671e+01  6e+01  1e-01  2e-14
 4: -6.5928e+00 -2.4543e+01  3e+01  4e-02  1e-14
 5: -5.8533e+00 -1.1273e+01  7e+00  1e-02  1e-14
 6: -5.9382e+00 -7.9755e+00  2e+00  2e-03  1e-14
 7: -6.1146e+00 -6.9517e+00  1e+00  7e-04  1e-14
 8: -6.2067e+00 -6.5700e+00  4e-01  1e-04  1e-14
 9: -6.3032e+00 -6.4094e+00  1e-01  2e-05  1e-14
10: -6.3347e+00 -6.3622e+00  3e-02  4e-06  1e-14
11: -6.3454e+00 -6.3477e+00  2e-03  1e-07  1e-14
12: -6.3464e+00 -6.3465e+00  2e-04  5e-09  1e-14
13: -6.3464e+00 -6.3464e+00  4e-06  1e-10  1e-14
Optimal solution found.


In [32]:
# Question 4
# since I already compute the w and b in the svm_train_dual function, I just return their values here.
def transfer_to_primal(svm):
    w = svm["w"]
    b = svm["b"]
    return w, b

In [33]:
# Question 4 output
w, b = transfer_to_primal(svm_model_dual)
print(f"Solution for b: {b}")
print(f"Sum of w dimensions: {np.sum(w)}")

Solution for b: 1.7389327654668258
Sum of w dimensions: -0.14513522739521


In [34]:
#Question 5

def support_vectors_primal(data, labels, svm_model):
    w = svm_model['w']
    b = svm_model['b']
    xi = svm_model['xi']

    decision_values = data @ w + b
    support_vector_index = np.where((xi > 1e-5) | (np.abs(labels * decision_values - 1) < 1e-5))[0] 
    support_vector = data[support_vector_index]

    return support_vector, support_vector_index

In [35]:
#Question 5 output

vector, index = support_vectors_primal(train_data, train_label, svm_model_primal)
print("Support vectors:", vector)
print("Indices of support vectors:", index)

Support vectors: [[-0.36 -0.91 -0.99 ...  0.3   2.44 -1.26]
 [-0.33 -1.78 -1.3  ...  0.49  0.3   0.2 ]
 [-0.51 -1.29 -0.96 ...  0.72 -1.2   0.31]
 ...
 [-0.73 -1.19 -0.24 ...  1.46 -1.36  1.21]
 [ 1.77 -1.64  0.66 ...  0.7   0.16 -0.97]
 [ 0.4   0.16 -0.49 ...  0.89  0.21  1.09]]
Indices of support vectors: [   0    6   11 ... 3991 3993 3998]


In [36]:
# Question 6

def support_vectors_dual(data, svm_dual):
    threshold = 1e-5
    alphas = svm_dual['alphas']

    support_vector_index = np.where(alphas > threshold)[0]
    support_vector = data[support_vector_index]
    
    return support_vector, support_vector_index

In [37]:
# Question 6 output

vector, index = support_vectors_dual(train_data, svm_model_dual)
print("Support vectors:", vector)
print("Indices of support vectors:", index)

Support vectors: [[-0.36 -0.91 -0.99 ...  0.3   2.44 -1.26]
 [ 1.05 -1.79  0.9  ...  0.39  0.6  -1.66]
 [ 1.01 -1.13  1.49 ...  0.23 -0.3  -0.01]
 ...
 [ 2.16 -0.78 -0.78 ... -0.38  1.1   0.39]
 [ 0.36 -0.19 -1.06 ... -0.83 -0.2   0.12]
 [-0.73 -1.19 -0.24 ...  1.46 -1.36  1.21]]
Indices of support vectors: [   0   16   28   29   41   54   58   68   80   88  127  130  140  145
  147  150  199  208  239  253  263  266  275  281  284  296  305  307
  332  344  356  377  395  401  403  405  414  420  424  432  433  441
  446  450  473  479  495  510  521  525  532  547  561  563  564  567
  576  581  587  595  604  610  637  642  656  660  678  703  708  709
  725  736  737  752  768  774  796  799  809  816  821  826  834  843
  844  856  861  873  905  909  932  935  948  965  967  988  989  993
 1003 1017 1022 1050 1082 1098 1116 1123 1139 1142 1166 1167 1178 1179
 1188 1219 1242 1266 1274 1295 1304 1320 1364 1371 1374 1377 1403 1406
 1407 1414 1416 1437 1447 1474 1475 1478 1489 1492 1

In [38]:
# Question 7

C_lst = [2**i for i in range(-10, 11, 2)]
best_res = 0
best_C = None
lst_res = []
for i in C_lst:
    svm_model = svm_train_primal(train_data, train_label, i)
    val_accuracy = svm_predict_primal(val_data, val_label, svm_model)
    lst_res.append(val_accuracy)
    if val_accuracy > best_res:
        best_res = val_accuracy
        best_C = i
svm_model = svm_train_primal(train_data, train_label, best_C)
test_accuracy = svm_predict_primal(test_data, test_label, svm_model)
print(f"Test accuracy: {test_accuracy}")



Test accuracy: 0.9746666666666667


In [17]:
for i in range(len(lst_res)):
    print (f"{C_lst[i]} : {lst_res[i]}")

0.0009765625 : 0.4908888888888889
0.00390625 : 0.4908888888888889
0.015625 : 0.4908888888888889
0.0625 : 0.9244444444444444
0.25 : 0.9622222222222222
1 : 0.9717777777777777
4 : 0.9748888888888889
16 : 0.974
64 : 0.9713333333333334
256 : 0.966
1024 : 0.9626666666666667


In [40]:
# Question 8

clf = SVC(kernel='linear', C=4)
clf.fit(train_data, train_label)

val_pred = clf.predict(val_data)
print("Validation Set Accuracy:", accuracy_score(val_label, val_pred))


test_pred = clf.predict(test_data)
print("Test Set Accuracy:", accuracy_score(test_label, test_pred))

Validation Set Accuracy: 0.9615555555555556
Test Set Accuracy: 0.9613333333333334
