In [1]:
import pandas as pd
import numpy as np



Reading the files in

In [2]:
train_df = pd.read_csv("train.csv", low_memory=False)
test_df = pd.read_csv("test.csv", low_memory=False)
train_df['0.00'] = train_df['0.00'].replace([0.0], [-1.0])
test_df['1.00'] = test_df['1.00'].replace([0.0], [-1.0])

val_df = train_df[4000::1]
train_df = train_df[0:4000:1]

train_x = train_df.drop(columns=['0.00'])
train_y = train_df['0.00']

val_x = val_df.drop(columns=['0.00'])
val_y = val_df['0.00']

test_x = test_df.drop(columns=['1.00'])
test_y = test_df['1.00']

In [3]:
val_df.reset_index(inplace=True)

Implementing Primal Form soft margin SVM

In [4]:
import cvxpy as cv
def svm_train_primal(data_train, label_train, regularisation_constant):
    """Training the w and b parameters for the soft margin SVM in primal Form"""
   

    # defining variables
    n_samples, n_features = data_train.shape 
    w = cv.Variable(n_features)
    b = cv.Variable()
    x_i = cv.Variable(n_samples)

    # minimising the objective function for the soft margin SVM 
    objective_function = cv.Minimize(0.5 * cv.norm(w, 2) ** 2 + regularisation_constant * cv.sum(x_i))

    # setting the constraints formulas
    constraints = [label_train[i] * (data_train[i] @ w + b) >= 1 - x_i[i] for i in range(n_samples)]
    constraints += [x_i >= 0] # making sure all alpha's are greater than zero

    # optimising the problem according to the constraints
    prob = cv.Problem(objective_function, constraints)
    prob.solve()

    return w.value, b.value, x_i

def svm_predict_primal ( data_test , label_test , svm_model ):
    return np.sign(data_test @ svm_model[0] + svm_model[1])

Training SVM model with primal

In [5]:
svm_model = svm_train_primal(train_x.to_numpy(), train_y.to_numpy(), 100)

Getting accuracies for primal svm

In [6]:
svm_predictions_train = svm_predict_primal(train_x.to_numpy(), train_y.to_numpy(), svm_model)
svm_predictions_test = svm_predict_primal(test_x.to_numpy(), test_y.to_numpy(), svm_model)

In [7]:
train_accuracy = np.mean(svm_predictions_train == train_y.to_numpy()) * 100
test_accuracy = np.mean(svm_predictions_test == test_y.to_numpy()) * 100

print(f"Train Accuracy: {train_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")

Train Accuracy: 98.45%
Test Accuracy: 96.86%


Printing W and B

In [8]:
print("Sum of W across dimensions: {:.2f}\nb: {}".format(svm_model[0].sum(), svm_model[1]))

Sum of W across dimensions: -0.16
b: 2.980545994257868


Getting the support vectors for the Prime form soft margin SVM

In [9]:
# you take all predictions within some tolerance limit (1e-5) of the hyperplane
predictions_for_train = train_y.to_numpy() * (train_x.to_numpy().dot(svm_model[0]) + svm_model[1]) 

support_vector_indexs = np.where(predictions_for_train <= 1 + 1e-5)
support_vectors = train_x.to_numpy()[support_vector_indexs]


In [10]:
support_vectors.shape

(315, 200)

Implementing soft margin SVM in dual form

In [11]:
import cvxpy as cvx

def svm_train_dual ( data_train , label_train , regularisation_para_C ):
    # initialising variables
    n, m = data_train.shape
    a = cvx.Variable(n)
    
    # setting the linear kernel
    sum_function = cvx.sum_squares(cvx.matmul (cvx.multiply(a, label_train), data_train ) ) 
    # setting the objective function as required
    # objective_function = cvx.Minimize(-cvx.sum(a) + 0.5 * cvx.sum ( cvx.multiply ( cvx.multiply( a, label_train ) , k @ cvx.multiply( a, label_train ) ) ) )
    # objective_function = cvx.Minimize(0.5 * cvx.sum(cvx.multiply(a, k @ a)) - cvx.sum(a))
    objective_function = cvx.Maximize(cvx.sum(a) - 0.5 * sum_function)
    # objective_function = cvx.Minimize( 0.5 * cvx.quad_form( cvx.multiply(a, label_train), k) - cvx.sum(a) )

    # setting the constraints
    constraints = [ a <= regularisation_para_C / n, a >= 0, cvx.sum(cvx.multiply(a, label_train)) == 0]

    # maximising the problem in regards to the constraints
    problem = cvx.Problem(objective_function, constraints)
    problem.solve()

   
    return a

Getting w and b

In [12]:
a = svm_train_dual(train_x.to_numpy(), train_y.to_numpy(), regularisation_para_C=100)
# now calculating w and b
w = np.sum((a.value * train_y.to_numpy())[:,None] * train_x.to_numpy(), axis = 0)

def compute_b(X, y, w, alpha, tol=1e-5):
    # Find support vectors (where alpha > 0)
    support_vector_indices = np.where((a.value > tol) & (a.value < 1/tol))[0]
    
    # Use the support vectors to compute b (averaging over all support vectors)
    b_vals = []
    for idx in support_vector_indices:
        b_val = y[idx] - np.dot(w, X[idx])
        b_vals.append(b_val)
    
    # Return the average value of b over all support vectors
    return np.mean(b_vals)

b = compute_b(train_x.to_numpy(), train_y.to_numpy(), w, a)


In [13]:
print("Sum of W across dimensions: {:.2f}\nb: {}".format(w.sum(), b))

Sum of W across dimensions: -0.26
b: 1.75642789079601


In [14]:

svm_predictions_train = svm_predict_primal(train_x.to_numpy(), train_y.to_numpy(), [w, b])
svm_predictions_test = svm_predict_primal(test_x.to_numpy(), test_y.to_numpy(), [w, b])


Finding support vectors from the dual problem

In [15]:
def find_support_vectors(data_train, label_train, a, regularisation_parameter, tol=1e-5):
 
    n, m = data_train.shape  # Number of training samples
    support_indices = np.where((a.value >= tol) & (a.value < (regularisation_parameter / n) + tol))[0]
    
    support_vectors = data_train[support_indices]  # Extract support vector points
    support_labels = label_train[support_indices]   # Extract the corresponding labels
    
    return support_indices, support_vectors, support_labels

support_ind, support_vec, support_labl =  find_support_vectors(train_x.to_numpy(), train_y.to_numpy(), a, 100)

In [16]:
support_vec.shape

(395, 200)

Choosing C from the validation set

In [17]:
optimal_c = 2**(-10)
optimal_accuracy = 0;
c = 2**(-10)
c_s = []
for i in range (-10, 10, 2):
    c_s.append(2**(i))

for c in c_s:
    print(c)
    svm_model = svm_train_dual(train_x.to_numpy(), train_y.to_numpy(), c)

    w = np.sum((a.value * train_y.to_numpy())[:,None] * train_x.to_numpy(), axis = 0)
    b = compute_b(train_x.to_numpy(), train_y.to_numpy(), w, a)

    acc = svm_predict_primal(val_x.to_numpy(), val_y.to_numpy(), [w, b])
    acc = np.mean(acc == val_y.to_numpy()) * 100
    if(acc > optimal_accuracy):
        optimal_accuracy = acc
        optimal_c = c
    
print("The optimal C is: {} with accuracy: {}".format(optimal_c, optimal_accuracy))
    


0.0009765625


0.00390625
0.015625
0.0625
0.25
1
4
16
64
256
The optimal C is: 0.0009765625 with accuracy: 97.02156034674371




Getting test accuracy with optimal C

In [18]:
svm_model = svm_train_dual(train_x.to_numpy(), train_y.to_numpy(), optimal_c)
w = np.sum((a.value * train_y.to_numpy())[:,None] * train_x.to_numpy(), axis = 0)
b = compute_b(train_x.to_numpy(), train_y.to_numpy(), w, a)

acc = svm_predict_primal(test_x.to_numpy(), test_y.to_numpy(), [w, b])
acc = np.mean(acc == test_y.to_numpy()) * 100
print("Accuracy of optimal C of {} for Test set: {:.4f}%".format(optimal_c, acc))

Accuracy of optimal C of 0.0009765625 for Test set: 96.8646%


Using C with Scikit's Linear svm

In [19]:
from sklearn.svm import LinearSVC

svm = LinearSVC(tol=1e-5, C=optimal_c)
svm.fit(train_x.to_numpy(), train_y.to_numpy())
print("SVM accuracy with optimal C of {}:\t{:.4f}%".format (optimal_c, svm.score(test_x.to_numpy(), test_y.to_numpy()) * 100 ) )

SVM accuracy with optimal C of 0.0009765625:	96.6644%


