In [8]:
import pandas as pd
import numpy as np



Reading the files in

In [28]:
train_df = pd.read_csv("train.csv", low_memory=False)
test_df = pd.read_csv("test.csv", low_memory=False)
train_df['0.00'] = train_df['0.00'].replace([0.0], [-1.0])
test_df['1.00'] = test_df['1.00'].replace([0.0], [-1.0])

val_df = train_df[4000::1]
train_df = train_df[0:4000:1]

train_x = train_df.drop(columns=['0.00'])
train_y = train_df['0.00']

val_x = val_df.drop(columns=['0.00'])
val_y = val_df['0.00']

test_x = test_df.drop(columns=['1.00'])
test_y = test_df['1.00']

In [19]:
val_df.reset_index(inplace=True)

Implementing Primal Form soft margin SVM

In [31]:
import cvxpy as cv
def svm_train_primal(data_train, label_train, regularisation_constant):
    """Training the w and b parameters for the soft margin SVM in primal Form"""
   

    # defining variables
    n_samples, n_features = data_train.shape 
    w = cv.Variable(n_features)
    b = cv.Variable()
    x_i = cv.Variable(n_samples)

    # minimising the objective function
    objective_function = cv.Minimize(0.5 * cv.norm(w, 2) ** 2 + regularisation_constant * cv.sum(x_i))

    # setting the constraints formulas
    constraints = [label_train[i] * (data_train[i] @ w + b) >= 1 - x_i[i] for i in range(n_samples)]
    constraints += [x_i >= 0]

    prob = cv.Problem(objective_function, constraints)
    prob.solve()
    
    return w.value, b.value

def svm_predict_primal ( data_test , label_test , svm_model ):
    return np.sign(data_test @ svm_model[0] + svm_model[1])

Training SVM model with primal

In [34]:
svm_model = svm_train_primal(train_x.to_numpy(), train_y.to_numpy(), 100)

Getting accuracies for primal svm

In [35]:
svm_predictions_train = svm_predict_primal(train_x.to_numpy(), train_y.to_numpy(), svm_model)
svm_predictions_test = svm_predict_primal(test_x.to_numpy(), test_y.to_numpy(), svm_model)

In [38]:
train_accuracy = np.mean(svm_predictions_train == train_y.to_numpy()) * 100
test_accuracy = np.mean(svm_predictions_test == test_y.to_numpy()) * 100

print(f"Train Accuracy: {train_accuracy:.2f}%")
print(f"Test Accuracy: {test_accuracy:.2f}%")

Train Accuracy: 98.45%
Test Accuracy: 96.86%


Printing W and B

In [39]:
print("Sum of W across dimensions: {:.2f}\nb: {}".format(svm_model[0].sum(), svm_model[1]))

Sum of W across dimensions: -0.16
b: 2.980545994257868


Implementing soft margin SVM in dual form

In [48]:
import cvxpy as cvx

def svm_train_dual ( data_train , label_train , regularisation_para_C ):
    # initialising variables
    n, m = data_train.shape
    a = cvx.Variable(n)
    
    # setting the linear kernel
    k = data_train @ data_train.T
    # setting the objective function as required
    objective_function = cvx.Maximize(cvx.sum(a) - 0.5 * cvx.quad_form(cvx.multiply(a, label_train), k))

    # setting the constraints
    constraints = [ a <= regularisation_para_C / n, cvx.sum(cvx.multiply(a, label_train)) == 0, a >= 0]

    # maximising the problem in regards to the constraints
    problem = cvx.Problem(objective_function, constraints)
    problem.solve()

    # now calculating w and b
    w = np.sum((a.value * label_train)[:,None] * data_train, axis = 0)

    # selecting vectors with a > 0
    support_index = np.where(a > 1e-6)[0]
    b = np.mean(label_train[support_index] - data_train[support_index] @ w)

    return w, b, a

In [49]:
svm_model = svm_train_dual ( train_x.to_numpy() , train_y.to_numpy() , 100 )

svm_predictions_train = svm_predict_primal(train_x.to_numpy(), train_y.to_numpy(), svm_model)
svm_predictions_test = svm_predict_primal(test_x.to_numpy(), test_y.to_numpy(), svm_model)


DCPError: Problem does not follow DCP rules. Specifically:
The objective is not DCP. Its following subexpressions are not:
QuadForm(var88051 @ [ 1.  1.  1. ...  1.  1. -1.], [[280.27 -45.10 ... 92.47 3.15]
 [-45.10 259.69 ... -57.98 28.78]
 ...
 [92.47 -57.98 ... 262.64 -14.62]
 [3.15 28.78 ... -14.62 260.14]])