# SVM Assignment Code

In [1]:
# Question 2
import numpy as np
import pandas as pd
import cvxopt

def svm_train_primal(data_train, label_train, regularisation_para_C):
    # Data dimensions
    N, d = data_train.shape

    # Convert labels from {0, 1} to {-1, 1}
    y = np.where(label_train == 0, -1, 1).reshape(-1, 1)

    # Prepare matrices for cvxopt
    P = cvxopt.matrix(np.block([[np.eye(d), np.zeros((d, N + 1))],
                                [np.zeros((N + 1, d + N + 1))]]))  # Quadratic term
    q = cvxopt.matrix(np.hstack([np.zeros(d + 1), regularisation_para_C / N * np.ones(N)]))  # Linear term
    G = cvxopt.matrix(np.block([[-np.diag(y.flatten()) @ data_train, -y, -np.eye(N)],
                                [np.zeros((N, d + 1)), -np.eye(N)]]))  # Constraints matrix
    h = cvxopt.matrix(np.hstack([-np.ones(N), np.zeros(N)]))  # Constraints vector

    # Solve the QP problem using cvxopt
    solution = cvxopt.solvers.qp(P, q, G, h)
    w_b_xi = np.array(solution['x']).flatten()

    # Extract weight vector (w), bias term (b), and slack variables (xi)
    w = w_b_xi[:d]
    b = w_b_xi[d]
    xi = w_b_xi[d+1:]  # Slack variables

    return (w, b, xi)

def svm_predict_primal(data_test, label_test, svm_model):
    w, b = svm_model[:2]  # Ignore slack variables for prediction
    # Predict: sign(w.T * X + b)
    predictions = np.sign(np.dot(data_test, w) + b)
    # Convert {-1, 1} back to {0, 1}
    predictions = np.where(predictions == -1, 0, 1)

    # Calculate accuracy
    accuracy = np.mean(predictions == label_test)
    return accuracy

if __name__ == '__main__':
    # Load data without any assumptions about headers
    train_data = pd.read_csv('train.csv', header=None)
    test_data = pd.read_csv('test.csv', header=None)

    # Separate features and labels
    X_train = train_data.iloc[:4000, 1:].values  # Features from 2nd column onwards
    y_train = train_data.iloc[:4000, 0].values   # Labels from 1st column
    X_val = train_data.iloc[4000:, 1:].values
    y_val = train_data.iloc[4000:, 0].values
    X_test = test_data.iloc[:, 1:].values
    y_test = test_data.iloc[:, 0].values

    # Train the model
    C = 100
    svm_model = svm_train_primal(X_train, y_train, C)

    # Validate and test the model
    val_accuracy = svm_predict_primal(X_val, y_val, svm_model)
    test_accuracy = svm_predict_primal(X_test, y_test, svm_model)

    print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
    print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

    # Sum of weights for a quick check
    w_sum = np.sum(svm_model[0])
    b = svm_model[1]
    xi = svm_model[2]

    # Save the debugging information to a text file
    with open('question2.txt', 'w') as f:
        f.write(f"Validation Accuracy: {val_accuracy * 100:.2f}%\n")
        f.write(f"Test Accuracy: {test_accuracy * 100:.2f}%\n")
        f.write(f"Sum of w: {w_sum}\n")
        f.write(f"Bias (b): {b}\n")
        f.write(f"Regularization parameter (C): {C}\n")
        f.write(f"Slack variables (xi): {xi[:5]} (first 5 shown)\n")  # To get an idea of the slack
        f.write(f"Total number of slack variables: {len(xi)}\n")
        f.write(f"Full Model Weights: {svm_model[0][:5]} (first 5 shown)\n")  # First 5 weights as a check

    print(f"Debugging information saved to question2.txt")


     pcost       dcost       gap    pres   dres
 0:  7.1592e+00  6.0250e+02  4e+04  3e+00  2e+04
 1:  2.1840e+02 -8.6176e+02  1e+03  7e-02  4e+02
 2:  1.3725e+02 -9.0183e+01  2e+02  1e-02  5e+01
 3:  4.8478e+01 -1.5479e+01  6e+01  3e-03  1e+01
 4:  2.4467e+01 -2.7209e+00  3e+01  1e-03  6e+00
 5:  1.1255e+01  3.8462e+00  7e+00  3e-04  1e+00
 6:  7.9712e+00  5.4852e+00  2e+00  6e-05  3e-01
 7:  6.9506e+00  5.9990e+00  1e+00  2e-05  9e-02
 8:  6.5698e+00  6.1903e+00  4e-01  2e-06  1e-02
 9:  6.4094e+00  6.2999e+00  1e-01  5e-07  3e-03
10:  6.3622e+00  6.3341e+00  3e-02  9e-08  5e-04
11:  6.3477e+00  6.3454e+00  2e-03  4e-09  2e-05
12:  6.3465e+00  6.3464e+00  2e-04  1e-10  7e-07
13:  6.3464e+00  6.3464e+00  4e-06  3e-12  1e-08
Optimal solution found.
Validation Accuracy: 96.96%
Test Accuracy: 96.80%
Debugging information saved to question2.txt


In [2]:
# Question 3
import cvxpy as cvx
import numpy as np
import pandas as pd
from cvxopt import matrix, solvers

def svm_train_dual(data_train, label_train, regularisation_para_C):
    samples, features = data_train.shape
    label_train = label_train * 2 - 1  # Convert 0/1 to -1/1

    # Compute the Gram matrix (dot products between training samples)
    X = np.dot(data_train, data_train.T)
    
    # Set up the parameters for the quadratic program
    P = matrix(np.outer(label_train, label_train) * X)
    Q = matrix(-np.ones(samples))
    G = matrix(np.vstack((-np.eye(samples), np.eye(samples))))
    H = matrix(np.hstack((np.zeros(samples), np.ones(samples) * regularisation_para_C / samples)))
    A = matrix(label_train, (1, samples), 'd')
    B = matrix(0.0)

    # Solve the quadratic program using cvxopt
    solution = solvers.qp(P, Q, G, H, A, B)

    # Extract the Lagrange multipliers (alpha) from the solution
    alpha = np.ravel(solution['x'])

    # Return alpha (dual solution)
    return alpha, X

# Load the training data and test data
train_data = pd.read_csv('train.csv', header=None)
test_data = pd.read_csv('test.csv', header=None)
validation_data = train_data[4000:]
train_data = train_data[:4000]
regularisation_para_C = 100

# Separate features and labels for training data
label_train = train_data.iloc[:, 0].values  # Labels are in the first column
data_train = train_data.iloc[:, 1:].values  # Features are in the remaining columns

# Compute the dual solution
alpha, gram_matrix = svm_train_dual(data_train, label_train, regularisation_para_C)

# Calculate additional debugging information
alpha_sum = np.sum(alpha)
non_zero_alphas = np.sum(alpha > 1e-5)  # Approximate number of support vectors
alpha_first_five = alpha[:5]  # First 5 alpha values
support_vector_indices = np.where(alpha > 1e-5)[0]  # Indices of support vectors
violating_alphas = np.where((alpha > 1e-5) & (alpha < regularisation_para_C))[0]  # Margin violators

# Sum of slack variables (indirectly derived from alpha)
slack_sum = np.sum(alpha[violating_alphas])

# Debugging Gram Matrix
gram_matrix_sample = gram_matrix[:5, :5]  # Small sample of the Gram matrix for validation

# Save the debugging information to a text file
with open('question3.txt', 'w') as f:
    f.write(f"Sum of alpha: {alpha_sum}\n")
    f.write(f"Number of non-zero alphas (Support Vectors): {non_zero_alphas}\n")
    f.write(f"First 5 alpha values: {alpha_first_five}\n")
    f.write(f"Regularization parameter (C): {regularisation_para_C}\n")
    f.write(f"Support Vector Indices (first 5): {support_vector_indices[:5]}\n")
    f.write(f"Number of Violating Alphas (Margin Violators): {len(violating_alphas)}\n")
    f.write(f"Sum of Slack Variables (derived from alpha): {slack_sum}\n")
    f.write(f"Gram Matrix Sample (5x5):\n{gram_matrix_sample}\n")

print(f"Sum of alpha saved to question3.txt: {alpha_sum}")
print(f"Number of support vectors: {non_zero_alphas}")
print(f"First 5 support vector indices: {support_vector_indices[:5]}")
print(f"Number of margin violators: {len(violating_alphas)}")

     pcost       dcost       gap    pres   dres
 0: -3.8416e+02 -2.2800e+02  4e+04  1e+02  5e-13
 1: -2.2204e+01 -2.2367e+02  1e+03  3e+00  5e-13
 2: -1.2704e+01 -1.3798e+02  2e+02  4e-01  7e-14
 3: -8.2452e+00 -4.8671e+01  6e+01  1e-01  2e-14
 4: -6.5928e+00 -2.4543e+01  3e+01  4e-02  1e-14
 5: -5.8533e+00 -1.1273e+01  7e+00  1e-02  9e-15
 6: -5.9382e+00 -7.9755e+00  2e+00  2e-03  9e-15
 7: -6.1146e+00 -6.9517e+00  1e+00  7e-04  9e-15
 8: -6.2067e+00 -6.5700e+00  4e-01  1e-04  9e-15
 9: -6.3032e+00 -6.4094e+00  1e-01  2e-05  9e-15
10: -6.3347e+00 -6.3622e+00  3e-02  4e-06  1e-14
11: -6.3454e+00 -6.3477e+00  2e-03  1e-07  1e-14
12: -6.3464e+00 -6.3465e+00  2e-04  5e-09  1e-14
13: -6.3464e+00 -6.3464e+00  4e-06  1e-10  1e-14
Optimal solution found.
Sum of alpha saved to question3.txt: 7.281637057117684
Number of support vectors: 392
First 5 support vector indices: [ 0 16 28 29 41]
Number of margin violators: 392


In [3]:
# Question 4
import cvxpy as cvx
import numpy as np
import pandas as pd
from cvxopt import matrix, solvers

def svm_train_dual(data_train, label_train, regularisation_para_C):
    samples, features = data_train.shape
    label_train = label_train * 2 - 1  # Convert 0/1 to -1/1
    X = np.dot(data_train, data_train.T)
    
    P = matrix(np.outer(label_train, label_train) * X)
    Q = matrix(-np.ones(samples))
    G = matrix(np.vstack((-np.eye(samples), np.eye(samples))))
    H = matrix(np.hstack((np.zeros(samples), np.ones(samples) * regularisation_para_C/samples)))
    A = matrix(label_train, (1,samples), 'd')
    B = matrix(0.0)
    solution = solvers.qp(P,Q,G,H,A,B)
    alpha = np.ravel(solution['x'])
    return {'alpha': alpha}

def compute_primal_solution(alpha, data_train, label_train, regularisation_para_C, tolerance=1e-5):
    support_vector_indices = np.where((alpha > tolerance) & (alpha < regularisation_para_C))[0]
    label_train = label_train * 2 - 1 
    
    w_star = np.sum((alpha * label_train).reshape(-1, 1) * data_train, axis=0)
    
    b_star_values = []
    for idx in support_vector_indices:
        x_s = data_train[idx]
        y_s = label_train[idx]
        b_star_value = y_s - np.dot(w_star, x_s)
        b_star_values.append(b_star_value)
    
    # Average the bias terms from all support vectors
    b_star = np.median(b_star_values) if b_star_values else 0
    
    return w_star, b_star, support_vector_indices

# Load the training and test data
train_data = pd.read_csv('train.csv', header=None)
test_data = pd.read_csv('test.csv', header=None)
validation_data = train_data[4000:]
train_data = train_data[:4000]
regularisation_para_C = 100
label_train = train_data.iloc[:, 0].values  
data_train = train_data.iloc[:, 1:].values 
label_test = test_data.iloc[:, 0].values  
data_test = test_data.iloc[:, 1:].values 

# Train the SVM model using the dual solution
optimal = svm_train_dual(data_train, label_train, regularisation_para_C)
np.save('alpha.npy', optimal['alpha'])
alpha = np.load('alpha.npy')

# Compute the primal solution (w*, b*)
w_star, b_star, support_vector_indices = compute_primal_solution(alpha, data_train, label_train, regularisation_para_C)

# Calculate the sum of w_star (the weight vector)
w_star_sum = np.sum(w_star)

# Additional debugging information
num_support_vectors = len(support_vector_indices)
first_5_weights = w_star[:5]  # First 5 weights
first_5_alphas = alpha[:5]     # First 5 alphas

# Print the sum of w*, b*, and other relevant info to the terminal
print("Sum of w*:", w_star_sum)
print("b*:", b_star)
print(f"Number of support vectors: {num_support_vectors}")
print(f"First 5 weights: {first_5_weights}")
print(f"First 5 alpha values: {first_5_alphas}")

# Save the detailed debugging information to a text file
with open('question4.txt', 'w') as f:
    f.write(f"Sum of w*: {w_star_sum}\n")
    f.write(f"b*: {b_star}\n")
    f.write(f"Number of support vectors: {num_support_vectors}\n")
    f.write(f"Support Vector Indices (first 5): {support_vector_indices[:5]}\n")
    f.write(f"First 5 weights of w*: {first_5_weights}\n")
    f.write(f"First 5 alpha values: {first_5_alphas}\n")
    f.write(f"Regularization parameter (C): {regularisation_para_C}\n")


     pcost       dcost       gap    pres   dres
 0: -3.8416e+02 -2.2800e+02  4e+04  1e+02  5e-13
 1: -2.2204e+01 -2.2367e+02  1e+03  3e+00  5e-13
 2: -1.2704e+01 -1.3798e+02  2e+02  4e-01  7e-14
 3: -8.2452e+00 -4.8671e+01  6e+01  1e-01  2e-14
 4: -6.5928e+00 -2.4543e+01  3e+01  4e-02  1e-14
 5: -5.8533e+00 -1.1273e+01  7e+00  1e-02  9e-15
 6: -5.9382e+00 -7.9755e+00  2e+00  2e-03  9e-15
 7: -6.1146e+00 -6.9517e+00  1e+00  7e-04  9e-15
 8: -6.2067e+00 -6.5700e+00  4e-01  1e-04  9e-15
 9: -6.3032e+00 -6.4094e+00  1e-01  2e-05  9e-15
10: -6.3347e+00 -6.3622e+00  3e-02  4e-06  1e-14
11: -6.3454e+00 -6.3477e+00  2e-03  1e-07  1e-14
12: -6.3464e+00 -6.3465e+00  2e-04  5e-09  1e-14
13: -6.3464e+00 -6.3464e+00  4e-06  1e-10  1e-14
Optimal solution found.
Sum of w*: -0.1451352273951927
b*: 1.7798092093688218
Number of support vectors: 392
First 5 weights: [-0.02622559 -0.11417209  0.04186253 -0.05229907  0.07552307]
First 5 alpha values: [2.49999998e-02 1.76599916e-10 1.27338214e-10 1.76758740

In [4]:
# Question 5
import cvxpy as cvx
import numpy as np
import pandas as pd
from cvxopt import matrix, solvers

def svm_train_dual(data_train, label_train, regularisation_para_C):
    samples, features = data_train.shape
    label_train = label_train * 2 - 1  # Convert 0/1 to -1/1
    X = np.dot(data_train, data_train.T)
    
    P = matrix(np.outer(label_train, label_train) * X)
    Q = matrix(-np.ones(samples))
    G = matrix(np.vstack((-np.eye(samples), np.eye(samples))))
    H = matrix(np.hstack((np.zeros(samples), np.ones(samples) * regularisation_para_C/samples)))
    A = matrix(label_train, (1,samples), 'd')
    B = matrix(0.0)
    solution = solvers.qp(P,Q,G,H,A,B)
    alpha = np.ravel(solution['x'])
    return {'alpha': alpha}

def compute_primal_solution(alpha, data_train, label_train, regularisation_para_C, tolerance=1e-5):
    support_vector_indices = np.where((alpha > tolerance) & (alpha < regularisation_para_C))[0]
    label_train = label_train * 2 - 1 
    
    w_star = np.sum((alpha * label_train).reshape(-1, 1) * data_train, axis=0)
    
    print(f"Number of support vectors: {len(support_vector_indices)}")
    print("Support vector indices:", support_vector_indices)
    
    b_star_values = []
    for idx in support_vector_indices:
        x_s = data_train[idx]
        y_s = label_train[idx]
        b_star_value = y_s - np.dot(w_star, x_s)
        b_star_values.append(b_star_value)
    
    # Write support vectors to file
    with open('question5.txt', 'w') as f:
        f.write(f"Number of support vectors: {len(support_vector_indices)}\n")
        f.write(f"Support vector indices: {support_vector_indices.tolist()}\n")
    
    return w_star, None  # b_star removed

train_data = pd.read_csv('train.csv', header=None)
test_data = pd.read_csv('test.csv', header=None)
validation_data = train_data[4000:]
train_data = train_data[:4000]
regularisation_para_C = 100
label_train = train_data.iloc[:, 0].values  
data_train = train_data.iloc[:, 1:].values 
label_test = test_data.iloc[:, 0].values  
data_test = test_data.iloc[:, 1:].values 
optimal = svm_train_dual(data_train, label_train, regularisation_para_C)
np.save('alpha.npy', optimal['alpha'])
alpha = np.load('alpha.npy')
w_star, _ = compute_primal_solution(alpha, data_train, label_train, regularisation_para_C)


     pcost       dcost       gap    pres   dres
 0: -3.8416e+02 -2.2800e+02  4e+04  1e+02  5e-13
 1: -2.2204e+01 -2.2367e+02  1e+03  3e+00  5e-13
 2: -1.2704e+01 -1.3798e+02  2e+02  4e-01  7e-14
 3: -8.2452e+00 -4.8671e+01  6e+01  1e-01  2e-14
 4: -6.5928e+00 -2.4543e+01  3e+01  4e-02  1e-14
 5: -5.8533e+00 -1.1273e+01  7e+00  1e-02  9e-15
 6: -5.9382e+00 -7.9755e+00  2e+00  2e-03  9e-15
 7: -6.1146e+00 -6.9517e+00  1e+00  7e-04  9e-15
 8: -6.2067e+00 -6.5700e+00  4e-01  1e-04  9e-15
 9: -6.3032e+00 -6.4094e+00  1e-01  2e-05  9e-15
10: -6.3347e+00 -6.3622e+00  3e-02  4e-06  1e-14
11: -6.3454e+00 -6.3477e+00  2e-03  1e-07  1e-14
12: -6.3464e+00 -6.3465e+00  2e-04  5e-09  1e-14
13: -6.3464e+00 -6.3464e+00  4e-06  1e-10  1e-14
Optimal solution found.
Number of support vectors: 392
Support vector indices: [   0   16   28   29   41   54   58   68   80   88  127  130  140  145
  147  150  199  208  239  253  263  266  275  281  284  296  305  307
  332  344  356  377  395  401  403  405  414

In [5]:
# Question 6
import cvxpy as cvx
import numpy as np
import pandas as pd
from cvxopt import matrix, solvers

def svm_train_dual(data_train, label_train, regularisation_para_C):
    samples, features = data_train.shape
    label_train = label_train * 2 - 1  # Convert 0/1 to -1/1

    # Compute the Gram matrix (dot products between training samples)
    X = np.dot(data_train, data_train.T)
    
    # Set up the parameters for the quadratic program
    P = matrix(np.outer(label_train, label_train) * X)
    Q = matrix(-np.ones(samples))
    G = matrix(np.vstack((-np.eye(samples), np.eye(samples))))
    H = matrix(np.hstack((np.zeros(samples), np.ones(samples) * regularisation_para_C / samples)))
    A = matrix(label_train, (1, samples), 'd')
    B = matrix(0.0)

    # Solve the quadratic program using cvxopt
    solution = solvers.qp(P, Q, G, H, A, B)

    # Extract the Lagrange multipliers (alpha) from the solution
    alpha = np.ravel(solution['x'])

    # Return alpha and Gram matrix (for debugging)
    return alpha, X

# Load the training and test data
train_data = pd.read_csv('train.csv', header=None)
test_data = pd.read_csv('test.csv', header=None)
validation_data = train_data[4000:]
train_data = train_data[:4000]
regularisation_para_C = 100

# Separate features and labels for training data
label_train = train_data.iloc[:, 0].values  # Labels are in the first column
data_train = train_data.iloc[:, 1:].values  # Features are in the remaining columns

# Compute the dual solution and Gram matrix
alpha, gram_matrix = svm_train_dual(data_train, label_train, regularisation_para_C)

# Save the computed alpha to a file for future use
np.save('alpha.npy', alpha)

# Sum of alpha values
alpha_sum = np.sum(alpha)

# Define a small threshold to identify support vectors (non-zero alphas)
threshold = 1e-5
support_vectors_indices = np.where(alpha > threshold)[0]
support_vectors_count = len(support_vectors_indices)

# First 5 alpha values
first_5_alphas = alpha[:5]

# Number of slack variables (those between 0 and C)
violating_alphas = np.where((alpha > threshold) & (alpha < regularisation_para_C))[0]
slack_variable_count = len(violating_alphas)

# Debugging Gram Matrix
gram_matrix_sample = gram_matrix[:5, :5]  # Small portion for verification

# Save the detailed debugging information to a text file
with open('question6.txt', 'w') as f:
    f.write(f"Sum of alpha: {alpha_sum}\n")
    f.write(f"Number of support vectors: {support_vectors_count}\n")
    f.write(f"First 5 support vector indices: {support_vectors_indices[:5]}\n")
    f.write(f"First 5 alpha values: {first_5_alphas}\n")
    f.write(f"Number of slack variables (margin violators): {slack_variable_count}\n")
    f.write(f"Regularization parameter (C): {regularisation_para_C}\n")
    f.write(f"Gram Matrix Sample (5x5):\n{gram_matrix_sample}\n")

# Print relevant info for quick reference
print(f"Sum of alpha: {alpha_sum}")
print(f"Number of support vectors: {support_vectors_count}")
print(f"First 5 alpha values: {first_5_alphas}")
print(f"Number of margin violators (slack variables): {slack_variable_count}")


     pcost       dcost       gap    pres   dres
 0: -3.8416e+02 -2.2800e+02  4e+04  1e+02  5e-13
 1: -2.2204e+01 -2.2367e+02  1e+03  3e+00  5e-13
 2: -1.2704e+01 -1.3798e+02  2e+02  4e-01  7e-14
 3: -8.2452e+00 -4.8671e+01  6e+01  1e-01  2e-14
 4: -6.5928e+00 -2.4543e+01  3e+01  4e-02  1e-14
 5: -5.8533e+00 -1.1273e+01  7e+00  1e-02  9e-15
 6: -5.9382e+00 -7.9755e+00  2e+00  2e-03  9e-15
 7: -6.1146e+00 -6.9517e+00  1e+00  7e-04  9e-15
 8: -6.2067e+00 -6.5700e+00  4e-01  1e-04  9e-15
 9: -6.3032e+00 -6.4094e+00  1e-01  2e-05  9e-15
10: -6.3347e+00 -6.3622e+00  3e-02  4e-06  1e-14
11: -6.3454e+00 -6.3477e+00  2e-03  1e-07  1e-14
12: -6.3464e+00 -6.3465e+00  2e-04  5e-09  1e-14
13: -6.3464e+00 -6.3464e+00  4e-06  1e-10  1e-14
Optimal solution found.
Sum of alpha: 7.281637057117684
Number of support vectors: 392
First 5 alpha values: [2.49999998e-02 1.76599916e-10 1.27338214e-10 1.76758740e-10
 2.34799685e-10]
Number of margin violators (slack variables): 392


In [6]:
# Question 7
import numpy as np
import cvxpy as cp
from sklearn.metrics import accuracy_score

# Load the train data (replace 'train.csv' with the actual path)
train_data = np.loadtxt('train.csv', delimiter=',')
test_data = np.loadtxt('test.csv', delimiter=',')

# Split train_data into features and labels
X_train_full = train_data[:, 1:]  # All features
y_train_full = train_data[:, 0]   # Class labels

# Define the first 4000 samples as the training set
X_train = X_train_full[:4000]
y_train = y_train_full[:4000]

# Use the remaining samples as the validation set
X_val = X_train_full[4000:]
y_val = y_train_full[4000:]

# Function to train soft-margin SVM using CVXPY for a given C
def svm_train_primal(X, y, C):
    # Ensure that y is in {-1, 1}
    y = np.where(y == 0, -1, 1)
    
    n_samples, n_features = X.shape
    
    # Variables for primal SVM
    w = cp.Variable(n_features)
    b = cp.Variable()
    slack = cp.Variable(n_samples)
    
    # Objective: 1/2 ||w||^2 + C * sum(slack)
    objective = cp.Minimize(0.5 * cp.norm(w, 2) ** 2 + C * cp.sum(slack))
    
    # Constraints: y_i (w^T x_i + b) >= 1 - slack_i, slack_i >= 0
    constraints = [y[i] * (X[i] @ w + b) >= 1 - slack[i] for i in range(n_samples)]
    constraints += [slack >= 0]
    
    # Formulate the problem and solve it
    problem = cp.Problem(objective, constraints)
    problem.solve()
    
    return w.value, b.value

# Function to predict using the primal SVM
def svm_predict_primal(X, w, b):
    return np.sign(X @ w + b)

# Grid search over C values
C_values = [2 ** i for i in range(-10, 11, 2)]

best_C = None
best_accuracy = 0
debug_info = []

# Iterate over all values of C
for C in C_values:
    # Train the model using CVXPY with the current C
    w, b = svm_train_primal(X_train, y_train, C)
    
    # Predict on the validation set
    y_val_pred = svm_predict_primal(X_val, w, b)
    
    # Convert the predictions from {-1, 1} back to {0, 1}
    y_val_pred = np.where(y_val_pred == -1, 0, 1)
    
    # Calculate accuracy on the validation set
    accuracy = accuracy_score(y_val, y_val_pred)
    
    # Save debug info
    debug_info.append({
        "C": C,
        "Validation Accuracy": accuracy,
        "Sum of w": np.sum(w),
        "Bias (b)": b,
        "First 5 Weights": w[:5]
    })
    
    print(f"C = {C}, Validation Accuracy = {accuracy}")
    
    # Update best C and accuracy if current one is better
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_C = C

# Save all debug information to a text file
with open('question7.txt', 'w') as f:
    for info in debug_info:
        f.write(f"C = {info['C']}, Validation Accuracy = {info['Validation Accuracy']}\n")
        f.write(f"Sum of w: {info['Sum of w']}\n")
        f.write(f"Bias (b): {info['Bias (b)']}\n")
        f.write(f"First 5 weights of w: {info['First 5 Weights']}\n")
        f.write("\n")
    
    # Save the best C and accuracy
    f.write(f"Optimal C = {best_C}, Best Validation Accuracy = {best_accuracy}\n")

print(f"Optimal C = {best_C}, Best Validation Accuracy = {best_accuracy}")


C = 0.0009765625, Validation Accuracy = 0.9746666666666667
C = 0.00390625, Validation Accuracy = 0.9742222222222222
C = 0.015625, Validation Accuracy = 0.9717777777777777
C = 0.0625, Validation Accuracy = 0.9655555555555555
C = 0.25, Validation Accuracy = 0.9624444444444444
C = 1, Validation Accuracy = 0.962
C = 4, Validation Accuracy = 0.9615555555555556
C = 16, Validation Accuracy = 0.9602222222222222
C = 64, Validation Accuracy = 0.96
C = 256, Validation Accuracy = 0.9606666666666667
C = 1024, Validation Accuracy = 0.9606666666666667
Optimal C = 0.0009765625, Best Validation Accuracy = 0.9746666666666667


In [7]:
# Question 8
import pandas as pd
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

# Load the datasets and ensure the first column is the label and the rest are features
train_data = pd.read_csv('train.csv', header=None)
test_data = pd.read_csv('test.csv', header=None)

# Split the datasets into features (X) and labels (y)
X_train = train_data.iloc[:, 1:]  # All columns except the first one
y_train = train_data.iloc[:, 0]   # First column is the label

X_test = test_data.iloc[:, 1:]    # All columns except the first one
y_test = test_data.iloc[:, 0]     # First column is the label

# Split the training data into training and validation sets (80% training, 20% validation)
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Train the LinearSVC model using the optimal C value (adjust if different)
optimal_C = 4
svm_model = LinearSVC(C=optimal_C, max_iter=10000)
svm_model.fit(X_train_split, y_train_split)

# Make predictions on the validation set
y_val_pred = svm_model.predict(X_val_split)

# Calculate validation accuracy
val_accuracy = accuracy_score(y_val_split, y_val_pred)

# Make predictions on the test set
y_test_pred = svm_model.predict(X_test)

# Calculate test accuracy
test_accuracy = accuracy_score(y_test, y_test_pred)

# Debugging Information: Model coefficients and intercept
coef_sum = svm_model.coef_.sum()
intercept = svm_model.intercept_[0]
first_5_coefficients = svm_model.coef_[0][:5]

# Save detailed debug information to a text file
with open('question8.txt', 'w') as f:
    f.write(f"Optimal C: {optimal_C}\n")
    f.write(f"Validation Accuracy: {val_accuracy * 100:.2f}%\n")
    f.write(f"Test Accuracy: {test_accuracy * 100:.2f}%\n")
    f.write(f"Sum of Model Coefficients: {coef_sum}\n")
    f.write(f"Bias (Intercept): {intercept}\n")
    f.write(f"First 5 Coefficients: {first_5_coefficients}\n")

# Print the results for quick reference
print(f"Optimal C: {optimal_C}")
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")
print(f"Sum of Model Coefficients: {coef_sum}")
print(f"Bias (Intercept): {intercept}")
print(f"First 5 Coefficients: {first_5_coefficients}")




Optimal C: 4
Validation Accuracy: 96.35%
Test Accuracy: 96.93%
Sum of Model Coefficients: 0.01841335137515998
Bias (Intercept): 1.0283074193860124
First 5 Coefficients: [ 0.00062454 -0.01072562  0.03267166 -0.0184045   0.03379624]


