# SVM on MNIST dataset 
@author BAIM Mohamed Jalal

In [13]:
# libraries
import numpy as np
import csv
from libsvm.svmutil import *

In [14]:
# paths
path_x_train = '../data/X_train.csv'
path_x_test = '../data/X_test.csv'
path_y_train = '../data/y_train.csv'
path_y_test = '../data/y_test.csv'

In [15]:
KERNEL = {
    'linear': 0, 
    'polynomial': 1, 
    'RBF': 2, 
}

## Task 1: kernel func 

In [16]:
def load_data(X_train_path, y_train_path, X_test_path, y_test_path):
    X_train = np.zeros((0, 784))
    y_train = []
    X_test = np.zeros((0, 784))
    y_test = []

    with open(X_train_path, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            X_train = np.vstack([X_train, np.array(row).astype(float)])
    with open(y_train_path, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            y_train.append(int(row[0]))
    with open(X_test_path, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            X_test = np.vstack([X_test, np.array(row).astype(float)])
    with open(y_test_path, 'r') as f:
        reader = csv.reader(f)
        for row in reader:
            y_test.append(int(row[0]))

    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_data(path_x_train, path_y_train, path_x_test, path_y_test)

print('Data loaded')
print('X_train shape:', X_train.shape)
print('y_train shape:', len(y_train))
print('X_test shape:', X_test.shape)
print('y_test shape:', len(y_test))

Data loaded
X_train shape: (5000, 784)
y_train shape: 5000
X_test shape: (2500, 784)
y_test shape: 2500


In [17]:
# Train using different kernels
for kernel_name, kernel in KERNEL.items():
    print('Training with', kernel_name, 'kernel')
    model = svm_train(y_train, X_train, '-t {} -q'.format(kernel))
    p_label, p_acc, p_val = svm_predict(y_test, X_test, model)
    print('Accuracy:', p_acc[0], '%')
    print('')

Training with linear kernel
Accuracy = 95.08% (2377/2500) (classification)
Accuracy: 95.08 %

Training with polynomial kernel
Accuracy = 34.68% (867/2500) (classification)
Accuracy: 34.68 %

Training with RBF kernel
Accuracy = 95.32% (2383/2500) (classification)
Accuracy: 95.32000000000001 %



## Task 2 

In [18]:
from libsvm.svmutil import svm_train

def evaluate_hyperparameters(Y, X, hyperparameters, optimal_accuracy, optimal_parameters):
    print("Evaluating hyperparameters:", hyperparameters)
    cv_accuracy = svm_train(Y, X, hyperparameters)
    
    if cv_accuracy > optimal_accuracy:
        optimal_accuracy = cv_accuracy
        optimal_parameters = hyperparameters
    
    return optimal_accuracy, optimal_parameters


def grid_search(X, Y, kernel):
    cost = [0.01, 0.1, 1, 10]

    optimal_accuracy = 0
    optimal_parameters = ''

    if kernel == 'linear':
        print('-'*50)
        print('Linear Kernel')
        print('-'*50)
        for c in cost:
            config = f'-s 0 -t 0 -c {c} -v 3 -q'
            optimal_accuracy, optimal_parameters = evaluate_hyperparameters(Y, X, config, optimal_accuracy, optimal_parameters)
            print('-'*50)
    elif kernel == 'polynomial':
        print('-'*50)
        print('Polynomial Kernel')
        print('-'*50)
        degree = [2, 3, 4]
        gammas = [0.01, 0.1, 1]
        for c in cost:
            for d in degree:
                for g in gammas:
                    config = f'-s 0 -t 1 -c {c} -d {d} -g {g} -v 3 -q'
                    optimal_accuracy, optimal_parameters = evaluate_hyperparameters(Y, X, config, optimal_accuracy, optimal_parameters)
                    print('-'*50)

    elif kernel == 'RBF':
        print('-'*50)
        print('RBF Kernel')
        print('-'*50)
        gammas = [0.01, 0.1, 1]
        for c in cost:
            for g in gammas:
                config = f'-s 0 -t 2 -c {c} -g {g} -v 3 -q'
                optimal_accuracy, optimal_parameters = evaluate_hyperparameters(Y, X, config, optimal_accuracy, optimal_parameters)
                print('-'*50)

    return optimal_accuracy, optimal_parameters

In [19]:
optimal_accuracy, optimal_parameters = grid_search(X_train, y_train, 'linear')
print('Optimal parameters:', optimal_parameters)
print('Optimal accuracy:', optimal_accuracy)

--------------------------------------------------
Linear Kernel
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 0 -c 0.01 -v 3 -q
Cross Validation Accuracy = 96.7%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 0 -c 0.1 -v 3 -q
Cross Validation Accuracy = 96.88%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 0 -c 1 -v 3 -q
Cross Validation Accuracy = 96.1%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 0 -c 10 -v 3 -q
Cross Validation Accuracy = 96.28%
--------------------------------------------------
Optimal parameters: -s 0 -t 0 -c 0.1 -v 3 -q
Optimal accuracy: 96.88


In [20]:
optimal_accuracy2, optimal_parameters2 = grid_search(X_train, y_train, 'polynomial')
print('Optimal parameters:', optimal_parameters2)
print('Optimal accuracy:', optimal_accuracy2)

--------------------------------------------------
Polynomial Kernel
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 1 -c 0.01 -d 2 -g 0.01 -v 3 -q
Cross Validation Accuracy = 77.92%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 1 -c 0.01 -d 2 -g 0.1 -v 3 -q
Cross Validation Accuracy = 97.4%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 1 -c 0.01 -d 2 -g 1 -v 3 -q
Cross Validation Accuracy = 97.74%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 1 -c 0.01 -d 3 -g 0.01 -v 3 -q
Cross Validation Accuracy = 58.7%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 1 -c 0.01 -d 3 -g 0.1 -v 3 -q
Cross Validation Accuracy = 97.44%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 1 -c 0.01 -d 3 -g 1 -v 3 -q
Cross Validation Accuracy = 97.7%
------------------------

In [21]:
optimal_accuracy3, optimal_parameters3 = grid_search(X_train, y_train, 'RBF')
print('Optimal parameters:', optimal_parameters3)
print('Optimal accuracy:', optimal_accuracy3)

--------------------------------------------------
RBF Kernel
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 2 -c 0.01 -g 0.01 -v 3 -q
Cross Validation Accuracy = 91.7%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 2 -c 0.01 -g 0.1 -v 3 -q
Cross Validation Accuracy = 48.9%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 2 -c 0.01 -g 1 -v 3 -q
Cross Validation Accuracy = 20.64%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 2 -c 0.1 -g 0.01 -v 3 -q
Cross Validation Accuracy = 96.12%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 2 -c 0.1 -g 0.1 -v 3 -q
Cross Validation Accuracy = 53.5%
--------------------------------------------------
Evaluating hyperparameters: -s 0 -t 2 -c 0.1 -g 1 -v 3 -q
Cross Validation Accuracy = 20.72%
--------------------------------------------------
Evaluating hy

## Task 3 
linear + RBF kernels 

In [22]:
def linear_kernel(X1, X2):
    """Compute the linear kernel."""
    return np.dot(X1, X2.T)

def rbf_kernel(X1, X2, gamma):
    """Compute the RBF kernel."""
    sq_dist = np.sum(X1**2, axis=1).reshape(-1, 1) + np.sum(X2**2, axis=1) - 2 * np.dot(X1, X2.T)
    return np.exp(-gamma * sq_dist)

def combined_kernel(X1, X2, gamma, alpha=0.5, beta=0.5):
    """
    Combine linear and RBF kernels:
    K_combined = alpha * K_linear + beta * K_RBF
    """
    K_linear = linear_kernel(X1, X2)
    K_rbf = rbf_kernel(X1, X2, gamma)
    return alpha * K_linear + beta * K_rbf

def create_precomputed_kernel_matrix(X, X_ref, kernel_func, *kernel_params):
    """
    Create a precomputed kernel matrix for use with libsvm.
    """
    kernel_matrix = kernel_func(X, X_ref, *kernel_params)
    m = kernel_matrix.shape[0]
    precomputed = np.zeros((m, m + 1))
    precomputed[:, 1:] = kernel_matrix
    precomputed[:, 0] = np.arange(1, m + 1)
    return precomputed

# Hyperparameter tuning with combined kernel
print('Combined Kernel Hyperparameter Tuning')
print('-' * 50)

gammas = [0.01, 0.1, 1]
cost = [0.01, 0.1, 1, 10]
alpha, beta = 0.5, 0.5
optimal_accuracy = 0
optimal_parameters = None

for c in cost:
    for g in gammas:
        print(f"Testing combined kernel with C={c}, gamma={g}")
        train_kernel = create_precomputed_kernel_matrix(X_train, X_train, combined_kernel, g, alpha, beta)
        config = f'-s 0 -t 4 -c {c} -v 3 -q'
        optimal_accuracy, optimal_parameters = evaluate_hyperparameters(
            y_train, train_kernel, config, optimal_accuracy, optimal_parameters
        )
        print(f"Current best accuracy: {optimal_accuracy}% with parameters: {optimal_parameters}")
        print('-' * 50)
        
print('Best configuration found')
print('-' * 50)
print(f'Optimal parameters: {optimal_parameters}')
print(f'Optimal accuracy: {optimal_accuracy}%')

Combined Kernel Hyperparameter Tuning
--------------------------------------------------
Testing combined kernel with C=0.01, gamma=0.01
Evaluating hyperparameters: -s 0 -t 4 -c 0.01 -v 3 -q
Cross Validation Accuracy = 96.48%
Current best accuracy: 96.48% with parameters: -s 0 -t 4 -c 0.01 -v 3 -q
--------------------------------------------------
Testing combined kernel with C=0.01, gamma=0.1
Evaluating hyperparameters: -s 0 -t 4 -c 0.01 -v 3 -q
Cross Validation Accuracy = 96.58%
Current best accuracy: 96.58% with parameters: -s 0 -t 4 -c 0.01 -v 3 -q
--------------------------------------------------
Testing combined kernel with C=0.01, gamma=1
Evaluating hyperparameters: -s 0 -t 4 -c 0.01 -v 3 -q
Cross Validation Accuracy = 96.42%
Current best accuracy: 96.58% with parameters: -s 0 -t 4 -c 0.01 -v 3 -q
--------------------------------------------------
Testing combined kernel with C=0.1, gamma=0.01
Evaluating hyperparameters: -s 0 -t 4 -c 0.1 -v 3 -q
Cross Validation Accuracy = 97.0