In [69]:
import cvxpy as cp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [70]:
# Read the dataset from the file
data = pd.read_csv('car+evaluation/car.data', header=None)
data = pd.get_dummies(data)
data.iloc[:,:] = data.iloc[:,:].replace({False: -1, True: 1})
# Display the first few rows of the dataset
data.info()
data.iloc[:5,:]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 25 columns):
 #   Column   Non-Null Count  Dtype
---  ------   --------------  -----
 0   0_high   1728 non-null   int64
 1   0_low    1728 non-null   int64
 2   0_med    1728 non-null   int64
 3   0_vhigh  1728 non-null   int64
 4   1_high   1728 non-null   int64
 5   1_low    1728 non-null   int64
 6   1_med    1728 non-null   int64
 7   1_vhigh  1728 non-null   int64
 8   2_2      1728 non-null   int64
 9   2_3      1728 non-null   int64
 10  2_4      1728 non-null   int64
 11  2_5more  1728 non-null   int64
 12  3_2      1728 non-null   int64
 13  3_4      1728 non-null   int64
 14  3_more   1728 non-null   int64
 15  4_big    1728 non-null   int64
 16  4_med    1728 non-null   int64
 17  4_small  1728 non-null   int64
 18  5_high   1728 non-null   int64
 19  5_low    1728 non-null   int64
 20  5_med    1728 non-null   int64
 21  6_acc    1728 non-null   int64
 22  6_good   1728 non-null  

Unnamed: 0,0_high,0_low,0_med,0_vhigh,1_high,1_low,1_med,1_vhigh,2_2,2_3,...,4_big,4_med,4_small,5_high,5_low,5_med,6_acc,6_good,6_unacc,6_vgood
0,-1,-1,-1,1,-1,-1,-1,1,1,-1,...,-1,-1,1,-1,1,-1,-1,-1,1,-1
1,-1,-1,-1,1,-1,-1,-1,1,1,-1,...,-1,-1,1,-1,-1,1,-1,-1,1,-1
2,-1,-1,-1,1,-1,-1,-1,1,1,-1,...,-1,-1,1,1,-1,-1,-1,-1,1,-1
3,-1,-1,-1,1,-1,-1,-1,1,1,-1,...,-1,1,-1,-1,1,-1,-1,-1,1,-1
4,-1,-1,-1,1,-1,-1,-1,1,1,-1,...,-1,1,-1,-1,-1,1,-1,-1,1,-1


In [71]:
feat_num = data.shape[1] - 4
columns = [i for i in range(feat_num)]

In [72]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

train_X, test_X, train_y, test_y = train_test_split(data.iloc[:, columns], data.iloc[:, 21], test_size=0.3)

# Create an instance of the StandardScaler
scaler = MinMaxScaler()

# Normalize the data
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)

train_X

array([[0., 0., 0., ..., 0., 1., 0.],
       [1., 0., 0., ..., 1., 0., 0.],
       [0., 0., 1., ..., 1., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 1., 0.],
       [0., 1., 0., ..., 1., 0., 0.],
       [1., 0., 0., ..., 1., 0., 0.]])

In [73]:
def gaussian_kernel(x, y, sigma=1.0):
    return np.exp(-np.linalg.norm(x - y) ** 2 / (2 * (sigma ** 2)))
def linear_kernel(x, y):
    return np.dot(x, y)
def polynomial_kernel(x, y, c=1, d=7):
    return (np.dot(x, y) + c) ** d
def laplacian_kernel(x, y, sigma=1):
    return np.exp(-np.linalg.norm(x - y) / sigma)

In [74]:
def train_kernel_svm(X_train, y_train, X_test, y_test, C = 1, kernel='gaussian'):
    if kernel == 'gaussian':
        kernel_matrix = np.array([[gaussian_kernel(x, y) for y in train_X] for x in train_X])
    elif kernel == 'linear':
        kernel_matrix = np.array([[linear_kernel(x, y) for y in train_X] for x in train_X])
    elif kernel == 'polynomial':
        kernel_matrix = np.array([[polynomial_kernel(x, y) for y in train_X] for x in train_X])
    elif kernel == 'laplacian':
        kernel_matrix = np.array([[laplacian_kernel(x, y) for y in train_X] for x in train_X])
    alpha = cp.Variable((train_X.shape[0],1))
    outer_y = train_y.values.reshape((-1,1)) * train_y.values.reshape((-1,1)).T
    gram = outer_y * kernel_matrix
    objective = cp.Minimize(-cp.sum(alpha) + 0.5 * cp.quad_form(alpha, cp.psd_wrap(gram)))
    constraints = [0 <= alpha, alpha <= C, train_y.values @ alpha == 0]

    problem = cp.Problem(objective, constraints)
    problem.solve(solver = cp.SCS)
    print("problem status: ",problem.status)
    test_kernel_matrix = np.array([[gaussian_kernel(x, y) for y in test_X] for x in train_X])
    predictions = np.sign(np.sum(train_y.values.reshape((-1,1)) * alpha.value * test_kernel_matrix, axis=0))
    accuracy = np.mean(predictions == test_y.values)

    print(f"Accuracy: {accuracy}")

In [75]:
train_kernel_svm(train_X, train_y, test_X, test_y)

problem status:  optimal
Accuracy: 0.8342967244701349


In [76]:
train_kernel_svm(train_X, train_y, test_X, test_y, kernel= 'linear')

problem status:  optimal
Accuracy: 0.630057803468208


In [77]:
train_kernel_svm(train_X, train_y, test_X, test_y, kernel= 'polynomial')

problem status:  optimal
Accuracy: 0.8208092485549133


In [78]:
train_kernel_svm(train_X, train_y, test_X, test_y, kernel= 'laplacian')

problem status:  optimal
Accuracy: 0.7976878612716763


In [80]:
# Train the SVM classifier with a soft margin
def train_linear_svm(X_train, y_train, X_test, y_test, gamma = 0.001, norm = 1):
    # m is the number of training examples
    m = X_train.shape[0]
    # n is the number of features
    n = X_train.shape[1]
    # Need to train k different classifiers
    k = y_train.shape[1]
    predict_train = np.zeros((k,m,1))
    predict_test = np.zeros((k,X_test.shape[0],1))
    for i in range(k):
        y = y_train[:,i].reshape((-1,1))
        w = cp.Variable((n,1))
        b = cp.Variable()
        eta = cp.Variable((m,1))
        norm_eta = cp.norm1(eta) if norm == 1 else cp.norm2(eta)
        const = np.ones((m,1))
        objective = cp.Minimize(cp.norm2(w) + gamma * norm_eta)
        constraints = [cp.multiply(y, X_train @ w - b) >= const - eta, eta >= 0]
        problem = cp.Problem(objective, constraints)
        problem.solve(verbose=False)
        predict_train[i] = np.sign((X_train @ w.value - b.value * const))
        predict_test[i] = np.sign((X_test @ w.value - b.value * np.ones((X_test.shape[0],1))))
    predict_train = np.squeeze(predict_train.T)
    accuracy_train = np.mean(np.all(predict_train == y_train, axis=1))
    accuracy_train = round(accuracy_train, 3)
    predict_test = np.squeeze(predict_test.T)
    accuracy_test = np.mean(np.all(predict_test == y_test, axis=1))
    accuracy_test = round(accuracy_test, 3)

    return accuracy_train, accuracy_test

In [81]:
def train_best_linear_svm(data_path = 'car+evaluation/car.data'):
    data = pd.read_csv(data_path, header=None)
    data = pd.get_dummies(data)
    # Change the false labels to -1 in last 4 columns
    data.iloc[:,:] = data.iloc[:,:].replace({False: -1, True: 1})
    train_ratio = 0.8
    X_train = data.iloc[:int(train_ratio * data.shape[0]),:-4].values
    y_train = data.iloc[:int(train_ratio * data.shape[0]),-4:].values
    X_test = data.iloc[int(train_ratio * data.shape[0]):,:-4].values
    y_test = data.iloc[int(train_ratio * data.shape[0]):,-4:].values
    gamma_list = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    norms = [1, 2]
    history = []
    for norm in norms:
        for gamma in gamma_list:
            accuracy_train, accuracy_test = train_linear_svm(X_train = X_train, y_train = y_train, X_test = X_test, y_test = y_test, gamma = gamma, norm = norm)
            history.append([norm, gamma, accuracy_train, accuracy_test])
    for row in history:
        print('norm: {}, gamma: {}, train accuracy: {}, test accuracy: {}'.format(row[0], row[1], row[2], row[3]))
    # Find the best test accuracy row
    best_row = history[np.argmax(np.array(history)[:,-1])]
    print('best test accuracy: {:<10} training accuracy: {:<10} norm: {:<10} gamma: {:<10}'.format(best_row[3], best_row[2], best_row[0], best_row[1]))

In [84]:
def train_vote_svm(X_train, X_test, y_train, y_test, gamma, eta_norm):
    # Number of training examples
    m = X_train.shape[0]
    # Number of features
    n = X_train.shape[1]
    # Number of classes
    k = 4
    
    # Initialize the predictions matrix
    train_predictions = np.zeros((m, 6))
    test_predictions = np.zeros((X_test.shape[0], 6))
    
    # Train k*(k-1)/2 SVM models
    model_index = 0
    for i in range(k):
        for j in range(i+1, k):
            # Select the samples for the current pair of classes
            X_train_pair = X_train[np.logical_or(y_train == i, y_train == j)]
            y_train_pair = y_train[np.logical_or(y_train == i, y_train == j)]
            y_train_pair = np.where(y_train_pair == i, -1, 1).reshape((-1,1))
            
            X_test_pair = X_test[np.logical_or(y_test == i, y_test == j)]
            y_test_pair = y_test[np.logical_or(y_test == i, y_test == j)]
            y_test_pair = np.where(y_test_pair == i, -1, 1).reshape((-1,1))
            if(X_train_pair.shape[0] == 0):
                continue
            # Variables
            w = cp.Variable((n,1))
            b = cp.Variable()
            eta = cp.Variable((X_train_pair.shape[0],1))
            
            # Constraints
            constraints = [cp.multiply(y_train_pair, X_train_pair @ w - b) >= 1 - eta, eta >= 0]
            
            # Objective function
            objective = cp.Minimize(cp.norm2(w) + gamma * cp.norm(eta, eta_norm))
            
            # Problem definition
            problem = cp.Problem(objective, constraints)
            
            # Solve the problem
            problem.solve()
            
            # Get the optimal values
            w_opt = w.value.reshape((-1,1))
            b_opt = b.value
            
            # Calculate the predictions for training and test data
            train_predictions[:, model_index] = np.sign(X_train @ w_opt - b_opt).reshape((-1,))
            test_predictions[:, model_index] = np.sign(X_test @ w_opt - b_opt).reshape((-1,))
            # If the prediction is negative, the class is i, otherwise it is j
            # Change the prediction value into i or j
            train_predictions[:, model_index] = np.where(train_predictions[:, model_index] == -1, i, j)
            test_predictions[:, model_index] = np.where(test_predictions[:, model_index] == -1, i, j)
            
            
            model_index += 1
    
    # Calculate the final predictions by majority voting
    # If there is a tie, choose the class with the smallest index
    train_final_predictions = np.zeros((m,))
    test_final_predictions = np.zeros((X_test.shape[0],))
    for i in range(m):
        train_final_predictions[i] = np.argmax(np.bincount(train_predictions[i,:].astype('int')))
    for i in range(X_test.shape[0]):
        test_final_predictions[i] = np.argmax(np.bincount(test_predictions[i,:].astype('int')))
    
    # Calculate the accuracies
    train_accuracy = np.mean(train_final_predictions == y_train)
    test_accuracy = np.mean(test_final_predictions == y_test)
    train_accuracy = round(train_accuracy, 3)
    test_accuracy = round(test_accuracy, 3)
    
    return train_accuracy, test_accuracy

def train_svm_with_parameters(data_path = 'car+evaluation/car.data'):
    data = pd.read_csv(data_path, header=None)
    # Change each string value to a unique integer
    for col in data.columns:
        data[col] = data[col].astype('category').cat.codes
    X = data.iloc[:, :-1].values
    y = data.iloc[:, -1].values

    
    train_ratio = 0.8
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1-train_ratio)
    
    gamma_list = [0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
    eta_norms = [1, 2]
    
    history = []
    
    for gamma in gamma_list:
        for eta_norm in eta_norms:
            train_accuracy, test_accuracy = train_vote_svm(X_train, X_test, y_train, y_test, gamma, eta_norm)
            history.append([eta_norm, gamma, train_accuracy, test_accuracy])
    for row in history:
        print('norm: {}, gamma: {}, train accuracy: {}, test accuracy: {}'.format(row[0], row[1], row[2], row[3]))
    # Find the best test accuracy row
    best_row = history[np.argmax(np.array(history)[:,-1])]
    print('best test accuracy: {:<10} training accuracy: {:<10} norm: {:<10} gamma: {:<10}'.format(best_row[3], best_row[2], best_row[0], best_row[1]))


In [85]:
train_best_linear_svm()
train_svm_with_parameters()

    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


norm: 1, gamma: 0.001, train accuracy: 0.732, test accuracy: 0.572
norm: 1, gamma: 0.003, train accuracy: 0.722, test accuracy: 0.566
norm: 1, gamma: 0.01, train accuracy: 0.874, test accuracy: 0.708
norm: 1, gamma: 0.03, train accuracy: 0.891, test accuracy: 0.647
norm: 1, gamma: 0.1, train accuracy: 0.886, test accuracy: 0.699
norm: 1, gamma: 0.3, train accuracy: 0.886, test accuracy: 0.717
norm: 1, gamma: 1, train accuracy: 0.885, test accuracy: 0.728
norm: 1, gamma: 3, train accuracy: 0.885, test accuracy: 0.734
norm: 1, gamma: 10, train accuracy: 0.885, test accuracy: 0.734
norm: 1, gamma: 30, train accuracy: 0.885, test accuracy: 0.734
norm: 2, gamma: 0.001, train accuracy: 0.732, test accuracy: 0.572
norm: 2, gamma: 0.003, train accuracy: 0.732, test accuracy: 0.572
norm: 2, gamma: 0.01, train accuracy: 0.732, test accuracy: 0.572
norm: 2, gamma: 0.03, train accuracy: 0.732, test accuracy: 0.572
norm: 2, gamma: 0.1, train accuracy: 0.789, test accuracy: 0.584
norm: 2, gamma: 0.3

    Your problem is being solved with the ECOS solver by default. Starting in 
    CVXPY 1.5.0, Clarabel will be used as the default solver instead. To continue 
    using ECOS, specify the ECOS solver explicitly using the ``solver=cp.ECOS`` 
    argument to the ``problem.solve`` method.
    


norm: 1, gamma: 0.001, train accuracy: 0.699, test accuracy: 0.705
norm: 2, gamma: 0.001, train accuracy: 0.699, test accuracy: 0.705
norm: 1, gamma: 0.003, train accuracy: 0.699, test accuracy: 0.705
norm: 2, gamma: 0.003, train accuracy: 0.699, test accuracy: 0.705
norm: 1, gamma: 0.01, train accuracy: 0.699, test accuracy: 0.705
norm: 2, gamma: 0.01, train accuracy: 0.699, test accuracy: 0.705
norm: 1, gamma: 0.03, train accuracy: 0.699, test accuracy: 0.705
norm: 2, gamma: 0.03, train accuracy: 0.699, test accuracy: 0.705
norm: 1, gamma: 0.1, train accuracy: 0.699, test accuracy: 0.705
norm: 2, gamma: 0.1, train accuracy: 0.699, test accuracy: 0.705
norm: 1, gamma: 0.3, train accuracy: 0.726, test accuracy: 0.72
norm: 2, gamma: 0.3, train accuracy: 0.706, test accuracy: 0.699
norm: 1, gamma: 1, train accuracy: 0.729, test accuracy: 0.72
norm: 2, gamma: 1, train accuracy: 0.708, test accuracy: 0.685
norm: 1, gamma: 3, train accuracy: 0.729, test accuracy: 0.72
norm: 2, gamma: 3, tra