In [46]:
import cvxpy as cp
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import math

In [47]:
# Read the dataset from the file
data = pd.read_csv('car+evaluation/car.data', header=None)

# Display the first few rows of the dataset
print(data.head())
data.info()
data[6].unique()

       0      1  2  3      4     5      6
0  vhigh  vhigh  2  2  small   low  unacc
1  vhigh  vhigh  2  2  small   med  unacc
2  vhigh  vhigh  2  2  small  high  unacc
3  vhigh  vhigh  2  2    med   low  unacc
4  vhigh  vhigh  2  2    med   med  unacc
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1728 entries, 0 to 1727
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       1728 non-null   object
 1   1       1728 non-null   object
 2   2       1728 non-null   object
 3   3       1728 non-null   object
 4   4       1728 non-null   object
 5   5       1728 non-null   object
 6   6       1728 non-null   object
dtypes: object(7)
memory usage: 94.6+ KB


array(['unacc', 'acc', 'vgood', 'good'], dtype=object)

In [48]:
#convert all columns to numeric
for i in range(0, 6):
    data[i] = pd.Categorical(data[i])
    data[i] = data[i].cat.codes

In [49]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

columns = [0,1,2,3,4,5]

train_X, test_X, train_y, test_y = train_test_split(data[columns], data[6].replace({'unacc': -1, 'acc': 1, 'good': 1, 'vgood': 1}), test_size=0.3)

# Create an instance of the StandardScaler
scaler = MinMaxScaler()

# Normalize the data
train_X = scaler.fit_transform(train_X)
test_X = scaler.transform(test_X)

train_X

array([[1.        , 0.33333333, 0.66666667, 0.5       , 0.        ,
        0.        ],
       [0.33333333, 1.        , 0.        , 0.        , 0.        ,
        0.5       ],
       [0.33333333, 1.        , 1.        , 0.5       , 0.        ,
        0.5       ],
       ...,
       [1.        , 0.66666667, 0.33333333, 1.        , 0.5       ,
        1.        ],
       [0.66666667, 1.        , 0.33333333, 0.        , 0.5       ,
        0.5       ],
       [0.66666667, 0.66666667, 1.        , 0.5       , 0.5       ,
        1.        ]])

In [50]:
def gaussian_kernel(x, y, sigma=1.0):
    return np.exp(-np.linalg.norm(x - y) ** 2 / (2 * (sigma ** 2)))
def linear_kernel(x, y):
    return np.dot(x, y)
kernel_matrix = np.array([[gaussian_kernel(x, y) for y in train_X] for x in train_X])
kernel_matrix


array([[1.        , 0.39984965, 0.53526143, ..., 0.42269216, 0.49246429,
        0.45308902],
       [0.39984965, 1.        , 0.53526143, ..., 0.33846543, 0.78969293,
        0.37302452],
       [0.53526143, 0.53526143, 1.        , ..., 0.41686202, 0.58991444,
        0.69690156],
       ...,
       [0.42269216, 0.33846543, 0.41686202, ..., 1.        , 0.47897297,
        0.66846063],
       [0.49246429, 0.78969293, 0.58991444, ..., 0.47897297, 1.        ,
        0.58991444],
       [0.45308902, 0.37302452, 0.69690156, ..., 0.66846063, 0.58991444,
        1.        ]])

In [51]:
outer_y=np.outer(train_y, train_y)

In [52]:
outer_y * kernel_matrix

array([[ 1.        , -0.39984965, -0.53526143, ...,  0.42269216,
        -0.49246429,  0.45308902],
       [-0.39984965,  1.        ,  0.53526143, ..., -0.33846543,
         0.78969293, -0.37302452],
       [-0.53526143,  0.53526143,  1.        , ..., -0.41686202,
         0.58991444, -0.69690156],
       ...,
       [ 0.42269216, -0.33846543, -0.41686202, ...,  1.        ,
        -0.47897297,  0.66846063],
       [-0.49246429,  0.78969293,  0.58991444, ..., -0.47897297,
         1.        , -0.58991444],
       [ 0.45308902, -0.37302452, -0.69690156, ...,  0.66846063,
        -0.58991444,  1.        ]])

In [53]:
alpha = cp.Variable((train_X.shape[0],1))
outer_y = train_y.values.reshape((-1,1)) * train_y.values.reshape((-1,1)).T
gram = outer_y * kernel_matrix
objective = cp.Minimize(-cp.sum(alpha) + 0.5 * cp.quad_form(alpha, cp.psd_wrap(gram)))
constraints = [0 <= alpha, alpha <= 1, train_y.values @ alpha == 0]

problem = cp.Problem(objective, constraints)
problem.solve(solver = cp.SCS)
print("problem status: ",problem.status)

problem status:  optimal


In [54]:
# support_vector_indices = np.where(alpha.value > 1e-5)[0]
# support_vectors = train_X[support_vector_indices]
# support_vector_labels = train_y.values[support_vector_indices]

test_kernel_matrix = np.array([[gaussian_kernel(x, y) for y in test_X] for x in train_X])
predictions = np.sign(np.sum(train_y.values.reshape((-1,1)) * alpha.value * test_kernel_matrix, axis=0))

accuracy = np.mean(predictions == test_y.values)
print(f"Accuracy: {accuracy}")

Accuracy: 0.7745664739884393


In [55]:
# Train the SVM classifier
def svm(X, y, gamma, regularization_type):
    y = y.reshape((-1,1))
    # Define the SVM variables
    w = cp.Variable((X.shape[1],1))
    b = cp.Variable()
    n = cp.Variable((X.shape[0],1))
    const = np.ones((X.shape[0],1))

    # Choose the regularization type
    w_norm = cp.norm2(w)
    if regularization_type == 1:
        w_norm = cp.norm1(w)

    # Define the SVM constraints
    constraints = [cp.multiply(y, (X @ w + b)) >= const - n, n >= 0]

    # Define the objective function to minimize the sum of the regularization term
    objective = cp.Minimize(w_norm + gamma * cp.norm1(n))

    # Define the SVM problem
    problem = cp.Problem(objective, constraints)

    # Solve the SVM problem
    problem.solve()
    return w.value, b.value

def get_accuracy(X, y, w, b):
    y_pred = np.sign(X @ w + b)
    return np.mean(y_pred == y)



In [56]:
# Train the SVM classifier
def svm(X, y, gamma, regularization_type):
    y = np.array(y).reshape((-1,1))
    # Define the SVM variables
    w = cp.Variable((X.shape[1],1))
    b = cp.Variable()
    n = cp.Variable((X.shape[0],1))
    const = np.ones((X.shape[0],1))

    # Choose the regularization type
    w_norm = cp.norm1(w)
    if regularization_type == 2:
        w_norm = cp.norm2(w)

    # Define the SVM constraints
    constraints = [cp.multiply(y, (X @ w + b)) >= const - n, n >= 0]

    # Define the objective function to minimize the sum of the regularization term
    objective = cp.Minimize(w_norm + gamma * cp.norm1(n))

    # Define the SVM problem
    problem = cp.Problem(objective, constraints)

    # Solve the SVM problem
    problem.solve(verbose=False)
    return w.value, b.value

def get_accuracy(X, y, w, b):
    y_pred = np.sign(X @ w + b)
    return np.mean(y_pred == y)

def get_best_model(X, y, X_test, y_test, gamma_list, regularization_type):
    best_accuracy = 0
    best_w = None
    best_b = None

    for gamma in gamma_list:
        w, b = svm(X, y, gamma, regularization_type)
        accuracy = get_accuracy(X_test, y_test, w, b)
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_gamma = gamma
            best_w = w
            best_b = b
    return best_w, best_b

In [72]:
train_X, test_X, train_y, test_y = train_test_split(data.iloc[:,:6], data.iloc[:,6], test_size=0.2, random_state=6)

In [73]:
# Try different values of gamma
def train_general(gamma_list, regularization_type):
    # Train a SVM classifier, suppose acceptable is -1 and unacceptable, good, very good are 1
    train_y1 = train_y.replace({'unacc': -1, 'acc': 1, 'good': 1, 'vgood': 1})
    test_y1 = test_y.replace({'unacc': -1, 'acc': 1, 'good': 1, 'vgood': 1})
    best_w1, best_b1 = get_best_model(train_X.values, train_y1.values, test_X.values, test_y1.values, gamma_list, regularization_type)
    accuracy1 = get_accuracy(test_X.values, test_y1.values, best_w1, best_b1)
    print(f"General Accuracy: {accuracy1}")
    return best_w1, best_b1

def train_vote(gamma, regularization_type):
    # Train a SVM classifier, suppose unacceptable is -1 and acceptable is 1
    train_y2 = train_y[~train_y.isin(['good', 'vgood'])].replace({'unacc': -1, 'acc': 1})
    test_y2 = test_y[~test_y.isin(['good', 'vgood'])].replace({'unacc': -1, 'acc': 1})
    w2, b2 = get_best_model(train_X[~train_y.isin(['good', 'vgood'])].values, train_y2.values, test_X[~test_y.isin(['good', 'vgood'])].values, test_y2.values, gamma, regularization_type)
    # Train a SVM classifier, suppose unacceptable is -1 and good is 1
    train_y3 = train_y[~train_y.isin(['acc', 'vgood'])].replace({'unacc': -1, 'good': 1})
    test_y3 = test_y[~test_y.isin(['acc', 'vgood'])].replace({'unacc': -1, 'good': 1})
    w3, b3 = get_best_model(train_X[~train_y.isin(['acc', 'vgood'])].values, train_y3.values, test_X[~test_y.isin(['acc', 'vgood'])].values, test_y3.values, gamma, regularization_type)
    # Train a SVM classifier, suppose acceptable is -1 and unacceptable, very good is 1
    train_y4 = train_y[~train_y.isin(['acc', 'good'])].replace({'unacc': -1, 'vgood': 1})
    test_y4 = test_y[~test_y.isin(['acc', 'good'])].replace({'unacc': -1, 'vgood': 1})
    w4, b4 = get_best_model(train_X[~train_y.isin(['acc', 'good'])].values, train_y4.values, test_X[~test_y.isin(['acc', 'good'])].values, test_y4.values, gamma, regularization_type)
    
    # The final prediction is the majority vote of the four SVM classifiers
    def predict(X):
        return np.sign(np.sum(np.array([X @ w2 + b2, X @ w3 + b3, X @ w4 + b4]), axis=0))
    accuracy = np.mean(predict(test_X.values) == test_y.replace({'unacc': -1, 'acc': 1, 'good': 1, 'vgood': 1}).values)
    print(f"Vote Accuracy: {accuracy}")

In [74]:
gamma_list = [0.01, 0.1, 1, 5, 10, 50]
train_general(gamma_list, 1)
train_general(gamma_list, 2)
train_vote(gamma_list, 1)
train_vote(gamma_list, 2)

General Accuracy: 0.708092485549133
General Accuracy: 0.708092485549133
Vote Accuracy: 0.708092485549133
Vote Accuracy: 0.708092485549133


Hi! We plan to train four separate Support Vector Machine (SVM) models with a unique approach.

There's four possible values in the target value: A B C D. 

First Model (Binary Classification for A): In the first model, category A will be labeled as '1', while B, C, and D will be labeled as '-1'. This model's primary objective is to differentiate category A from the others.

Next Three Models (One-vs-One Approach): For the subsequent three models, we will employ a one-vs-one strategy where each model will differentiate A from one of the other categories (B, C, or D). Specifically, the models will be: A=1, B=-1; A=1, C=-1; and A=1, D=-1.

Accuracy Evaluation in Three Phases:

First Phase: we will evaluate the accuracy of the first model (A vs. BCD) independently.

Second Phase: The second evaluation will involve the combined accuracy of the next three models. The prediction for each instance will be the majority vote from these models. If the majority vote is inconclusive, we'll randomly select between the two equally probable classes.

Third Phase: The final accuracy evaluation will combine all four models. The prediction will again be based on the majority vote. In cases where there is a tie (two '1's and two '-1's), we will randomly select between '-1' and '1' for the final prediction.

Do you find this approach feasible and valid for evaluating the performance of SVM models in a multi-class classification problem?
