In [1]:
##Importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time
import math
import gzip
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle

def compute_cost(W, X, Y):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = regularization_strength * (np.sum(distances) / N)

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost

def calculate_cost_gradient(W, X_batch, Y_batch):
    # if only one example is passed (eg. in case of SGD)
    #if type(Y_batch) == np.float64:
    Y_batch = np.array([Y_batch])
    X_batch = np.array([X_batch])  # gives multidimensional array

    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))

    for ind, d in enumerate(distance):
        if max(0, d) == 0:
            di = W
        else:
            di = W - (regularization_strength * Y_batch[ind] * X_batch[ind])
        dw += di

    dw = dw/len(Y_batch)  # average
    return dw

def sgd(X_train, y_train):
    max_epochs = 5000
    weights = np.zeros(X_train.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.01  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        # shuffle to prevent repeating update cycles
        X, Y = shuffle(X_train, y_train)
        for ind, x in enumerate(X):
            ascent = calculate_cost_gradient(weights, x, Y[ind])
            weights = weights - (learning_rate * ascent)

        # convergence check on 2^nth epoch
        if epoch == 2 ** nth or epoch == max_epochs - 1:
            cost = compute_cost(weights, X_train, y_train)
            #print("Epoch is: {} and Cost is: {}".format(epoch, cost))
            # stoppage criterion
            if abs(prev_cost - cost) < cost_threshold * prev_cost:
                return weights
            prev_cost = cost
            nth += 1
    return weights

def predict(best_para, X_test):
    #y_predict = np.array([])
    #for i in range(X_test.shape[0]):
        #temp = np.sign(np.dot(w, X_test.to_numpy()[i]))
        #y_predict = np.append(y_predict, temp)
        
    w = np.array(best_para)
    #print(w.shape)
    b = best_para[-1]
    y_pred = np.dot(X_test, w) + b
    y_predict = [9 if y_pred[i] > 0 else 0 for i in range(X_test.shape[0])]
    
    return y_predict


def accuracy_estimate(y_test, y_pred):
    ##Testing error of each method
    accuracy_val = np.sum(y_test == y_pred) / len(y_test)
    #accuracy_val = np.mean(np.absolute(y_test - y_pred))
    return accuracy_val
    

def svm_estimate(X_train, X_test, y_train, y_test):
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)
    
    classifier = SVC(kernel = 'linear', random_state = 0)
    classifier.fit(X_train, y_train)
    
    y_pred = classifier.predict(X_test)
    
    #accuracy = accuracy_estimate(y_test, y_pred)
    accuracy = accuracy_score(y_test,y_pred)
    print('Accuracy of SVM with sklearn = ', accuracy)

In [2]:
##Resubmission

##Importing MNIST Data
train_raw = pd.read_csv(r'C:\Users\sbhatta\Box\Spring 2022\EE 425\Assignments\HW-4\mnist\mnist_train.csv')
test_raw = pd.read_csv(r'C:\Users\sbhatta\Box\Spring 2022\EE 425\Assignments\HW-4\mnist\mnist_test.csv')
train_raw.shape
test_raw.shape

train_data = train_raw.drop('label',axis=1)
train_data = train_raw.iloc[:, 1:]
train_label = train_raw['label']
#train_label = train_raw.loc[:, 'label']
print('Training Data Shape : ', train_data.shape)
print('Training label Shape : ', train_label.shape)

test_data = test_raw.drop('label',axis=1)
test_data = test_raw.iloc[:, 1:]
test_label = test_raw['label']
#test_label = test_raw.loc[:, 'label']
print('Testing Data Shape : ', test_data.shape)
print('Testing label Shape : ', test_label.shape)

X_train = train_data[np.logical_or(train_label==9, train_label==0)]
y_train = train_label[np.logical_or(train_label==9, train_label==0)]
X_test = test_data[np.logical_or(test_label==9, test_label==0)]
y_test = test_label[np.logical_or(test_label==9, test_label==0)]
print('X_train shape :', X_train.shape)
print('y_train shape :', y_train.shape)
print('X_test shape :', X_test.shape)
print('y_test shape :', y_test.shape)


Training Data Shape :  (60000, 784)
Training label Shape :  (60000,)
Testing Data Shape :  (10000, 784)
Testing label Shape :  (10000,)
X_train shape : (11872, 784)
y_train shape : (11872,)
X_test shape : (1989, 784)
y_test shape : (1989,)


In [3]:
##set hyper-parameters
regularization_strength = 1000
learning_rate = 0.000001
sc = StandardScaler()
#X_normalized = MinMaxScaler().fit_transform(X_train.values)
X_normalized = sc.fit_transform(X_train)
X_train = pd.DataFrame(X_normalized)

#y_normalized = sc.fit_transform(y_train)
#y_train = pd.DataFrame(y_normalized)

w = sgd(X_train.to_numpy(), y_train.to_numpy())
#print("weights : ".format(w))
#print(w.shape)

y_predict = predict(w, X_test)

for i in range(len(y_predict)):
    if y_predict[i] == -1:
        y_predict[i] = 0
        
    if y_predict[i] == 1:
        y_predict[i] = 9


#print(y_test.shape)
#print(y_predict)

accuracy = accuracy_estimate(y_test, y_predict)
print('Accuracy of SVM = ', accuracy)

Accuracy of SVM =  0.9748617395676219


In [166]:
##Comparison with sklearn SVM functions
svm_estimate(X_train, X_test, y_train, y_test)

Accuracy of SVM with sklearn =  0.9914529914529915
