In [7]:
#numpy
import numpy as np
from numpy import column_stack

# pandas
import pandas as pd

# matplotlib
import matplotlib.pyplot as plt

import pickle 

# sklearn
from sklearn.svm import SVC
from sklearn.metrics.pairwise import polynomial_kernel
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, RobustScaler

# time
import time

# Ignore Warnings
import sys
import warnings
if not sys.warnoptions:
    warnings.simplefilter("ignore")

In [47]:
train_fash_data = pd.read_csv('fashion-mnist_train.csv')
# train_fash_data['label'] = train_fash_data['label'] % 2
test_fas_data = pd.read_csv('fashion-mnist_test.csv')

In [48]:
X_train, X_val, y_train, y_val =  train_test_split(train_fash_data.drop('label', axis=1), train_fash_data['label'], test_size=0.33, random_state=42, stratify =train_fash_data['label'])

In [49]:
X_test = test_fas_data.drop('label', axis=1)
y_test = test_fas_data['label']

In [50]:
# Scale data input features
scaler = RobustScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
X_val = scaler.transform(X_val)

In [51]:
# # Onehot encode labels
# lab_scaler = LabelEncoder().fit(np.array(y_train).reshape(-1,1))
# y_train = lab_scaler.transform(np.array(y_train).reshape(-1,1))
# y_test = lab_scaler.transform(np.array(y_test).reshape(-1,1))
# y_val = lab_scaler.transform(np.array(y_val).reshape(-1,1))

# cat_scaler = OneHotEncoder().fit(np.array(y_train).reshape(-1,1))
# y_train = cat_scaler.transform(np.array(y_train).reshape(-1,1))
# y_test = cat_scaler.transform(np.array(y_test).reshape(-1,1))
# y_val = cat_scaler.transform(np.array(y_val).reshape(-1,1))

In [53]:
def svm_a():
    
    C = [0.0001, 0.001, 0.01, 0.1, 1 , 10, 100, 1000, 10000]
    
    train_acc ={}
    test_acc = {}
    dev_acc = {}
    train_acc_n_vec = {}
    
    for c in C:
        print('Evaluating Model for : ', c)
        model = SVC(C=c, kernel='linear', verbose=True)
        model.fit(X_train, y_train)
        
        train_acc[c] = model.score(X_train, y_train)
        train_acc_n_vec[c] = model
        test_acc[c] = model.score(X_test, y_test)
        val_acc[c] = model.score(X_val, y_val)
        
    return train_acc, test_acc, val_acc, train_acc_n_vec

In [None]:
start = time.time()
train_acc_svma, test_acc_svma, dev_acc_svma, train_num_vec = svm_a( )
print('Time taken : ', time.time() - start)

Evaluating Model for :  0.0001
[LibSVM]

In [None]:
def plot_svma(titl):
    
    x_train_svma, y_train_svma = zip(*sorted(train_acc_svma.items()))
    x_test_svma, y_test_svma = zip(*sorted(test_acc_svma.items()))
    x_dev_svma, y_dev_svma = zip(*sorted(dev_acc_svma.items()))
    
    x = [1, 2, 3, 4, 5, 6 ,7, 8, 9]
    y_tr = [y * 100 for y in y_train_svma]
    y_tst = [y * 100 for y in y_test_svma]
    y_dev = [y * 100 for y in y_dev_svma]
    
    labels = ['0.0001', '0.001', '0.01', '0.1', '1', '10', '100', '1000', '10000']
    
    plt.plot(x, y_tr, 'ro', color='b', label='train' , linestyle='-')
    plt.plot(x, y_tst, 'ro', color='g', label ='test' , linestyle='-')
    plt.plot(x, y_dev, 'ro', color='y', label = 'dev' , linestyle='-')
    
    plt.xticks(x, labels, rotation='vertical')
    plt.margins(0.2)
    plt.subplots_adjust(bottom=0.15)
    plt.legend(loc='lower right')
    plt.xlabel('Values of C')
    plt.title(titl)
    plt.ylabel('Accuracy')
    plt.savefig(titl+'.jpg', dpi=300)
    plt.show()    

plot_svma('Training, Testing and Dev Set accuracy as function of C')

In [None]:
m_scaler = RobustScaler().fit(np.array(train_X[col]).reshape(-1,1))
train_feature = m_scaler.transform(np.array(train_fash_data[col]).reshape(-1,1))
train_feature = train_feature.reshape(train_X.shape[0], 1)

In [None]:
train_fash_data = np.array(train_fash_data) 
m_scaler = RobustScaler().fit(np.array(train_fash_data[:, :]).reshape(-1,1))
train_feature = m_scaler.transform(np.array(train_fash_data[:, :]).reshape(-1,1))
x_train ,x_test = train_test_split(train_feature,test_size=0.2)

In [None]:
x_test

In [None]:
test_fash_data = pd.read_csv('fashion-mnist_test.csv')
test_label = test_fash_data['label']
test_inputs = test_fash_data.drop('label', axis=1)
test_fash_data.head()
input_test = test_inputs.iloc[:, :].values
for i in range(len(test_label)):
    if test_label[i]==1:
        test_label[i]=-1
    else:
        test_label[i]=1

In [None]:
def train_svm(X, Y, c):
    train_accs = []
    test_accs = []
    accuracy = {}

    for i in c:
        linear_kernel = SVC(C=i, kernel='linear')
        linear_kernel.fit(X[0], Y[0])
        score_train = linear_kernel.score(X[0], Y[0])
        score_test = linear_kernel.score(X[1], Y[1])
        train_accs.append(score_train)
        test_accs.append(score_test)

        print("i = {}, time takes = {} min".format(i, (end - start) / 60), file=f)

    best_acc = [key for (key, value) in accuracy.items() if value == max(accuracy.values())][0]

    return best_acc, train_accs, test_accs

In [None]:
c = [10**-4, 10**-3, 10**-2, 10**-1, 1, 10, 10**2, 10**3, 10**4]

In [None]:
train_svm(x_train, x_test, c)