In [113]:
import numpy as np
import pandas as pd
import statsmodels.api as sm

from sklearn.preprocessing import MinMaxScaler  # for normalization

from sklearn.model_selection import train_test_split as tts # to split our data into train and test samples.
#If you want to see how to implement  this split from scratch you can check out my other project Glass Classification using KNN from Scratch in my profile.

from sklearn.metrics import accuracy_score # for calculating our accuracy in the end 
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
import time

In [116]:
X_train = pd.read_csv("./data/X_train.csv")
# X_train.head(5)
normalizedTrainX = MinMaxScaler().fit_transform(X_train.values)
trainX = pd.DataFrame(normalizedTrainX)
# trainX.head(5)

In [117]:
X_test = pd.read_csv("./data/X_test.csv")
# X_test.head(5)
normalizedTestX = MinMaxScaler().fit_transform(X_test.values)
testX = pd.DataFrame(normalizedTestX)
# testX.head(5)

In [107]:
Y_train = pd.read_csv("./data/Y_train.csv")
Y = Y_train.to_numpy()
trainY = np.zeros(len(Y))
# change 0 in trainY to -1 
for idx in range(len(Y)):
    if Y[idx] == 0 :
        trainY[idx] = -1
    else :
        trainY[idx] = 1
# print(trainY)

In [108]:
Y_test = pd.read_csv("./data/Y_test.csv")
Y = Y_test.to_numpy()
testY = np.zeros(len(Y))
# change 0 in trainY to -1 
for idx in range(len(Y)):
    if Y[idx] == 0 :
        testY[idx] = -1
    else :
        testY[idx] = 1
# print(testY)

In [109]:
class SVM:
    
    def init(self, learning_rate=0.001,lambda_param=0.02,n_iters=800):
        self.lr = learning_rate #α
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None
    
    def fit(self,dataX,dataY):
        n_samples, n_features = dataX.shape
        
        self.w = np.zeros(n_features)
        self.b = 0
        
        for _ in range(self.n_iters):
            for idx, x_i in enumerate(dataX):
                # print(x_i,dataY[idx])
                self.update(x_i,dataY[idx])
    
    def update(self,x,y):
        distance = 1-(y*(np.dot(x,self.w)+self.b))
        hinge_loss = max(0,distance)
        if(hinge_loss == 0):
            self.w = self.w-self.lr*(2*self.lambda_param*self.w)
        else:
            # print(x,"\n",y)
            self.w = self.w-self.lr*(2*self.lambda_param*self.w-np.dot(x,y))
            self.b = self.b+self.lr*y
        
    def predict(self,dataX):
        eq = np.dot(dataX,self.w)+self.b
        return np.sign(eq)

In [114]:
clf = SVM()
clf.init()
start_time = time.time()
clf.fit(trainX.to_numpy(),trainY)
end_time = time.time()
predictedY = clf.predict(testX.to_numpy())

In [115]:
print("fit time: {}".format(end_time-start_time))
print("accuracy on test dataset: {}".format(accuracy_score(testY, predictedY)))
print("recall on test dataset: {}".format(recall_score(testY, predictedY)))
print("precision on test dataset: {}".format(precision_score(testY, predictedY)))

fit time: 9.501799583435059
accuracy on test dataset: 0.85
recall on test dataset: 0.6842105263157895
precision on test dataset: 1.0
