In [137]:
import numpy as np


In [138]:
class SVM_Classifier():
    def __init__(self,lr,epochs,lamb):
        self.lr = lr
        self.epochs = epochs
        self.lamb = lamb
        
    def fit(self,x,y):
        self.m,self.n = x.shape
        
        self.w = np.zeros(self.n)
        self.b = 0
        self.x = x
        self.y = y
        
        for i in range(int(self.epochs)):
            self.update_weights()
            
        
    def update_weights(self):
        y_label = np.where(self.y<=0,-1,1)
        for index,x_i in enumerate(self.x):
            condition = y_label[index]*(np.dot(x_i,self.w)-self.b)
            if condition >=1:
                derv_w = 2*self.w*self.lamb
                derv_b = 0
            else:
                derv_w = 2*self.w*self.lamb - np.dot(x_i,y_label[index])
                derv_b = y_label[index]
            self.w = self.w - self.lr * derv_w
            self.b = self.b - self.lr * derv_b
                
        
    def predict(self,x):
        output = np.dot(x,self.w) - self.b
        pred_labels = np.sign(output)
        y_hat = np.where(pred_labels<=-1,0,1)
        return y_hat
        
        

In [139]:
model = SVM_Classifier(lr = 1e-3,epochs=1e4,lamb=1e-2)



In [140]:
import pandas as pd

df = pd.read_csv('data/diabetes.csv')

In [141]:
X = df.drop(columns=['Outcome','Pregnancies','DiabetesPedigreeFunction','SkinThickness'])

In [142]:
Y = df['Outcome']

In [143]:
X.head()

Unnamed: 0,Glucose,BloodPressure,Insulin,BMI,Age
0,148,72,0,33.6,50
1,85,66,0,26.6,31
2,183,64,0,23.3,32
3,89,66,94,28.1,21
4,137,40,168,43.1,33


In [144]:
Y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [145]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X)

In [146]:
standardized_data = scaler.transform(X)

In [147]:
print(standardized_data)

[[ 0.84832379  0.14964075 -0.69289057  0.20401277  1.4259954 ]
 [-1.12339636 -0.16054575 -0.69289057 -0.68442195 -0.19067191]
 [ 1.94372388 -0.26394125 -0.69289057 -1.10325546 -0.10558415]
 ...
 [ 0.00330087  0.14964075  0.27959377 -0.73518964 -0.27575966]
 [ 0.1597866  -0.47073225 -0.69289057 -0.24020459  1.17073215]
 [-0.8730192   0.04624525 -0.69289057 -0.20212881 -0.87137393]]


In [148]:
features = standardized_data
targets = Y

In [149]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(features,targets,test_size = 0.2,random_state=2)

In [150]:
print(X_train.shape,X_test.shape,Y_train.shape,Y_test.shape)

(614, 5) (154, 5) (614,) (154,)


In [151]:
model.fit(X_train,Y_train)


In [152]:
X_train_pred = model.predict(X_train)


In [153]:
from sklearn.metrics import accuracy_score

training_data_accuracy = accuracy_score(Y_train,X_train_pred)
training_data_accuracy

0.7654723127035831

In [154]:
Y_pred = model.predict(X_test)
accuracy_score(Y_test,Y_pred)

0.7987012987012987