In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn import svm

In [2]:
data_df = pd.read_csv("diabetes.csv")
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768 non-null    int64  
 2   BloodPressure             768 non-null    int64  
 3   SkinThickness             768 non-null    int64  
 4   Insulin                   768 non-null    int64  
 5   BMI                       768 non-null    float64
 6   DiabetesPedigreeFunction  768 non-null    float64
 7   Age                       768 non-null    int64  
 8   Outcome                   768 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [3]:
data_df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [4]:
ind_features = ["Pregnancies","Glucose","BloodPressure","SkinThickness","Insulin","BMI","DiabetesPedigreeFunction","Age"]
target = "Outcome"

In [5]:
sc = StandardScaler()
x_scaled = sc.fit_transform(data_df[ind_features])

In [6]:
X = pd.DataFrame(data=x_scaled,columns=ind_features)
y = data_df[target]

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,stratify=y)

In [8]:
lr = LogisticRegression(random_state=42)
lr.fit(X_train,y_train)

In [9]:
y_train_pred = lr.predict(X_train)
y_test_pred = lr.predict(X_test)

print("Training Accuracy",np.round(accuracy_score(y_train,y_train_pred)))
print("Testing Accuracy",np.round(accuracy_score(y_test,y_test_pred)))

Training Accuracy 1.0
Testing Accuracy 1.0


In [10]:
svm_classifier = svm.LinearSVC(random_state=42)
svm_classifier.fit(X_train,y_train)



In [11]:
y_train_pred=svm_classifier.predict(X_train)
y_test_pred=svm_classifier.predict(X_test)

print("Training Accuracy",np.round(accuracy_score(y_train,y_train_pred)))
print("Testing Accuracy",np.round(accuracy_score(y_test,y_test_pred)))

Training Accuracy 1.0
Testing Accuracy 1.0


In [12]:
class custom_SVM_Classifier():
    #initialise the regularisation parameter, learning rate and no of iterations
    def __init__(self,learning_rate, no_iters, regularisation_parameter):
        self.learning_rate = learning_rate
        self.no_iters = no_iters
        self.regularisation_parameter = regularisation_parameter
    
    def fit(self,X,y):
        #m = no of rows
        #n = no of cols
        self.m,self.n = X.shape
        
        
        self.X = X
        self.y = y
        
        #initialise w and bias
        self.w = np.zeros(self.n)
        self.b = 0
        
        for i in range(self.no_iters):
            self.update_weights()
    
    def update_weights(self):
        y_actual = np.where(self.y==0,-1,1)
                
        for index in range(len(self.X)):
            
            condition = y_actual[index]*(np.dot(X.iloc[index,:].values,self.w)+self.b)
            
            if(condition>=1):
                dw = 2*self.regularisation_parameter*self.w
                db = 0
            else:
                dw = 2*self.regularisation_parameter*self.w - (np.dot(X.iloc[index,:].values, y_actual[index]))
                db = y_actual[index]
            
            self.w = self.w - self.learning_rate * dw
            self.b = self.b - self.learning_rate * db
        
    
    def predict(self,X):
        
        output = np.dot(X, self.w) - self.b
        predicted_labels = np.sign(output)
        y_hat = np.where(predicted_labels <= -1, 0, 1)
        return y_hat

In [13]:
svm_custom = custom_SVM_Classifier(0.01,500,0.01)

In [14]:
svm_custom.fit(X_train,y_train)
y_train_pred = svm_custom.predict(X_train)
y_test_pred = svm_custom.predict(X_test)
print("Training Accuracy",np.round(accuracy_score(y_train,y_train_pred)))
print("Testing Accuracy",np.round(accuracy_score(y_test,y_test_pred)))

Training Accuracy 1.0
Testing Accuracy 1.0
