In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
class SVM_classifier():

    # initiating the hyperparameters
    def __init__(self, lr, n_iters, lambda_parameter):
        self.lr               = lr
        self.n_iters          = n_iters
        self.lambda_parameter = lambda_parameter

    # fitting the dataset to SVM Classifier
    def fit(self, X, Y):

        # m : number of Data points    --> number of rows
        # n : number of input features --> number of columns
        self.m, self.n = X.shape

        # initiating the weight value and bias value

        self.w = np.zeros(self.n)
        self.b = 0
        self.X = X
        self.Y = Y

        # implementing Gradient Descent algorithm for Optimization

        for i in range(self.n_iters):
            self.update_weights()

            
    def update_weights(self):
        y_label = np.where(self.Y <= 0,  -1, 1)

        for index, x_i in enumerate(self.X):
            
            condition = y_label[index] * (np.dot(x_i, self.w) - self.b) >= 1
    
            if(condition == True):
        
                dw = 2 * self.lambda_parameter * self.w
                db = 0
                
            else:
        
                dw = 2 * self.lambda_parameter * self.w - np.dot(x_i, y_label[index])
                db = y_label[index]
    
            self.w = self.w - self.lr * dw
            self.b = self.b - self.lr * db


    def predict(self, X):

        output = np.dot(X, self.w) - self.b

        predicted_labels = np.sign(output)
    
        y_hat = np.where(predicted_labels <= -1, 0, 1)

        return y_hat
        

In [3]:
 model = SVM_classifier(lr=0.001, n_iters=1000, lambda_parameter=0.01)

In [4]:
diabetes_data = pd.read_csv('Fichiers_csv/diabetes.csv')

In [5]:
diabetes_data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
diabetes_data.shape

(768, 9)

In [7]:
diabetes_data['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64

In [8]:
features = diabetes_data.drop(columns='Outcome', axis=1)
target   = diabetes_data['Outcome']

In [9]:
scaler = StandardScaler()

In [10]:
scaler.fit(features)

In [11]:
standardized_data = scaler.transform(features)

In [12]:
features = standardized_data
target   = diabetes_data['Outcome']

In [13]:
X_train, X_test, Y_train, Y_test = train_test_split(features, target, test_size=0.2, random_state=2)

In [14]:
classifier = SVM_classifier(lr=0.001, n_iters=1000, lambda_parameter=0.01)

In [15]:
classifier.fit(X_train, Y_train)

In [16]:
X_train_prediction  = classifier.predict(X_train)
accuracy_train_data = accuracy_score(Y_train, X_train_prediction)
print("Accuracy of training data : ", accuracy_train_data)

Accuracy of training data :  0.7768729641693811


In [18]:
X_test_prediction  = classifier.predict(X_test)
accuracy_test_data = accuracy_score(Y_test, X_test_prediction)
print("Accuracy of test data : ", accuracy_test_data)

Accuracy of test data :  0.7532467532467533
