<a href="https://colab.research.google.com/github/Sachithananthan-1/Building_SVM_classifier_from_Scratch/blob/main/Building_Support_Vector_Model_from_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [66]:
class SVM_classifier():

  # Initiating the hyperparameters
   def __init__(self,learning_rate,no_of_iterations,lambda_parameter):

    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter

  # fitting the dataset in SVM classifier
   def fit(self,X,Y):
    self.m, self.n = X.shape

    self.w = np.zeros(self.n)
    self.b = 0
    self.X = X
    self.Y = Y

    # Implementing Gradient Descent Algorithm
    for i in range(self.no_of_iterations):
      self.update_weights()

   def update_weights(self):

    # Label Encoding
    y_label = np.where(self.Y<=0,-1,1)

    # gradients

    for index, x_i in enumerate(self.X):
      condition = y_label[index]*(np.dot(x_i,self.w)-self.b)>=1

      if(condition == True):
        dw = 2*self.lambda_parameter*self.w
        db = 0

      else:
        dw = 2*self.lambda_parameter*self.w - np.dot(x_i,y_label[index])
        db = y_label[index]

      self.w = self.w - self.learning_rate*dw
      self.b = self.b - self.learning_rate*db

   def predict(self,X):
    output = np.dot(X,self.w)-self.b
    predicted_labels = np.sign(output)
    y_cap = np.where(predicted_labels <=-1,0,1)
    return y_cap

Using the Above model for Prediction

In [55]:
blood_sugar = pd.read_csv('/content/Sugar_patient.csv')
blood_sugar.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [56]:
blood_sugar.shape

(499, 9)

In [57]:
blood_sugar.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [58]:
# Separating features = X and target = Y

X = blood_sugar.drop(columns ='Outcome',axis=1)
Y = blood_sugar['Outcome']
print(X)

     Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0              6      148             72             35        0  33.6   
1              1       85             66             29        0  26.6   
2              8      183             64              0        0  23.3   
3              1       89             66             23       94  28.1   
4              0      137             40             35      168  43.1   
..           ...      ...            ...            ...      ...   ...   
494            3       80              0              0        0   0.0   
495            6      166             74              0        0  26.6   
496            5      110             68              0        0  26.0   
497            2       81             72             15       76  30.1   
498            7      195             70             33      145  25.1   

     DiabetesPedigreeFunction  Age  
0                       0.627   50  
1                       0.351   31  


In [59]:
print(Y)

0      1
1      0
2      1
3      0
4      1
      ..
494    0
495    0
496    0
497    0
498    1
Name: Outcome, Length: 499, dtype: int64


In [60]:
# Data Standardization
scaler = StandardScaler()
scaler.fit(X)
standard_data = scaler.transform(X)
print(standard_data)

[[ 0.65712418  0.82215798  0.16757553 ...  0.19695267  0.4102653
   1.45493002]
 [-0.83879263 -1.12174849 -0.14117594 ... -0.65648446 -0.38927309
  -0.17945286]
 [ 1.2554909   1.90210602 -0.24409309 ... -1.05881911  0.54062482
  -0.09343271]
 ...
 [ 0.35794082 -0.35035703 -0.03825878 ... -0.72963622 -0.56018891
  -0.26547301]
 [-0.53960927 -1.24517113  0.16757553 ... -0.2297659   0.17851504
  -0.69557377]
 [ 0.95630754  2.27237392  0.06465837 ... -0.83936385 -0.9338862
   1.88503078]]


In [61]:
X = standard_data

In [62]:
# Train Test Split

X_train, X_test, Y_train, Y_test = train_test_split(X,Y,test_size = 0.03,random_state = 2)

In [67]:
# Training the model

classifier = SVM_classifier(learning_rate=0.001,no_of_iterations = 1000, lambda_parameter =0.01)
classifier.fit(X_train, Y_train)

In [69]:
# Accuracy Score

X_train_predict = classifier.predict(X_train)
train_data_accuracy = accuracy_score(X_train_predict, Y_train)
print('Accuracy on training data : ',train_data_accuracy)

Accuracy on training data :  0.7582644628099173


In [70]:
# test data
X_test_predict = classifier.predict(X_test)
test_data_accuracy = accuracy_score(X_test_predict, Y_test)
print('Accuracy on testing data : ',test_data_accuracy)

Accuracy on testing data :  0.7333333333333333


In [73]:
input_data = (7,147,76,0,0,39.4,0.257,43)

# changing input value into a numpy array
input_array = np.asarray(input_data)
data_reshaped = input_array.reshape(1,-1)

# Standardize the data
std_data = scaler.transform(data_reshaped)
prediction = classifier.predict(std_data)
print(prediction)


if(prediction[0] == 0):
  print('The patient has normal sugar level')
else:
  print('The patient has high sugar level')

[1]
The patient has high sugar level


