In [44]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score

In [84]:
class svm_classifier():
  def __init__(self,learning_rate,no_of_iterations,lambda_parameter):
    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.lambda_parameter = lambda_parameter


  # x is input feature , y is outcome features , used for training model
  def fit(self,x,y):

    # m is no of rows in input and n is columns in input
    # weight is number of input features
    self.m,self.n=x.shape

    #initialise weight with random values - here with zeros array of size of features
    # weight is array
    # Bias is a single value
    self.w = np.zeros(self.n)

    self.b = 0
    self.x = x
    self.y = y

    # using gradient descent for optimization

    for i in range(self.no_of_iterations):
      self.update_weights()





  def update_weights(self):
    # create label encoder  for getting values in form of 1 and -1 (not 0,1)
    y_label = np.where(self.y <= 0,-1,1)

    # gradients - dw , db , dj
    for index,x_i in enumerate(self.x):
      condition = y_label[index] *(np.dot(x_i,self.w) - self.b)  >= 1
      if (condition == True):
        dw = 2 * self.lambda_parameter * self.w #dj/dw = 2*lambda*w
        db = 0
      else:
        dw = 2 * self.lambda_parameter * self.w - np.dot(x_i,y_label[index])
        #dj/dw = 2*lambda*w - xi.yi
        db = y_label[index]
      # update the weights
      self.w = self.w - self.learning_rate * dw
      self.b = self.b - self.learning_rate * db

  def predict(self,x):
    output = np.dot(x,self.w) - self.b # y = wx-b
    predicted_labels = np.sign(output)
    y_hat = np.where(predicted_labels <= -1,0,1)
    return y_hat



In [8]:
dataset = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/diabetes.csv')
dataset.head(5)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [9]:
dataset.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [10]:
dataset.shape

(768, 9)

In [11]:
dataset.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [12]:
dataset['Outcome'].value_counts()

Unnamed: 0_level_0,count
Outcome,Unnamed: 1_level_1
0,500
1,268


# Seperate Features and target

In [75]:
x= dataset.drop(columns = 'Outcome' , axis =1)
y = dataset['Outcome']

print('Features shape : ',x.shape)
print('Target shape :',y.shape)

Features shape :  (768, 8)
Target shape : (768,)


# Data Standardization

In [76]:
scaler = StandardScaler()

scaler.fit(x)

x = scaler.transform(x)

# x = pd.DataFrame(x)

# x.head(5)

# Split and train test

In [77]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=2)

print('x_train shape is :',x_train.shape)
print('x_test shape is :',x_test.shape)
print('y_train shape is :',y_train.shape)
print('y_test shape is :',y_test.shape)


x_train shape is : (614, 8)
x_test shape is : (154, 8)
y_train shape is : (614,)
y_test shape is : (154,)


In [97]:
model = svm_classifier(learning_rate=0.001,no_of_iterations=2000,lambda_parameter=0.01)

In [98]:
model.fit(x_train,y_train)

In [101]:
# test data accuracy

y_pred = model.predict(x_test)

print(f"Accuracy score is : {accuracy_score(y_test,y_pred)*100}%")

Accuracy score is : 74.67532467532467%


In [102]:
# train data accuracy

y_pred = model.predict(x_train)

print(f"Accuracy score is : {accuracy_score(y_train,y_pred)*100}%")

Accuracy score is : 77.68729641693811%


In [111]:
import numpy as np

# Make sure input is a 2D array (shape: 1 row, 8 columns)
input = np.array([[1, 85, 66, 29, 0, 26.6, 0.351, 31]])

# Apply same scaling used for training
input_scaled = scaler.transform(input)

# Predict
my_pred = model.predict(input_scaled)
print("Custom prediction:", my_pred)


Custom prediction: [0]




# Building a prediction system

In [117]:
# input_data = ([[7,107,74,0,0,29.6,0.254,31]])
input_data = [[10, 180, 88, 15, 0, 40.0, 0.5, 55]]

std_data = scaler.transform(input_data)

# print(std_data)

prediction = model.predict(std_data)

# print(prediction)

if prediction[0] == 0:
  print('The person is not diabetic')
else:
  print('The person is diabetic')


The person is diabetic


