In [1]:
import numpy as np

In [30]:
class logistic_regression:
  def __init__(self,learning_rate,no_of_iterations):
    self.learning_rate = learning_rate
    self.no_of_iterations = no_of_iterations
    self.w = None
    self.b = None

  def fit(self,x_train,y_train):
    self.x_train = x_train
    self.y_train = y_train
    self.m,self.n = x_train.shape

    #Initiating the weight and bias Value
    self.w = np.zeros(self.n)
    self.b = 0

    #Implementing gradient descent for optimisation
    for i in range(self.no_of_iterations):
      self.update_weights()

  def update_weights(self):
    z = self.b + np.dot(self.x_train, self.w)
    y_hat = 1/(1+np.exp(-z))

    #derivatives
    dw = (1/self.m)*np.dot(self.x_train.T,(y_hat-self.y_train))
    db = (1/self.m)*np.sum(y_hat-self.y_train)

    #update
    self.w = self.w - (self.learning_rate*dw)
    self.b = self.b - (self.learning_rate*db)

  def predict(self,x_test):
    z_test = np.dot(x_test, self.w) + self.b
    y_pred = 1/(1+np.exp(-z_test))
    y_pred = np.where(y_pred>0.5 , 1 , 0)
    return y_pred

In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
data = pd.read_csv(r"/content/diabetes.csv")
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
data.isnull().sum()

Unnamed: 0,0
Pregnancies,0
Glucose,0
BloodPressure,0
SkinThickness,0
Insulin,0
BMI,0
DiabetesPedigreeFunction,0
Age,0
Outcome,0


In [7]:
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [8]:
data['Outcome'].value_counts()

Unnamed: 0_level_0,count
Outcome,Unnamed: 1_level_1
0,500
1,268


**0->Non Diabetics**

**1->Diabetics**

In [9]:
ip = data.drop('Outcome',axis=1)
op = data['Outcome']

In [10]:
ip.shape

(768, 8)

In [11]:
op.shape

(768,)

**Train-Test Split**

In [12]:
x_train,x_test,y_train,y_test = train_test_split(ip,op,test_size=0.2,random_state=2)

In [13]:
x_train.shape

(614, 8)

In [33]:
x_train

array([[-0.85811767,  0.06488386,  0.25332145, ..., -0.51313743,
        -1.10316947, -0.27704152],
       [-0.85811767, -0.84697246,  0.66358026, ...,  0.4081093 ,
        -0.71238555,  0.84376203],
       [-1.15412006, -0.87841578,  0.04819205, ...,  1.49569224,
        -0.37742791, -1.05298243],
       ...,
       [ 0.02988949,  0.09632718,  0.04819205, ..., -0.3723914 ,
         1.96433735,  1.01619334],
       [-0.2661129 , -0.18666271,  0.25332145, ..., -0.70506383,
        -1.08260189, -0.79433546],
       [ 0.02988949, -0.37532264, -0.15693736, ...,  0.0114614 ,
        -0.01308802, -0.36325717]])

**Data Standardization**

In [14]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

In [15]:
x_train

array([[-0.85811767,  0.06488386,  0.25332145, ..., -0.51313743,
        -1.10316947, -0.27704152],
       [-0.85811767, -0.84697246,  0.66358026, ...,  0.4081093 ,
        -0.71238555,  0.84376203],
       [-1.15412006, -0.87841578,  0.04819205, ...,  1.49569224,
        -0.37742791, -1.05298243],
       ...,
       [ 0.02988949,  0.09632718,  0.04819205, ..., -0.3723914 ,
         1.96433735,  1.01619334],
       [-0.2661129 , -0.18666271,  0.25332145, ..., -0.70506383,
        -1.08260189, -0.79433546],
       [ 0.02988949, -0.37532264, -0.15693736, ...,  0.0114614 ,
        -0.01308802, -0.36325717]])

**Model Training**

In [20]:
lr = logistic_regression(learning_rate=0.01,no_of_iterations=1000)

In [21]:
lr.fit(x_train,y_train)

In [22]:
pred = lr.predict(x_test)
pred

array([0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0])

In [23]:
ac = accuracy_score(y_test,pred)
print("Accuracy:",ac)

Accuracy: 0.7597402597402597


In [42]:
input_data = [5,166,72,19,175,25.8,0.587,51]

input_data_as_numpy_array = np.asarray(input_data)
input_data_reshaped = input_data_as_numpy_array.reshape(1,-1)
print(input_data_reshaped)

std_data = sc.transform(input_data_reshaped)
print(std_data)

prediction = lr.predict(std_data)
print(prediction)

if (prediction[0] == 0):
  print('The person is not diabetic')
else:
  print('The person is diabetic')

[[  5.    166.     72.     19.    175.     25.8     0.587  51.   ]]
[[0. 0. 0. 0. 0. 0. 0. 0.]]
[0]
The person is not diabetic
