In [1]:
# Importing Necessary modules

import sklearn
from sklearn.metrics import classification_report
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from math import e

In [2]:
# Loading the data

df=pd.read_csv('https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.csv')
df.columns=list('ABCDEFGHI')
df

Unnamed: 0,A,B,C,D,E,F,G,H,I
0,1,85,66,29,0,26.6,0.351,31,0
1,8,183,64,0,0,23.3,0.672,32,1
2,1,89,66,23,94,28.1,0.167,21,0
3,0,137,40,35,168,43.1,2.288,33,1
4,5,116,74,0,0,25.6,0.201,30,0
...,...,...,...,...,...,...,...,...,...
762,10,101,76,48,180,32.9,0.171,63,0
763,2,122,70,27,0,36.8,0.340,27,0
764,5,121,72,23,112,26.2,0.245,30,0
765,1,126,60,0,0,30.1,0.349,47,1


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 767 entries, 0 to 766
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   A       767 non-null    int64  
 1   B       767 non-null    int64  
 2   C       767 non-null    int64  
 3   D       767 non-null    int64  
 4   E       767 non-null    int64  
 5   F       767 non-null    float64
 6   G       767 non-null    float64
 7   H       767 non-null    int64  
 8   I       767 non-null    int64  
dtypes: float64(2), int64(7)
memory usage: 54.1 KB


In [4]:
df.isnull().sum()

A    0
B    0
C    0
D    0
E    0
F    0
G    0
H    0
I    0
dtype: int64

Data is already cleaned, so we can directly give it to the model. We can also do preprocessing, to churn out accuracy

In [5]:
x_train,x_test,y_train,y_test=train_test_split(df.drop('I',axis=1),df["I"],test_size=0.2)

In [6]:
x_train

Unnamed: 0,A,B,C,D,E,F,G,H
725,1,116,78,29,180,36.1,0.496,25
202,2,99,70,16,44,20.4,0.235,27
640,4,128,70,0,0,34.3,0.303,24
216,6,125,68,30,120,30.0,0.464,32
525,1,97,64,19,82,18.2,0.299,21
...,...,...,...,...,...,...,...,...
363,4,147,74,25,293,34.9,0.385,30
121,2,107,74,30,100,33.6,0.404,23
344,8,126,88,36,108,38.5,0.349,49
126,1,118,58,36,94,33.3,0.261,23


In [7]:
x_test

Unnamed: 0,A,B,C,D,E,F,G,H
77,0,131,0,0,0,43.2,0.270,26
478,4,132,86,31,0,28.0,0.419,63
74,1,0,48,20,0,24.7,0.140,22
94,6,144,72,27,228,33.9,0.255,40
235,7,181,84,21,192,35.9,0.586,51
...,...,...,...,...,...,...,...,...
144,0,102,75,23,0,0.0,0.572,21
708,2,93,64,32,160,38.0,0.674,23
138,5,105,72,29,325,36.9,0.159,28
596,1,89,24,19,25,27.8,0.559,21


In [8]:
# Scaling the data so that every feature has equal importance

scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

In [9]:
class LogisticRegression:
    
    # sigmoid activation function
    def sig(self,x):
        return 1/(1+(e**(-x)))
    
    # training the model
    def fit(self,a,b):
        o=np.ones(a.shape[0]).reshape(-1,1)
        a=np.concatenate((o,a),axis=1)
        self.c=[0]*(a.shape[1])
        m=len(b)
        al=0.001 #learning_rate
        for _ in range(1500): #1500 epochs
            pred=self.sig(a.dot(self.c))
            loss=pred-b
            self.c-=(al/m)*a.T.dot(loss)
          
    # predicting on new data
    def predict(self,x):
        o=np.ones(x.shape[0]).reshape(-1,1)
        x=np.concatenate((o,x),axis=1)
        y=self.sig(x.dot(self.c))
        l=[0 if i<=0.5 else 1 for i in y]
        return np.array(l)

In [10]:
model=LogisticRegression()
model.fit(x_train,y_train)

In [11]:
#Coefficients of the best fit line

model.c

array([-0.19788498,  0.11520879,  0.25884362,  0.03370167,  0.03368498,
        0.05972761,  0.15267923,  0.07716284,  0.12241715])

In [12]:
y_pred=model.predict(x_test)

In [13]:
# Classification report on the test data

print(classification_report(y_test,y_pred,digits=3))

              precision    recall  f1-score   support

           0      0.764     0.866     0.812        97
           1      0.705     0.544     0.614        57

    accuracy                          0.747       154
   macro avg      0.734     0.705     0.713       154
weighted avg      0.742     0.747     0.738       154

