In [1]:
# Step 1: Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV
import joblib  # For saving/loading model
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
df = pd.read_csv("F:\\0. The Data Psychology\\4. New_Machine Learning\\2. Logistic Regression\\diabetes.xls")

In [5]:
data = df.copy()

In [7]:
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [8]:
data.tail()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.34,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1
767,1,93,70,31,0,30.4,0.315,23,0


In [9]:
data.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [10]:
data.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [12]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Pregnancies,768.0,3.845052,3.369578,0.0,1.0,3.0,6.0,17.0
Glucose,768.0,120.894531,31.972618,0.0,99.0,117.0,140.25,199.0
BloodPressure,768.0,69.105469,19.355807,0.0,62.0,72.0,80.0,122.0
SkinThickness,768.0,20.536458,15.952218,0.0,0.0,23.0,32.0,99.0
Insulin,768.0,79.799479,115.244002,0.0,0.0,30.5,127.25,846.0
BMI,768.0,31.992578,7.88416,0.0,27.3,32.0,36.6,67.1
DiabetesPedigreeFunction,768.0,0.471876,0.331329,0.078,0.24375,0.3725,0.62625,2.42
Age,768.0,33.240885,11.760232,21.0,24.0,29.0,41.0,81.0
Outcome,768.0,0.348958,0.476951,0.0,0.0,0.0,1.0,1.0


In [13]:
data.shape

(768, 9)

In [16]:
X = data.iloc[:,:8]

In [18]:
X

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


In [19]:
y = data.Outcome

In [20]:
y

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64

In [21]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size=0.25, random_state=45)

In [22]:
x_train

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
133,8,84,74,31,0,38.3,0.457,39
672,10,68,106,23,49,35.5,0.285,47
126,3,120,70,30,135,42.9,0.452,30
589,0,73,0,0,0,21.1,0.342,25
468,8,120,0,0,0,30.0,0.183,38
...,...,...,...,...,...,...,...,...
725,4,112,78,40,0,39.4,0.236,38
607,1,92,62,25,41,19.5,0.482,25
544,1,88,78,29,76,32.0,0.365,29
643,4,90,0,0,0,28.0,0.610,31


In [23]:
x_test

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
195,5,158,84,41,210,39.4,0.395,29
51,1,101,50,15,36,24.2,0.526,26
66,0,109,88,30,0,32.5,0.855,38
437,5,147,75,0,0,29.9,0.434,28
665,1,112,80,45,132,34.8,0.217,24
...,...,...,...,...,...,...,...,...
671,1,99,58,10,0,25.4,0.551,21
422,0,102,64,46,78,40.6,0.496,21
742,1,109,58,18,116,28.5,0.219,22
331,2,87,58,16,52,32.7,0.166,25


In [24]:
y_train

133    0
672    0
126    0
589    0
468    1
      ..
725    0
607    0
544    0
643    0
414    1
Name: Outcome, Length: 576, dtype: int64

In [25]:
y_test

195    1
51     0
66     1
437    0
665    0
      ..
671    0
422    0
742    0
331    0
699    0
Name: Outcome, Length: 192, dtype: int64

In [26]:
scaler = StandardScaler()

In [27]:
x_train_scale =scaler.fit_transform(x_train)

In [28]:
x_test_scale = scaler.transform(x_test)

In [29]:
LR = LogisticRegression()

In [31]:
LR = LR.fit(x_train_scale,y_train)

In [32]:
LR.score(x_test_scale,y_test)

0.7395833333333334

In [35]:
y_pred = LR.predict(x_test_scale)

In [37]:
accuracy = accuracy_score(y_test, y_pred)

In [38]:
confusion_matrix(y_test,y_pred)

array([[109,  21],
       [ 29,  33]], dtype=int64)

In [39]:
y_test.shape

(192,)

In [41]:
(109+33)/192

0.7395833333333334

In [43]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           0       0.79      0.84      0.81       130
           1       0.61      0.53      0.57        62

    accuracy                           0.74       192
   macro avg       0.70      0.69      0.69       192
weighted avg       0.73      0.74      0.73       192

