# IMPORTING REQUIRED LIBRARIES

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler

# READING THE DATASET

In [None]:
df = pd.read_csv("diabetes.csv")
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


# CHECKING THE SHAPE OF DATASET

In [None]:
df.shape

(768, 9)

# DESCRIBING THE DATASET

In [None]:
df.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


# CHECKING THE NULL VALUES IN DATA

In [None]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

# PRINTING COLUMN NAMES

In [None]:
df.columns

Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

# CHECKING CORRELATION BETWEEN DEPENDENT AND INDEPENDENT VARIABLES

In [None]:
print('0------>Non-diabetic')
print('1------>Diabetic')
print(df.groupby('Outcome').mean())

0------>Non-diabetic
1------>Diabetic
         Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \
Outcome                                                                      
0           3.298000  109.980000      68.184000      19.664000   68.792000   
1           4.865672  141.257463      70.824627      22.164179  100.335821   

               BMI  DiabetesPedigreeFunction        Age  
Outcome                                                  
0        30.304200                  0.429734  31.190000  
1        35.142537                  0.550500  37.067164  


# IMPORTING THE REQUIRED MODELS AND METRICS

In [None]:
# Train Test Split
from sklearn.model_selection import train_test_split

# Metrics
from sklearn.metrics import accuracy_score

# Model
from sklearn.svm import SVC

#importing tensorflow and keras for neural network
import tensorflow as tf
tf.random.set_seed(3)
from tensorflow import keras


# SPLITTING DEPENDENT AND INDEPENDENT VARIABLES AND STORING IN DIFFERENT VARIABLES

In [None]:
y = df['Outcome']
x = df.drop(['Outcome'],axis = 1)

In [None]:
y

0      1
1      0
2      1
3      0
4      1
      ..
763    0
764    0
765    0
766    1
767    0
Name: Outcome, Length: 768, dtype: int64

In [None]:
x

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33
...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63
764,2,122,70,27,0,36.8,0.340,27
765,5,121,72,23,112,26.2,0.245,30
766,1,126,60,0,0,30.1,0.349,47


# DIVIDING THE DATA INTO TRAIN AND TEST DATASETS

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2, random_state=2)

In [None]:
x.shape, x_train.shape, x_test.shape

((768, 8), (614, 8), (154, 8))

# STANDARDIZING THE DATA

In [None]:
standard=StandardScaler()
x_train_standard = standard.fit_transform(x_train)
x_test_standard = standard.transform(x_test)
print(x_train_standard)

[[-0.85811767  0.06488386  0.25332145 ... -0.51313743 -1.10316947
  -0.27704152]
 [-0.85811767 -0.84697246  0.66358026 ...  0.4081093  -0.71238555
   0.84376203]
 [-1.15412006 -0.87841578  0.04819205 ...  1.49569224 -0.37742791
  -1.05298243]
 ...
 [ 0.02988949  0.09632718  0.04819205 ... -0.3723914   1.96433735
   1.01619334]
 [-0.2661129  -0.18666271  0.25332145 ... -0.70506383 -1.08260189
  -0.79433546]
 [ 0.02988949 -0.37532264 -0.15693736 ...  0.0114614  -0.01308802
  -0.36325717]]


# MAKING LAYERS FOR NEURAL NETWORK

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(8,)),
    keras.layers.Dense(24, activation='relu'),
    keras.layers.Dense(2, activation='sigmoid')
])

# COMPILING THE NEURAL NETWORK

In [None]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy']
)

# TRAINING THE MODEL

In [None]:
model.fit(x_train_standard,y_train, validation_split=0.1, epochs=150)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

<keras.callbacks.History at 0x7fd4e2798190>

# CHECKING THE ACCURACY

In [None]:
loss, accuracy=model.evaluate(x_train_standard,y_train)
accuracy



0.8110749125480652

In [None]:
loss, accuracy=model.evaluate(x_test_standard,y_test)
accuracy



0.8246753215789795

In [None]:
#Taking predicted values
y_predict = model.predict(x_test_standard)



In [None]:
y_predict

array([[0.9396495 , 0.05699902],
       [0.7640776 , 0.27956933],
       [0.89978266, 0.17407067],
       [0.29636207, 0.48681882],
       [0.63549536, 0.6621204 ],
       [0.9012107 , 0.14433429],
       [0.91191304, 0.10752293],
       [0.677828  , 0.550845  ],
       [0.8635192 , 0.10822365],
       [0.5296168 , 0.66695696],
       [0.46392912, 0.44264278],
       [0.5321866 , 0.43198103],
       [0.46364856, 0.9276138 ],
       [0.8347799 , 0.53997344],
       [0.87398875, 0.05595716],
       [0.50116134, 0.8619334 ],
       [0.3570565 , 0.88840127],
       [0.9390021 , 0.0536364 ],
       [0.85648406, 0.51834387],
       [0.37251163, 0.6742141 ],
       [0.4073012 , 0.9507231 ],
       [0.33438993, 0.897104  ],
       [0.8822939 , 0.1072677 ],
       [0.8409537 , 0.6917976 ],
       [0.84615505, 0.1172843 ],
       [0.76795965, 0.26941735],
       [0.7106528 , 0.6094378 ],
       [0.80977666, 0.6615816 ],
       [0.6821078 , 0.35984033],
       [0.73335826, 0.43213063],
       [0.

# SAVING THE MODEL IN PICKLE FILE

In [None]:
import pickle

In [None]:
filename = 'diabetes_model.pkl'
pickle.dump(model, open(filename, 'wb'))
 