In [4]:
import numpy as np  
import pandas as pd  
import matplotlib.pyplot as plt

In [5]:
df = pd.read_csv("diabetes.csv")
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
df.corr()['Outcome'].sort_values()

BloodPressure               0.065068
SkinThickness               0.074752
Insulin                     0.130548
DiabetesPedigreeFunction    0.173844
Pregnancies                 0.221898
Age                         0.238356
BMI                         0.292695
Glucose                     0.466581
Outcome                     1.000000
Name: Outcome, dtype: float64

In [7]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values

In [8]:
X

array([[  6.   , 148.   ,  72.   , ...,  33.6  ,   0.627,  50.   ],
       [  1.   ,  85.   ,  66.   , ...,  26.6  ,   0.351,  31.   ],
       [  8.   , 183.   ,  64.   , ...,  23.3  ,   0.672,  32.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,  26.2  ,   0.245,  30.   ],
       [  1.   , 126.   ,  60.   , ...,  30.1  ,   0.349,  47.   ],
       [  1.   ,  93.   ,  70.   , ...,  30.4  ,   0.315,  23.   ]])

In [9]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [10]:
X

array([[ 0.63994726,  0.84832379,  0.14964075, ...,  0.20401277,
         0.46849198,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.68442195,
        -0.36506078, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -1.10325546,
         0.60439732, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ..., -0.73518964,
        -0.68519336, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.24020459,
        -0.37110101,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.20212881,
        -0.47378505, -0.87137393]])

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [12]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import BinaryAccuracy
from tensorflow.keras.callbacks import EarlyStopping

In [13]:
model = Sequential()
# Here we have to stuff by intuition, so we can automate it
# we can do this by using hyperparameter tuning
model.add(Dense(32, activation='relu', input_dim=8))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.5467 - loss: 0.7090 - val_accuracy: 0.6299 - val_loss: 0.6686
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5884 - loss: 0.6736 - val_accuracy: 0.7013 - val_loss: 0.6204
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6277 - loss: 0.6284 - val_accuracy: 0.7273 - val_loss: 0.5831
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6457 - loss: 0.6084 - val_accuracy: 0.7468 - val_loss: 0.5565
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6875 - loss: 0.5789 - val_accuracy: 0.7468 - val_loss: 0.5299
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7382 - loss: 0.5503 - val_accuracy: 0.7597 - val_loss: 0.5125
Epoch 7/100
[1m20/20[0m [32m━━

<keras.src.callbacks.history.History at 0x18a1cfc5a10>

In [15]:
!pip install keras-tuner



In [16]:
# How to select appropriate optimizer
# No. of nodes
# No. of hidden layers
# then we'll create a all in one model

In [20]:
import keras_tuner as kt

In [24]:
def build_model(hp):
    model = Sequential()
    model.add(Dense(32, activation='relu', input_dim=8))
    model.add(Dense(1, activation='sigmoid'))

    optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adadelta'])

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model


In [25]:
tuner = kt.RandomSearch(build_model,
                        objective='val_accuracy',
                        max_trials=5,)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [26]:
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Trial 4 Complete [00h 00m 02s]
val_accuracy: 0.5974025726318359

Best val_accuracy So Far: 0.8116883039474487
Total elapsed time: 00h 00m 09s


In [27]:
tuner.results_summary()

Results summary
Results in .\untitled_project
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0 summary
Hyperparameters:
optimizer: adam
Score: 0.8116883039474487

Trial 1 summary
Hyperparameters:
optimizer: sgd
Score: 0.7727272510528564

Trial 2 summary
Hyperparameters:
optimizer: rmsprop
Score: 0.7532467246055603

Trial 3 summary
Hyperparameters:
optimizer: adadelta
Score: 0.5974025726318359


In [28]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'adam'}

In [30]:
model = tuner.get_best_models(num_models=1)[0]

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [31]:
model.summary()

In [32]:
model.fit(X_train, y_train, batch_size=32, epochs=200, initial_epoch=100, validation_data=(X_test, y_test))

Epoch 101/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.7810 - loss: 0.5073 - val_accuracy: 0.8117 - val_loss: 0.4525
Epoch 102/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7645 - loss: 0.5070 - val_accuracy: 0.8052 - val_loss: 0.4469
Epoch 103/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7700 - loss: 0.4762 - val_accuracy: 0.7987 - val_loss: 0.4410
Epoch 104/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8087 - loss: 0.4454 - val_accuracy: 0.7987 - val_loss: 0.4367
Epoch 105/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7745 - loss: 0.4825 - val_accuracy: 0.7987 - val_loss: 0.4369
Epoch 106/200
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7567 - loss: 0.5019 - val_accuracy: 0.8052 - val_loss: 0.4350
Epoch 107/200
[1m20/

<keras.src.callbacks.history.History at 0x18a24565c10>

In [35]:
def build_model(hp):
    model = Sequential()
    units = hp.Int('units', 8, 128, step=8)

    model.add(Dense(units, activation='relu', input_dim=8))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [39]:
tuner = kt.RandomSearch(build_model,
                        objective='val_accuracy',
                        max_trials=5,
                        directory='my_dir',)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [40]:
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 03s]
val_accuracy: 0.798701286315918

Best val_accuracy So Far: 0.8311688303947449
Total elapsed time: 00h 00m 11s


In [41]:
tuner.results_summary()

Results summary
Results in my_dir\untitled_project
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 0 summary
Hyperparameters:
units: 48
Score: 0.8311688303947449

Trial 2 summary
Hyperparameters:
units: 24
Score: 0.8311688303947449

Trial 1 summary
Hyperparameters:
units: 88
Score: 0.8246753215789795

Trial 3 summary
Hyperparameters:
units: 32
Score: 0.8116883039474487

Trial 4 summary
Hyperparameters:
units: 40
Score: 0.798701286315918


In [42]:
tuner.get_best_hyperparameters()[0].values

{'units': 48}

In [43]:
tuner.get_best_models(num_models=1)[0]

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


<Sequential name=sequential, built=True>

In [44]:
model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8294 - loss: 0.3971 - val_accuracy: 0.7922 - val_loss: 0.4482
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8366 - loss: 0.4127 - val_accuracy: 0.7987 - val_loss: 0.4456
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8290 - loss: 0.3859 - val_accuracy: 0.7987 - val_loss: 0.4464
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8311 - loss: 0.3867 - val_accuracy: 0.7987 - val_loss: 0.4481
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8293 - loss: 0.3878 - val_accuracy: 0.7987 - val_loss: 0.4473
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8017 - loss: 0.4065 - val_accuracy: 0.7987 - val_loss: 0.4477
Epoch 7/100
[1m20/20[0m [32m━━━

<keras.src.callbacks.history.History at 0x18a259dfa90>

In [47]:
# Now let's check for hidde layers
def buid_model(hp):
    model = Sequential()
    model.add(Dense(32, activation='relu', input_dim=8))

    for i in range(hp.Int('num_layers', min_value=1, max_value=5)):
        model.add(Dense(32, activation='relu'))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [48]:
tuner = kt.RandomSearch(build_model,
                        objective='val_accuracy',
                        max_trials=5,
                        directory='layer_dir')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [49]:
tuner.search(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 02s]
val_accuracy: 0.8181818127632141

Best val_accuracy So Far: 0.8376623392105103
Total elapsed time: 00h 00m 11s


In [50]:
tuner.results_summary()

Results summary
Results in layer_dir\untitled_project
Showing 10 best trials
Objective(name="val_accuracy", direction="max")

Trial 2 summary
Hyperparameters:
units: 72
Score: 0.8376623392105103

Trial 0 summary
Hyperparameters:
units: 80
Score: 0.8311688303947449

Trial 4 summary
Hyperparameters:
units: 88
Score: 0.8181818127632141

Trial 3 summary
Hyperparameters:
units: 40
Score: 0.8051947951316833

Trial 1 summary
Hyperparameters:
units: 48
Score: 0.798701286315918


In [51]:
tuner.get_best_hyperparameters()[0].values

{'units': 72}

In [None]:
model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test, y_test))