In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [62]:
df.corr()['Outcome']

Pregnancies                 0.221898
Glucose                     0.466581
BloodPressure               0.065068
SkinThickness               0.074752
Insulin                     0.130548
BMI                         0.292695
DiabetesPedigreeFunction    0.173844
Age                         0.238356
Outcome                     1.000000
Name: Outcome, dtype: float64

In [65]:
df.drop(['SkinThickness','BloodPressure'], axis=1, inplace=True)
df.head()

Unnamed: 0,Pregnancies,Glucose,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,0,33.6,0.627,50,1
1,1,85,0,26.6,0.351,31,0
2,8,183,0,23.3,0.672,32,1
3,1,89,94,28.1,0.167,21,0
4,0,137,168,43.1,2.288,33,1


In [66]:
X = df.iloc[:,0:-1]
y = df.iloc[:, -1]
X.shape

(768, 6)

In [67]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
X

array([[ 0.63994726,  0.84832379, -0.69289057,  0.20401277,  0.46849198,
         1.4259954 ],
       [-0.84488505, -1.12339636, -0.69289057, -0.68442195, -0.36506078,
        -0.19067191],
       [ 1.23388019,  1.94372388, -0.69289057, -1.10325546,  0.60439732,
        -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.27959377, -0.73518964, -0.68519336,
        -0.27575966],
       [-0.84488505,  0.1597866 , -0.69289057, -0.24020459, -0.37110101,
         1.17073215],
       [-0.84488505, -0.8730192 , -0.69289057, -0.20212881, -0.47378505,
        -0.87137393]])

In [68]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

In [9]:
import tensorflow as tf
from tensorflow import keras

In [18]:
model = keras.Sequential([
    keras.layers.Dense(32, input_dim=8, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])
model.compile(loss='binary_crossentropy', metrics=['accuracy'], optimizer='adam')
model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.5049 - loss: 0.7002 - val_accuracy: 0.6104 - val_loss: 0.6582
Epoch 2/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6564 - loss: 0.6386 - val_accuracy: 0.7143 - val_loss: 0.6061
Epoch 3/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7085 - loss: 0.5937 - val_accuracy: 0.7532 - val_loss: 0.5720
Epoch 4/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7280 - loss: 0.5639 - val_accuracy: 0.7857 - val_loss: 0.5480
Epoch 5/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7492 - loss: 0.5414 - val_accuracy: 0.7857 - val_loss: 0.5296
Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7590 - loss: 0.5241 - val_accuracy: 0.7922 - val_loss: 0.5191
Epoch 7/100
[1m20/20[0m [32m━━

<keras.src.callbacks.history.History at 0x17906336850>

# Using Keras Tuner to automate for finding the best Hyperparameter for the model

In [17]:
import keras_tuner as kt
# 1. Hoe to select appropriate optimizer
# 2. No. of nodes in hidden layer
# 3. How to select no. of layers
# 4. All in all one model

In [23]:
# selecting appropriate optimizer
def build_model(hp):
    model = keras.Sequential([
        keras.layers.Dense(32, activation='relu', input_dim=8),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    optimizer = hp.Choice('optimizer', values=['adam','sgd','rmsprop','adadelta'])
    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [25]:
tuner = kt.RandomSearch(build_model, 
                       objective='val_accuracy',
                       max_trials=5)
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 4 Complete [00h 00m 03s]
val_accuracy: 0.7142857313156128

Best val_accuracy So Far: 0.7857142686843872
Total elapsed time: 00h 00m 13s


In [30]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'rmsprop'}

In [31]:
model = tuner.get_best_models(num_models=1)[0]

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [32]:
model.summary()

In [33]:
model.fit(X_train, y_train, batch_size=32, epochs=100, initial_epoch=6, validation_data=(X_test, y_test))

Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 19ms/step - accuracy: 0.7394 - loss: 0.5341 - val_accuracy: 0.8117 - val_loss: 0.5134
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7524 - loss: 0.5150 - val_accuracy: 0.8052 - val_loss: 0.5016
Epoch 9/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7573 - loss: 0.5027 - val_accuracy: 0.8052 - val_loss: 0.4923
Epoch 10/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7671 - loss: 0.4932 - val_accuracy: 0.7987 - val_loss: 0.4842
Epoch 11/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7638 - loss: 0.4856 - val_accuracy: 0.8052 - val_loss: 0.4782
Epoch 12/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7687 - loss: 0.4793 - val_accuracy: 0.8052 - val_loss: 0.4749
Epoch 13/100
[1m20/20[0m [3

<keras.src.callbacks.history.History at 0x17908b03350>

In [37]:
# Selecting the best no. of nodes for layer
def build_model(hp):
    model = keras.Sequential()

    units = hp.Int('units', min_value=8, max_value=128, step=8)

    model.add(keras.layers.Dense(units=units, activation='relu', input_dim=8))
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [42]:
tuner = kt.RandomSearch(build_model,max_trials=10, objective='val_accuracy', directory='mydir', project_name='nodes_in_layer')
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 10 Complete [00h 00m 03s]
val_accuracy: 0.3896103799343109

Best val_accuracy So Far: 0.8116883039474487
Total elapsed time: 00h 00m 56s


In [47]:
tuner.get_best_hyperparameters()[0].values
model = tuner.get_best_models(num_models=1)[0]

{'units': 104}

In [46]:
model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test), initial_epoch=5)

Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7818 - loss: 0.4673 - val_accuracy: 0.7857 - val_loss: 0.4649
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7866 - loss: 0.4589 - val_accuracy: 0.7857 - val_loss: 0.4633
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7704 - loss: 0.4558 - val_accuracy: 0.7727 - val_loss: 0.4637
Epoch 9/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7818 - loss: 0.4535 - val_accuracy: 0.7857 - val_loss: 0.4615
Epoch 10/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7752 - loss: 0.4516 - val_accuracy: 0.7922 - val_loss: 0.4607
Epoch 11/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7769 - loss: 0.4496 - val_accuracy: 0.7922 - val_loss: 0.4616
Epoch 12/100
[1m20/20[0m [32

<keras.src.callbacks.history.History at 0x1790f05c650>

In [50]:
# Selcting the best no. of layers
def build_model(hp):
    model = keras.Sequential([
        keras.layers.Dense(104, activation='relu', input_dim=8)
    ])

    for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
        model.add(keras.layers.Dense(104, activation='relu'))

    model.add(keras.layers.Dense(1, activation='sigmoid'))
    model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [51]:
tuner = kt.RandomSearch(build_model, max_trials=10, objective='val_accuracy', directory='mydir', project_name='number_of_layers')
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 9 Complete [00h 00m 04s]
val_accuracy: 0.8116883039474487

Best val_accuracy So Far: 0.8116883039474487
Total elapsed time: 00h 00m 30s


In [52]:
tuner.get_best_hyperparameters()[0].values

{'num_layers': 3}

In [53]:
model = tuner.get_best_models(num_models=1)[0]
model.fit(X_train, y_train, epochs=100, initial_epoch=5, validation_data=(X_test, y_test))

Epoch 6/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.7752 - loss: 0.4483 - val_accuracy: 0.7857 - val_loss: 0.4990
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7964 - loss: 0.4229 - val_accuracy: 0.7922 - val_loss: 0.4822
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7948 - loss: 0.4152 - val_accuracy: 0.8052 - val_loss: 0.4737
Epoch 9/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8013 - loss: 0.4042 - val_accuracy: 0.7857 - val_loss: 0.4729
Epoch 10/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8127 - loss: 0.3997 - val_accuracy: 0.7403 - val_loss: 0.5029
Epoch 11/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8257 - loss: 0.3884 - val_accuracy: 0.8312 - val_loss: 0.4848
Epoch 12/100
[1m20/20[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x17915c80de0>

In [84]:
# find the best model out of all the hyperparameter tuning
def build_model(hp):
    model = keras.Sequential()
    counter = 0
    # units = hp.Int('num_nodes', min_value=8, max_value=120, step=8)
    # activation_func = hp.Choice('acti_func', ['relu', 'tanh','sigmoid'])
    optimizer = hp.Choice('optimizer', ['adam','rmsprop', 'sgd', 'adadelta'])
    # drop = hp.Choice('drops', [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])

    for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
        if counter==0:
            model.add(keras.layers.Dense(hp.Int('num_nodes_'+str(i), min_value=8, max_value=120, step=8), 
                                         hp.Choice('acti_func_'+str(i), ['relu', 'tanh','sigmoid']), 
                                         input_dim=6))
            model.add(keras.layers.Dropout(hp.Choice('drops_'+str(i), [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])))
        else: 
            model.add(keras.layers.Dense(hp.Int('num_nodes_'+str(i), min_value=8, max_value=120, step=8), 
                                         hp.Choice('acti_func_'+str(i), ['relu', 'tanh','sigmoid'])))
            model.add(keras.layers.Dropout(hp.Choice('drops_'+str(i), [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9])))
            
        counter += 1
    model.add(keras.layers.Dense(1, activation='sigmoid'))

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [85]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=20, directory='mydir2', project_name='best_model_selection')
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 20 Complete [00h 00m 06s]
val_accuracy: 0.7207792401313782

Best val_accuracy So Far: 0.7792207598686218
Total elapsed time: 00h 02m 38s


In [86]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'sgd',
 'num_layers': 1,
 'num_nodes_0': 80,
 'acti_func_0': 'tanh',
 'drops_0': 0.6,
 'num_nodes_1': 24,
 'acti_func_1': 'relu',
 'drops_1': 0.7,
 'num_nodes_2': 48,
 'acti_func_2': 'relu',
 'drops_2': 0.1,
 'num_nodes_3': 112,
 'acti_func_3': 'sigmoid',
 'drops_3': 0.7,
 'num_nodes_4': 8,
 'acti_func_4': 'sigmoid',
 'drops_4': 0.9,
 'num_nodes_5': 104,
 'acti_func_5': 'sigmoid',
 'drops_5': 0.1,
 'num_nodes_6': 112,
 'acti_func_6': 'relu',
 'drops_6': 0.8,
 'num_nodes_7': 8,
 'acti_func_7': 'sigmoid',
 'drops_7': 0.3,
 'num_nodes_8': 80,
 'acti_func_8': 'sigmoid',
 'drops_8': 0.7,
 'num_nodes_9': 80,
 'acti_func_9': 'relu',
 'drops_9': 0.5}

In [87]:
model = tuner.get_best_models(num_models=1)[0]
model.fit(X_train, y_train, epochs=100, initial_epoch=5, validation_data=(X_test, y_test))

Epoch 6/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.6889 - loss: 0.5918 - val_accuracy: 0.7792 - val_loss: 0.5353
Epoch 7/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6840 - loss: 0.5720 - val_accuracy: 0.7792 - val_loss: 0.5250
Epoch 8/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7134 - loss: 0.5515 - val_accuracy: 0.7922 - val_loss: 0.5167
Epoch 9/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7280 - loss: 0.5522 - val_accuracy: 0.7987 - val_loss: 0.5092
Epoch 10/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7345 - loss: 0.5297 - val_accuracy: 0.8117 - val_loss: 0.5022
Epoch 11/100
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7264 - loss: 0.5344 - val_accuracy: 0.8117 - val_loss: 0.4977
Epoch 12/100
[1m20/20[0m [32

<keras.src.callbacks.history.History at 0x17969a044b0>