In [142]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

In [143]:
df = pd.read_csv('diabetes_pima.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [144]:
df.corr()['Outcome']

Pregnancies                 0.221898
Glucose                     0.466581
BloodPressure               0.065068
SkinThickness               0.074752
Insulin                     0.130548
BMI                         0.292695
DiabetesPedigreeFunction    0.173844
Age                         0.238356
Outcome                     1.000000
Name: Outcome, dtype: float64

In [145]:
X = df.iloc[:,:-1].values
y = df.iloc[:,-1].values

In [146]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X)
X.shape

(768, 8)

In [147]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [148]:
import tensorflow
from tensorflow import keras
from keras import Sequential, Input
from keras.layers import Dense, BatchNormalization

model = Sequential(
    [
        Input(shape=(X_train.shape[1],)),
        Dense(32, activation='relu'),
        BatchNormalization(),
        Dense(16, activation='relu'),
        BatchNormalization(),
        Dense(1, activation='sigmoid')
    ]
)
model.summary()

In [149]:
model.compile(
    optimizer='SGD',
    loss='binary_crossentropy',
    metrics=['accuracy']
)
history = model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32
)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5570 - loss: 0.8384 - val_accuracy: 0.6911 - val_loss: 0.6176
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6296 - loss: 0.7037 - val_accuracy: 0.6992 - val_loss: 0.6118
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6771 - loss: 0.6194 - val_accuracy: 0.7236 - val_loss: 0.6032
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6867 - loss: 0.6020 - val_accuracy: 0.7154 - val_loss: 0.5908
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.6873 - loss: 0.6036 - val_accuracy: 0.7236 - val_loss: 0.5784
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7129 - loss: 0.5608 - val_accuracy: 0.7317 - val_loss: 0.5650
Epoch 7/100
[1m16/16[0m [32m━━━

In [150]:
import keras_tuner as kt
from keras.layers import BatchNormalization, Dropout

In [151]:
def build_model(hp):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1],))) 

    model.add(Dense(
        units=hp.Int('units_layer_1', min_value=16, max_value=128, step=16),
        activation='relu'
    ))

    model.add(Dropout(rate=hp.Float('dropout_layer_1', min_value=0, max_value=0.5, step=0.1)))
    model.add(BatchNormalization())

    model.add(Dense(
        units=hp.Int('units_layer_2', min_value=16, max_value=96, step=16),
        activation='relu'
    ))

    model.add(Dropout(rate=hp.Float('dropout_layer_2', min_value=0, max_value=0.5, step=0.1)))
    model.add(BatchNormalization())

    model.add(Dense(1, activation='sigmoid'))

    optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop'])

    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

In [152]:
tuner = kt.RandomSearch(
    build_model,
    objective='val_loss',
    max_trials=5,
    directory='trials_diabetes_tuning',
    project_name='diabetes_tuning'
)

In [153]:
tuner.search(
    X_train, y_train,
    validation_split=0.2,
    epochs=50
)

Trial 5 Complete [00h 00m 03s]
val_loss: 0.4040256142616272

Best val_loss So Far: 0.4040256142616272
Total elapsed time: 00h 00m 14s


In [154]:
model = tuner.get_best_models(num_models=1)[0]
model.summary()

  saveable.load_own_variables(weights_store.get(inner_path))


In [155]:
model.fit(
    X_train, y_train,
    validation_split=0.2,
    validation_data=(X_test, y_test),
    epochs=50,
    batch_size=32
)

Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8315 - loss: 0.3835 - val_accuracy: 0.7468 - val_loss: 0.5599
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7968 - loss: 0.3933 - val_accuracy: 0.7273 - val_loss: 0.5743
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8119 - loss: 0.4000 - val_accuracy: 0.7468 - val_loss: 0.5697
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8044 - loss: 0.4001 - val_accuracy: 0.7338 - val_loss: 0.5801
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8490 - loss: 0.3385 - val_accuracy: 0.7403 - val_loss: 0.5832
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7915 - loss: 0.4249 - val_accuracy: 0.7273 - val_loss: 0.5742
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1430c3550>