In [1]:
import numpy as np
import pandas as pd
df = pd.read_csv('/content/diabetes.csv')
df.head(20)

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
5,5,116,74,0,0,25.6,0.201,30,0
6,3,78,50,32,88,31.0,0.248,26,1
7,10,115,0,0,0,35.3,0.134,29,0
8,2,197,70,45,543,30.5,0.158,53,1
9,8,125,96,0,0,0.0,0.232,54,1


In [2]:
df.isnull().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [4]:
df.corr()['Outcome']

Pregnancies                 0.221898
Glucose                     0.466581
BloodPressure               0.065068
SkinThickness               0.074752
Insulin                     0.130548
BMI                         0.292695
DiabetesPedigreeFunction    0.173844
Age                         0.238356
Outcome                     1.000000
Name: Outcome, dtype: float64

In [3]:
x = df.iloc[:, :-1]
y = df.iloc[:,-1]

In [5]:
x.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [11]:
x.shape

(768, 8)

In [12]:
y.head()

0    1
1    0
2    1
3    0
4    1
Name: Outcome, dtype: int64

In [6]:
# Pre-Processing - 

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x = scaler.fit_transform(x)

In [7]:
# Split the data into training and test

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=101)

# Building Multilayer Peceptron Model (MLP)

In [8]:
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout

In [9]:
model = Sequential()
model.add(Dense(32, activation='relu', input_dim = 8))
model.add(Dense(1, activation='sigmoid'))


In [10]:
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [11]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=32, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f29c10ee2c0>

In [12]:
model = Sequential()
model.add(Dense(224, activation='relu', input_dim = 8))
model.add(Dense(1, activation='sigmoid'))


In [13]:
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])


In [14]:
model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=32, epochs=10)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f29c0305510>

The values of the validation metrics are close to the corresponding training metrics, suggesting that the model has not overfit the training data.

Based on these results, you can conclude that the model has learned to make reasonably accurate predictions on both the training and validation data.

# HyperParameter Tunning - Keras tunner


### 1. How to select appropriate optimizer
### 2. No. of nodes in a layer
### 3. How to select No. of hidden layer
### 4. All in all with one model

In [15]:
!pip install -U keras-tuner

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [16]:
import kerastuner as kt

  import kerastuner as kt


In [17]:
# https://keras.io/keras_tuner/

In [18]:
# Hyperparameter for optimization
def build_model(hp):
  model = Sequential()
  model.add(Dense(32, activation='relu', input_dim = 8))
  model.add(Dense(1, activation='sigmoid'))
  optimizer = hp.Choice('optimizer', values=['adam','sgd','rmsprop','adadelta'])
  model.compile(loss='binary_crossentropy', optimizer= optimizer, metrics=['accuracy'])
  return model


In [19]:
# Best Optimization

tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5)

In [20]:
tuner.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10)

In [33]:
tuner.get_best_hyperparameters()[0].values

{'optimizer': 'rmsprop'}

In [34]:
# RMSprop is working better among all 

# Right numbers of neurons int he given leayer

In [36]:
# Define the build_model function

def build_model(hp):
  model = Sequential()
  units = hp.Int('units', 8,256, step=8)                 # 8 = lower limit and 256 = max limit
  model.add(Dense(units=units, activation='relu', input_dim = 8))
  model.add(Dense(1, activation='sigmoid'))
  model.compile(loss='binary_crossentropy', optimizer= 'rmsprop', metrics=['accuracy'])
  return model


In [38]:
# Create the tuner object

tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5,
                        directory='mydir', project_name='my_own_dir')

In [39]:
tuner.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10)

Trial 5 Complete [00h 00m 03s]
val_accuracy: 0.798701286315918

Best val_accuracy So Far: 0.8116883039474487
Total elapsed time: 00h 00m 14s


In [48]:
tuner.get_best_hyperparameters()[0].values

{'units': 224}

In [41]:
# The above says 224 (neuron) is the best one

In [47]:
model.fit(x_train, y_train, batch_size=32, epochs=11, validation_data=(x_test, y_test))

Epoch 1/11
Epoch 2/11
Epoch 3/11
Epoch 4/11
Epoch 5/11
Epoch 6/11
Epoch 7/11
Epoch 8/11
Epoch 9/11
Epoch 10/11
Epoch 11/11


<keras.callbacks.History at 0x7fbe88ab7130>

# How many Hidden Layer Required

In [53]:
def build_model(hp):
    model = Sequential()
    model.add(Dense(224, activation='relu', input_dim = 8))
    for i in range(hp.Int('num_layer', min_value=1, max_value=10)):
      model.add(Dense(224, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer = 'rmsprop', loss='binary_crossentropy', metrics = ['accuracy'])
    return model


In [54]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5,
                        directory='mydir1', project_name='num_layers')


In [55]:
tuner.search(x_train, y_train, validation_data=(x_test, y_test), epochs=10)

Trial 5 Complete [00h 00m 05s]
val_accuracy: 0.8116883039474487

Best val_accuracy So Far: 0.8116883039474487
Total elapsed time: 00h 00m 22s


In [60]:
tuner.get_best_hyperparameters()[0].values

{'num_layer': 6}

# ALl Hyperparameter at one go

In [25]:
# Import necessary libraries
import keras_tuner as kt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout


In [26]:
# Define the build_model function
def build_model(hp):
    model = Sequential()
    counter = 0

    for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
        if counter == 0:
            model.add(Dense(
                units=hp.Int('units' + str(i), min_value=8, max_value=128, step=8),
                activation=hp.Choice('activation' + str(i), values=['relu', 'tanh', 'sigmoid', 'elu', 'leaky_relu', 'softmax', 'linear']),
                input_dim=8
            ))
            model.add(Dropout(hp.Choice('dropout' + str(i), values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7])))
        else:
            model.add(Dense(
                units=hp.Int('units' + str(i), min_value=8, max_value=128, step=8),
                activation=hp.Choice('activation' + str(i), values=['relu', 'tanh', 'sigmoid', 'elu', 'leaky_relu', 'softmax', 'linear'])
            ))
            model.add(Dropout(hp.Choice('dropout' + str(i), values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7])))
        counter += 1

    model.add(Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=hp.Choice('optimizer', values=['rmsprop', 'adam', 'sgd', 'adadelta', 'adagrad', 'adam']),
        loss='binary_cross_entropy',
        metrics=['accuracy']
    )
    return model


In [30]:
# Perform hyperparameter tuning
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=5,
    directory='Best_Model',
    project_name='Final_HyprPar'
)

In [34]:
# Retrieve the best hyperparameters
best_hyperparameters = tuner.get_best_hyperparameters()[0]
print(best_hyperparameters.values)


{'num_layers': 2, 'units0': 24, 'activation0': 'softmax', 'dropout0': 0.3, 'optimizer': 'adam', 'units1': 8, 'activation1': 'relu', 'dropout1': 0.1}


In [35]:
final_model = tuner.hypermodel.build(best_hyperparameters)
final_model.compile(
    optimizer=best_hyperparameters.get('optimizer'),
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [36]:
final_model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f2931f67ac0>

In [37]:
loss, accuracy = final_model.evaluate(x_test, y_test)
print("Test Loss:", loss)
print("Test Accuracy:", accuracy)

Test Loss: 0.5865147709846497
Test Accuracy: 0.6688311696052551


In conclusion, after performing hyperparameter tuning using Keras Tuner, we built and trained a final model using the best hyperparameters obtained from the tuning process. The final model was compiled with the specified optimizer, loss function, and metrics.

The trained model achieved a test loss of 0.5865 and a test accuracy of 0.6688. These metrics provide insights into the model's performance on unseen data. The test accuracy indicates the proportion of correctly classified samples, while the test loss measures the dissimilarity between the predicted and true labels.

The hyperparameter tuning process allowed us to systematically explore different combinations of hyperparameters, optimizing the model's architecture and learning parameters. By finding the best hyperparameters, we aimed to enhance the model's ability to generalize and improve its overall performance.

