In [2]:
import numpy as np
import pandas as pd

In [3]:
df = pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [4]:
# to know which column is affecting the outcome column
df.corr()['Outcome']

Pregnancies                 0.221898
Glucose                     0.466581
BloodPressure               0.065068
SkinThickness               0.074752
Insulin                     0.130548
BMI                         0.292695
DiabetesPedigreeFunction    0.173844
Age                         0.238356
Outcome                     1.000000
Name: Outcome, dtype: float64

In [5]:
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values


In [6]:
# now we will scale the X values
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()

In [7]:
X = scalar.fit_transform(X)

In [9]:
# train test 
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)


In [10]:
import tensorflow
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense

In [None]:
model = Sequential()

# here we have to make it manually that this much neurons i have to keep , trial and error 
model.add(Dense(32, input_dim=8, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
model.fit(X_train, y_train, epochs=100, batch_size=20 , validation_data=(X_test, y_test))

Epoch 1/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 18ms/step - accuracy: 0.7560 - loss: 0.4720 - val_accuracy: 0.7727 - val_loss: 0.4710
Epoch 2/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7764 - loss: 0.4617 - val_accuracy: 0.7727 - val_loss: 0.4693
Epoch 3/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7678 - loss: 0.4555 - val_accuracy: 0.7792 - val_loss: 0.4699
Epoch 4/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7857 - loss: 0.4486 - val_accuracy: 0.7792 - val_loss: 0.4692
Epoch 5/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7715 - loss: 0.4730 - val_accuracy: 0.7792 - val_loss: 0.4681
Epoch 6/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.7660 - loss: 0.4552 - val_accuracy: 0.7857 - val_loss: 0.4689
Epoch 7/100
[1m31/31[0m [32m

<keras.src.callbacks.history.History at 0x2be0ec94620>

# But we can automate the process of finding the neurons size and layers using Keras Ruler

In [17]:


''' 
1. How to select appropiate optimiser
2. How to select no of nodes in a layer
3. How to select no of layers
4. All in one model

'''

' \n1. How to select appropiate optimiser\n2. How to select no of nodes in a layer\n3. How to select no of layers\n4. All in one model\n\n'

In [19]:
pip install -U keras-tuner

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.



In [20]:
import kerastuner as kt


### How to select appropiate optimizer

In [21]:
# making a function that chooses the suitable optimiser
def build_model(hp): # hp is the hyperparameter
    model = Sequential()
    model.add(Dense(32,activation='relu',input_dim=8))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop' , 'adadelta']), loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [22]:
# now making the tuner object
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [23]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 4 Complete [00h 00m 13s]
val_accuracy: 0.649350643157959

Best val_accuracy So Far: 0.7792207598686218
Total elapsed time: 00h 00m 33s


In [24]:
tuner.get_best_hyperparameters()[0].values  # say u the best optimiser we can use 

{'optimizer': 'adam'}

In [25]:
model = tuner.get_best_models(num_models=1)[0]   # say u the best model u can use 

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [26]:
model.summary()

In [27]:
model.fit(X_train, y_train, epochs=100, batch_size=20 , validation_data=(X_test, y_test))

Epoch 1/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 23ms/step - accuracy: 0.7293 - loss: 0.5587 - val_accuracy: 0.7792 - val_loss: 0.5212
Epoch 2/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7665 - loss: 0.5207 - val_accuracy: 0.7857 - val_loss: 0.5034
Epoch 3/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7596 - loss: 0.5368 - val_accuracy: 0.7792 - val_loss: 0.4901
Epoch 4/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7495 - loss: 0.5135 - val_accuracy: 0.7727 - val_loss: 0.4814
Epoch 5/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7820 - loss: 0.4788 - val_accuracy: 0.7727 - val_loss: 0.4758
Epoch 6/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7685 - loss: 0.4752 - val_accuracy: 0.7727 - val_loss: 0.4713
Epoch 7/100
[1m31/31[0m [32m━

<keras.src.callbacks.history.History at 0x2be0ed37440>

### Number of nodes in a layer

In [31]:
# finding the number of neurons in a particular layer
# in the model we have taken 32 as random as per out concern now deciding how many will be best

def build_model(hp): # hp is the hyperparameter
    model = Sequential()
    model.add(Dense(hp.Int('units', min_value=32, max_value=512, step=32),activation='relu',input_dim=8))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [None]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5 , directory='output', project_name='diabetes')

# the output folder will keep the records ofn every trials 


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [33]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 05s]
val_accuracy: 0.798701286315918

Best val_accuracy So Far: 0.798701286315918
Total elapsed time: 00h 00m 23s


In [34]:
tuner.get_best_hyperparameters()[0].values  # say u the best optimiser we can use

{'units': 352}

In [35]:
model = tuner.get_best_models(num_models=1)[0]   # say u the best model u can use


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [36]:
model.fit(X_train, y_train, epochs=100, batch_size=20 , validation_data=(X_test, y_test))
# we can also add initial_epoch = some_number , it will start from that epoch

Epoch 1/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.7945 - loss: 0.4739 - val_accuracy: 0.7922 - val_loss: 0.4664
Epoch 2/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7691 - loss: 0.4744 - val_accuracy: 0.7727 - val_loss: 0.4630
Epoch 3/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7817 - loss: 0.4338 - val_accuracy: 0.7857 - val_loss: 0.4606
Epoch 4/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8085 - loss: 0.4198 - val_accuracy: 0.7922 - val_loss: 0.4622
Epoch 5/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7904 - loss: 0.4304 - val_accuracy: 0.7792 - val_loss: 0.4633
Epoch 6/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7905 - loss: 0.4121 - val_accuracy: 0.7922 - val_loss: 0.4594
Epoch 7/100
[1m31/31[0m [32m━

<keras.src.callbacks.history.History at 0x2be11e6d310>

### How to select number of layers

In [41]:
def build_model(hp):
    model = Sequential()

    model.add(Dense(72 , activation='relu', input_dim=8))  # created the first layer

    for i in range(hp.Int('num_layers', min_value = 1 , max_value = 10)):
        model.add(Dense(72, activation='relu'))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [42]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5 , directory='output', project_name='diabetes2')


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [43]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 07s]
val_accuracy: 0.8246753215789795

Best val_accuracy So Far: 0.8246753215789795
Total elapsed time: 00h 00m 34s


In [44]:
tuner.get_best_hyperparameters()[0].values  # say u the best optimiser we can use

{'num_layers': 10}

In [45]:
model = tuner.get_best_models(num_models=1)[0]   # say u the best model u can use

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [46]:
model.fit(X_train, y_train, epochs=100, batch_size=20 , validation_data=(X_test, y_test))

Epoch 1/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 32ms/step - accuracy: 0.7442 - loss: 0.4875 - val_accuracy: 0.7922 - val_loss: 0.5157
Epoch 2/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.7993 - loss: 0.4167 - val_accuracy: 0.7727 - val_loss: 0.4828
Epoch 3/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7880 - loss: 0.4299 - val_accuracy: 0.7987 - val_loss: 0.4810
Epoch 4/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.7870 - loss: 0.4294 - val_accuracy: 0.7987 - val_loss: 0.4746
Epoch 5/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8191 - loss: 0.3835 - val_accuracy: 0.7987 - val_loss: 0.4751
Epoch 6/100
[1m31/31[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8281 - loss: 0.3711 - val_accuracy: 0.7532 - val_loss: 0.4980
Epoch 7/100
[1m31/31[0m [3

<keras.src.callbacks.history.History at 0x2be166365a0>

# Here above u can add how many neurons per layer can be added . 

In [None]:
'''  

And also we can add dropout layers in the model to avoid overfitting .


'''