#### Keras Tuner -- Help us to decide How many number of neurons and hidden layers to be selected in Neural Network

https://keras-team.github.io/keras-tuner/

In [1]:
import tensorflow
print(tensorflow.__version__)

2.4.0


In [2]:
!pip install keras-tuner

Collecting keras-tuner
[?25l  Downloading https://files.pythonhosted.org/packages/20/ec/1ef246787174b1e2bb591c95f29d3c1310070cad877824f907faba3dade9/keras-tuner-1.0.2.tar.gz (62kB)
[K     |█████▏                          | 10kB 24.5MB/s eta 0:00:01[K     |██████████▍                     | 20kB 14.8MB/s eta 0:00:01[K     |███████████████▋                | 30kB 13.1MB/s eta 0:00:01[K     |████████████████████▉           | 40kB 12.6MB/s eta 0:00:01[K     |██████████████████████████      | 51kB 8.4MB/s eta 0:00:01[K     |███████████████████████████████▎| 61kB 9.2MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 5.9MB/s 
Collecting terminaltables
  Downloading https://files.pythonhosted.org/packages/9b/c4/4a21174f32f8a7e1104798c445dacdc1d4df86f2f26722767034e4de4bff/terminaltables-3.1.0.tar.gz
Collecting colorama
  Downloading https://files.pythonhosted.org/packages/44/98/5b86278fbbf250d239ae0ecb724f8572af1c91f4a11edf4d36a206189440/colorama-0.4.4-py2.py3-none-any.

In [41]:
## Import Libraries

import pandas as pd
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import RandomSearch

Dataset is all about Air Quality Index Prediction -- Regression Problem

Dependent Feature -- PM2.5 , All other features are Independent Features

In [42]:
df = pd.read_csv('Real_Combine.csv')
df.head()

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM,PM 2.5
0,7.4,9.8,4.8,1017.6,93.0,0.5,4.3,9.4,219.720833
1,7.8,12.7,4.4,1018.5,87.0,0.6,4.4,11.1,182.1875
2,6.7,13.4,2.4,1019.4,82.0,0.6,4.8,11.1,154.0375
3,8.6,15.5,3.3,1018.7,72.0,0.8,8.1,20.6,223.208333
4,12.4,20.9,4.4,1017.3,61.0,1.3,8.7,22.2,200.645833


In [43]:
df.isnull().sum()

T         0
TM        0
Tm        0
SLP       0
H         0
VV        0
V         0
VM        0
PM 2.5    1
dtype: int64

In [44]:
df = df.dropna()
df.isnull().sum()

T         0
TM        0
Tm        0
SLP       0
H         0
VV        0
V         0
VM        0
PM 2.5    0
dtype: int64

In [45]:
### Divide dataset into Independent and Dependent Features

X = df.iloc[:,:-1]
y = df.iloc[:,-1]

X.head()

Unnamed: 0,T,TM,Tm,SLP,H,VV,V,VM
0,7.4,9.8,4.8,1017.6,93.0,0.5,4.3,9.4
1,7.8,12.7,4.4,1018.5,87.0,0.6,4.4,11.1
2,6.7,13.4,2.4,1019.4,82.0,0.6,4.8,11.1
3,8.6,15.5,3.3,1018.7,72.0,0.8,8.1,20.6
4,12.4,20.9,4.4,1017.3,61.0,1.3,8.7,22.2


In [46]:
y.head()

0    219.720833
1    182.187500
2    154.037500
3    223.208333
4    200.645833
Name: PM 2.5, dtype: float64

In [47]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.fit_transform(X_test)

X_train

array([[-0.57274033, -0.35937433, -0.95753592, ..., -1.21178613,
        -0.08174649,  0.11286325],
       [-1.69123558, -1.79437055, -1.59036078, ..., -1.21178613,
         0.97864768,  3.51530141],
       [ 0.96001243,  1.22510067,  0.9005456 , ..., -0.10125437,
         0.16295985, -0.64635849],
       ...,
       [ 1.13952401,  0.71687284,  1.30447636, ...,  0.87046093,
         1.3321124 ,  0.91426397],
       [-1.87074717, -2.00364084, -1.40185976, ..., -2.04468496,
        -0.54396959, -0.88537274],
       [ 0.67003218,  0.35812379,  0.84668816, ...,  0.87046093,
        -0.89743432, -0.64635849]])

In [48]:
X_test

array([[ 0.72398496,  0.80357143,  0.45590992, ...,  1.28011238,
         2.78044217,  2.47658459],
       [-2.01142681, -2.55488973, -1.65348247, ..., -1.58806932,
        -0.24056669, -0.64125358],
       [-1.45601325, -1.54584534, -1.53256189, ...,  0.40718751,
         0.02317218,  0.05734916],
       ...,
       [ 1.07111843,  1.1499598 ,  1.20830466, ..., -0.21633025,
         0.91029383,  0.05734916],
       [ 0.77952631,  0.59272634,  0.96646349, ...,  0.28248396,
         3.25996739,  1.51924009],
       [-1.650408  , -1.75669044, -1.50569065, ..., -0.96455156,
        -0.86394947, -1.09405165]])

### Hyper Parameters in Neural Network

1. How many no of Hidden layers we should have
2. How many no of neurons we should have in hidden layers
3. Learning Rate

In [79]:
def build_model(hp):

    ## hp.Int --- Try with different different values
    ## hp.Choice -- Choose any one
    model = keras.Sequential()
    ## We will do tunning of Layers between 2 to 20 -- Hidden Layers
    for i in range(hp.Int('num_layers', 2, 20)):
      ## No of neurons in each hidden layer will be ranging between 32 to 512
      model.add(layers.Dense(units=hp.Int('units_' + str(i),
                                            min_value=32,
                                            max_value=512,
                                            step=32),
                               activation='relu'))
    ## Output layer
    model.add(layers.Dense(1, activation='linear')) ##Linear activation function is used in Regression problem
    model.compile(
        optimizer=keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])),
        loss='mean_absolute_error', ## For Regrerssion problem statement
        metrics=['mean_absolute_error']) ## For Regression problem
    return model

In [80]:
### Hyper parameter optimization using RandomSearch

tuner = RandomSearch(
    build_model,
    ### When we do training we get val_mean_absolute_error
    ### Incase of Classification problem write val_accuracy
    objective='val_mean_absolute_error',

    ### Above function will be called 5*3=15 times 
    ### single round of training for each model configuration,if we set execution_per_trail=1
    max_trials=5,
    executions_per_trial=1,

    ### Weights will get stored in these directory
    directory='project6',
    project_name='Air Quality Index')

In [81]:
tuner.search_space_summary()

### All information will get stored in Air Quality Index 

Search space summary
Default search space size: 4
num_layers (Int)
{'default': None, 'conditions': [], 'min_value': 2, 'max_value': 20, 'step': 1, 'sampling': None}
units_0 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
units_1 (Int)
{'default': None, 'conditions': [], 'min_value': 32, 'max_value': 512, 'step': 32, 'sampling': None}
learning_rate (Choice)
{'default': 0.01, 'conditions': [], 'values': [0.01, 0.001, 0.0001], 'ordered': True}


In [82]:
### Train Test split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=0)

X_train.shape,X_test.shape,y_train.shape,y_test.shape

((764, 8), (328, 8), (764,), (328,))

In [83]:
### 5 trials will be done as per the epocs 

tuner.search(X_train, y_train,
             epochs=50,
             validation_data=(X_test, y_test))

Trial 5 Complete [00h 00m 06s]
val_mean_absolute_error: 50.91386413574219

Best val_mean_absolute_error So Far: 42.80025863647461
Total elapsed time: 00h 00m 37s
INFO:tensorflow:Oracle triggered exit


In [84]:
tuner.results_summary()

Results summary
Results in project6/Air Quality Index
Showing 10 best trials
Objective(name='val_mean_absolute_error', direction='min')
Trial summary
Hyperparameters:
num_layers: 5
units_0: 192
units_1: 288
learning_rate: 0.01
units_2: 256
units_3: 192
units_4: 224
units_5: 96
units_6: 256
units_7: 320
units_8: 288
units_9: 256
Score: 42.80025863647461
Trial summary
Hyperparameters:
num_layers: 10
units_0: 160
units_1: 64
learning_rate: 0.01
units_2: 32
units_3: 32
units_4: 32
units_5: 32
units_6: 32
units_7: 32
units_8: 32
units_9: 32
Score: 43.0254020690918
Trial summary
Hyperparameters:
num_layers: 7
units_0: 160
units_1: 64
learning_rate: 0.01
units_2: 320
units_3: 192
units_4: 224
units_5: 448
units_6: 320
units_7: 416
units_8: 160
units_9: 160
Score: 43.49293899536133
Trial summary
Hyperparameters:
num_layers: 15
units_0: 320
units_1: 384
learning_rate: 0.0001
units_2: 96
units_3: 256
units_4: 288
units_5: 128
units_6: 192
units_7: 128
units_8: 320
units_9: 192
units_10: 32
units

In [85]:
tuner.get_best_models(num_models=1)[0]

<tensorflow.python.keras.engine.sequential.Sequential at 0x7feda37b24a8>

In [95]:
model = tuner.get_best_models(num_models=1)[0]

In [96]:
!zip -r ./project6.zip ./project6/

  adding: project6/ (stored 0%)
  adding: project6/Air Quality Index/ (stored 0%)
  adding: project6/Air Quality Index/oracle.json (deflated 85%)
  adding: project6/Air Quality Index/.ipynb_checkpoints/ (stored 0%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/ (stored 0%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/checkpoints/ (stored 0%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/checkpoints/epoch_0/ (stored 0%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/checkpoints/epoch_0/checkpoint.index (deflated 63%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/checkpoints/epoch_0/checkpoint.data-00000-of-00001 (deflated 52%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/checkpoints/epoch_0/checkpoint (deflated 48%)
  adding: project6/Air Quality Index/trial_bdb3d0b7835382816c8ef1a293832ee2/trial.json (deflat