In [3]:
!pip install ray

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ray
  Downloading ray-1.13.0-cp37-cp37m-manylinux2014_x86_64.whl (54.5 MB)
[K     |████████████████████████████████| 54.5 MB 236 kB/s 
Collecting grpcio<=1.43.0,>=1.28.1
  Downloading grpcio-1.43.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.1 MB)
[K     |████████████████████████████████| 4.1 MB 38.7 MB/s 
Collecting virtualenv
  Downloading virtualenv-20.14.1-py2.py3-none-any.whl (8.8 MB)
[K     |████████████████████████████████| 8.8 MB 36.5 MB/s 
Collecting aiosignal
  Downloading aiosignal-1.2.0-py3-none-any.whl (8.2 kB)
Collecting frozenlist
  Downloading frozenlist-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (144 kB)
[K     |████████████████████████████████| 144 kB 59.7 MB/s 
Collecting distlib<1,>=0.3.1
  Downloading distlib-0.3.4-py2.py3-none-any.whl (461 kB)
[K     |████████████████████████

In [1]:
# Data Fetch and Processing Libraries
import pandas as pd
import numpy as np
import sys
import os
from sklearn.model_selection import train_test_split

# For the stacked models
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Dense,Flatten,Input,concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adadelta, Adam, RMSprop
tf.random.set_seed(42)

# Distributed Training
from ray import tune
from ray.tune.schedulers import ASHAScheduler
from ray.tune.suggest import ConcurrencyLimiter
from ray.tune.suggest.hyperopt import HyperOptSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.integration.keras import TuneReportCallback
# Parameter Optimisation
import hyperopt
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval
# Metrics
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error ,mean_absolute_percentage_error

In [2]:
# Data
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

In [3]:
X = housing['data']
y = housing['target']

In [4]:
X_train, X_test, y_train , y_test = train_test_split(X, y , train_size = 0.7, random_state = 42)

In [5]:
# Let's tune 2 neural networks together

In [6]:
space = {  # Network 1
            "m1_kernel_initializer1":hp.choice('kernel_initializer_m1_1',['glorot_uniform','he_normal']),
            "m1_bias_initializer1":hp.choice('bias_initializer_m1_1',['ones','glorot_uniform']),
            "m1_kernel_initializer2":hp.choice('kernel_initializer_m1_2',['glorot_normal','he_uniform']),
            "m1_bias_initializer2":hp.choice('bias_initializer_m1_2',['zeros','ones']),
            'm1_dropout': hp.choice('dropout1', [0, 0.25, 0.5]),
    
          # Network 2
            "m2_kernel_initializer1":hp.choice('kernel_initializer_m2',['glorot_normal','he_uniform']),
            "m2_bias_initializer1":hp.choice('bias_initializer_m2',['zeros','ones']),
            "batch_normalisation" : hp.choice("batch_normalisation",[None,True]),
            'm2_dropout': hp.choice('dropout2', [0, 0.25, 0.5]),
            
          # Stacked 
            "lr": hp.choice("lr", [1e-2, 1e-3, 1e-4, 1e-5]),
            'epochs' :  hp.choice('epochs',[10,15,20,25]),
            'batch_size': hp.choice('batch_size', [16,32,64,128])
        }

In [7]:
def hyper_tuner(space):

  # Network 1
  network_1 = Sequential()
  network_1.add(Dense(units = 4, input_dim=X_train.shape[1] ,
                      activation='relu', bias_initializer = space['m1_bias_initializer1'],
                      kernel_initializer=space['m1_kernel_initializer1']))
  
  network_1.add(Dense(units = 4, activation='relu',
                      bias_initializer = space['m1_bias_initializer2'],
                      kernel_initializer = space['m1_kernel_initializer2']))
  network_1.add(Dropout(space['m1_dropout']))
  network_1.add(Dense(units = 4, activation='relu'))
  network_1.add(Dense(units = 4, activation='relu'))
  
  # Network 2
  network_2 = Sequential()
  network_2.add(Dense(units = 16, input_dim=X_train.shape[1] , activation='relu',
                      bias_initializer = space['m2_bias_initializer1'],
                      kernel_initializer=space['m2_kernel_initializer1']))
  if space['batch_normalisation']:
        network_2.add(BatchNormalization())
  network_2.add(Dense(units = 16, activation='relu'))
  network_2.add(Dropout(space['m2_dropout']))
  
  # Stacking the Networks

  combinedInput = concatenate([network_1.output, network_2.output])
  x = Dense(1, activation="linear")(combinedInput)
  model = Model(inputs=[network_1.input, network_2.input], outputs=x)
  adam = Adam(learning_rate=space['lr']) 
  model.compile(loss='mean_squared_error', optimizer=adam, metrics=['mean_squared_error'])
  model.fit( x=[X_train, X_train],
          y=y_train,
          validation_split = 0.2,
          epochs=space['epochs'], batch_size=space['batch_size'], verbose = 0,
          callbacks=[TuneReportCallback({'mean_se':'mean_squared_error'})])
  

In [8]:
# To return the model after hyperparameters have been selected
def tuned_dnn(space):

  # Network 1
  network_1 = Sequential()
  network_1.add(Dense(units = 4, input_dim=X_train.shape[1] ,
                      activation='relu', bias_initializer = space['m1_bias_initializer1'],
                      kernel_initializer=space['m1_kernel_initializer1']))
  
  network_1.add(Dense(units = 4, activation='relu',
                      bias_initializer = space['m1_bias_initializer2'],
                      kernel_initializer = space['m1_kernel_initializer2']))
  network_1.add(Dropout(space['m1_dropout']))
  network_1.add(Dense(units = 4, activation='relu'))
  network_1.add(Dense(units = 4, activation='relu'))
  
  # Network 2
  network_2 = Sequential()
  network_2.add(Dense(units = 16, input_dim=X_train.shape[1] , activation='relu',
                      bias_initializer = space['m2_bias_initializer1'],
                      kernel_initializer=space['m2_kernel_initializer1']))
  if space['batch_normalisation']:
        network_2.add(BatchNormalization())
  network_2.add(Dense(units = 16, activation='relu'))
  network_2.add(Dropout(space['m2_dropout']))
  
  combinedInput = concatenate([network_1.output, network_2.output])
  x = Dense(1, activation="linear")(combinedInput)
  model = Model(inputs=[network_1.input, network_2.input], outputs=x)
  adam = Adam(learning_rate=space['lr']) 
  model.compile(loss='mean_squared_error', optimizer=adam, metrics=['mean_squared_error'])
  
  history = model.fit( x=[X_train, X_train],
          y=y_train,
          validation_split = 0.2,
          epochs=space['epochs'], batch_size=space['batch_size'], verbose = 0)
  # Can add connections to Tensorboard , Loss Curves and Report Generation
  
  return (history , model)

In [9]:
def tune_dnn(space):
    
    sched = AsyncHyperBandScheduler(time_attr="training_iteration", max_t=10000, grace_period=100)
    search_alg = HyperOptSearch(space , metric = "mean_se", mode = 'min')
    search_alg = ConcurrencyLimiter(search_alg, max_concurrent = 15)

    analysis = tune.run(hyper_tuner, search_alg = search_alg,
                        config = space, metric = 'mean_se',
                        mode = 'min',scheduler=sched, verbose = 1,
                        num_samples = 2, resources_per_trial={"cpu": os.cpu_count()})  # -1 sometimes
    
    history,model = tuned_dnn(analysis.best_config)
    return model, analysis

In [10]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
model, analysis = tune_dnn(space)

2022-06-17 09:16:49,079	INFO tune.py:748 -- Total run time: 33.53 seconds (33.27 seconds for the tuning loop).


In [13]:
analysis.best_config

{'batch_normalisation': None,
 'batch_size': 32,
 'epochs': 10,
 'lr': 0.001,
 'm1_bias_initializer1': 'ones',
 'm1_bias_initializer2': 'ones',
 'm1_dropout': 0.25,
 'm1_kernel_initializer1': 'he_normal',
 'm1_kernel_initializer2': 'he_uniform',
 'm2_bias_initializer1': 'ones',
 'm2_dropout': 0,
 'm2_kernel_initializer1': 'glorot_normal'}

In [14]:
y_pred = model.predict([X_test, X_test])

In [15]:
y_pred

array([[1.7856823],
       [2.0177858],
       [2.7519245],
       ...,
       [1.7425326],
       [1.6950011],
       [2.2255912]], dtype=float32)