<font color=pink>
For HyperParameter search, it can be necessary to ensure no other operations on GPU, in particular other Jupyter kernels. Run ```nvidia-smi``` in terminal to check.
<br>
</font>

<font color=pink>
TensorFlow makes log folder and file names so long that it exceeds the 260 character address length limit so this should be run from a directory with as short an address as possible. Tested with TF 2.3.
<br>
Also advisable to pause cloud storage sinking (i.e. OneDrive) as this can block the logging of trial results.
</font>

Estimating the amplitude and period of a sine curve given an input of 30 values
sampled at equally spaced points along the curve over the interval [-15,15].
The model is trained for curves of the form b+Asin(w(a+x)/2pi)
where the period is w and the amplitude is A. The x are sampled for the
30 equally spaced values in the interval [-15,15] as above.

This script performs a random search or Bayesian search to identify optimal hyperparameters, and then trains a model with these parameters.

#######################################################################################
#          Initialisation           #

We first initialize parameters such as the number of epochs in each trial and define helper functions.

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout, InputLayer
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from kerastuner.engine.hyperparameters import HyperParameters
from tensorboard.plugins.hparams import api as hp
import tensorflow.keras.backend as kb
import kerastuner as kt
from tensorflow import math as tm
print(tf.config.list_physical_devices()) # Check TensorFlow can see your GPU.

import math, time, os
import numpy as np
import pandas as pd
from scipy.stats import norm
from datetime import datetime
np.set_printoptions(edgeitems=9, linewidth=128,
                    formatter={'float':lambda x: f'{x:.3f}'}, precision=3, suppress=True)

In [None]:
# To run on CPU:
#tf.config.set_visible_devices(tf.config.experimental.list_physical_devices('CPU'))

# Rounding errors on float32 can inflate the loss
floatsize = 'float64'
kb.set_floatx(floatsize)

In [None]:
num_sample_pts = 30
train_sqrt     = 450                        # Number of periods and number of amplitudes to generate in data.

randSearch = True                           # True if using random hyperparameter search or hard-coding hyperparameters.
max_trials = 60
hyp_epochs = 20
fit_epochs = 50
batch      = 2**6
val_split  = 0.2

We create training data of $train\_sqrt^2$ curves with random periods, amplitudes and translations.\
Data() returns a list $[curves,\ yAw]$ of 2 arrays:
 - $curves$ is an array in which each row is a set of $num\_sample\_pts$ equidistant sample points from one curve;
 - $yAw$ is an array of the period-amplitude pair for each curve sample in $curves$.

In [None]:
def make_curve(period, amp, sample, aug = False):
    return ( amp * np.sin(( sample + aug*np.random.rand() ) * (2 * np.pi / period))
            + aug*np.random.rand() ).reshape(num_sample_pts)

def data(sample_interval = [-15, 15], amp_interval = [0,30], aug = False, gridsize = train_sqrt):
    sample = np.linspace(*sample_interval, num_sample_pts)
    mesh   = np.meshgrid(0.05 + 10 * np.pi * np.random.rand(gridsize),          # Periods
                         amp_interval[0] + (amp_interval[1] - amp_interval[0]) *
                         np.random.rand(gridsize))                              # Amplitudes
    pairs  = np.array(mesh).T.reshape(-1, 2)
    curves = np.array([make_curve(w, a, sample,aug) for w,a in pairs])
    return [curves, pairs]

df = data(aug=True)

In [None]:
def pretty_hparams(hparams):
    keys       = hparams.keys()
    max_length = max([len(key) for key in keys])
    indices    = ['number_layers', 'number_units', 'learning_rate', 'rate_decay',
                  'l1_regularizer', 'l2_regularizer', 'activation_func', 'dropout_p']
    for key, value in hparams.items():
        print(f'{key:<15} : {value}')
    print('')

# Terminate training after patience^th consecutive epoch with non-decreasing val_loss
early_stopR = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 20)
early_stopB = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 20)
early_stopT = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 5)

In [None]:
logdir = "logs\\sinAw\\scalars\\" + datetime.now().strftime("%Y%m%d-%H%M-%S")
tensorboard_callback = [keras.callbacks.TensorBoard(log_dir = logdir, update_freq = 'epoch')]

In [None]:
def model_builder(hparams):
    if randSearch:
        indices = hparams.keys()
    else:
        indices = ['number_layers', 'number_units', 'learning_rate', 'rate_decay',
                   'l1_regularizer', 'l2_regularizer', 'activation_func', 'dropout_p']
    number_layers, number_units, learning_rate, rate_decay, l1_reg, l2_reg, a_func, dropout_p = [hparams[ind] for ind in indices]

    def get_initialiser():
        if a_func in ['sigmoid', 'tanh']:
            return tf.keras.initializers.glorot_uniform()
        else:
            return tf.keras.initializers.he_uniform()#normal()

    initializer = get_initialiser()
    regularizer = keras.regularizers.l1_l2(l1_reg, l2_reg)

    norm_layer = Normalization()
    norm_layer.adapt(df[0])
    model = keras.Sequential()
    model.add( InputLayer(input_shape=(num_sample_pts,), name='Input_Layer') )
    
    for layer in range(number_layers):
        model.add( Dense( units             = number_units,
                         name               = f'Layer_{layer+1}',
                         kernel_initializer = initializer,           # Initialize weights with He/Xavier initialiser.
                         kernel_regularizer = regularizer,
                         activation         = a_func,
                         dtype              = floatsize) )
        #model.add( BatchNormalization(renorm = True) )              # Batch-norm in TF requires float32, cannot accept float64.
        model.add( Dropout(dropout_p) )
        
    model.add( Dense(units = 2, activation = 'linear',               # 2 output nodes for a period-amplitude pair.
                     dtype = floatsize, name='Output_Layer') )
    
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        learning_rate, decay_steps = 4000, decay_rate = rate_decay, staircase = True)
    
    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate = lr_schedule),
        loss      = tf.keras.losses.MeanAbsolutePercentageError()) # Train, test loss & objective. A 'metric' is not necessary.
    return model

###############################################################################
# Random Search
<br>
<font color=green>
(TensorBoard hparams plugin hp module)     
</font>

In [None]:
layer_range  = [2, 3]
node_range   = [5, 16]
l_rate_range = (np.linspace(10**-4.5, 10**-2,   600)).tolist()
decay_range  = (np.linspace(0.82,     0.99,     400)).tolist()
reg1_range   = (np.linspace(10**-8,   10**-5.5, 600)).tolist()
reg2_range   = (np.linspace(10**-7,   10**-4.5, 600)).tolist()
act_funcs    = ['sigmoid', 'tanh', 'relu', 'elu']
dropout_ps   = (np.linspace(0.01, 0.40, 400)).tolist()
# (10**(np.linspace(-9, -3, 600))).tolist() to sample log scale

In [None]:
HP_LAYERS = hp.HParam('number_layers', hp.IntInterval(*layer_range))
HP_UNITS  = hp.HParam('number_units',  hp.IntInterval(*node_range))
HP_LRATE  = hp.HParam('learning_rate',   hp.Discrete(l_rate_range))
HP_LDECAY = hp.HParam('rate_decay',      hp.Discrete(decay_range))
HP_REGL1  = hp.HParam('l1_regularizer',  hp.Discrete(reg1_range))
HP_REGL2  = hp.HParam('l2_regularizer',  hp.Discrete(reg1_range))
HP_ACTS   = hp.HParam('activation_func', hp.Discrete(act_funcs))
HP_DROP   = hp.HParam('dropout_p', hp.Discrete(dropout_ps))

In [None]:
start = time.time()

trial_num = 1
for i in range(max_trials):
    startrun       = time.time()
    run_name       = f'randTrial-{trial_num}'
    logdir         = f".\\sinAw\\random_search\\" + datetime.now().strftime("%d-%H%M-%S") + "\\" + run_name
    inputs, prices = data()
    hyperparams = {
        'number_layers'   : HP_LAYERS.domain.sample_uniform(),
        'number_units'    : HP_UNITS.domain.sample_uniform(),
        'learning_rate'   : HP_LRATE.domain.sample_uniform(),
        'rate_decay'      : HP_LDECAY.domain.sample_uniform(),
        'l1_regularizer'  : HP_REGL1.domain.sample_uniform(),
        'l2_regularizer'  : HP_REGL2.domain.sample_uniform(),
        'activation_func' : HP_ACTS.domain.sample_uniform(),
        'dropout_p'       : HP_DROP.domain.sample_uniform()}
    
    print(f'----Starting trial {run_name}')
    pretty_hparams(hyperparams)
    
    model = model_builder(hyperparams)
    #print(model.summary())
    callbacks = [tf.keras.callbacks.TensorBoard(logdir), early_stopR,       # MSE+loss
                 hp.KerasCallback(logdir + "\\validation", hyperparams)]    # TB Hparams
    
    train_history = model.fit(inputs, prices,
                              validation_split = val_split,
                              batch_size       = batch,
                              callbacks        = callbacks,
                              shuffle          = True,
                              verbose          = 0,
                              epochs           = hyp_epochs)
    
    print('Average val loss: ', np.average(train_history.history['val_loss'][-3:]))
    print('Trial time taken: ', time.time()-startrun)
    print('Cumulative time taken:', time.time()-start)
    print('')
    trial_num += 1

print('Total time taken: ', time.time()-start)
    
import winsound
for i in range(2):
    winsound.Beep(1000, 250)
    
# An error (in prompt) of the form:
# Blas GEMM launch failed : a.shape=(64, 30), b.shape=(30, 4), m=64, n=4, k=30
# [[{{node sequential/Layer_1/MatMul}}]]
# CPU->GPU Memcpy failed
# is likely caused by another kernel also using the GPU.
# Terminate any other active kernels before running.

## # Bayesian search using Keras Tuner
<br>
<font color=green>
(keras-tuner HyperParameters hy_Params)     
</font>

To enable the hyperparameter search, we first create use a HyperParameter object from kerastuner, informing the algorithm of our concervative range of possible values for each hyperparameter.

In [None]:
randSearch = False
hy_Params = HyperParameters()

hy_Params.Float(  'learning_rate',  *[10**(-3), 10**(-1)] )
hy_Params.Float(  'rate_decay',     *[0.86, 0.94] )
hy_Params.Float(  'l1_regularizer', *[10**(-6.5), 5*10**(-4)] )
hy_Params.Float(  'l2_regularizer', *[10**(-6.5), 5*10**(-4)] )
hy_Params.Choice( 'activation_func', ['elu', 'sigmoid'] )
hy_Params.Float(  'dropout_p', *[0.1, 0.4] )
hy_Params.Choice( 'number_layers', [2] )
hy_Params.Int( 'number_units',  step=1, *[4, 7] )

We define 'tuner' by passing the model_builder function to the Keras Tuner Bayesian optimization method. Increase max_trials to increase the number of Bayesian exploration steps - the larger the better. The TensorBoard information will be stored in a dated folder.

In [None]:
logdir = f"\\Bayes_search\\"

tuner = kt.BayesianOptimization(model_builder,
                                hyperparameters = hy_Params,
                                max_trials      = max_trials,
                                objective       = 'val_loss',
                                directory       = logdir,
                                project_name    = "proj" + datetime.now().strftime("%d-%H%M"))
# The logging here saves each trial but does not itself report to TensorBoard

In [None]:
# Once complete, only retain summary of best trial
import IPython
class ClearTrainingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)
        print('Cumulative time taken:', time.time()-start)

# For Bayesian optimisation we do not need to visualise the HPARAMS
# in TensorBoard but we can still log the training curves of each run.
cb_dir     = logdir + datetime.now().strftime("%d-%H%M") + "\\"
callbacks  = [early_stopB, ClearTrainingOutput(), tf.keras.callbacks.TensorBoard(cb_dir)]

In [None]:
start = time.time()
inputs, prices = data()

tuner.search(inputs, prices,
             validation_split = val_split,
             batch_size       = batch,
             callbacks        = callbacks,
             verbose          = 0,
             initial_epoch    = 0,
             shuffle          = True,
             epochs           = hyp_epochs)
                             
print('Time Taken: ', time.time()-start)
print(' ')
    
import winsound
for i in range(2):
    winsound.Beep(1000, 250)
    
# An error of the form

# NotFoundError: Failed to create a NewWriteableFile:
# logs\sinAw\Bayes_s999999earch\proj25-0111\trial_961dd8ef567fe0474969eb2fc5d39d04\checkpoints
# \epoch_0\checkpoint_temp_97e4de392a704274a8e78d72bba426de/part-00000-of-00002.data-00000-of-00001.
# tempstate7872945028056574961 : The system cannot find the path specified.
# ; No such process [Op:SaveV2]
# is very likely to be due to a 'long' directory address, since TensorFlow
# trial identifiers create addresses exceeding the 260 character limit.
# Run TensorFlow scripts from a short directory path such as 'C:\Music\'.

The tuner stores all trial hyperparameters and scores, so we now extract the hyperparameters corresponding to the lowest-loss trial.

In [None]:
num_results = min(max_trials,10)
bests = tuner.get_best_hyperparameters(num_results)
for i in range(num_results):
    sup = (['st', 'nd', 'rd'] + ['th'] * 7) * 5
    sup[10:13] = ['th'] * 3
    print(f'{i+1}{sup[i]} best hyperparameter configuration found:')
    pretty_hparams(bests[i].values)

## # Train best model
<br>
<font color=green>  
</font>

Now that we have optimized hyperparameters stored in the best_hps object, we can easily reuse $model\_builder$ to train a full model. The TensorBoard data will be logged in a dated folder in ".\logs\scalars\" and loading TensorBoard from terminal via
'tensorboard --logdir "<dir/to/logs>"'
will open a localhost server. Navigate to the address and observe the loss vs epochs plot updating every 30 seconds. Note if the foldername is constant (ie not recording date and time) then the search will continue with knowledge of the previous runs. I find TensorBoard is much more stable launching from Terminal than a notebook however so this is recommended.

In [None]:
logdir = f".\\scalars\\"
tensorboard_cb = keras.callbacks.TensorBoard(log_dir=logdir + datetime.now().strftime("%d-%H%M-%S"), update_freq='epoch')
randSearch = True

if randSearch:
    best_hp = { # Copy hparams from printout then add string-quotes and commas.
        'number_layers'   : 2,
        'number_units'    : 12,
        'learning_rate'   : 0.004871010080685016,
        'rate_decay'      : 0.9032167916319965,
        'l1_regularizer'  : 0.0003417837964467978,
        'l2_regularizer'  : 0.000390735809379687,
        'activation_func' : 'sigmoid',
        'dropout'         : 0.23402950096803992 }
else:
    best_hp = tuner.get_best_hyperparameters(num_trials = 1)[0]

model = model_builder(best_hp)

In [None]:
train_sqrt = 200
df = data()

start = time.time()

training_history = model.fit(*df,
                             batch_size = batch,
                             epochs = fit_epochs,
                             verbose = 0,
                             validation_split = 0.2,
                             callbacks = [early_stopT, tensorboard_cb])
                             
print("Average test loss: ", np.average(training_history.history['loss'][-3:]))
print("Average val loss: ",  np.average(training_history.history['val_loss'][-3:]))
print('Time taken: ', time.time()-start)

import winsound
for i in range(2):
    winsound.Beep(800, 200)

In [None]:
#model.save("trained_sine_model")
#saved_model = keras.models.load_model("trained_sine_model")

## # 3D Error Surface Plot
<br>
<font color=green>
Plotting can take a little while.     
</font>

In [None]:
%matplotlib qt
# Magic command so we can interact with a 3D plot in the notebook.

import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter

fig = plt.figure(figsize = (12,10))
ax = plt.axes(projection = '3d')

df_test = data(aug = False, gridsize = 300)

def plot_err_3d(save = False):
    errors           = np.abs(df_test[1] - saved_model.predict(df_test[0])).T
    mapes            = 100 * errors / df_test[1].T
    mapes            = np.sum(mapes, axis=0) / 2
    mapes[mapes>100] = 100
    surf             = ax.plot_trisurf(*df_test[1].T, mapes, cmap = cm.coolwarm,
                                       linewidth = 0, antialiased = False)
    
    #ax.set_title('Plot of MAPE Vs Amplitude & Period', pad=0.5)
    ax.set_xlabel('Period', fontsize = 11, labelpad = 10)
    ax.set_ylabel('Amplitude', fontsize = 11, labelpad = 10)
    ax.set_zlabel('MAPE (%)', fontsize = 11, labelpad = 10)
    ax.zaxis.set_major_locator(LinearLocator(11))
    ax.zaxis.set_major_formatter(FormatStrFormatter('%.00f'))
    ax.view_init(20, 60)    
    fig.colorbar(surf, shrink = 0.5, aspect = 5)
    plt.tight_layout()
    if save:
        plt.savefig('3d_err_plot.png', transparent = True, dpi = 200, bbox_inches = 'tight')
    fig.show()
    
plot_err_3d(save=True)