<font color=pink>
For HyperParameter search, it can be necessary to ensure no other operations on GPU, in particular other Jupyter kernels. Run ```nvidia-smi``` in terminal to check.
</font>

<font color=pink>
TensorFlow makes log folder and file names so long that it exceeds the 260 character address length limit.
<br>
This file should be run from a directory with as short an address as possible. Tested with TF 2.3.
<br>
Also advisable to pause cloud storage sinking (i.e. OneDrive) as this can block the logging of trial results.
</font>

#######################################################################################
#          Initialisation           #

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import BatchNormalization, Dense, Dropout, InputLayer
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from kerastuner.engine.hyperparameters import HyperParameters
from tensorboard.plugins.hparams import api as hp
import tensorflow.keras.backend as kb
import kerastuner as kt
from tensorflow import math as tm
print(tf.config.list_physical_devices()) # Check TensorFlow can see your GPU.

import math, time, os
import numpy as np
import pandas as pd
from scipy.stats import norm
from datetime import datetime
np.set_printoptions(edgeitems=9, linewidth=128,
                    formatter={'float':lambda x: f'{x:.3f}'}, precision=3, suppress=True)

# To run on CPU:
tf.config.set_visible_devices(tf.config.experimental.list_physical_devices('CPU'))

# Rounding errors on float32 can inflate the loss
floatsize = 'float64'
kb.set_floatx(floatsize)

market     = True                           # False if training set is Heston prices, True if market data.
dim        = 1                              # Dim only relevant if Market==False, unused otherwise. (=1,4,7,10,13,16)
randSearch = True                           # Enable if using random search or hard-coding hyperparameters.
hyp_epochs = 30
fit_epochs = 100
size       = 6*10**5
batch      = 2**8
val_split  = 0.2
max_trials = 1

if market:
    dim_name = 'market_acc'
    features = 5                            # 5 input features: strike, moneyness, rate, volatility, maturity.
else:
    dim_name = f'heston_dim{dim}'
    features = 7 + 2 * dim                  # 7+2*dim input features: dim spot+corr vectors, plus market stats.


def df(size = 6*10**5, market = market, dim = dim):
    if market:        
        data = pd.read_csv('.\\data\\calls_OMrates587569.csv')
        data = data[['strike', 'moneyness', 'rate', 'volatility', 'days_to_maturity', 'contract_price']]
    else:
        data = pd.read_csv(f'.\\data\\heston_prices\\heston_prices_dim{dim:02}.csv', header=None)
    size0 = int(min(size, len(data)))
    data  = data.sample(n = size0, replace = False)
      
    inputs = data[data.columns[:-1]].to_numpy()
    prices = data[data.columns[-1]].to_numpy()
    return inputs, prices.reshape(size0, 1)
    
def pretty_hparams(hparams):
    keys       = hparams.keys()
    max_length = max([len(key) for key in keys])
    indices    = ['number_layers', 'number_units', 'learning_rate', 'rate_decay',
                  'l1_regularizer', 'l2_regularizer', 'activation_func']
    for key, value in hparams.items():
        print(f'{key:<15} : {value}')
    print('')

# Terminate training after patience^th consecutive epoch with non-decreasing val_loss
early_stopR = keras.callbacks.EarlyStopping(monitor='val_loss', patience= 20) # Random search
early_stopB = keras.callbacks.EarlyStopping(monitor='val_loss', patience= 20) # Bayesian search
early_stopT = keras.callbacks.EarlyStopping(monitor='val_loss', patience= 5)  # Training

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
def model_builder(hparams):
    if randSearch:
        indices = hparams.keys()
    else:
        indices = ['number_layers', 'number_units', 'learning_rate', 'rate_decay',
                   'l1_regularizer', 'l2_regularizer', 'activation_func']
    number_layers, number_units, learning_rate, rate_decay, l1_reg, l2_reg, a_func = [hparams[ind] for ind in indices]

    initializer = tf.keras.initializers.he_uniform()#normal()    # All networks use ELU so we use He weight normalisation
    regularizer = keras.regularizers.l1_l2(l1_reg, l2_reg)

    norm_layer = Normalization()
    norm_layer.adapt(df()[0])
    model = keras.Sequential()
    model.add( InputLayer(input_shape=(features,), name='Input_Layer') )
    
    for layer in range(number_layers):
        model.add( Dense( units             = number_units,
                         name               = f'Layer_{layer+1}',
                         kernel_initializer = initializer,           # Initialize weights with He/Xavier initialiser
                         kernel_regularizer = regularizer,
                         activation         = a_func,
                         dtype              = floatsize) )
        #model.add( BatchNormalization(renorm = True) )              # Batch-norm in TF requires float32, cannot accept float64.
        #model.add( Dropout(0.2) )
        
    model.add( Dense(units = 1, activation = 'linear',               # Output is a single scalar (price)
                     dtype = floatsize, name='Output_Layer') )
    
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        learning_rate, decay_steps = 4000, decay_rate = rate_decay, staircase = True)
    
    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate = lr_schedule),
        loss      = tf.keras.losses.MeanAbsolutePercentageError(), # Train, test loss & objective.
        metrics   = [tf.keras.metrics.MeanSquaredError()])
# Metrics report values but are not used in training. MAPE is our objective loss function.
    return model

###############################################################################
# Random Search
<br>
<font color=green>
(TensorBoard hparams plugin hp module)     
</font>

In [4]:
layer_range  = [2,4]
node_range   = [5, 9]
l_rate_range = (np.linspace(10**(-5), 0.003, 400)).tolist()
decay_range  = (np.linspace(0.85, 0.9995, 400)).tolist()
reg1_range   = (np.linspace(10**(-7.5), 10**(-5.5), 400)).tolist()
reg2_range   = (np.linspace(10**(-7), 10**(-4.5), 400)).tolist()
act_funcs    = ['elu']
# (10**(np.linspace(-9, -3, 600))).tolist() to sample log scale

In [5]:
HP_LAYERS = hp.HParam('number_layers', hp.IntInterval(*layer_range))
HP_UNITS  = hp.HParam('number_units',  hp.IntInterval(*node_range))
HP_LRATE  = hp.HParam('learning_rate',   hp.Discrete(l_rate_range))
HP_LDECAY = hp.HParam('rate_decay',      hp.Discrete(decay_range))
HP_REGL1  = hp.HParam('l1_regularizer',  hp.Discrete(reg1_range))
HP_REGL2  = hp.HParam('l2_regularizer',  hp.Discrete(reg1_range))
HP_ACTS   = hp.HParam('activation_func', hp.Discrete(act_funcs))

In [6]:
start = time.time()

trial_num = 1
for i in range(max_trials):
    startrun       = time.time()
    run_name       = f'randTrial-{trial_num}'
    logdir         = f"logs\\random_search\\{dim_name}\\{dim_name}" + datetime.now().strftime("%d-%H%M-%S") + "\\" + run_name
    inputs, prices = df(size, market, dim)
    hyperparams = {
        'number_layers'   : HP_LAYERS.domain.sample_uniform(),
        'number_units'    : HP_UNITS.domain.sample_uniform(),
        'learning_rate'   : HP_LRATE.domain.sample_uniform(),
        'rate_decay'      : HP_LDECAY.domain.sample_uniform(),
        'l1_regularizer'  : HP_REGL1.domain.sample_uniform(),
        'l2_regularizer'  : HP_REGL2.domain.sample_uniform(),
        'activation_func' : HP_ACTS.domain.sample_uniform() }
    
    print(f'----Starting trial {run_name}')
    pretty_hparams(hyperparams)
    
    model = model_builder(hyperparams)
    print(model.summary())
    callbacks = [tf.keras.callbacks.TensorBoard(logdir), early_stopR,     # MSE+loss
                 hp.KerasCallback(logdir+"\\validation", hyperparams)]    # TB Hparams
    
    train_history = model.fit(inputs, prices,
                              validation_split = val_split,
                              batch_size       = batch,
                #              callbacks        = callbacks,
                              shuffle          = True,
                              verbose          = 0,
                              epochs           = hyp_epochs)
    
    print('Average val loss: ', np.average(train_history.history['val_loss'][-3:]))
    print('Trial time taken: ', time.time()-startrun)
    print('Cumulative time taken:', time.time()-start)
    print('')
    trial_num += 1

print('Total time taken: ', time.time()-start)
    
import winsound
for i in range(2):
    winsound.Beep(1000, 250)

----Starting trial randTrial-1
number_layers   : 3
number_units    : 5
learning_rate   : 0.0003547117794486216
rate_decay      : 0.9223145363408521
l1_regularizer  : 3.15443140732736e-06
l2_regularizer  : 1.6087195976465604e-06
activation_func : elu

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Layer_1 (Dense)              (None, 5)                 30        
_________________________________________________________________
Layer_2 (Dense)              (None, 5)                 30        
_________________________________________________________________
Layer_3 (Dense)              (None, 5)                 30        
_________________________________________________________________
Output_Layer (Dense)         (None, 1)                 6         
Total params: 96
Trainable params: 96
Non-trainable params: 0
_________________________________________________________________
None
Av

## # Bayesian search using Keras Tuner
<br>
<font color=green>
(keras-tuner HyperParameters hy_Params)     
</font>

In [None]:
randSearch = False
hy_Params = HyperParameters()
hy_Params.Float(  'learning_rate',  *[10**(-3), 0.01] )
hy_Params.Float(  'rate_decay',     *[0.85, 0.9995] )
hy_Params.Float(  'l1_regularizer', *[10**(-8), 10**(-6.5)] )        #, sampling='LOG' also an option
hy_Params.Float(  'l2_regularizer', *[10**(-8), 10**(-6.5)] )
hy_Params.Choice( 'activation_func', ['elu'] )
hy_Params.Int( 'number_layers', step=1, *[2, 6] )
hy_Params.Int( 'number_units',  step=1, *[4, 7]  )

In [None]:
logdir = f"logs\\Bayes_search\\{dim_name}\\" + datetime.now().strftime("%d-%H%M")

tuner = kt.BayesianOptimization(model_builder,
                                hyperparameters = hy_Params,
                                max_trials      = max_trials,
                                objective       = 'val_loss',
                                directory       = logdir,
                                project_name    = "proj" + datetime.now().strftime("%d-%H%M"))
# The logging here saves each trial but does not itself report to TensorBoard

In [None]:
# Once complete, only retain summary of best trial
import IPython
class ClearTrainingOutput(tf.keras.callbacks.Callback):
    def on_train_end(*args, **kwargs):
        IPython.display.clear_output(wait = True)
        print('Cumulative time taken:', time.time()-start)

# For Bayesian optimisation we do not need to visualise the HPARAMS
# in TensorBoard but we can still log the training curves of each run.
cb_dir     = logdir + "bayes_trial" + datetime.now().strftime("%d-%H%M") + "\\"
callbacks  = [early_stopB, ClearTrainingOutput(), tf.keras.callbacks.TensorBoard(cb_dir)]             

In [None]:
start = time.time()
inputs, prices = df(size, market, dim)

tuner.search(inputs, prices,
             validation_split = val_split,
             batch_size       = batch,
             callbacks        = callbacks,
             verbose          = 0,
             initial_epoch    = 0,
             shuffle          = True,
             epochs           = hyp_epochs)
                             
print('Time Taken: ', time.time()-start)
print(' ')
    
import winsound
for i in range(2):
    winsound.Beep(1000, 250)

In [None]:
num_results = min(max_trials,10)
bests = tuner.get_best_hyperparameters(num_results)
for i in range(num_results):
    sup = (['st', 'nd', 'rd'] + ['th'] * 7) * 5
    sup[10:13] = ['th'] * 3
    print(f'{i+1}{sup[i]} best hyperparameter configuration found:')
    pretty_hparams(bests[i].values)

#######################################################################################
# Train model with best hyperparameters

In [9]:
start = time.time()

if market:
    logdir = f'.\\trained_models_and_logs\\accuracy\\' + datetime.now().strftime("%d-%H%M")
else:
    logdir = f'.\\trained_models_and_logs\\efficiency\\heston_dim{dim:02}\\'
tensorboard_cb = keras.callbacks.TensorBoard(log_dir=logdir+'logs', update_freq='epoch')

if randSearch:
    best_hp = { # Copy hparams from printout then add string-quotes and commas.
        'number_layers'   : 3,
        'number_units'    : 12,
        'learning_rate'   : 0.004866422595439956,
        'rate_decay'      : 0.8814231084354753,
        'l1_regularizer'  : 8.01608844764161e-09,
        'l2_regularizer'  : 2.4880966570023003e-08,
        'activation_func' : 'elu' }
else:
    best_hp = tuner.get_best_hyperparameters(num_trials = 1)[0]

model          = model_builder(best_hp)
inputs, prices = df(size, market, dim)
train_history  = model.fit(inputs, prices,
                           validation_split = val_split,
                           batch_size       = batch,
                           callbacks        = [tensorboard_cb, early_stopT],
                           shuffle          = True,
                           verbose          = 0,
                           epochs           = fit_epochs)
                             
print("Average training loss: ", np.average(train_history.history['loss'][-3:]))
print("Average val loss: ", np.average(train_history.history['val_loss'][-3:]))
print('Time taken: ', time.time()-start)

import winsound
for i in range(2):
    winsound.Beep(1000, 250)

Average training loss:  32.33586105114325
Average val loss:  33.504411792813386
Time taken:  60.92910432815552


In [10]:
print("Average val mse: ", np.average(train_history.history['val_mean_squared_error'][-3:]))

Average val mse:  0.008916552919599863


In [12]:
#model.save(logdir)
#saved_model = keras.models.load_model(logdir)

The below is only for visual reference and not the reported test loss, since this includes training data.

In [None]:
def black_scholes(arr): # Returns analytical B-S prices for each row of inputs in df
    arr    = pd.DataFrame(arr, columns=['strike', 'moneyness', 'rate', 'volatility', 'days_to_maturity'])
    N      = lambda vec: norm.cdf(vec)
    mats   = arr.days_to_maturity / 365
    d1_num = np.log(arr.moneyness) + (arr.rate + 0.5 * arr.volatility ** 2) * mats
    d1_den = arr.volatility * mats ** 0.5
    d1     = d1_num / d1_den
    d2     = d1 - d1_den
    return arr.strike * ( N(d1) * arr.moneyness - N(d2) * np.exp(-arr.rate * mats ) )

In [None]:
num_predictions = 15
preds = model.predict(inputs[:num_predictions].reshape(num_predictions,features))
truth = prices[:num_predictions]
if market:
    bench = black_scholes(inputs)
    print('Prediction / Truth / Pred Mape / Black-Scholes / BS Mape(%)')
    for i in range(num_predictions):
        print(preds[i], '   ', truth[i], '', 100*np.abs((preds[i]-truth[i]))/truth[i],
              ' ', bench[i], '', 100*np.abs((bench[i]-truth[i]))/truth[i])
else:
    print('Prediction / Truth / Pred Mape(%)')
    for i in range(num_predictions):
        print(preds[i], ' ', truth[i], '  ', 100*np.abs((preds[i]-truth[i]))/truth[i])