In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from tensorflow import keras
import tensorflow.keras.backend as kb
import timeit, time

from tensorflow.keras.layers import BatchNormalization, Dense, Dropout, InputLayer
from tensorflow.keras.layers.experimental.preprocessing import Normalization

print(tf.config.list_physical_devices()) # Check TensorFlow can see your GPU.

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
# To run on CPU:
tf.config.set_visible_devices(tf.config.experimental.list_physical_devices('CPU'))

# Rounding errors on float32 can inflate the loss
floatsize = 'float64'
kb.set_floatx(floatsize)

In [3]:
fit_epochs = 1000   # Train until reaching a MAPE of 1%
val_split  = 0.2
repeats    = 20     # Times are averaged over 20 repetitions
batch      = 2**8
size       = 6*10**5

In [4]:
def df(dim, size = 2*10**5):
    data = pd.read_csv(f'.\\data\\heston_prices\\heston_prices_dim{dim:02}.csv', header=None)
    size = int(min(size, len(data)))
    data = data.sample(n=size, replace=False)
      
    inputs = data[data.columns[:-1]].to_numpy()
    prices = data[data.columns[-1]].to_numpy()
    return inputs, prices.reshape(size,1)

In [5]:
def model_builder(dim):
    number_layers, number_units, learning_rate, rate_decay, l1_reg, l2_reg, a_func = hyp_params[f'dim{dim}'].values()

    initializer = tf.keras.initializers.he_uniform()#normal()
    regularizer = keras.regularizers.l1_l2(l1_reg, l2_reg)

    norm_layer = Normalization()
    norm_layer.adapt(df(dim)[0])
    model = keras.Sequential()
    model.add( InputLayer(input_shape=(features,), name='Input_Layer') )
    
    for layer in range(number_layers):
        model.add( Dense( units             = number_units,
                         name               = f'Layer_{layer+1}',
                         kernel_initializer = initializer,                 # Initialize weights with He/Xavier initialiser
                         kernel_regularizer = regularizer,
                         activation         = a_func,
                         dtype              = floatsize) )
        #model.add( BatchNormalization(renorm = True) )                    # Dropout and Batch-norm are best not used together.
        #model.add( Dropout(0.2) )
        
    model.add( Dense(units = 1, activation = 'linear',                     # Output is a single scalar (price)
                     dtype = floatsize, name='Output_Layer') )
    
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        learning_rate, decay_steps = 4000, decay_rate = rate_decay, staircase = True)
    
    model.compile(
        optimizer = keras.optimizers.Adam(learning_rate = lr_schedule),
        loss      = tf.keras.losses.MeanAbsolutePercentageError())         # Train, test loss & objective.
    return model

In [6]:
# Black-Scholes achieved MAPE of 98% on historical quotes. Train network until achieves 1% MAPE. (lower is better)
class trainToThreshold(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs.get('val_loss') < 1:
            self.model.stop_training = True
beat_bench = trainToThreshold()

In [15]:
times_list = []
hyp_params = pd.read_csv('all_hyperparameters.csv', index_col=0).to_dict('index')

for dim in [1, 4, 7, 10, 13, 16]:
    start = time.time()
    inputs, prices = df(dim, size)
    #fc, pc    = 0, 0
    print(f'Beginning runs for dimension {dim}')
    
    def fitting():
        global fc
        model   = model_builder(dim)
        model.fit(inputs, prices,
                  validation_split = val_split,
                  batch_size       = batch,
                  callbacks        = [beat_bench],
                  shuffle          = True,
                  verbose          = 0,
                  epochs           = fit_epochs)
        #fc += 1
        #print(f'{fc}/{repeats} training repeats completed. Cumulative time {time.time() - start}')

    logdir      = f'.\\trained_models_and_logs\\efficiency\\heston_dim{dim:02}\\'
    saved_model = keras.models.load_model(logdir)
    features    = 7 + 2 * dim
    
    def predicting():
        global pc
        saved_model.predict(inputs.reshape(-1, features))
        #pc += 1
        #print(f'{pc}/{repeats} prediction repeats completed. Cumulative time {time.time() - start}')
        
    if __name__ == "__main__":
        fit_time  = timeit.timeit(fitting,    number = repeats) / repeats
        pred_time = timeit.timeit(predicting, number = repeats) / repeats
        pred_time = (10**6) * pred_time / len(inputs)     # Spurious Monte Carlo prices are removed so less for network to use.
        comb_time = fit_time+pred_time
    times_list.append([fit_time, pred_time, comb_time])
    
    print(f'Dimension {dim} completed with average fitting time {fit_time:.3f},\
(scaled) prediction time {pred_time:.3f} and combined time {comb_time:.3f}.\n')

Beginning runs for dimension 1
1/20 training repeats completed. Cumulative time 7.011801719665527
2/20 training repeats completed. Cumulative time 28.402799367904663
3/20 training repeats completed. Cumulative time 50.843059062957764
4/20 training repeats completed. Cumulative time 57.77083611488342
5/20 training repeats completed. Cumulative time 69.60637640953064
6/20 training repeats completed. Cumulative time 82.63332605361938
7/20 training repeats completed. Cumulative time 115.8965060710907
8/20 training repeats completed. Cumulative time 123.02126717567444
9/20 training repeats completed. Cumulative time 177.60794234275818
10/20 training repeats completed. Cumulative time 202.086843252182
11/20 training repeats completed. Cumulative time 216.01340699195862
12/20 training repeats completed. Cumulative time 227.92854022979736
13/20 training repeats completed. Cumulative time 267.0744664669037
14/20 training repeats completed. Cumulative time 284.73609614372253
15/20 training repea

In [8]:
times = pd.DataFrame(times_list, index = ['dim1', 'dim4', 'dim7', 'dim10', 'dim13', 'dim16'],
                     columns=['Average Fitting Time', '(Scaled) Average Prediction Time', 'Average Combined Time'])
#times.to_csv(f'network_times.csv', index=False)

In [9]:
times_list

[[23.713148120099998, 10.288466183328667, 34.00161430342867],
 [27.384465487200004, 10.870254055438888, 38.254719542638895],
 [22.425331194299996, 10.772322293036845, 33.197653487336844],
 [19.719614053600004, 9.728448675052796, 29.4480627286528],
 [21.54310136135, 10.625664378964938, 32.16876574031494],
 [36.54722708475001, 11.197764206988602, 47.744991291738614]]

In [17]:
##########################

In [11]:
# Provided to enable updating the optimal hyperparameters if necessary.
arr = np.array([[3, 14, 0.0031622776601683794, 1.0,                3.981071705534969e-09,  3.981071705534969e-08,  'elu'],
                [4, 5,  0.0020532768013140695, 0.9260942201031189, 1.8220209272284306e-07, 2.223087029305306e-06,  'elu'],
                [5, 11, 0.003343011965403283,  1.0,                1.2689610031679234e-08, 1e-07,                  'elu'],
                [3, 8,  0.008410349931109732,  0.9373650283899712, 1.138171227667121e-07,  8.854986578381169e-08,  'elu'],
                [4, 8,  0.005451471686114507,  0.90923157270006,   1.4486689707216352e-07, 1.810821338043891e-07,  'elu'],
                [5, 19, 0.003829733191759989,  0.821837036444325,  4.434242545353473e-08,  2.5599029496976226e-08, 'elu'],
                [6, 19, 0.00476654387664145,   0.8998223163663568, 4.2846295035144304e-07, 9.279111614760352e-07,  'elu']])
hyperparametersDF = pd.DataFrame( arr, index = ['market', 'dim1', 'dim4', 'dim7', 'dim10', 'dim13', 'dim16'],
                                     columns = ['number_layers', 'number_units', 'learning_rate', 'rate_decay',
                                                'l1_regularizer', 'l2_regularizer', 'activation_func' ] )
#hyperparametersDF.to_csv('all_hyperparameters.csv')