# Hyperparameter Tuning for ANN-2 and ANN-3

This notebook contains the investigation into what architecture is optimal for ANN-2 and ANN-3

In [None]:
# importing public modules
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras import Input, Model
import tensorflow as tf

In [None]:
# check for GPU usage

tf.test.gpu_device_name()

'/device:GPU:0'

In [None]:
# connecting to google drive
import os
from google.colab import drive
drive.mount('/content/drive', force_remount=True)
os.chdir("/content/drive/My Drive/content")

# importing Black-Scholes data
data = np.genfromtxt('bs_data.csv', delimiter=',')

# data prep

# inputs: converting spot price + strike price to moneyness (s/k)
inputs = data[:,:5] 
X = np.array([np.array([x[0]/x[1], x[2],x[3],x[4]])for x in inputs])

# outputs: scaling the prices byt the strike price (p/k)
y_calls = np.array(data[:,-2]) / data[:,1]
y_puts = np.array(data[:,-1]) / data[:,1]

# training data
X_train = X[:900000]
y_calls_train = y_calls[:900000]
y_puts_train = y_puts[:900000]

# validation data
X_val = X[900000:950000]
y_calls_val = y_calls[900000:950000]
y_puts_val = y_puts[900000:950000]

# testing data
X_test = X[950000:]
y_calls_test = y_calls[950000:]
y_puts_test = y_puts[950000:]

print(X_train.shape)
print(y_calls_train.shape)
print(y_puts_train.shape)
print(X_val.shape)
print(y_calls_val.shape)
print(y_puts_val.shape)
print(X_test.shape)
print(y_calls_test.shape)
print(y_puts_test.shape)

Mounted at /content/drive
(900000, 4)
(900000,)
(900000,)
(50000, 4)
(50000,)
(50000,)
(50000, 4)
(50000,)
(50000,)


In [None]:
# ANN-2
# Forward Selection to find the lowest parameter model that satisfies the same accuracy as ANN-1

# function to build models
def build_model(num_of_nodes, num_of_layers):
  inputs = Input(shape=(4,), name='input')

  x = inputs
  for i in range(num_of_layers):
    x = Dense(num_of_nodes, activation='relu', kernel_initializer='glorot_uniform')(x)

  call = Dense(1, name='call_output')(x)

  model = Model(inputs=inputs, outputs=call)
  opt = Adam(learning_rate=1e-05)
  model.compile(loss='mse', optimizer=opt)

  return model

# loop over number of layers (1-8)
for i in range(6):
  # loop over number of nodes (100-800)
  for j in range(100,700,100):
    sum_of_losses = 0
    name = 'Layers' + str(i+1) + 'Nodes' + str(j)
    model_infra = name
    # 2 models to get an average MSE
    for iteration in range(2):
      print('\n')
      print('Iteration: ' + str(iteration+1) + ' for ' + str(model_infra))
      model = model_infra + str(iteration+1)
      model = build_model(j, i+1)
      model.fit(X_train, y_calls_train,
                validation_data = (X_val,y_calls_val),
                epochs=20, batch_size=200)
      mse_loss = model.evaluate(X_val, y_calls_val)
      sum_of_losses += mse_loss
    
    # mean mse for the model
    mean_mse = sum_of_losses / 2

    print('\n')
    print('Model: ' + str(model_infra))
    print('Mean Loss over 2 models: ' + str(mean_mse))
    print('\n')

    # if average MSE is less than what was achieved by ANN-1 for calls, then stop
    if mean_mse <= 2.72e-07:
      break
  else:
    continue
  break
print('Best parameters: ' + str(model_infra))



Iteration: 1 for Layers1Nodes100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Iteration: 2 for Layers1Nodes100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Model: Layers1Nodes100
Mean Loss over 2 models: 1.1713935691659572e-05




Iteration: 1 for Layers1Nodes200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Iteration: 2 for Layers1Nodes200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoc

In [None]:
# ANN-3
# Forward Selection to find the lowest parameter model that satisfies the same accuracy as ANN-1

# function to build models
def build_model(num_of_nodes, num_of_layers):
  inputs = Input(shape=(4,), name='input')

  x = inputs
  for i in range(num_of_layers):
    x = Dense(num_of_nodes, activation='relu', kernel_initializer='glorot_uniform')(x)

  put = Dense(1, name='put_output')(x)

  model = Model(inputs=inputs, outputs=put)
  opt = Adam(learning_rate=1e-05)
  model.compile(loss='mse', optimizer=opt)

  return model

# loop over number of layers (1-8)
for i in range(6):
  # loop over number of nodes (100-800)
  for j in range(100,700,100):
    sum_of_losses = 0
    name = 'Layers' + str(i+1) + 'Nodes' + str(j)
    model_infra = name
    # 2 models to get an average MSE
    for iteration in range(2):
      print('\n')
      print('Iteration: ' + str(iteration+1) + ' for ' + str(model_infra))
      model = model_infra + str(iteration+1)
      model = build_model(j, i+1)
      model.fit(X_train, y_puts_train,
                validation_data = (X_val,y_puts_val),
                epochs=20, batch_size=200)
      mse_loss = model.evaluate(X_val, y_puts_val)
      sum_of_losses += mse_loss
    
    # mean mse for the model
    mean_mse = sum_of_losses / 2

    print('\n')
    print('Model: ' + str(model_infra))
    print('Mean Loss over 2 models: ' + str(mean_mse))
    print('\n')

    # if average MSE is less than what was achieved by ANN-1 for puts, then stop
    if mean_mse <= 2.20e-07:
      break
  else:
    continue
  break
print('Best parameters: ' + str(model_infra))



Iteration: 1 for Layers1Nodes100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Iteration: 2 for Layers1Nodes100
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Model: Layers1Nodes100
Mean Loss over 2 models: 6.048647946954588e-06




Iteration: 1 for Layers1Nodes200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


Iteration: 2 for Layers1Nodes200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch

For ANN-2, the optimal architecture consists of 2 layers and 600 neurons per layer


For ANN-3, the optimal architecture consists of 3 layers and 400 neurons per layer