# Initializer

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
#pip install tensorflow-addons

In [3]:
import pandas as pd
import datetime, os
import numpy as np
import numpy.random as npr
from pylab import plt, mpl
import time

from scipy.stats import norm
from scipy import optimize
import scipy.integrate as integrate
import scipy.special as special 

import tensorflow as tf
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorboard.plugins.hparams import api as hp
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler

import tensorflow_addons as tfa

import matplotlib.pyplot as plt
import seaborn as sns

from keras import backend as BK

# Load the TensorBoard notebook extension
%load_ext tensorboard

Using TensorFlow backend.


Lets read the CSV so we dont need to run it again, but can just load it when needed.

In [4]:
#To read the import the csv-file, use:
raw_Options_input = pd.read_csv (r"/Users/Marcklein/Desktop/Master Thesis/Option pricing using Neural Networks/Python/Heston/Options_input.csv")
raw_Options_output = pd.read_csv (r"/Users/Marcklein/Desktop/Master Thesis/Option pricing using Neural Networks/Python/Heston/Options_output.csv")

#Creates some unnamed column in the beginning, delete it:
del raw_Options_input['Unnamed: 0']
del raw_Options_output['Unnamed: 0']


In [5]:
Options_input = raw_Options_input.copy()
Options_output = raw_Options_output.copy()

Since the standard deviation is calculated by taking the sum of the squared deviations from the mean, a zero standard deviation can only be possible when all the values of a variable are the same (all equal to the mean). In this case, those variables have no discriminative power so they can be removed from the analysis. They cannot improve any classification, clustering or regression task. Many implementations will do it for you or throw an error about a matrix calculation.

### **Data preparation**

We split our dataset into a training set and a test set (validation set is taken from the training set during model.fit).

In [6]:
# 90% for training and validating
train_dataset = Options_input.sample(frac=0.9, random_state=42)
test_dataset = Options_input.drop(train_dataset.index)

train_labels = Options_output.sample(frac=0.9, random_state=42)
test_labels = Options_output.drop(train_labels.index)

Check the overall statistics

In [7]:
train_stats = train_dataset.describe().T

In [8]:
#normalize the data
def norm(x):
    return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset).values
normed_test_data = norm(test_dataset).values

#make the labels into numpy array just like the normed training data
train_labels = np.asarray(train_labels)
test_labels = np.asarray(test_labels)

#check the shapes
print("Input train data:", normed_train_data.shape, " Output train data:", train_labels.shape)
print("Input test data:", normed_test_data.shape, " Output test data:", test_labels.shape)

Input train data: (90000, 7)  Output train data: (90000, 10)
Input test data: (10000, 7)  Output test data: (10000, 10)


## **Initializers**

As mentioned earlier, we need to find a way to alleviate the unstable gradients problem. This is done by getting the signal to flow properly in both directions: in the forward direction when making predictions, and in the opposite direction when backpropagating gradients. We don't want the signals to explode and saturate, nor do we want them to die out. For the signal to flow properly, the authors  Xavier Glorot and Yoshua Bengio argued that we need the variance of the outputs of each layer to be equal to the variance of its inputs, and the gradients to have equal variance before and after flowing through a layer in the reverse direction.
_____________________________________________________________________________

In [9]:
# Clear any logs from previous runs
!rm -rf ./logs/

In [10]:
input_size = len(train_dataset.keys())
output_size = 10
hidden_layer_size = 100
n_epochs = 50

# Glorot he initializer
weights_initializer_GH = keras.initializers.GlorotUniform()
#Random Normal initializer
weights_initializer_RN = tf.random_normal_initializer(mean=0.0, stddev=0.05, seed=None)

#A function that trains and validates the model and returns the MSE 
def train_val_model(hparams):
    model = keras.models.Sequential([
            #Layer to be used as an entry point into a Network
            keras.layers.InputLayer(input_shape=[len(train_dataset.keys())]),
            #Dense layer 1
            keras.layers.Dense(hidden_layer_size, activation='relu',
                               kernel_initializer = hparams['initializer'],
                               name='Layer_1'),
        
            #Batch Layer
            #keras.layers.BatchNormalization(),
        
            #Dense layer 2
            keras.layers.Dense(hidden_layer_size, activation='relu', 
                               kernel_initializer = hparams['initializer'],
                               name='Layer_2'),
        
            #Batch Layer
            #keras.layers.BatchNormalization(),
        
            #activation function is linear since we are doing regression
            keras.layers.Dense(output_size, activation='linear', name='Output_layer')
                                ])
    
    #use the adam optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-10, amsgrad=False, name='Adam')
    
    #Compiling the model
    model.compile(optimizer=optimizer, 
                  loss='mean_squared_error', #Computes the mean of squares of errors between labels and predictions
                  metrics=['mean_squared_error']) #Computes the mean squared error between y_true and y_pred
    
    # initialize TimeStopping callback 
    time_stopping_callback = tfa.callbacks.TimeStopping(seconds=5*60, verbose=1)
    
    #log our training in order to display it to tensorboard (provides great visual effects)
    NAME = "Heston_Tensorboard-{}".format(int(time.time()))
    tensorboard_callback = TensorBoard(log_dir="logs/fit/{}".format(NAME),  
        histogram_freq=1,
        write_graph=True,
        write_images=False,
        update_freq="epoch",
        profile_batch=2,
        embeddings_freq=0,
        embeddings_metadata=None)
    
    
    #Training the network
    history = model.fit(normed_train_data, train_labels, 
         epochs=n_epochs,
         batch_size=len(normed_train_data), 
         verbose=2,
         #validation_split=0.2,
         callbacks=[tensorboard_callback, time_stopping_callback])
    
    return history

train_val_model({'initializer': weights_initializer_GH})

Epoch 1/50
352/352 - 3s - loss: 0.0064 - mean_squared_error: 0.0064
Epoch 2/50
352/352 - 4s - loss: 1.0985e-04 - mean_squared_error: 1.0985e-04
Epoch 3/50
352/352 - 3s - loss: 6.6655e-05 - mean_squared_error: 6.6655e-05
Epoch 4/50
352/352 - 3s - loss: 4.5681e-05 - mean_squared_error: 4.5681e-05
Epoch 5/50
352/352 - 4s - loss: 3.4819e-05 - mean_squared_error: 3.4819e-05
Epoch 6/50
352/352 - 3s - loss: 2.7275e-05 - mean_squared_error: 2.7275e-05
Epoch 7/50
352/352 - 4s - loss: 2.2240e-05 - mean_squared_error: 2.2240e-05
Epoch 8/50
352/352 - 3s - loss: 1.9184e-05 - mean_squared_error: 1.9184e-05
Epoch 9/50
352/352 - 3s - loss: 1.9034e-05 - mean_squared_error: 1.9034e-05
Epoch 10/50
352/352 - 4s - loss: 1.8866e-05 - mean_squared_error: 1.8866e-05
Epoch 11/50
352/352 - 3s - loss: 1.9018e-05 - mean_squared_error: 1.9018e-05
Epoch 12/50
352/352 - 2s - loss: 1.7764e-05 - mean_squared_error: 1.7764e-05
Epoch 13/50
352/352 - 2s - loss: 1.6610e-05 - mean_squared_error: 1.6610e-05
Epoch 14/50
352/

<tensorflow.python.keras.callbacks.History at 0x7fdaf155de90>

In [11]:
%tensorboard --logdir logs/fit

Reusing TensorBoard on port 6010 (pid 34409), started 0:03:37 ago. (Use '!kill 34409' to kill it.)