In [1]:
import os
import numpy as np
import pandas as pd
import multiprocessing
from scipy.stats import norm
import plotly.graph_objs as go
from keras.optimizers import Adam
from keras.callbacks import TensorBoard
from keras.layers import Dense, LeakyReLU
from keras.models import Sequential, load_model
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

2023-09-25 16:37:16.832935: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Read in the data
df_train = pd.read_csv('../Data/train.csv')
df_test = pd.read_csv('../Data/test.csv')

# Define the features and target
features = ['Underlying Price', 'Strike', 'Days to Expiry', 'Underlying Volatility', 'Rate']
target = ['Option Price']

# Create the training and testing data
X_train = df_train[features]
y_train = df_train[target]
X_test = df_test[features]
y_test = df_test[target]
# Ensure Days to Expiry is in non-zero days, if zero, set to 0.1
X_train['Days to Expiry'] = X_train['Days to Expiry'].apply(lambda x: 0.5 if x == 0 else x)
X_test['Days to Expiry'] = X_test['Days to Expiry'].apply(lambda x: 0.5 if x == 0 else x)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_train['Days to Expiry'] = X_train['Days to Expiry'].apply(lambda x: 0.5 if x == 0 else x)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X_test['Days to Expiry'] = X_test['Days to Expiry'].apply(lambda x: 0.5 if x == 0 else x)


In [3]:
# Black-Scholes Model (parallelized)
def parallel_black_scholes(params):
    S0, K, T, sigma, r = params
    d1 = (np.log(S0 / K) + (r*365 + 0.5 * sigma**2) * T/365) / (sigma * np.sqrt(T/365))
    d2 = d1 - sigma * np.sqrt(T/365)
    call_price = S0 * norm.cdf(d1) - K * np.exp(-r * T) * norm.cdf(d2)
    return call_price

In [4]:
models = [
    ('Black-Scholes', parallel_black_scholes),
    ('MLP', Sequential([
        Dense(400, input_dim=5),
        LeakyReLU(),
        Dense(400, activation='relu'),
        Dense(400, activation='relu'),
        Dense(400, activation='relu'),
        Dense(1),
    ]))
]

2023-09-25 16:37:20.709279: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-25 16:37:20.780265: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1960] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


In [5]:
n_batch = 4096
n_epochs = 10
learning_rates = [1e-3, 1e-4, 1e-5]
train_metrics = {
    'MSE': [],
    'MAE': [],
    'R2': []
}
test_metrics = {
    'MSE': [],
    'MAE': [],
    'R2': []
}

In [6]:
for i, (name, model) in enumerate(models):
    print(f'Training {name}...')
    if name == 'Black-Scholes':
        pool = multiprocessing.Pool()
        y_pred = pool.map(parallel_black_scholes, X_train.values)
        pool.close()
        pool.join()
    else:
        # If model is already trained, load it
        if os.path.exists(f'../Models/{name}.h5'):
            model = load_model(f'../Models/{name}.h5')
            models[i] = (name,model)
        else:
            full_history = None
            for lr in learning_rates:
                model.compile(loss='mse', optimizer=Adam(learning_rate=lr))
                history = model.fit(X_train, y_train, 
                            batch_size=n_batch, epochs=n_epochs, 
                            validation_split = 0.01,
                            callbacks=[TensorBoard()],
                            verbose=1)
                # Concatenate the history
                if full_history is None:
                    full_history = history.history
                else:
                    for key in history.history:
                        full_history[key] += history.history[key]
            history = full_history
            # Save the model
            model.save(f'../Models/{name}.h5')
            # Plot the training history
            fig = go.Figure(data=[
                go.Scatter(name='Training', x=np.arange(n_epochs*len(learning_rates)), y=history['loss'])
            ])
            fig.update_layout(title=f'{name} Training History', xaxis_title='Epoch', yaxis_title='Loss')
            #Save the plot
            fig.write_image(f'../Images/{name}_training_history.png')
        y_pred = model.predict(X_train)
    print(f'MSE: {mean_squared_error(y_train, y_pred)}')
    print(f'MAE: {mean_absolute_error(y_train, y_pred)}')
    print(f'R2: {r2_score(y_train, y_pred)}')
    print()
    train_metrics['MSE'].append(mean_squared_error(y_train, y_pred))
    train_metrics['MAE'].append(mean_absolute_error(y_train, y_pred))
    train_metrics['R2'].append(r2_score(y_train, y_pred))

print(train_metrics)

Training Black-Scholes...
MSE: 46155.44594536169
MAE: 108.27489609318852
R2: 0.8853696233641085

Training MLP...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  saving_api.save_model(


MSE: 13970.899020640327
MAE: 70.55465908010652
R2: 0.9653022653367095

{'MSE': [46155.44594536169, 13970.899020640327], 'MAE': [108.27489609318852, 70.55465908010652], 'R2': [0.8853696233641085, 0.9653022653367095]}


In [7]:
# Test the models
for name, model in models:
    print(f'Testing {name}...')
    if name == 'Black-Scholes':
        pool = multiprocessing.Pool()
        y_pred = pool.map(parallel_black_scholes, X_test.values)
        pool.close()
        pool.join()
    else:
        y_pred = model.predict(X_test)
    print(f'MSE: {mean_squared_error(y_test, y_pred)}')
    print(f'MAE: {mean_absolute_error(y_test, y_pred)}')
    print(f'R2: {r2_score(y_test, y_pred)}')
    print()
    test_metrics['MSE'].append(mean_squared_error(y_test, y_pred))
    test_metrics['MAE'].append(mean_absolute_error(y_test, y_pred))
    test_metrics['R2'].append(r2_score(y_test, y_pred))

print(test_metrics)

Testing Black-Scholes...
MSE: 32299.474557791524
MAE: 90.3169194664287
R2: 0.9283768050161211

Testing MLP...
MSE: 14534.982809485924
MAE: 88.63554024520005
R2: 0.9677690760576162

{'MSE': [32299.474557791524, 14534.982809485924], 'MAE': [90.3169194664287, 88.63554024520005], 'R2': [0.9283768050161211, 0.9677690760576162]}


In [8]:
# Plot the results 3 plots for MSE, MAE and R-squared comparing Testing and Training
for metric in train_metrics:
    fig = go.Figure(data=[
        go.Bar(name='Training', x=[name for name, _ in models], y=train_metrics[metric], text=[round(m,2) for m in train_metrics[metric]], textposition='auto'),
        go.Bar(name='Testing', x=[name for name, _ in models], y=test_metrics[metric], text=[round(m,2) for m in test_metrics[metric]], textposition='auto')
    ])
    fig.update_layout(title=f'{metric} Comparison', xaxis_title='Model', yaxis_title=metric)
    #Save the plot
    fig.write_image(f'../Images/{metric}_comparison.png')