# Group project - A neural Network model to compute implied volatility from option prices

In [1]:
import pandas as pd
from scipy.stats import qmc
from scipy.stats import norm
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
from torchmetrics import R2Score
from IPython.display import display, Markdown

## Create the dataset

We first create a sample dataset of model parameters by using moneynees S/K instead of
S and K separately. Thus, we create input features as
1. Moneyness: S/K with a narrow range: [0.5, 1.5] and a wide range: [0.4, 1.6]
2. Time to maturity: τ with a narrow range: [0.3, 0.95] and a wide range: [0.2, 1.1]
3. Risk free rate: r with a narrow range: [0.03, 0.08] and a wide range: [0.02, 0.1]
4. Volatility: σ with a narrow range: [0.02, 0.9] and a wide range: [0.01, 1.0]

In [87]:
def black_scholes_price(data):
    d1 = (np.log(data[:,0]) + (data[:,2] + (0.5*data[:,3]**2))*data[:,1]
          )/(data[:,3]*np.sqrt(data[:,1]))
    d2 = (np.log(data[:,0]) + (data[:,2] - (0.5*data[:,3]**2))*data[:,1]
          )/(data[:,3]*np.sqrt(data[:,1]))
    return data[:,0]*norm.cdf(d1) - np.exp(-1*data[:,1]*data[:,2]) \
        *norm.cdf(d2)

In [88]:
lhs = qmc.LatinHypercube(d=4, seed=1)
sample = lhs.random(n=1000000)
n_l_bounds = [0.5, 0.3, 0.03, 0.02]
n_u_bounds = [1.5, 0.95, 0.08, 0.9]
w_l_bounds = [0.4, 0.2, 0.02, 0.01]
w_u_bounds = [1.6, 1.1, 0.1, 1.0]
narrow_data  = qmc.scale(sample, n_l_bounds, n_u_bounds)
narrow_data = np.column_stack((black_scholes_price(narrow_data),
                               narrow_data))
wide_data  = qmc.scale(sample, w_l_bounds, w_u_bounds)
wide_data = np.column_stack((black_scholes_price(wide_data),
                             wide_data))

## Pre-processing step

In [89]:
narrow_data[:,0] = narrow_data[:,0] - np.maximum(narrow_data[:,1] - np.exp(-1*narrow_data[:,3] * narrow_data[:,2]), 0)
narrow_data = narrow_data[narrow_data[:,0] >= 10e-7]
narrow_data[:, 0] = np.log(narrow_data[:, 0])
wide_data[:,0] = wide_data[:,0] - np.maximum(wide_data[:,1] - np.exp(-1*wide_data[:,3] * wide_data[:,2]), 0)
wide_data = wide_data[wide_data[:,0] >= 10e-7]
wide_data[:, 0] = np.log(wide_data[:, 0])

## Divide the data into training, validation and test sets

In [90]:
def split_data(data, scale):
    X_train, X_temp, y_train, y_temp = train_test_split(
        data[:, [0,1,2,3]], narrow_data[:,4], test_size=0.2,
        random_state=1)
    X_valid, X_test, y_valid, y_test = train_test_split(
        X_temp, y_temp, test_size=0.5, random_state=1)

    X_train = torch.from_numpy(X_train).float().to('cuda')
    y_train = torch.from_numpy(y_train).float().to('cuda')
    X_valid = torch.from_numpy(X_valid).float().to('cuda')
    y_valid = torch.from_numpy(y_valid).float().to('cuda')
    X_test = torch.from_numpy(X_test).float().to('cuda')
    y_test = torch.from_numpy(y_test).float().to('cuda')
    del X_temp, y_temp