In [1]:
from keras.models import Sequential
from keras.layers import Dense, Activation, LeakyReLU, BatchNormalization
from keras import backend
from keras.callbacks import TensorBoard
from keras.optimizers import Adam
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

Using TensorFlow backend.


In [2]:
# Hyperparams
n_units = 400
layers = 4
n_batch = 4096
n_epochs = 50

In [3]:
df = pd.read_csv('options-df-sigma.csv')
df = df.dropna(axis=0)
df = df.drop(columns=['date', 'exdate', 'impl_volatility', 'volume', 'open_interest'])
df.strike_price = df.strike_price / 1000
call_df = df[df.cp_flag == 'C'].drop(['cp_flag'], axis=1)
put_df = df[df.cp_flag == 'P'].drop(['cp_flag'], axis=1)

In [4]:
put_df.head()

Unnamed: 0,strike_price,best_bid,best_offer,date_ndiff,treasury_rate,closing_price,sigma_20
1,615.0,5.375,5.75,47,5.17,624.22,0.007761
2,550.0,10.125,11.75,509,5.05,624.22,0.007761
3,610.0,9.5,10.0,145,5.12,624.22,0.007761
4,600.0,2.625,2.875,47,5.17,624.22,0.007761
5,560.0,0.375,0.625,47,5.17,624.22,0.007761


In [11]:
call_X_train, call_X_test, call_y_train, call_y_test = train_test_split(call_df.drop(['best_bid', 'best_offer'], axis=1),
                                                                        call_df[['best_bid', 'best_offer']],
                                                                        test_size=0.01, random_state=42)
put_X_train, put_X_test, put_y_train, put_y_test = train_test_split(put_df.drop(['best_bid', 'best_offer'], axis=1),
                                                                    put_df[['best_bid', 'best_offer']],
                                                                    test_size=0.01, random_state=42)

In [12]:
model = Sequential()
model.add(Dense(n_units, input_dim=call_X_train.shape[1]))
model.add(LeakyReLU())

for _ in range(layers - 1):
    model.add(Dense(n_units))
    model.add(BatchNormalization())
    model.add(LeakyReLU())

model.add(Dense(2, activation='relu'))

model.compile(loss='mse', optimizer=Adam())

In [13]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 400)               2400      
_________________________________________________________________
leaky_re_lu_5 (LeakyReLU)    (None, 400)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_4 (Batch (None, 400)               1600      
_________________________________________________________________
leaky_re_lu_6 (LeakyReLU)    (None, 400)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 400)               160400    
_________________________________________________________________
batch_normalization_5 (Batch (None, 400)               1600      
__________

In [14]:
history = model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=30, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5535888 samples, validate on 55919 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [15]:
model.save('mlp2-put30.h5')
put_y_pred30 = model.predict(put_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(put_y_test.values, axis=1) - np.mean(put_y_pred30, axis=1))))
print('spread mse', np.mean(np.square(np.diff(put_y_test.values, axis=1) - np.diff(put_y_pred30, axis=1))))

equilibrium mse 177.30706866136677
spread mse 2.5870883658018213


In [17]:
model.compile(loss='mse', optimizer=Adam(lr=1e-4))
history = model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=10, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5535888 samples, validate on 55919 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
model.save('mlp2-put40.h5')
put_y_pred40 = model.predict(put_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(put_y_test.values, axis=1) - np.mean(put_y_pred40, axis=1))))
print('spread mse', np.mean(np.square(np.diff(put_y_test.values, axis=1) - np.diff(put_y_pred40, axis=1))))

equilibrium mse 38.818840572132245
spread mse 2.1885908400178935


In [19]:
model.compile(loss='mse', optimizer=Adam(lr=1e-5))
history = model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=10, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5535888 samples, validate on 55919 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [20]:
model.save('mlp2-put50.h5')
put_y_pred50 = model.predict(put_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(put_y_test.values, axis=1) - np.mean(put_y_pred50, axis=1))))
print('spread mse', np.mean(np.square(np.diff(put_y_test.values, axis=1) - np.diff(put_y_pred50, axis=1))))

equilibrium mse 8.972609482242744
spread mse 2.0515710888773926


In [21]:
model.compile(loss='mse', optimizer=Adam(lr=1e-6))
history = model.fit(put_X_train, put_y_train, 
                    batch_size=n_batch, epochs=10, 
                    validation_split = 0.01,
                    callbacks=[TensorBoard()],
                    verbose=1)

Train on 5535888 samples, validate on 55919 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [22]:
model.save('mlp2-put60.h5')
put_y_pred60 = model.predict(put_X_test)
print('equilibrium mse', np.mean(np.square(np.mean(put_y_test.values, axis=1) - np.mean(put_y_pred60, axis=1))))
print('spread mse', np.mean(np.square(np.diff(put_y_test.values, axis=1) - np.diff(put_y_pred60, axis=1))))

equilibrium mse 8.838232424748972
spread mse 2.0520345803224336
