In [1]:
!python -m pip install --upgrade pip
!pip install --user keras
!pip install --user numpy
!pip install --user tensorflow

Requirement already up-to-date: pip in c:\users\mayingzh\appdata\roaming\python\python36\site-packages (19.2.2)
Collecting setuptools>=41.0.0 (from tensorboard<1.15.0,>=1.14.0->tensorflow)
  Downloading https://files.pythonhosted.org/packages/75/b3/0a106dfaf7f48aef638da80b32608617cc8de4b24a22c8cd3759c32e5d30/setuptools-41.1.0-py2.py3-none-any.whl (576kB)
Installing collected packages: setuptools
Successfully installed setuptools-41.1.0


In [13]:
import pandas as pd
import numpy as np
from keras.models import load_model, Sequential
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from keras.layers import Dense
from keras import backend as K
import sklearn.metrics as skm, math
#from sklearn.utils.validation import check_array

In [2]:
def rmse(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis = -1))
def r_square(y_true, y_pred):
    SS_res =  K.sum(K.square(y_true - y_pred)) 
    SS_tot = K.sum(K.square(y_true - K.mean(y_true))) 
    return (1 - SS_res/(SS_tot + K.epsilon()))
def mpe(y_true, y_pred):
    errors = K.sqrt(K.mean((y_true-y_pred)**2))/K.mean(y_true)
    return errors

def exp(x):
    return K.exp(x)

### With Implied Volatility

In [14]:
#Load saved model
model = load_model('model4_0.h5', custom_objects={'r_square': r_square})
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_51 (Dense)             (None, 5)                 40        
_________________________________________________________________
dense_52 (Dense)             (None, 20)                120       
_________________________________________________________________
dense_53 (Dense)             (None, 60)                1260      
_________________________________________________________________
dense_54 (Dense)             (None, 120)               7320      
_________________________________________________________________
dense_55 (Dense)             (None, 60)                7260      
_________________________________________________________________
dense_56 (Dense)             (None, 40)                2440      
_________________________________________________________________
dense_57 (Dense)             (None, 30)                1230      
__________

In [4]:
def intrinsic(S, K, C, P):
    return C*(max(S-K, 0)) + P*(max(K-S, 0))

In [5]:
def load_data():
    df = pd.read_csv('Options_R_test.csv')
    df = df.dropna()
    
    # Calculate the intrinsic value of all the options
    df['intrinsic'] = df.apply(lambda row: intrinsic(row['underlying_price'], row['strike_price'], row['cp_flag_C'], row['cp_flag_P']), axis = 1)
    
    df[['strike_price', 'underlying_price', 'maturity', 'impl_volatility', 'realized_vol', 'cp_flag_C', 'cp_flag_P', 
                    'interest_rate', 'intrinsic', 'best_offer']].apply(pd.to_numeric)
    #sample_df = sample_df.reset_index(drop=True)
    
    return df

In [6]:
df = load_data()
df.head()

Unnamed: 0,ticker,date,exdate,maturity,strike_price,best_offer,impl_volatility,realized_vol,underlying_price,interest_rate,cp_flag_C,cp_flag_P,intrinsic
0,AXP,2016-01-04,2016-01-08,4,100.0,0.13,1.695791,0.208476,67.59,0.0022,1,0,0.0
1,AXP,2016-01-04,2016-01-08,4,105.0,0.13,1.869624,0.208476,67.59,0.0022,1,0,0.0
2,AXP,2016-01-04,2016-01-08,4,66.0,1.7,0.238987,0.208476,67.59,0.0022,1,0,1.59
3,AXP,2016-01-04,2016-01-08,4,66.5,1.31,0.246213,0.208476,67.59,0.0022,1,0,1.09
4,AXP,2016-01-04,2016-01-08,4,67.0,0.95,0.242758,0.208476,67.59,0.0022,1,0,0.59


In [7]:
def mean_sum_error(y_true, y_pred):
    errors = np.sum(np.abs(y_true-y_pred))/np.sum(y_true)
    return errors

In [15]:
def model_pred(df, model, vol):
    
    y = df['best_offer'].values
    X = df[[ 'intrinsic', 'strike_price', 'underlying_price', 'maturity', vol, 'cp_flag_P', 'interest_rate',]]
    
    sc_X = StandardScaler()
    X_scale = sc_X.fit_transform(X)

    sc_y = StandardScaler()
    y = y.reshape(-1,1)
    y_scale = sc_y.fit_transform(y)
    
    pred = model.predict(X_scale)
    print('y_scale data:\n', y_scale[:10])
    print('scaled y_predition:\n', pred[:10])
    print('\n')
    
    y_pred = sc_y.inverse_transform(pred)
    print('actual value:\n', y[:10])
    print('actual predicted value:\n', y_pred[:10])
    print('\n')
    
    print("Average sum error:              %f" % mean_sum_error(y, y_pred))
    print("Mean absolute error (MAE):      %f" % skm.mean_absolute_error(y, y_pred))
    print("Mean squared error (MSE):       %f" % skm.mean_squared_error(y, y_pred))
    print("Root mean squared error (RMSE): %f" % math.sqrt(skm.mean_squared_error(y, y_pred)))
    print("R square (R^2):                 %f" % skm.r2_score(y, y_pred)) 

In [16]:
model_pred(df, model, 'impl_volatility')

y_scale data:
 [[-0.50128955]
 [-0.50128955]
 [-0.39568243]
 [-0.42191605]
 [-0.44613169]
 [-0.46563874]
 [-0.48043719]
 [-0.4911997 ]
 [-0.49859892]
 [-0.50330752]]
scaled y_predition:
 [[-0.5594872 ]
 [-0.56381774]
 [-0.34541607]
 [-0.37147492]
 [-0.39224923]
 [-0.41634238]
 [-0.42127568]
 [-0.423308  ]
 [-0.42927128]
 [-0.43567306]]


actual value:
 [[0.13]
 [0.13]
 [1.7 ]
 [1.31]
 [0.95]
 [0.66]
 [0.44]
 [0.28]
 [0.17]
 [0.1 ]]
actual predicted value:
 [[-0.73519135]
 [-0.7995701 ]
 [ 2.447281  ]
 [ 2.0598788 ]
 [ 1.7510395 ]
 [ 1.3928604 ]
 [ 1.31952   ]
 [ 1.2893062 ]
 [ 1.200654  ]
 [ 1.1054826 ]]


Average sum error:              0.106412
Mean absolute error (MAE):      0.806853
Mean squared error (MSE):       1.216458
Root mean squared error (RMSE): 1.102931
R square (R^2):                 0.994496


### With Realized Volatility

In [17]:
model_pred(df, model, 'realized_vol')

y_scale data:
 [[-0.50128955]
 [-0.50128955]
 [-0.39568243]
 [-0.42191605]
 [-0.44613169]
 [-0.46563874]
 [-0.48043719]
 [-0.4911997 ]
 [-0.49859892]
 [-0.50330752]]
scaled y_predition:
 [[-0.5657784 ]
 [-0.5656523 ]
 [-0.34195787]
 [-0.36980546]
 [-0.3901198 ]
 [-0.4140973 ]
 [-0.41925567]
 [-0.42039472]
 [-0.42623723]
 [-0.4321669 ]]


actual value:
 [[0.13]
 [0.13]
 [1.7 ]
 [1.31]
 [0.95]
 [0.66]
 [0.44]
 [0.28]
 [0.17]
 [0.1 ]]
actual predicted value:
 [[-0.8287182]
 [-0.8268442]
 [ 2.498692 ]
 [ 2.0846982]
 [ 1.7826967]
 [ 1.4262366]
 [ 1.3495502]
 [ 1.3326168]
 [ 1.2457595]
 [ 1.1576066]]


Average sum error:              0.207753
Mean absolute error (MAE):      1.575261
Mean squared error (MSE):       9.358039
Root mean squared error (RMSE): 3.059091
R square (R^2):                 0.957658


#### Predict special ticker which was used to train the model

In [18]:
MCD = df[df.ticker == 'MCD']
model_pred(MCD, model, 'realized_vol')

y_scale data:
 [[-0.37136029]
 [-0.44322478]
 [-0.50435089]
 [-0.55308658]
 [-0.58695375]
 [-0.60677843]
 [-0.61669078]
 [-0.62164695]
 [-0.62329901]
 [-0.61669078]]
scaled y_predition:
 [[-0.5670122 ]
 [-0.56042475]
 [-0.5558928 ]
 [-0.5557641 ]
 [-0.555442  ]
 [-0.55505556]
 [-0.55472845]
 [-0.55448616]
 [-0.55430704]
 [-0.5541575 ]]


actual value:
 [[3.1 ]
 [2.23]
 [1.49]
 [0.9 ]
 [0.49]
 [0.25]
 [0.13]
 [0.07]
 [0.05]
 [0.13]]
actual predicted value:
 [[0.7314149 ]
 [0.81116307]
 [0.8660275 ]
 [0.8675858 ]
 [0.87148535]
 [0.8761631 ]
 [0.88012326]
 [0.88305676]
 [0.88522494]
 [0.8870355 ]]


Average sum error:              0.343189
Mean absolute error (MAE):      2.606775
Mean squared error (MSE):       19.973032
Root mean squared error (RMSE): 4.469120
R square (R^2):                 0.863719


In [19]:
model_pred(MCD, model, 'impl_volatility')

y_scale data:
 [[-0.37136029]
 [-0.44322478]
 [-0.50435089]
 [-0.55308658]
 [-0.58695375]
 [-0.60677843]
 [-0.61669078]
 [-0.62164695]
 [-0.62329901]
 [-0.61669078]]
scaled y_predition:
 [[-0.5609345 ]
 [-0.5681405 ]
 [-0.5649883 ]
 [-0.56508005]
 [-0.56520873]
 [-0.56534463]
 [-0.5651284 ]
 [-0.56529784]
 [-0.5667168 ]
 [-0.56907105]]


actual value:
 [[3.1 ]
 [2.23]
 [1.49]
 [0.9 ]
 [0.49]
 [0.25]
 [0.13]
 [0.07]
 [0.05]
 [0.13]]
actual predicted value:
 [[0.8049923 ]
 [0.71775544]
 [0.75591624]
 [0.7548057 ]
 [0.75324786]
 [0.75160277]
 [0.7542206 ]
 [0.7521688 ]
 [0.7349912 ]
 [0.70649016]]


Average sum error:              0.300577
Mean absolute error (MAE):      2.283100
Mean squared error (MSE):       13.668277
Root mean squared error (RMSE): 3.697063
R square (R^2):                 0.906738


In [20]:
MCD.shape

(39975, 13)

In [21]:
GS = df[df.ticker == 'GS']
model_pred(GS, model, 'realized_vol')

y_scale data:
 [[-0.42571587]
 [-0.50607751]
 [-0.56706993]
 [-0.6074568 ]
 [-0.63094713]
 [-0.64166201]
 [-0.64537101]
 [-0.64743157]
 [-0.6449589 ]
 [-0.64908001]]
scaled y_predition:
 [[-0.16553566]
 [-0.19624251]
 [-0.22406375]
 [-0.18750826]
 [-0.14609736]
 [-0.10035601]
 [-0.06446743]
 [-0.08019927]
 [-0.09547034]
 [-0.11029425]]


actual value:
 [[5.45]
 [3.5 ]
 [2.02]
 [1.04]
 [0.47]
 [0.21]
 [0.12]
 [0.07]
 [0.13]
 [0.03]]
actual predicted value:
 [[11.763353 ]
 [11.018242 ]
 [10.343151 ]
 [11.230182 ]
 [12.235029 ]
 [13.344957 ]
 [14.215805 ]
 [13.834067 ]
 [13.4635105]
 [13.103804 ]]


Average sum error:              0.266825
Mean absolute error (MAE):      4.210539
Mean squared error (MSE):       50.494960
Root mean squared error (RMSE): 7.105981
R square (R^2):                 0.914242


In [22]:
model_pred(GS, model, 'impl_volatility')

y_scale data:
 [[-0.42571587]
 [-0.50607751]
 [-0.56706993]
 [-0.6074568 ]
 [-0.63094713]
 [-0.64166201]
 [-0.64537101]
 [-0.64743157]
 [-0.6449589 ]
 [-0.64908001]]
scaled y_predition:
 [[-0.3156091 ]
 [-0.19615567]
 [-0.22719142]
 [-0.19034544]
 [-0.14889216]
 [-0.10432225]
 [-0.06872827]
 [-0.08635625]
 [-0.12127945]
 [-0.12185228]]


actual value:
 [[5.45]
 [3.5 ]
 [2.02]
 [1.04]
 [0.47]
 [0.21]
 [0.12]
 [0.07]
 [0.13]
 [0.03]]
actual predicted value:
 [[ 8.121775 ]
 [11.0203495]
 [10.267257 ]
 [11.161336 ]
 [12.1672125]
 [13.248715 ]
 [14.112414 ]
 [13.684667 ]
 [12.837244 ]
 [12.823344 ]]


Average sum error:              0.237951
Mean absolute error (MAE):      3.754897
Mean squared error (MSE):       35.688452
Root mean squared error (RMSE): 5.973981
R square (R^2):                 0.939388


In [23]:
NKE = df[df.ticker == 'NKE']
model_pred(NKE, model, 'realized_vol')

y_scale data:
 [[-0.05944965]
 [-0.18904377]
 [-0.27587182]
 [-0.35492423]
 [-0.4106497 ]
 [-0.45989546]
 [-0.50784529]
 [-0.54672352]
 [-0.57912205]
 [-0.60504087]]
scaled y_predition:
 [[ 0.07758543]
 [-0.11564639]
 [-0.1956338 ]
 [-0.2824161 ]
 [-0.3646232 ]
 [-0.43174466]
 [-0.47977507]
 [-0.50993705]
 [-0.51004016]
 [-0.508849  ]]


actual value:
 [[4.55]
 [3.55]
 [2.88]
 [2.27]
 [1.84]
 [1.46]
 [1.09]
 [0.79]
 [0.54]
 [0.34]]
actual predicted value:
 [[5.6074176]
 [4.1163635]
 [3.4991486]
 [2.8295016]
 [2.195159 ]
 [1.677223 ]
 [1.306601 ]
 [1.0738592]
 [1.0730634]
 [1.0822549]]


Average sum error:              0.299303
Mean absolute error (MAE):      1.499131
Mean squared error (MSE):       6.747812
Root mean squared error (RMSE): 2.597655
R square (R^2):                 0.886673


In [24]:
KO = df[df.ticker == 'KO']
model_pred(KO, model, 'realized_vol')

y_scale data:
 [[-0.31796112]
 [-0.43220254]
 [-0.51924363]
 [-0.5736443 ]
 [-0.59268454]
 [-0.5899645 ]
 [-0.58724447]
 [-0.5899645 ]
 [-0.5899645 ]
 [-0.58724447]]
scaled y_predition:
 [[-0.3639849 ]
 [-0.5643699 ]
 [-0.56899583]
 [-0.5692184 ]
 [-0.5690379 ]
 [-0.5685668 ]
 [-0.56789815]
 [-0.5679258 ]
 [-0.568122  ]
 [-0.56833524]]


actual value:
 [[1.08]
 [0.66]
 [0.34]
 [0.14]
 [0.07]
 [0.08]
 [0.09]
 [0.08]
 [0.08]
 [0.09]]
actual predicted value:
 [[0.91079694]
 [0.17409652]
 [0.15708965]
 [0.1562714 ]
 [0.15693492]
 [0.15866703]
 [0.16112536]
 [0.16102356]
 [0.1603021 ]
 [0.15951842]]


Average sum error:              0.311295
Mean absolute error (MAE):      0.700090
Mean squared error (MSE):       1.718214
Root mean squared error (RMSE): 1.310807
R square (R^2):                 0.872877


In [25]:
AXP = df[df.ticker == 'AXP']
model_pred(AXP, model, 'realized_vol')

y_scale data:
 [[-0.55472657]
 [-0.55472657]
 [-0.36969014]
 [-0.41565461]
 [-0.45808334]
 [-0.49226204]
 [-0.51819071]
 [-0.53704793]
 [-0.55001227]
 [-0.5582623 ]]
scaled y_predition:
 [[-0.53262615]
 [-0.5670237 ]
 [-0.15329129]
 [-0.22196546]
 [-0.28005597]
 [-0.32914713]
 [-0.33845666]
 [-0.3400593 ]
 [-0.34163272]
 [-0.34162834]]


actual value:
 [[0.13]
 [0.13]
 [1.7 ]
 [1.31]
 [0.95]
 [0.66]
 [0.44]
 [0.28]
 [0.17]
 [0.1 ]]
actual predicted value:
 [[0.31751817]
 [0.02566093]
 [3.5361044 ]
 [2.9534168 ]
 [2.4605296 ]
 [2.0440001 ]
 [1.9650103 ]
 [1.9514121 ]
 [1.9380621 ]
 [1.9380993 ]]


Average sum error:              0.284637
Mean absolute error (MAE):      1.376720
Mean squared error (MSE):       5.371010
Root mean squared error (RMSE): 2.317544
R square (R^2):                 0.925394


#### Predict special ticker which was not used to train the model

In [26]:
AAPL = pd.read_csv('Options2016AAPL_withReaVo.csv')
AAPL = AAPL.dropna()
AAPL.rename(columns = {'Strike Price':'strike_price', 'Lowest  Closing Ask Across All Exchanges':'best_offer', 
                       'Underlying Price':'underlying_price', 'C=Call, P=Put_C':'cp_flag_C', 'C=Call, P=Put_P':'cp_flag_P',
                       'volatility':'realized_vol'}, inplace = True);
AAPL.head()

Unnamed: 0.1,Unnamed: 0,strike_price,best_offer,underlying_price,maturity,cp_flag_C,cp_flag_P,interest_rate,days,realized_vol
0,0,100.0,5.45,105.35,4,1,0,0.0022,30,0.218791
1,1,101.0,4.5,105.35,4,1,0,0.0022,30,0.218791
2,2,102.0,3.65,105.35,4,1,0,0.0022,30,0.218791
3,3,103.0,2.79,105.35,4,1,0,0.0022,30,0.218791
4,4,104.0,2.06,105.35,4,1,0,0.0022,30,0.218791


In [27]:
AAPL['intrinsic'] = AAPL.apply(lambda row: intrinsic(row['underlying_price'], row['strike_price'], row['cp_flag_C'], 
                                                     row['cp_flag_P']), axis = 1)

In [28]:
model_pred(AAPL, model, 'realized_vol')

y_scale data:
 [[-0.36548695]
 [-0.40373587]
 [-0.43795859]
 [-0.47258393]
 [-0.5019752 ]
 [-0.52734027]
 [-0.54747128]
 [-0.56236823]
 [-0.57203112]
 [-0.5776678 ]]
scaled y_predition:
 [[-0.4857859 ]
 [-0.5622188 ]
 [-0.5676803 ]
 [-0.5654862 ]
 [-0.56590253]
 [-0.56627256]
 [-0.5665827 ]
 [-0.56687576]
 [-0.56715924]
 [-0.56742024]]


actual value:
 [[5.45]
 [4.5 ]
 [3.65]
 [2.79]
 [2.06]
 [1.43]
 [0.93]
 [0.56]
 [0.32]
 [0.18]]
actual predicted value:
 [[2.4620981 ]
 [0.5637121 ]
 [0.42806247]
 [0.48255828]
 [0.4722176 ]
 [0.46302703]
 [0.4553242 ]
 [0.44804576]
 [0.44100478]
 [0.4345217 ]]


Average sum error:              0.248489
Mean absolute error (MAE):      3.609975
Mean squared error (MSE):       45.405190
Root mean squared error (RMSE): 6.738337
R square (R^2):                 0.926397
