In [2]:
# 0) Imports (add tensorflow)
import warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf                                   # ← new
from keras import Sequential, Input
from keras.layers import Dense, LeakyReLU, BatchNormalization, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from scipy.stats import norm; warnings.filterwarnings('ignore')


In [3]:

# 1) Load
df = pd.read_parquet('option_data_2021-2023_2000secids.parquet')
df['date']   = pd.to_datetime(df['date'])
df['exdate'] = pd.to_datetime(df['exdate'])
df.dropna(inplace=True); df['strike_price'] /= 1_000


In [4]:

# 2) Features (add log-moneyness)
df['mid_price']   = (df['best_bid'] + df['best_offer'])/2
df['days_to_exp'] = (df['exdate'] - df['date']).dt.days
df['is_call']     = (df['cp_flag']=='C').astype(int)
df['log_mny']     = np.log(df['underlying_price']/df['strike_price'])
df['log_mny2']    = df['log_mny']**2

X_COLS = ['underlying_price','strike_price','impl_volatility',
          'risk_free_rate','days_to_exp','is_call','log_mny','log_mny2']
Y_COLS = ['mid_price','delta','gamma','vega','theta']

df = df.dropna(subset=X_COLS + Y_COLS).sort_values('date').reset_index(drop=True)


KeyboardInterrupt: 

In [None]:
df.head()

Unnamed: 0,date,secid,strike_price,best_bid,best_offer,impl_volatility,delta,gamma,vega,theta,exdate,cp_flag,mid_price,risk_free_rate,underlying_price,days_to_exp,is_call,log_mny,log_mny2
0,2021-01-04,100958.0,100.0,63.4,67.8,2.44884,0.981652,0.001063,0.778052,-87.20694,2021-01-08,C,65.6,0.0009,168.6525,4,1,0.52267,0.273184
1,2021-01-04,109820.0,445.0,75.99,76.44,0.813343,-0.999039,0.000152,0.088453,-6.412182,2021-01-06,P,76.215,0.0009,371.0925,2,0,-0.181623,0.032987
2,2021-01-04,109820.0,450.0,80.99,81.44,0.857416,-0.999084,0.000138,0.084587,-6.471672,2021-01-06,P,81.215,0.0009,371.0925,2,0,-0.192796,0.03717
3,2021-01-04,109820.0,460.0,90.99,91.44,0.943541,-0.99916,0.000116,0.078019,-6.581821,2021-01-06,P,91.215,0.0009,371.0925,2,0,-0.214775,0.046128
4,2021-01-04,109820.0,470.0,100.99,101.44,1.027151,-0.999221,0.0001,0.072966,-6.682199,2021-01-06,P,101.215,0.0009,371.0925,2,0,-0.236281,0.055829


In [None]:

# 3) Chronological split (unchanged) ...
df_call, df_put = df[df.is_call==1], df[df.is_call==0]
split = lambda g: (g.iloc[:int(.98*len(g))],
                   g.iloc[int(.98*len(g)):int(.985*len(g))],
                   g.iloc[int(.985*len(g)):])

call_tr, call_val, call_te = split(df_call)
put_tr , put_val , put_te  = split(df_put)


In [None]:

# 4) Scaling (unchanged) ...
x_scal = StandardScaler().fit(pd.concat([call_tr,put_tr])[X_COLS])
ysc_c  = StandardScaler().fit(call_tr[Y_COLS])
ysc_p  = StandardScaler().fit(put_tr[Y_COLS])

prep = lambda g,xs,ys: (xs.transform(g[X_COLS]), ys.transform(g[Y_COLS]))
cXtr,cYtr = prep(call_tr,x_scal,ysc_c); cXva,cYva = prep(call_val,x_scal,ysc_c); cXte,cYte = prep(call_te,x_scal,ysc_c)
pXtr,pYtr = prep(put_tr ,x_scal,ysc_p); pXva,pYva = prep(put_val ,x_scal,ysc_p); pXte,pYte = prep(put_te ,x_scal,ysc_p)


In [None]:

# 5) MLP factory (extra layer + θ-weight in loss)
def build_mlp(indim, hidden=512, layers=6, dropout=.3, theta_w=2.0):
    x = Input(shape=(indim,)); h = Dense(hidden)(x); h=LeakyReLU()(h)
    for _ in range(layers-1):
        h=Dense(hidden)(h); h=BatchNormalization()(h); h=LeakyReLU()(h); h=Dropout(dropout)(h)
    out = Dense(len(Y_COLS))(h)
    w = tf.constant([1.,1.,1.,1.,theta_w], dtype='float32')
    loss = lambda y_t,y_p: tf.reduce_mean(w * tf.square(y_t - y_p), axis=-1)
    m = tf.keras.Model(x,out); m.compile('adam', loss=loss); return m

CB = [EarlyStopping(patience=15,restore_best_weights=True),
      ReduceLROnPlateau(factor=.5,patience=7)]


In [None]:

# 6) Train
call_m = build_mlp(cXtr.shape[1]); call_m.fit(cXtr,cYtr,validation_data=(cXva,cYva),
                                             epochs=60,batch_size=4096,callbacks=CB,verbose=1)
put_m  = build_mlp(pXtr.shape[1]); put_m.fit(pXtr,pYtr,validation_data=(pXva,pYva),
                                             epochs=60,batch_size=4096,callbacks=CB,verbose=1)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/6

<keras.callbacks.History at 0x1f352a55910>

In [None]:

# 7) Evaluate
c_pred = ysc_c.inverse_transform(call_m.predict(cXte)); c_true = ysc_c.inverse_transform(cYte)
p_pred = ysc_p.inverse_transform( put_m.predict(pXte)); p_true = ysc_p.inverse_transform(pYte)

for tag,t,p in [('CALL',c_true,c_pred),('PUT',p_true,p_pred)]:
    print(f'\n{tag} MODEL'); 
    for i,g in enumerate(Y_COLS):
        print(f'{g:10s}  MSE={mean_squared_error(t[:,i],p[:,i]):.6f}  '
              f'MAE={mean_absolute_error(t[:,i],p[:,i]):.6f}  '
              f'R²={r2_score(t[:,i],p[:,i]):.4f}')



CALL MODEL
mid_price   MSE=23.973160  MAE=1.926977  R²=0.9963
delta       MSE=0.000587  MAE=0.015549  R²=0.9956
gamma       MSE=0.000471  MAE=0.004274  R²=0.9293
vega        MSE=20.023605  MAE=1.616324  R²=0.9954
theta       MSE=239.236980  MAE=3.278028  R²=0.9490

PUT MODEL
mid_price   MSE=6.109004  MAE=1.058331  R²=0.9975
delta       MSE=0.000656  MAE=0.015936  R²=0.9948
gamma       MSE=0.000214  MAE=0.004150  R²=0.9708
vega        MSE=34.721731  MAE=1.945969  R²=0.9902
theta       MSE=64.822635  MAE=2.151741  R²=0.9679


15421/15421 [==============================] - 53s 3ms/step
14383/14383 [==============================] - 49s 3ms/step

CALL MODEL
mid_price   MSE=23.973160  MAE=1.926977  R²=0.9963
delta       MSE=0.000587  MAE=0.015549  R²=0.9956
gamma       MSE=0.000471  MAE=0.004274  R²=0.9293
vega        MSE=20.023605  MAE=1.616324  R²=0.9954
theta       MSE=239.236980  MAE=3.278028  R²=0.9490

PUT MODEL
mid_price   MSE=6.109004  MAE=1.058331  R²=0.9975
delta       MSE=0.000656  MAE=0.015936  R²=0.9948
gamma       MSE=0.000214  MAE=0.004150  R²=0.9708
vega        MSE=34.721731  MAE=1.945969  R²=0.9902
theta       MSE=64.822635  MAE=2.151741  R²=0.9679

# WITH SSR LARGE

In [16]:

import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from keras import Input, Model
from keras.layers import Dense, LeakyReLU, BatchNormalization, Dropout, Concatenate
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
warnings.filterwarnings('ignore')


# ----------------------------------------------------------------------------
# 0) Load and preprocess raw data
# ----------------------------------------------------------------------------
df = pd.read_parquet('option_data_2021-2023_2000secids.parquet')
df['date']   = pd.to_datetime(df['date'])
df['exdate'] = pd.to_datetime(df['exdate'])
df.rename(columns={'exdate':'maturity'}, inplace=True)
df.dropna(inplace=True)
df['strike_price'] /= 1_000

# core features
df['mid_price']   = (df['best_bid'] + df['best_offer']) / 2
df['days_to_exp'] = (df['maturity'] - df['date']).dt.days
df['is_call']     = (df['cp_flag']=='C').astype(int)
df['log_mny']     = np.log(df['underlying_price']/df['strike_price'])
df['log_mny2']    = df['log_mny']**2

# helper for SSR labels
df['F'] = df['underlying_price'] * np.exp(df['risk_free_rate'] * df['days_to_exp'] / 252)
df['k'] = np.log(df['strike_price'] / df['F'])



In [17]:

# ----------------------------------------------------------------------------
# 1) Compute SSR labels and enrich features
# ----------------------------------------------------------------------------
def get_atm(group):
    idx = (group['strike_price'] - group['F']).abs().idxmin()
    return pd.Series({
        'date': group.at[idx,'date'],
        'maturity': group.at[idx,'maturity'],
        'underlying_price': group.at[idx,'underlying_price'],
        'atm_iv': group.at[idx,'impl_volatility']
    })
atm = df.groupby(['date','maturity']).apply(get_atm).reset_index(drop=True)
atm = atm.sort_values(['maturity','date'])
atm['dlnS'] = np.log(atm['underlying_price']).groupby(atm['maturity']).diff()
atm['dIV']  = atm['atm_iv'].groupby(atm['maturity']).diff()
atm['num']  = atm['dIV']/atm['dlnS']

def skew_slope(group):
    sub = group[np.abs(group['k'])<0.05]
    if len(sub)<5: return np.nan
    return np.polyfit(sub['k'], sub['impl_volatility'], 1)[0]
skew = df.groupby(['date','maturity']).apply(skew_slope).reset_index(name='skew')

sr = pd.merge(atm, skew, on=['date','maturity'], how='inner')
sr['SSR_raw'] = sr['num']/sr['skew']
sr['SSR'] = sr['SSR_raw'].clip(-5,5)
# enrich df with SSR label and key vol features
df = pd.merge(df, sr[['date','maturity','SSR','atm_iv','skew']], on=['date','maturity'], how='left')


In [18]:


# ----------------------------------------------------------------------------
# 2) Feature & target setup
# ----------------------------------------------------------------------------
X_COLS = [
    'underlying_price','strike_price','impl_volatility',
    'risk_free_rate','days_to_exp','is_call','log_mny','log_mny2',
    'atm_iv','skew'
]
GREEKS = ['mid_price','delta','gamma','vega','theta']
Y_COLS = GREEKS + ['SSR']

df.dropna(subset=X_COLS+Y_COLS, inplace=True)
df_call, df_put = df[df['is_call']==1], df[df['is_call']==0]
split=lambda g: (g.iloc[:int(.98*len(g))], g.iloc[int(.98*len(g)):int(.985*len(g))], g.iloc[int(.985*len(g)):])
call_tr, call_va, call_te = split(df_call)
put_tr, put_va, put_te    = split(df_put)


In [19]:


# ----------------------------------------------------------------------------
# 3) Separate scaling for greeks & SSR & features
# ----------------------------------------------------------------------------
greek_scaler = StandardScaler().fit(pd.concat([call_tr[GREEKS], put_tr[GREEKS]]))
ssr_scaler   = StandardScaler().fit(pd.concat([call_tr[['SSR']],  put_tr[['SSR']]]))
feature_scaler = StandardScaler().fit(pd.concat([call_tr[X_COLS], put_tr[X_COLS]]))

def prep(df_, xs, gs, ss):
    X  = xs.transform(df_[X_COLS])
    Yg = gs.transform(df_[GREEKS])
    Ys = ss.transform(df_[['SSR']])
    return X, np.hstack([Yg, Ys])

cXtr, cYtr = prep(call_tr,  feature_scaler, greek_scaler, ssr_scaler)
cXva, cYva = prep(call_va,  feature_scaler, greek_scaler, ssr_scaler)
cXte, cYte = prep(call_te,  feature_scaler, greek_scaler, ssr_scaler)
pXtr, pYtr = prep(put_tr,   feature_scaler, greek_scaler, ssr_scaler)
pXva, pYva = prep(put_va,   feature_scaler, greek_scaler, ssr_scaler)
pXte, pYte = prep(put_te,   feature_scaler, greek_scaler, ssr_scaler)



In [20]:

# ----------------------------------------------------------------------------
# 4) Multi-task MLP: separate greeks & SSR heads
# ----------------------------------------------------------------------------
def build_mlp_mt(indim, hidden=2048, layers=10, dropout=0.3, theta_w=2.0, ssr_w=10.0):
    x = Input(shape=(indim,))
    h = Dense(hidden)(x); h = LeakyReLU()(h)
    for _ in range(layers-1):
        h = Dense(hidden)(h); h = BatchNormalization()(h)
        h = LeakyReLU()(h); h = Dropout(dropout)(h)
    out_g = Dense(len(GREEKS), name='greeks')(h)
    out_s = Dense(1, name='ssr')(h)
    model = Model(x, [out_g, out_s])
    model.compile('adam', loss={'greeks':'mse','ssr':'mse'}, loss_weights={'greeks':1.0,'ssr':ssr_w})
    return model

CB = [EarlyStopping(patience=15, restore_best_weights=True), ReduceLROnPlateau(factor=0.5, patience=7)]



In [21]:

# ----------------------------------------------------------------------------
# 5) Train multi-task network
# ----------------------------------------------------------------------------
call_m = build_mlp_mt(cXtr.shape[1])
call_m.fit(cXtr, [cYtr[:,:5], cYtr[:,5:]], validation_data=(cXva, [cYva[:,:5], cYva[:,5:]]),
           epochs=60, batch_size=8192, callbacks=CB, verbose=1)
put_m  = build_mlp_mt(pXtr.shape[1])
put_m.fit(pXtr,  [pYtr[:,:5], pYtr[:,5:]], validation_data=(pXva, [pYva[:,:5], pYva[:,5:]]),
          epochs=60, batch_size=8192, callbacks=CB, verbose=1)



Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/6

<keras.callbacks.History at 0x2aaee064790>

In [22]:

# ----------------------------------------------------------------------------
# 6) Evaluate including SSR with detailed reports
# ----------------------------------------------------------------------------
def evaluate_mt(model, X, Y, gs, ss, tag):
    pred_g, pred_s = model.predict(X, verbose=1)
    true_g = gs.inverse_transform(Y[:,:5])
    true_s = ss.inverse_transform(Y[:,5:])
    pred_g = gs.inverse_transform(pred_g)
    pred_s = ss.inverse_transform(pred_s)

    print(f"\n{tag} MODEL Performance:")
    for i, name in enumerate(GREEKS):
        mse = mean_squared_error(true_g[:,i], pred_g[:,i])
        mae = mean_absolute_error(true_g[:,i], pred_g[:,i])
        r2  = r2_score(true_g[:,i], pred_g[:,i])
        print(f"{name:10s}  MSE={mse:.6f}  MAE={mae:.6f}  R²={r2:.4f}")
    mse_s = mean_squared_error(true_s[:,0], pred_s[:,0])
    mae_s = mean_absolute_error(true_s[:,0], pred_s[:,0])
    r2_s  = r2_score(true_s[:,0], pred_s[:,0])
    print(f"{'SSR':10s}  MSE={mse_s:.6f}  MAE={mae_s:.6f}  R²={r2_s:.4f}")

evaluate_mt(call_m, cXte, cYte, greek_scaler, ssr_scaler, 'CALL')
evaluate_mt(put_m,  pXte, pYte, greek_scaler, ssr_scaler, 'PUT')



CALL MODEL Performance:
mid_price   MSE=22.520345  MAE=3.214529  R²=0.8866
delta       MSE=0.005262  MAE=0.049099  R²=0.9579
gamma       MSE=0.002050  MAE=0.016222  R²=0.8431
vega        MSE=14.032539  MAE=2.718047  R²=0.7032
theta       MSE=52.067368  MAE=3.174945  R²=0.8541
SSR         MSE=0.183972  MAE=0.220200  R²=0.9765

PUT MODEL Performance:
mid_price   MSE=9.349951  MAE=1.995313  R²=0.9445
delta       MSE=0.003025  MAE=0.037927  R²=0.9753
gamma       MSE=0.002292  MAE=0.017304  R²=0.8397
vega        MSE=12.837138  MAE=2.632674  R²=0.8332
theta       MSE=56.644219  MAE=3.139970  R²=0.8669
SSR         MSE=0.213453  MAE=0.255303  R²=0.9726


512 hidden layers, 6 layers, 4096 batch size, ssr_w = 10.0 
15339/15339 [==============================] - 53s 3ms/step

CALL MODEL Performance:
mid_price   MSE=23.012303  MAE=3.484929  R²=0.8841
delta       MSE=0.008162  MAE=0.067397  R²=0.9346
gamma       MSE=0.002408  MAE=0.016505  R²=0.8158
vega        MSE=21.252476  MAE=3.430168  R²=0.5505
theta       MSE=122.277664  MAE=4.450807  R²=0.6574
SSR         MSE=0.878807  MAE=0.586897  R²=0.8876
14303/14303 [==============================] - 49s 3ms/step

PUT MODEL Performance:
mid_price   MSE=20.846676  MAE=3.418753  R²=0.8762
delta       MSE=0.007359  MAE=0.063661  R²=0.9400
gamma       MSE=0.003449  MAE=0.021250  R²=0.7587
vega        MSE=26.763761  MAE=3.828162  R²=0.6522
theta       MSE=74.410447  MAE=4.479405  R²=0.8252
SSR         MSE=0.848236  MAE=0.565792  R²=0.8912

2048 hidden layers, 8 layers, 8192 batch size, ssr_w = 10.0 
15339/15339 [==============================] - 70s 5ms/step

CALL MODEL Performance:
mid_price   MSE=22.520345  MAE=3.214529  R²=0.8866
delta       MSE=0.005262  MAE=0.049099  R²=0.9579
gamma       MSE=0.002050  MAE=0.016222  R²=0.8431
vega        MSE=14.032539  MAE=2.718047  R²=0.7032
theta       MSE=52.067368  MAE=3.174945  R²=0.8541
SSR         MSE=0.183972  MAE=0.220200  R²=0.9765
14303/14303 [==============================] - 67s 5ms/step

PUT MODEL Performance:
mid_price   MSE=9.349951  MAE=1.995313  R²=0.9445
delta       MSE=0.003025  MAE=0.037927  R²=0.9753
gamma       MSE=0.002292  MAE=0.017304  R²=0.8397
vega        MSE=12.837138  MAE=2.632674  R²=0.8332
theta       MSE=56.644219  MAE=3.139970  R²=0.8669
SSR         MSE=0.213453  MAE=0.255303  R²=0.9726