In [16]:

import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from keras import Input, Model
from keras.layers import Dense, LeakyReLU, BatchNormalization, Dropout, Concatenate
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
warnings.filterwarnings('ignore')


In [17]:

# ----------------------------------------------------------------------------
# 0) Load and preprocess raw data
# ----------------------------------------------------------------------------
df = pd.read_csv('option data variable.csv', parse_dates=['date','exdate'])
df.rename(columns={'exdate':'maturity'}, inplace=True)
df.dropna(inplace=True)
df['strike_price'] /= 1_000

# core features
df['mid_price']   = (df['best_bid'] + df['best_offer']) / 2
df['days_to_exp'] = (df['maturity'] - df['date']).dt.days
df['is_call']     = (df['cp_flag']=='C').astype(int)
df['log_mny']     = np.log(df['underlying_price']/df['strike_price'])
df['log_mny2']    = df['log_mny']**2

# helper for SSR labels
df['F'] = df['underlying_price'] * np.exp(df['risk_free_rate'] * df['days_to_exp'] / 252)
df['k'] = np.log(df['strike_price'] / df['F'])


In [18]:

# ----------------------------------------------------------------------------
# 1) Compute SSR labels and enrich features
# ----------------------------------------------------------------------------
def get_atm(group):
    idx = (group['strike_price'] - group['F']).abs().idxmin()
    return pd.Series({
        'date': group.at[idx,'date'],
        'maturity': group.at[idx,'maturity'],
        'underlying_price': group.at[idx,'underlying_price'],
        'atm_iv': group.at[idx,'impl_volatility']
    })
atm = df.groupby(['date','maturity']).apply(get_atm).reset_index(drop=True)
atm = atm.sort_values(['maturity','date'])
atm['dlnS'] = np.log(atm['underlying_price']).groupby(atm['maturity']).diff()
atm['dIV']  = atm['atm_iv'].groupby(atm['maturity']).diff()
atm['num']  = atm['dIV']/atm['dlnS']

def skew_slope(group):
    sub = group[np.abs(group['k'])<0.05]
    if len(sub)<5: return np.nan
    return np.polyfit(sub['k'], sub['impl_volatility'], 1)[0]
skew = df.groupby(['date','maturity']).apply(skew_slope).reset_index(name='skew')

sr = pd.merge(atm, skew, on=['date','maturity'], how='inner')
sr['SSR_raw'] = sr['num']/sr['skew']
sr['SSR'] = sr['SSR_raw'].clip(-5,5)
# enrich df with SSR label and key vol features
df = pd.merge(df, sr[['date','maturity','SSR','atm_iv','skew']], on=['date','maturity'], how='left')


In [19]:

# ----------------------------------------------------------------------------
# 2) Feature & target setup
# ----------------------------------------------------------------------------
X_COLS = [
    'underlying_price','strike_price','impl_volatility',
    'risk_free_rate','days_to_exp','is_call','log_mny','log_mny2',
    'atm_iv','skew'
]
GREEKS = ['mid_price','delta','gamma','vega','theta']
Y_COLS = GREEKS + ['SSR']

df.dropna(subset=X_COLS+Y_COLS, inplace=True)
df_call, df_put = df[df['is_call']==1], df[df['is_call']==0]
split=lambda g: (g.iloc[:int(.98*len(g))], g.iloc[int(.98*len(g)):int(.985*len(g))], g.iloc[int(.985*len(g)):])
call_tr, call_va, call_te = split(df_call)
put_tr, put_va, put_te    = split(df_put)


In [20]:

# ----------------------------------------------------------------------------
# 3) Separate scaling for greeks & SSR & features
# ----------------------------------------------------------------------------
greek_scaler = StandardScaler().fit(pd.concat([call_tr[GREEKS], put_tr[GREEKS]]))
ssr_scaler   = StandardScaler().fit(pd.concat([call_tr[['SSR']],  put_tr[['SSR']]]))
feature_scaler = StandardScaler().fit(pd.concat([call_tr[X_COLS], put_tr[X_COLS]]))

def prep(df_, xs, gs, ss):
    X  = xs.transform(df_[X_COLS])
    Yg = gs.transform(df_[GREEKS])
    Ys = ss.transform(df_[['SSR']])
    return X, np.hstack([Yg, Ys])

cXtr, cYtr = prep(call_tr,  feature_scaler, greek_scaler, ssr_scaler)
cXva, cYva = prep(call_va,  feature_scaler, greek_scaler, ssr_scaler)
cXte, cYte = prep(call_te,  feature_scaler, greek_scaler, ssr_scaler)
pXtr, pYtr = prep(put_tr,   feature_scaler, greek_scaler, ssr_scaler)
pXva, pYva = prep(put_va,   feature_scaler, greek_scaler, ssr_scaler)
pXte, pYte = prep(put_te,   feature_scaler, greek_scaler, ssr_scaler)


In [21]:

# ----------------------------------------------------------------------------
# 4) Multi-task MLP: separate greeks & SSR heads
# ----------------------------------------------------------------------------
def build_mlp_mt(indim, hidden=512, layers=6, dropout=0.3, theta_w=2.0, ssr_w=10.0):
    x = Input(shape=(indim,))
    h = Dense(hidden)(x); h = LeakyReLU()(h)
    for _ in range(layers-1):
        h = Dense(hidden)(h); h = BatchNormalization()(h)
        h = LeakyReLU()(h); h = Dropout(dropout)(h)
    out_g = Dense(len(GREEKS), name='greeks')(h)
    out_s = Dense(1, name='ssr')(h)
    model = Model(x, [out_g, out_s])
    model.compile('adam', loss={'greeks':'mse','ssr':'mse'}, loss_weights={'greeks':1.0,'ssr':ssr_w})
    return model

CB = [EarlyStopping(patience=15, restore_best_weights=True), ReduceLROnPlateau(factor=0.5, patience=7)]


In [22]:

# ----------------------------------------------------------------------------
# 5) Train multi-task network
# ----------------------------------------------------------------------------
call_m = build_mlp_mt(cXtr.shape[1])
call_m.fit(cXtr, [cYtr[:,:5], cYtr[:,5:]], validation_data=(cXva, [cYva[:,:5], cYva[:,5:]]),
           epochs=60, batch_size=4096, callbacks=CB, verbose=1)
put_m  = build_mlp_mt(pXtr.shape[1])
put_m.fit(pXtr,  [pYtr[:,:5], pYtr[:,5:]], validation_data=(pXva, [pYva[:,:5], pYva[:,5:]]),
          epochs=60, batch_size=4096, callbacks=CB, verbose=1)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/6

<keras.callbacks.History at 0x2ae21719880>

In [24]:

# ----------------------------------------------------------------------------
# 6) Evaluate including SSR with detailed reports
# ----------------------------------------------------------------------------
def evaluate_mt(model, X, Y, gs, ss, tag):
    pred_g, pred_s = model.predict(X, verbose=1)
    true_g = gs.inverse_transform(Y[:,:5])
    true_s = ss.inverse_transform(Y[:,5:])
    pred_g = gs.inverse_transform(pred_g)
    pred_s = ss.inverse_transform(pred_s)

    print(f"\n{tag} MODEL Performance:")
    for i, name in enumerate(GREEKS):
        mse = mean_squared_error(true_g[:,i], pred_g[:,i])
        mae = mean_absolute_error(true_g[:,i], pred_g[:,i])
        r2  = r2_score(true_g[:,i], pred_g[:,i])
        print(f"{name:10s}  MSE={mse:.6f}  MAE={mae:.6f}  R²={r2:.4f}")
    mse_s = mean_squared_error(true_s[:,0], pred_s[:,0])
    mae_s = mean_absolute_error(true_s[:,0], pred_s[:,0])
    r2_s  = r2_score(true_s[:,0], pred_s[:,0])
    print(f"{'SSR':10s}  MSE={mse_s:.6f}  MAE={mae_s:.6f}  R²={r2_s:.4f}")

evaluate_mt(call_m, cXte, cYte, greek_scaler, ssr_scaler, 'CALL')
evaluate_mt(put_m,  pXte, pYte, greek_scaler, ssr_scaler, 'PUT')



CALL MODEL Performance:
mid_price   MSE=5.828734  MAE=1.495236  R²=0.9711
delta       MSE=0.006409  MAE=0.050326  R²=0.9402
gamma       MSE=0.008975  MAE=0.020366  R²=0.5222
vega        MSE=13.010059  MAE=2.538499  R²=0.8167
theta       MSE=134.431546  MAE=3.626314  R²=0.8678
SSR         MSE=3.916497  MAE=1.351317  R²=0.4609

PUT MODEL Performance:
mid_price   MSE=8.147453  MAE=1.742968  R²=0.9582
delta       MSE=0.006050  MAE=0.059347  R²=0.9451
gamma       MSE=0.008939  MAE=0.024719  R²=0.5937
vega        MSE=17.140207  MAE=3.087860  R²=0.8127
theta       MSE=222.905080  MAE=5.047914  R²=0.7944
SSR         MSE=4.220377  MAE=1.398403  R²=0.4161
