In [3]:
# 0) Imports (add tensorflow)
import warnings, numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf                                   # ← new
from keras import Sequential, Input
from keras.layers import Dense, LeakyReLU, BatchNormalization, Dropout
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from scipy.stats import norm; warnings.filterwarnings('ignore')


In [4]:

# 1) Load
df = pd.read_csv('option data variable.csv', parse_dates=['date','exdate'])
df.dropna(inplace=True); df['strike_price'] /= 1_000


In [3]:

# 2) Features (add log-moneyness)
df['mid_price']   = (df['best_bid'] + df['best_offer'])/2
df['days_to_exp'] = (df['exdate'] - df['date']).dt.days
df['is_call']     = (df['cp_flag']=='C').astype(int)
df['log_mny']     = np.log(df['underlying_price']/df['strike_price'])
df['log_mny2']    = df['log_mny']**2

X_COLS = ['underlying_price','strike_price','impl_volatility',
          'risk_free_rate','days_to_exp','is_call','log_mny','log_mny2']
Y_COLS = ['mid_price','delta','gamma','vega','theta']

df = df.dropna(subset=X_COLS + Y_COLS).sort_values('date').reset_index(drop=True)


In [5]:
df.head()

Unnamed: 0,date,secid,strike_price,best_bid,best_offer,impl_volatility,delta,gamma,vega,theta,exdate,cp_flag,mid_price,risk_free_rate,underlying_price
0,2018-01-02,101369.0,10.0,7.0,11.5,4.069927,0.97435,0.008452,0.103827,-25.86654,2018-01-05,C,9.25,0.0144,19.0725
1,2018-01-02,101369.0,10.5,6.4,11.0,3.207709,0.986672,0.006145,0.059599,-11.77059,2018-01-05,C,8.7,0.0144,19.0725
2,2018-01-02,101369.0,11.0,5.9,10.5,2.983993,0.985723,0.007009,0.063206,-11.62743,2018-01-05,C,8.2,0.0144,19.0725
3,2018-01-02,101369.0,11.5,5.4,10.0,2.769642,0.984681,0.008022,0.067096,-11.47521,2018-01-05,C,7.7,0.0144,19.0725
4,2018-01-02,101369.0,12.0,6.8,9.6,6.426838,0.863271,0.019603,0.380452,-148.8986,2018-01-05,C,8.2,0.0144,19.0725


In [4]:

# 3) Chronological split (unchanged) ...
df_call, df_put = df[df.is_call==1], df[df.is_call==0]
split = lambda g: (g.iloc[:int(.98*len(g))],
                   g.iloc[int(.98*len(g)):int(.985*len(g))],
                   g.iloc[int(.985*len(g)):])

call_tr, call_val, call_te = split(df_call)
put_tr , put_val , put_te  = split(df_put)


In [5]:

# 4) Scaling (unchanged) ...
x_scal = StandardScaler().fit(pd.concat([call_tr,put_tr])[X_COLS])
ysc_c  = StandardScaler().fit(call_tr[Y_COLS])
ysc_p  = StandardScaler().fit(put_tr[Y_COLS])

prep = lambda g,xs,ys: (xs.transform(g[X_COLS]), ys.transform(g[Y_COLS]))
cXtr,cYtr = prep(call_tr,x_scal,ysc_c); cXva,cYva = prep(call_val,x_scal,ysc_c); cXte,cYte = prep(call_te,x_scal,ysc_c)
pXtr,pYtr = prep(put_tr ,x_scal,ysc_p); pXva,pYva = prep(put_val ,x_scal,ysc_p); pXte,pYte = prep(put_te ,x_scal,ysc_p)


In [6]:

# 5) MLP factory (extra layer + θ-weight in loss)
def build_mlp(indim, hidden=512, layers=6, dropout=.3, theta_w=2.0):
    x = Input(shape=(indim,)); h = Dense(hidden)(x); h=LeakyReLU()(h)
    for _ in range(layers-1):
        h=Dense(hidden)(h); h=BatchNormalization()(h); h=LeakyReLU()(h); h=Dropout(dropout)(h)
    out = Dense(len(Y_COLS))(h)
    w = tf.constant([1.,1.,1.,1.,theta_w], dtype='float32')
    loss = lambda y_t,y_p: tf.reduce_mean(w * tf.square(y_t - y_p), axis=-1)
    m = tf.keras.Model(x,out); m.compile('adam', loss=loss); return m

CB = [EarlyStopping(patience=15,restore_best_weights=True),
      ReduceLROnPlateau(factor=.5,patience=7)]


In [7]:

# 6) Train
call_m = build_mlp(cXtr.shape[1]); call_m.fit(cXtr,cYtr,validation_data=(cXva,cYva),
                                             epochs=60,batch_size=4096,callbacks=CB,verbose=1)
put_m  = build_mlp(pXtr.shape[1]); put_m.fit(pXtr,pYtr,validation_data=(pXva,pYva),
                                             epochs=60,batch_size=4096,callbacks=CB,verbose=1)


Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60
Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/6

<keras.callbacks.History at 0x14040c23880>

In [8]:

# 7) Evaluate
c_pred = ysc_c.inverse_transform(call_m.predict(cXte)); c_true = ysc_c.inverse_transform(cYte)
p_pred = ysc_p.inverse_transform( put_m.predict(pXte)); p_true = ysc_p.inverse_transform(pYte)

for tag,t,p in [('CALL',c_true,c_pred),('PUT',p_true,p_pred)]:
    print(f'\n{tag} MODEL'); 
    for i,g in enumerate(Y_COLS):
        print(f'{g:10s}  MSE={mean_squared_error(t[:,i],p[:,i]):.6f}  '
              f'MAE={mean_absolute_error(t[:,i],p[:,i]):.6f}  '
              f'R²={r2_score(t[:,i],p[:,i]):.4f}')



CALL MODEL
mid_price   MSE=3.545308  MAE=1.121932  R²=0.9988
delta       MSE=0.000647  MAE=0.015751  R²=0.9955
gamma       MSE=0.000474  MAE=0.004118  R²=0.9317
vega        MSE=5.551839  MAE=1.349863  R²=0.9971
theta       MSE=59.677525  MAE=2.217834  R²=0.9601

PUT MODEL
mid_price   MSE=1.197582  MAE=0.624755  R²=0.9986
delta       MSE=0.000831  MAE=0.017550  R²=0.9935
gamma       MSE=0.000338  MAE=0.004354  R²=0.9596
vega        MSE=11.092182  MAE=1.652020  R²=0.9931
theta       MSE=31.069621  MAE=2.073442  R²=0.9516


# OPTUNA Search

In [None]:
# ───────────────────────────────────────────────────────────────────────────
# Optuna hyper-parameter sweep  (run after you have cXtr, cYtr, … ready)
# ───────────────────────────────────────────────────────────────────────────
import optuna, tensorflow as tf
from keras.layers import (Input, Dense, LeakyReLU, BatchNormalization,
                          Dropout, Concatenate)
from keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.optimizers.schedules import CosineDecayRestarts
from tensorflow.keras.models import Model
import numpy as np 

############################################################################
# 1)  Model-builder factory
############################################################################
def build_net(indim, hp):
    """Return compiled Keras model built from an Optuna trial dict `hp`."""
    units   = hp['units']
    layers  = hp['layers']
    dropout = hp['drop']
    theta_w = hp['theta_w']
    lr0     = hp['lr0']

    x = Input(shape=(indim,))
    h = Dense(units)(x); h = LeakyReLU()(h)

    # residual block every 2 layers
    for i in range(layers-1):
        h_in = h
        h = Dense(units)(h); h = BatchNormalization()(h); h = LeakyReLU()(h)
        h = Dropout(dropout)(h)
        if i % 2:                                    # add residual every 2nd layer
            h = Concatenate()([h, h_in])

    shared  = Dense(3)(h)        # mid_price, Δ, ν
    gamma_h = Dense(1)(h)        # Γ
    theta_h = Dense(1)(h)        # θ
    out = Concatenate()([shared, gamma_h, theta_h])

    w = tf.constant([1., 1., 1., 3., theta_w], dtype='float32')
    loss = lambda y_t, y_p: tf.reduce_mean(w * tf.square(y_t - y_p), axis=-1)

    m = Model(x, out)
    m.compile(tf.keras.optimizers.Adam(lr0), loss=loss)
    return m


############################################################################
# 2)  Objective wrapper
############################################################################
def make_objective(X_tr, Y_tr, X_val, Y_val):
    def objective(trial):
        hp = {
            'units'  : trial.suggest_int('units', 256, 768, step=128),
            'layers' : trial.suggest_int('layers', 4, 8),
            'drop'   : trial.suggest_float('drop', 0.15, 0.45),
            'theta_w': trial.suggest_float('theta_w', 1.5, 3.5),
            'lr0'    : trial.suggest_loguniform('lr0', 5e-4, 3e-3)
        }

        model  = build_net(X_tr.shape[1], hp)

        EPOCHS = 60
        # ---------- edit #1: use hp['lr0'] here ----------
        cos_sched = CosineDecayRestarts(
            initial_learning_rate=hp['lr0'],
            first_decay_steps=EPOCHS // 2,
            alpha=1e-5)

        cbs = [
            EarlyStopping(patience=10, restore_best_weights=True),
            LearningRateScheduler(lambda e: cos_sched(e), verbose=0)
        ]

        # ---------- edit #2: EPOCHS (capital) ----------
        model.fit(X_tr, Y_tr,
                  validation_data=(X_val, Y_val),
                  batch_size=4096, epochs=EPOCHS,
                  callbacks=cbs, verbose=0)

        val_loss = model.evaluate(X_val, Y_val, verbose=0)
        tf.keras.backend.clear_session()
        return val_loss
    return objective



############################################################################
# 3)  Convenience runner
############################################################################
def optuna_search(Xtr, Ytr, Xva, Yva, tag, n_trials=30):
    print(f"\n🟢  Starting Optuna search for **{tag}** model …")
    study = optuna.create_study(direction='minimize')
    study.optimize(make_objective(Xtr, Ytr, Xva, Yva), n_trials=n_trials)
    print(f"✅  {tag} best params →", study.best_params)

    # build + train final model with full train+val data using best params
    best_hp = study.best_params
    model   = build_net(Xtr.shape[1], best_hp)

    X_full  = np.vstack([Xtr, Xva])
    Y_full  = np.vstack([Ytr, Yva])

    epochs = 60
    cbs = [
        EarlyStopping(patience=12, restore_best_weights=True),
        CosineDecayRestarts(initial_learning_rate=best_hp['lr0'],
                            first_decay_steps=epochs//2,
                            alpha=1e-5)
    ]
    model.fit(X_full, Y_full, batch_size=4096, epochs=epochs,
              callbacks=cbs, verbose=1)
    model.save(f"best_{tag.lower()}.keras")
    return model


############################################################################
# 4)  Run for CALLs and PUTs
############################################################################
N_TRIALS = 30          # bump to 50-100 for a deeper search if you wish

call_model = optuna_search(cXtr, cYtr, cXva, cYva, tag='CALL', n_trials=N_TRIALS)
put_model  = optuna_search(pXtr, pYtr, pXva, pYva, tag='PUT',  n_trials=N_TRIALS)

print("\n🎉  Both optimised models saved to 'best_call.keras' and 'best_put.keras'")


[I 2025-04-28 20:25:51,065] A new study created in memory with name: no-name-c51311f6-44f2-4c8b-a652-fb7e8f8a0664



🟢  Starting Optuna search for **CALL** model …


[I 2025-04-28 20:44:21,178] Trial 0 finished with value: 0.03982483968138695 and parameters: {'units': 384, 'layers': 6, 'drop': 0.4369625828503292, 'theta_w': 3.0869315456096977, 'lr0': 0.0007867875770411421}. Best is trial 0 with value: 0.03982483968138695.
[I 2025-04-28 21:05:42,584] Trial 1 finished with value: 0.032729409635066986 and parameters: {'units': 384, 'layers': 7, 'drop': 0.3377937041671903, 'theta_w': 3.014008012461961, 'lr0': 0.002278759197077704}. Best is trial 1 with value: 0.032729409635066986.
[I 2025-04-28 21:28:40,493] Trial 2 finished with value: 0.043362416326999664 and parameters: {'units': 256, 'layers': 8, 'drop': 0.38863759237116585, 'theta_w': 3.0106973509153745, 'lr0': 0.0026463501601638136}. Best is trial 1 with value: 0.032729409635066986.
[W 2025-04-28 21:28:51,167] Trial 3 failed with parameters: {'units': 768, 'layers': 8, 'drop': 0.2591726457929341, 'theta_w': 3.3904189195777947, 'lr0': 0.0013924993275376465} because of the following error: Internal

InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.