In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber

original_data = pd.read_csv('../../Data/rnn_data.csv', low_memory=False)
original_data = original_data.groupby('contractSymbol').filter(lambda x: x['Date'].nunique() == 6).sort_values(by=["contractSymbol", "Date"])
original_data.drop("Ticker", axis=1, inplace=True)

call_data = original_data[original_data["Call"] == 1]
put_data = original_data[original_data["Call"] == 0]

data = call_data
data['Date'] = pd.to_datetime(data['Date'])
data = data.sort_values(by='Date')
data.fillna(method='ffill', inplace=True)

features_to_scale = ['strike', 'Underlying Price', 'Annualized Volatility', 'lastPrice', 'Expiration']
scaler = MinMaxScaler()
data[features_to_scale] = scaler.fit_transform(data[features_to_scale])

data['Time_to_Maturity'] = (pd.to_datetime(data['Expiration']) - data['Date']).dt.days
features_to_scale.append('Time_to_Maturity')

def create_sequences(data, window_size=6):
    sequences = []
    targets = []
    for i in range(len(data) - window_size):
        seq = data.iloc[i:i + window_size][features_to_scale].values
        target = data.iloc[i + window_size]['lastPrice']
        sequences.append(seq)
        targets.append(target)
    return np.array(sequences), np.array(targets)

X, y = create_sequences(data)

In [None]:
train_size = int(0.8 * len(X))
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)

RNN_call = Sequential([
    Bidirectional(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2]))),
    Dropout(0.2),
    BatchNormalization(),
    LSTM(64, return_sequences=False),
    Dropout(0.2),
    Dense(64, activation='relu', kernel_regularizer='l2'),
    Dense(1)
])

RNN_call.compile(optimizer=Adam(learning_rate=1e-3), loss=Huber(), metrics=['mae', 'mape'])
RNN_call.summary()

history = RNN_call.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_test, y_test),
    callbacks=[early_stopping, lr_scheduler],
    verbose=1
)

loss, mae, mape = RNN_call.evaluate(X_test, y_test, verbose=0)
print(f"Test Loss: {loss}, Test MAE: {mae}, Test MAPE: {mape}")