## Practical Application

### Practical Applicaiton: Hybrid Method (Shorter Time Split)

In [None]:
import numpy as np
import pandas as pd
from datetime import date
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping

data_path = "derived/final_merged_for_lstm.csv"
df = pd.read_csv(data_path, parse_dates=["Date"])

df = df.drop(columns=[col for col in ['key_0', 'Date_sent'] if col in df.columns]) # Clean Up Columns

for col in df.columns: # Ensuring all columns except Date are numeric
    if col not in ['Date']:
        df[col] = pd.to_numeric(df[col], errors='coerce')
df = df.dropna().reset_index(drop=True)

# Feature Engineering
exclude_cols = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume'] # Remove prices and volume
feature_cols = ['Return', 'VIX', 'Sentiment']
N_LAGS = 2 #lagged features
for lag in range(1, N_LAGS+1):
    df[f"Return_lag{lag}"] = df["Return"].shift(lag)
    df[f"Sentiment_lag{lag}"] = df["Sentiment"].shift(lag)
feature_cols += [f"Return_lag{lag}" for lag in range(1, N_LAGS+1)]
feature_cols += [f"Sentiment_lag{lag}" for lag in range(1, N_LAGS+1)]
df = df.dropna().reset_index(drop=True)

# Train/val/test split by time
TRAIN_END = date(2022, 12, 31)
VAL_END = date(2024, 5, 31)
df["Date"] = pd.to_datetime(df["Date"])
df_train = df[df["Date"] <= pd.to_datetime(TRAIN_END)]
df_val = df[(df["Date"] > pd.to_datetime(TRAIN_END)) & (df["Date"] <= pd.to_datetime(VAL_END))]
df_test = df[df["Date"] > pd.to_datetime(VAL_END)]

# Scaling 
scaler = StandardScaler()
X_train = scaler.fit_transform(df_train[feature_cols])
X_val = scaler.transform(df_val[feature_cols])
X_test = scaler.transform(df_test[feature_cols])
y_train = df_train["Return"].values
y_val = df_val["Return"].values
y_test = df_test["Return"].values

# Creating LSTM Sequences
SEQ_LEN = 5
def create_sequences(x, y, seq_len):
    xs, ys = [], []
    for i in range(len(x) - seq_len):
        xs.append(x[i : i + seq_len])
        ys.append(y[i + seq_len])
    return np.array(xs), np.array(ys)

X_train_seq, y_train_seq = create_sequences(X_train, y_train, SEQ_LEN)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, SEQ_LEN)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, SEQ_LEN)

# LSTM Model
model = Sequential([
    LSTM(50, input_shape=(SEQ_LEN, X_train_seq.shape[2])),
    Dense(1),
])
model.compile(optimizer="adam", loss="mse")
early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

# Train
print("Training LSTM …")
model.fit(
    X_train_seq, y_train_seq,
    epochs=100,
    batch_size=16,
    validation_data=(X_val_seq, y_val_seq),
    callbacks=[early_stop],
    verbose=1,
)

# Evaluation
print("Evaluating …")
pred_test = model.predict(X_test_seq).flatten()
rmse = np.sqrt(mean_squared_error(y_test_seq, pred_test))
print(f"Test RMSE: {rmse:.6f}")

# Directional accuracy
actual_dir = (y_test_seq > 0)
pred_dir = (pred_test > 0)
acc = accuracy_score(actual_dir, pred_dir)
print(f"Directional accuracy: {acc:.2%}")

df_out = df_test.iloc[SEQ_LEN:].copy().reset_index(drop=True)
df_out["Predicted_Return"] = pred_test
df_out.to_csv("derived/lstm_prac_test_predictions.csv", index=False)

print(df_out[["Date", "Return", "Predicted_Return"]].head())
print("Final LSTM model summary:")
model.summary()

### Practical Applicaiton: FinBERT-only Method (Shorter Time Split)

In [None]:
import numpy as np
import pandas as pd
from datetime import date
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, accuracy_score
from keras.models import Sequential
from keras.layers import LSTM, Dense
from keras.callbacks import EarlyStopping

data_path = "derived/final_merged_FinBERT_for_lstm.csv"
df = pd.read_csv(data_path, parse_dates=["Date"])

df = df.drop(columns=[col for col in ['key_0', 'Date_sent'] if col in df.columns]) # Clean columns 

for col in ['Close', 'High', 'Low', 'Open', 'Volume']: # Removing non-numeric entry columns
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
if "VIX" in df.columns:
    df["VIX"] = pd.to_numeric(df["VIX"], errors='coerce')

for col in df.columns: # Set all except 'Date' are numeric
    if col != 'Date':
        df[col] = pd.to_numeric(df[col], errors='coerce')

df = df.dropna().reset_index(drop=True)

# Feature Engineering
N_LAGS = 2
for lag in range(1, N_LAGS+1):
    df[f"Return_lag{lag}"] = df["Return"].shift(lag)
    df[f"FinBERT_score_lag{lag}"] = df["FinBERT_score"].shift(lag)
df = df.dropna().reset_index(drop=True)

# Train/val/test split 
TRAIN_END = date(2022, 12, 31)
VAL_END = date(2024, 5, 31)

df["Date"] = pd.to_datetime(df["Date"])
df_train = df[df["Date"] <= pd.to_datetime(TRAIN_END)]
df_val = df[(df["Date"] > pd.to_datetime(TRAIN_END)) & (df["Date"] <= pd.to_datetime(VAL_END))]
df_test = df[df["Date"] > pd.to_datetime(VAL_END)]

# Features and Scaling
FEATURES = ['Close', 'High', 'Low', 'Open', 'Volume', 'VIX', 'FinBERT_score'] + \
           [f"Return_lag{lag}" for lag in range(1, N_LAGS+1)] + \
           [f"FinBERT_score_lag{lag}" for lag in range(1, N_LAGS+1)]
X_train = df_train[FEATURES].astype(np.float32).values
X_val = df_val[FEATURES].astype(np.float32).values
X_test = df_test[FEATURES].astype(np.float32).values
y_train = df_train["Return"].values
y_val = df_val["Return"].values
y_test = df_test["Return"].values

# Scale Features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

# Create LSTM sequences (window=5)
SEQ_LEN = 5
def create_sequences(x, y, seq_len):
    xs, ys = [], []
    for i in range(len(x) - seq_len):
        xs.append(x[i : i + seq_len])
        ys.append(y[i + seq_len])
    return np.array(xs), np.array(ys)
X_train_seq, y_train_seq = create_sequences(X_train, y_train, SEQ_LEN)
X_val_seq, y_val_seq = create_sequences(X_val, y_val, SEQ_LEN)
X_test_seq, y_test_seq = create_sequences(X_test, y_test, SEQ_LEN)

print(f"Train samples: {X_train_seq.shape[0]}, Val: {X_val_seq.shape[0]}, Test: {X_test_seq.shape[0]}")

# LSTM Model
model = Sequential([
    LSTM(50, input_shape=(SEQ_LEN, X_train_seq.shape[2])),
    Dense(1),
])
model.compile(optimizer="adam", loss="mse")
early_stop = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)

# Train
print("Training LSTM …")
model.fit(
    X_train_seq, y_train_seq,
    epochs=100,
    batch_size=16,
    validation_data=(X_val_seq, y_val_seq),
    callbacks=[early_stop],
    verbose=1,
)

print("Evaluating …") # Evaluation
pred_test = model.predict(X_test_seq).flatten()
rmse = np.sqrt(mean_squared_error(y_test_seq, pred_test))
print(f"Test RMSE: {rmse:.6f}")

actual_dir = (y_test_seq > 0) # Directional accuracy
pred_dir = (pred_test > 0)
acc = accuracy_score(actual_dir, pred_dir)
print(f"Directional accuracy: {acc:.2%}")

df_test = df_test.iloc[SEQ_LEN:].copy()  # aligning with y_test_seq
df_test["Predicted_Return"] = pred_test
df_test.to_csv("derived/lstm_prac_FinBERT_only_test_predictions.csv", index=False)

print(df_test[["Date", "Return", "Predicted_Return"]].head())
print("Final LSTM model summary:")
model.summary()

### Evaluation Metrics

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, accuracy_score,
    precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)
from scipy.stats import norm

df_gpt = pd.read_csv("derived/lstm_prac_test_predictions.csv", parse_dates=["Date"])
df_fbert = pd.read_csv("derived/lstm_prac_FinBERT_only_test_predictions.csv", parse_dates=["Date"])
market_df = pd.read_csv("sp500_vix_data.csv", parse_dates=["Date"])

df = pd.DataFrame({ # Align data by date
    "Date": df_gpt["Date"],
    "Market_Return": df_gpt["Return"],
    "LSTM_GPT4": df_gpt["Predicted_Return"],
    "LSTM_FinBERT": df_fbert["Predicted_Return"],
})
df = df.merge(market_df[["Date", "Return", "VIX"]].rename(columns={"Return": "Market_True_Return"}), on="Date", how="left")
df["VIX"] = pd.to_numeric(df["VIX"], errors="coerce")

def trading_signal_returns(true_returns, predicted_returns): # Signal-based trading returns
    signal = np.where(predicted_returns > 0, 1, -1)
    return signal * true_returns

def compute_all_metrics(true_returns, predicted_returns, rolling_window=60): # Computing all metrics for each model (with rolling mean/std)
    if len(true_returns) == 0 or len(predicted_returns) == 0:
        raise ValueError("Empty input arrays! Check your mask and input data.")
    mse = mean_squared_error(true_returns, predicted_returns)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(true_returns, predicted_returns)

    true_up = (true_returns > 0).astype(int)
    pred_up = (predicted_returns > 0).astype(int)

    acc = accuracy_score(true_up, pred_up)
    prec = precision_score(true_up, pred_up, zero_division=0)
    rec = recall_score(true_up, pred_up, zero_division=0)
    f1 = f1_score(true_up, pred_up, zero_division=0)
    try:
        roc = roc_auc_score(true_up, predicted_returns)
    except:
        roc = np.nan

    cm = confusion_matrix(true_up, pred_up)
    strat_returns = trading_signal_returns(true_returns, predicted_returns)
    cum_return = np.cumprod(1 + strat_returns)[-1] - 1 if len(strat_returns) > 0 else np.nan
    sharpe = np.mean(strat_returns) / (np.std(strat_returns) + 1e-9) * np.sqrt(252)
    roll_sharpe = pd.Series(strat_returns).rolling(rolling_window).apply(
        lambda x: np.mean(x) / (np.std(x) + 1e-9) * np.sqrt(252), raw=True)
    roll_acc = pd.Series(pred_up == true_up).rolling(rolling_window).mean()
    roll_cum_return = (1 + pd.Series(strat_returns)).cumprod() - 1
    rolling_sharpe_mean, rolling_sharpe_std = roll_sharpe.mean(), roll_sharpe.std()
    rolling_acc_mean, rolling_acc_std = roll_acc.mean(), roll_acc.std()
    return {
        "MSE": mse, "RMSE": rmse, "MAE": mae,
        "Direction_Acc": acc, "Precision": prec, "Recall": rec, "F1": f1, "ROC_AUC": roc,
        "Sharpe": sharpe, "Cumulative_Return": cum_return,
        "Rolling_Sharpe_Mean": rolling_sharpe_mean, "Rolling_Sharpe_Std": rolling_sharpe_std,
        "Rolling_Acc_Mean": rolling_acc_mean, "Rolling_Acc_Std": rolling_acc_std,
        "Confusion_Matrix": cm,
        "Rolling_Sharpe": roll_sharpe,
        "Rolling_Acc": roll_acc,
        "Rolling_CumReturn": roll_cum_return,
        "Signal_Returns": strat_returns
    }

mask_gpt = df['Market_True_Return'].notnull() & df['LSTM_GPT4'].notnull() # Data masks and calculated for both models
mask_fbert = df['Market_True_Return'].notnull() & df['LSTM_FinBERT'].notnull()

print("\nValid rows for GPT:", mask_gpt.sum())
print("Valid rows for FinBERT:", mask_fbert.sum())

if mask_gpt.sum() == 0 or mask_fbert.sum() == 0:
    raise ValueError("No valid data rows for at least one model. Check input data and merges!")

metrics_gpt = compute_all_metrics(
    df.loc[mask_gpt, 'Market_True_Return'].values,
    df.loc[mask_gpt, 'LSTM_GPT4'].values
)
metrics_finbert = compute_all_metrics(
    df.loc[mask_fbert, 'Market_True_Return'].values,
    df.loc[mask_fbert, 'LSTM_FinBERT'].values
)

comparison_table = pd.DataFrame({ # Table
    "Hybrid (FinBERT+GPT-4)": {k: v for k, v in metrics_gpt.items() if not isinstance(v, (np.ndarray, pd.Series, list))},
    "FinBERT-only": {k: v for k, v in metrics_finbert.items() if not isinstance(v, (np.ndarray, pd.Series, list))}
})
print("Model Comparison Table")
print(comparison_table)
comparison_table.to_csv("model_performance_comparison.csv")

print("Confusion Matrices") # Confusion matrices for appendix
print("Hybrid (FinBERT+GPT-4):\n", metrics_gpt["Confusion_Matrix"])
print("FinBERT-only:\n", metrics_finbert["Confusion_Matrix"])
np.savetxt("hybrid_confusion_matrix.csv", metrics_gpt["Confusion_Matrix"], delimiter=",")
np.savetxt("finbert_confusion_matrix.csv", metrics_finbert["Confusion_Matrix"], delimiter=",")

vix_median = df["VIX"].median() # Regime split (by VIX) shows model dominance in high/low volatility
df["VIX_regime"] = np.where(df["VIX"] > vix_median, "High_VIX", "Low_VIX")
def regime_metrics(regime):
    idx = df["VIX_regime"] == regime
    gpt_metrics = compute_all_metrics(
        df.loc[idx & mask_gpt, "Market_True_Return"].values, df.loc[idx & mask_gpt, "LSTM_GPT4"].values
    )
    finbert_metrics = compute_all_metrics(
        df.loc[idx & mask_fbert, "Market_True_Return"].values, df.loc[idx & mask_fbert, "LSTM_FinBERT"].values
    )
    return gpt_metrics, finbert_metrics
for regime in ["High_VIX", "Low_VIX"]:
    gpt_metrics, finbert_metrics = regime_metrics(regime)
    print(f"\n=== {regime} Regime ===")
    print("Hybrid Sharpe:", gpt_metrics["Sharpe"], "Cumulative:", gpt_metrics["Cumulative_Return"])
    print("FinBERT-only Sharpe:", finbert_metrics["Sharpe"], "Cumulative:", finbert_metrics["Cumulative_Return"])

### Shortened Time-frame Plot

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import (
    mean_squared_error, mean_absolute_error, accuracy_score,
    precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
)

df_gpt = pd.read_csv("derived/lstm_prac_test_predictions.csv", parse_dates=["Date"])
df_fbert = pd.read_csv("derived/lstm_prac_FinBERT_only_test_predictions.csv", parse_dates=["Date"])
market_df = pd.read_csv("sp500_vix_data.csv", parse_dates=["Date"])

df = pd.DataFrame({ # Align data by date
    "Date": df_gpt["Date"],
    "Market_Return": df_gpt["Return"],  # Use market_df["Return"] if better aligned
    "LSTM_GPT4": df_gpt["Predicted_Return"],
    "LSTM_FinBERT": df_fbert["Predicted_Return"],
})
df = df.merge(market_df[["Date", "Return", "VIX"]].rename(columns={"Return": "Market_True_Return"}), on="Date", how="left")
df["VIX"] = pd.to_numeric(df["VIX"], errors="coerce")

def trading_signal_returns(true_returns, predicted_returns): # Signal-based trading returns
    signal = np.where(predicted_returns > 0, 1, -1)  # Long/short signal
    return signal * true_returns

mask_gpt = df['Market_True_Return'].notnull() & df['LSTM_GPT4'].notnull() # Data masks and calculated for both models
mask_fbert = df['Market_True_Return'].notnull() & df['LSTM_FinBERT'].notnull()

print("\nValid rows for GPT:", mask_gpt.sum())
print("Valid rows for FinBERT:", mask_fbert.sum())

if mask_gpt.sum() == 0 or mask_fbert.sum() == 0:
    raise ValueError("No valid data rows for at least one model. Check input data and merges!")

metrics_gpt = compute_all_metrics(
    df.loc[mask_gpt, 'Market_True_Return'].values,
    df.loc[mask_gpt, 'LSTM_GPT4'].values
)
metrics_finbert = compute_all_metrics(
    df.loc[mask_fbert, 'Market_True_Return'].values,
    df.loc[mask_fbert, 'LSTM_FinBERT'].values
)

plt.figure(figsize=(12,5))

dates = df.loc[mask_gpt, 'Date'] # Align dates

gpt_signal = np.where(df.loc[mask_gpt, 'LSTM_GPT4'] > 0, 1, -1) # Trading strategy returns for both models
finbert_signal = np.where(df.loc[mask_fbert, 'LSTM_FinBERT'] > 0, 1, -1)

gpt_strat_returns = gpt_signal * df.loc[mask_gpt, 'Market_True_Return'].values
finbert_strat_returns = finbert_signal * df.loc[mask_fbert, 'Market_True_Return'].values

gpt_cum_return = np.cumsum(gpt_strat_returns) # Cumulative returns
finbert_cum_return = np.cumsum(finbert_strat_returns)
market_cum_return = np.cumsum(df.loc[mask_gpt, 'Market_True_Return'].values)

dates = df.loc[mask_gpt, 'Date'].reset_index(drop=True)

plt.figure(figsize=(12, 5)) # Plotting cumulative returns
plt.plot(dates, market_cum_return, label="Market Cumulative Return", color='black', linewidth=2)
plt.plot(dates, gpt_cum_return, label="Hybrid Cumulative Return", color='red')
plt.plot(dates, finbert_cum_return, label="FinBERT-only Cumulative Return", color='darkorange')
plt.title("Cumulative Strategy Returns Over Time")
plt.xlabel("Date")
plt.ylabel("Cumulative Return")
plt.legend()
plt.tight_layout()
plt.show()