In [None]:
import os
import sys

PROJECT_ROOT = os.path.abspath("..")
if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("PROJECT_ROOT:", PROJECT_ROOT)

%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


import optuna
from sklearn.metrics import mean_squared_error, log_loss

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, balanced_accuracy_score, confusion_matrix

from src.data_loading import download_daily_prices, load_daily_close
from src.signals import (
    make_basic_signals,
    build_feature_matrix,
    build_sequence_dataset,
    DEFAULT_FEATURES,
)
from src.models_tree import train_tree_regressor, evaluate_regression
from src.models_lstm_class import train_lstm_classifier, predict_lstm_proba
from src.models_lstm import train_lstm_regressor, predict_lstm
from src.backtest import (
    equity_curve_from_returns,
    cagr,
    annualized_vol,
    sharpe_ratio,
    max_drawdown,
)
from src.models_tcn import train_tcn_regressor, predict_tcn



In [None]:
def sharpe_ratio_np(returns: np.ndarray, freq: int = 252) -> float:
    """Simple annualized Sharpe on a 1D array of daily returns."""
    mean = returns.mean()
    std = returns.std()
    if std == 0:
        return 0.0
    return np.sqrt(freq) * mean / std


In [None]:
ticker = "SPY"

# Load or download prices
try:
    prices = load_daily_close(ticker)
except FileNotFoundError:
    download_daily_prices(ticker, start="2010-01-01")
    prices = load_daily_close(ticker)

signals_df = make_basic_signals(prices)
signals_df.head(), signals_df.columns


In [None]:
feature_names = DEFAULT_FEATURES

X, y, dates = build_feature_matrix(signals_df, feature_names)

split_idx = int(len(X) * 0.7)  # 70% train / 30% test

X_train, X_test = X[:split_idx], X[split_idx:]
y_train_float, y_test_float = y[:split_idx], y[split_idx:]
dates_train, dates_test = dates[:split_idx], dates[split_idx:]

print("Train size:", len(X_train), "Test size:", len(X_test))
print("Train period:", dates_train[0], "->", dates_train[-1])
print("Test period:", dates_test[0], "->", dates_test[-1])

# Standardise features for LSTM regressor
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled  = scaler_X.transform(X_test)
X_train_scaled.shape, X_test_scaled.shape


In [None]:
# ---------------------------
# Optuna tuning: Tree model (Sharpe-based)
# ---------------------------
def objective_tree(trial):
    # Hyperparameters of the tree
    max_depth = trial.suggest_int("max_depth", 2, 8)
    max_iter = trial.suggest_int("max_iter", 100, 500)
    learning_rate = trial.suggest_float("learning_rate", 0.01, 0.2, log=True)
    l2_reg = trial.suggest_float("l2_regularization", 0.0, 1.0)

    # Trading hyperparameter: quantile for threshold
    q = trial.suggest_float("q", 0.4, 0.9)  # e.g. 40%–90% quantile

    # Chronological split of X_train into inner-train / validation
    n = len(X_train)
    split = int(n * 0.8)
    X_tr, X_val = X_train[:split], X_train[split:]
    y_tr, y_val = y_train_float[:split], y_train_float[split:]

    # Train tree on inner-train
    model = train_tree_regressor(
        X_tr,
        y_tr,
        max_depth=max_depth,
        max_iter=max_iter,
        learning_rate=learning_rate,
        l2_regularization=l2_reg,
    )

    # Predict on inner-val
    preds_val = model.predict(X_val)

    # Ranking rule on validation:
    # only long when prediction is above q-quantile of preds_val
    tau_val = np.quantile(preds_val, q)
    positions_val = (preds_val > tau_val).astype(int)
    strat_ret_val = positions_val * y_val  # actual next-day returns

    # Strategy Sharpe on validation slice
    sharpe_val = sharpe_ratio_np(strat_ret_val)

    # Optuna minimizes -> return negative Sharpe
    return -sharpe_val



study_tree = optuna.create_study(direction="minimize")
study_tree.optimize(objective_tree, n_trials=10)

print("Best tree params:", study_tree.best_params)


In [None]:
# --- Tree with best Optuna params (model + trading rule) ---
best_tree_params = study_tree.best_params
q_tree = best_tree_params["q"]

# Params for the sklearn model (exclude q)
tree_model_params = {k: v for k, v in best_tree_params.items() if k != "q"}

tree_model = train_tree_regressor(
    X_train,
    y_train_float,
    **tree_model_params,
)

# Predictions on train/test
preds_tree_train = tree_model.predict(X_train)
preds_tree_test  = tree_model.predict(X_test)

# Threshold based on train predictions and tuned q
tau_tree = np.quantile(preds_tree_train, q_tree)
print("Tree q_best:", q_tree, " -> tau_tree:", tau_tree)

# Trading rule on test
positions_tree = (preds_tree_test > tau_tree).astype(int)

# Returns on full test period
bh_returns = y_test_float                              # buy & hold
tree_returns = positions_tree * y_test_float           # tree strategy

# Series for full test period (we'll align later)
bh_series_full = signals_df["target_ret_1"].iloc[split_idx:].copy()
bh_series_full[:] = bh_returns

tree_series_full = bh_series_full.copy()
tree_series_full[:] = tree_returns


bh_series_full.head(), tree_series_full.head()


In [None]:
# Direction labels for single-day features (no sequences)
y_train_class_simple = (y_train_float > 0).astype(int)
y_test_class_simple  = (y_test_float > 0).astype(int)

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train_class_simple)

y_pred_simple = log_reg.predict(X_test)

base_rate_simple = y_test_class_simple.mean()
acc_simple = accuracy_score(y_test_class_simple, y_pred_simple)
bacc_simple = balanced_accuracy_score(y_test_class_simple, y_pred_simple)

print("Base rate (simple):", base_rate_simple)
print("LogReg accuracy:", acc_simple)
print("LogReg balanced acc:", bacc_simple)


In [None]:
seq_len = 30

# --- Sequences for CLASSIFIER (unscaled features) ---
X_train_seq, y_train_seq_float, dates_train_seq = build_sequence_dataset(
    X_train, y_train_float, dates_train, seq_len=seq_len
)
X_test_seq, y_test_seq_float, dates_test_seq = build_sequence_dataset(
    X_test, y_test_float, dates_test, seq_len=seq_len
)

# Labels for classifier
y_train_seq_class = (y_train_seq_float > 0).astype(np.float32)
y_test_seq_class  = (y_test_seq_float > 0).astype(np.float32)

print("Classifier sequences:", X_train_seq.shape, X_test_seq.shape)
print("Fraction up days (train/test):", y_train_seq_class.mean(), y_test_seq_class.mean())

# --- Sequences for REGRESSOR (scaled features) ---
X_train_seq_reg, y_train_seq_reg_float, dates_train_seq_reg = build_sequence_dataset(
    X_train_scaled, y_train_float, dates_train, seq_len=seq_len
)
X_test_seq_reg, y_test_seq_reg_float, dates_test_seq_reg = build_sequence_dataset(
    X_test_scaled, y_test_float, dates_test, seq_len=seq_len
)

print("Regressor sequences:", X_train_seq_reg.shape, X_test_seq_reg.shape)
print("First test seq date (cls/reg):", dates_test_seq[0], dates_test_seq_reg[0])


In [None]:
# ---------------------------
# Optuna tuning: LSTM Classifier
# ---------------------------
def objective_lstm_cls(trial):
    hidden_dim = trial.suggest_int("hidden_dim", 16, 64)
    num_layers = trial.suggest_int("num_layers", 1, 10)
    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    num_epochs = trial.suggest_int("num_epochs", 10, 40)

    n = len(X_train_seq)
    split = int(n * 0.7)
    X_tr_seq = X_train_seq[:split]
    X_val_seq = X_train_seq[split:]
    y_tr_seq = y_train_seq_class[:split]
    y_val_seq = y_train_seq_class[split:]

    model = train_lstm_classifier(
        X_tr_seq,
        y_tr_seq,
        num_epochs=num_epochs,
        batch_size=batch_size,
        lr=lr,
        hidden_dim=hidden_dim,
        num_layers=num_layers,
    )

    p_up_val = predict_lstm_proba(model, X_val_seq)
    # Log-loss on validation as objective
    loss_val = log_loss(y_val_seq, p_up_val)
    return loss_val


study_lstm_cls = optuna.create_study(direction="minimize")
study_lstm_cls.optimize(objective_lstm_cls, n_trials=10)

print("Best LSTM cls params:", study_lstm_cls.best_params)


In [None]:
# ---------------------------
# Optuna tuning: LSTM Regressor (Sharpe-based)
# ---------------------------
def objective_lstm_reg(trial):
    hidden_dim = trial.suggest_int("hidden_dim", 16, 64)
    num_layers = trial.suggest_int("num_layers", 1, 10)
    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    num_epochs = trial.suggest_int("num_epochs", 15, 40)
    q = trial.suggest_float("q", 0.5, 0.9)  # ⬅️ quantile for trading threshold

    # Chronological inner train/val split on *sequences*
    n = len(X_train_seq_reg)
    split = int(n * 0.7)
    X_tr_seq = X_train_seq_reg[:split]
    X_val_seq = X_train_seq_reg[split:]
    y_tr_seq = y_train_seq_reg_float[:split]
    y_val_seq = y_train_seq_reg_float[split:]
    dates_val_seq = dates_train_seq_reg[split:]

    # Train model on inner-train
    model = train_lstm_regressor(
        X_tr_seq,
        y_tr_seq,
        num_epochs=num_epochs,
        batch_size=batch_size,
        lr=lr,
        hidden_dim=hidden_dim,
        num_layers=num_layers,
    )

    # Predictions on inner-val
    preds_val = predict_lstm(model, X_val_seq)

    # Trading rule on inner-val: rank by predicted return
    tau_ret_val = np.quantile(preds_val, q)
    positions_val = (preds_val > tau_ret_val).astype(int)
    strat_ret_val = positions_val * y_val_seq  # use true next-day returns

    # Compute Sharpe on validation slice
    sharpe_val = sharpe_ratio_np(strat_ret_val)

    # Optuna minimizes -> return negative Sharpe
    return -sharpe_val



study_lstm_reg = optuna.create_study(direction="minimize")
study_lstm_reg.optimize(objective_lstm_reg, n_trials=10)

print("Best LSTM reg params:", study_lstm_reg.best_params)


In [None]:
# ---------------------------
# Optuna tuning: TCN Regressor (Sharpe-based, with q)
# ---------------------------
def objective_tcn_reg(trial):
    hidden_dim = trial.suggest_int("hidden_dim", 16, 64)
    num_layers = trial.suggest_int("num_layers", 1, 3)
    kernel_size = trial.suggest_int("kernel_size", 2, 5)
    lr = trial.suggest_float("lr", 1e-4, 5e-3, log=True)
    batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])
    num_epochs = trial.suggest_int("num_epochs", 10, 40)
    q = trial.suggest_float("q", 0.5, 0.9)  # quantile for trading threshold

    # Chronological inner train/val split on *sequences*
    n = len(X_train_seq_reg)
    split = int(n * 0.7)  # match LSTM reg split
    X_tr_seq = X_train_seq_reg[:split]
    X_val_seq = X_train_seq_reg[split:]
    y_tr_seq = y_train_seq_reg_float[:split]
    y_val_seq = y_train_seq_reg_float[split:]

    # Train TCN on inner-train
    model = train_tcn_regressor(
        X_tr_seq,
        y_tr_seq,
        num_epochs=num_epochs,
        batch_size=batch_size,
        lr=lr,
        hidden_dim=hidden_dim,
        num_layers=num_layers,
        kernel_size=kernel_size,
        dropout=0.1,
    )

    # Predictions on inner-val
    preds_val = predict_tcn(model, X_val_seq)

    # Ranking rule on validation: long on top (1-q) of predictions
    tau_ret_val = np.quantile(preds_val, q)
    positions_val = (preds_val > tau_ret_val).astype(int)
    strat_ret_val = positions_val * y_val_seq

    sharpe_val = sharpe_ratio_np(strat_ret_val)

    # Optuna minimizes -> negative Sharpe
    return -sharpe_val



study_tcn_reg = optuna.create_study(direction="minimize")
study_tcn_reg.optimize(objective_tcn_reg, n_trials=10)

print("Best TCN reg params:", study_tcn_reg.best_params)


In [None]:
best_tcn_params = study_tcn_reg.best_params

# Separate q from the model hyperparameters
q_tcn_best = best_tcn_params["q"]
tcn_model_params = {k: v for k, v in best_tcn_params.items() if k != "q"}

tcn_reg = train_tcn_regressor(
    X_train_seq_reg,
    y_train_seq_reg_float,
    num_epochs=tcn_model_params["num_epochs"],
    batch_size=tcn_model_params["batch_size"],
    lr=tcn_model_params["lr"],
    hidden_dim=tcn_model_params["hidden_dim"],
    num_layers=tcn_model_params["num_layers"],
    kernel_size=tcn_model_params["kernel_size"],
    dropout=0.1,
)

preds_train_tcn = predict_tcn(tcn_reg, X_train_seq_reg)
preds_test_tcn  = predict_tcn(tcn_reg, X_test_seq_reg)

print("TCN Train preds mean/std:",
      float(preds_train_tcn.mean()), float(preds_train_tcn.std()))
print("TCN Test  preds mean/std:",
      float(preds_test_tcn.mean()), float(preds_test_tcn.std()))

In [None]:
# Ranking rule: use tuned q on train preds
tau_ret_tcn = np.quantile(preds_train_tcn, q_tcn_best)
print("TCN q_best:", q_tcn_best, " -> tau_ret_tcn:", tau_ret_tcn)

positions_tcn = (preds_test_tcn > tau_ret_tcn).astype(int)
print("TCN long ratio:", float((positions_tcn == 1).mean()))
print("TCN trades:", int((positions_tcn[1:] != positions_tcn[:-1]).sum()))

# Strategy returns
tcn_returns = positions_tcn * y_test_seq_reg_float

# Series for TCN strategy
tcn_series = signals_df["target_ret_1"].loc[dates_test_seq_reg].copy()
tcn_series[:] = tcn_returns
tcn_series.head()


In [None]:
best_cls_params = study_lstm_cls.best_params

lstm_clf = train_lstm_classifier(
    X_train_seq,
    y_train_seq_class,
    num_epochs=best_cls_params["num_epochs"],
    batch_size=best_cls_params["batch_size"],
    lr=best_cls_params["lr"],
    hidden_dim=best_cls_params["hidden_dim"],
    num_layers=best_cls_params["num_layers"],
)


# Probabilities on train & test
p_up_train = predict_lstm_proba(lstm_clf, X_train_seq)
p_up_test  = predict_lstm_proba(lstm_clf, X_test_seq)

print("Train p_up mean/std:", float(p_up_train.mean()), float(p_up_train.std()))
print("Test  p_up mean/std:", float(p_up_test.mean()), float(p_up_test.std()))

# Threshold on train distribution to define when we trade
tau = np.quantile(p_up_train, 0.7)   # only top 30% most bullish
print("tau (prob threshold):", tau)

positions_lstm_cls = (p_up_test > tau).astype(int)
print("LSTM classifier long ratio:", float((positions_lstm_cls == 1).mean()))

# Strategy returns (use float next-day returns)
lstm_cls_returns = positions_lstm_cls * y_test_seq_float

# Series for classifier strategy
lstm_cls_series = signals_df["target_ret_1"].loc[dates_test_seq].copy()
lstm_cls_series[:] = lstm_cls_returns

lstm_cls_series.head()


In [None]:
best_reg_params = study_lstm_reg.best_params

lstm_reg = train_lstm_regressor(
    X_train_seq_reg,
    y_train_seq_reg_float,
    num_epochs=best_reg_params["num_epochs"],
    batch_size=best_reg_params["batch_size"],
    lr=best_reg_params["lr"],
    hidden_dim=best_reg_params["hidden_dim"],
    num_layers=best_reg_params["num_layers"],
)

preds_train_reg = predict_lstm(lstm_reg, X_train_seq_reg)
preds_test_reg  = predict_lstm(lstm_reg, X_test_seq_reg)

print("Train preds mean/std:", float(preds_train_reg.mean()), float(preds_train_reg.std()))
print("Test  preds mean/std:", float(preds_test_reg.mean()), float(preds_test_reg.std()))

# Use the tuned q to define tau_ret on *train* preds
q_best = best_reg_params["q"]
tau_ret = np.quantile(preds_train_reg, q_best)
print("q_best:", q_best, " -> tau_ret:", tau_ret)

positions_lstm_reg = (preds_test_reg > tau_ret).astype(int)
print("LSTM reg long ratio:", float((positions_lstm_reg == 1).mean()))
print("LSTM reg trades:", int((positions_lstm_reg[1:] != positions_lstm_reg[:-1]).sum()))

lstm_reg_returns = positions_lstm_reg * y_test_seq_reg_float
lstm_reg_series = signals_df["target_ret_1"].loc[dates_test_seq_reg].copy()
lstm_reg_series[:] = lstm_reg_returns


lstm_reg_series.head()


In [None]:
# Common date range for all four strategies
common_idx = (
    bh_series_full.index
    .intersection(tree_series_full.index)
    .intersection(lstm_cls_series.index)
    .intersection(lstm_reg_series.index)
    .intersection(tcn_series.index)
)

bh_common       = bh_series_full.loc[common_idx]
tree_common     = tree_series_full.loc[common_idx]
lstm_cls_common = lstm_cls_series.loc[common_idx]
lstm_reg_common = lstm_reg_series.loc[common_idx]
tcn_common      = tcn_series.loc[common_idx]

equity_bh        = equity_curve_from_returns(bh_common)
equity_tree      = equity_curve_from_returns(tree_common)
equity_lstm_cls  = equity_curve_from_returns(lstm_cls_common)
equity_lstm_reg  = equity_curve_from_returns(lstm_reg_common)
equity_tcn       = equity_curve_from_returns(tcn_common)


metrics = {
    "bh_cagr":        cagr(equity_bh),
    "bh_vol":         annualized_vol(bh_common),
    "bh_sharpe":      sharpe_ratio(bh_common),
    "bh_max_dd":      max_drawdown(equity_bh),

    "tree_cagr":      cagr(equity_tree),
    "tree_vol":       annualized_vol(tree_common),
    "tree_sharpe":    sharpe_ratio(tree_common),
    "tree_max_dd":    max_drawdown(equity_tree),

    "lstm_cls_cagr":   cagr(equity_lstm_cls),
    "lstm_cls_vol":    annualized_vol(lstm_cls_common),
    "lstm_cls_sharpe": sharpe_ratio(lstm_cls_common),
    "lstm_cls_max_dd": max_drawdown(equity_lstm_cls),

    "lstm_reg_cagr":   cagr(equity_lstm_reg),
    "lstm_reg_vol":    annualized_vol(lstm_reg_common),
    "lstm_reg_sharpe": sharpe_ratio(lstm_reg_common),
    "lstm_reg_max_dd": max_drawdown(equity_lstm_reg),

    "tcn_cagr":        cagr(equity_tcn),
    "tcn_vol":         annualized_vol(tcn_common),
    "tcn_sharpe":      sharpe_ratio(tcn_common),
    "tcn_max_dd":      max_drawdown(equity_tcn),
}
metrics



In [None]:
import json
from pathlib import Path

best_params_all = {
    "tree":      study_tree.best_params,
    "lstm_reg":  study_lstm_reg.best_params,
    "lstm_cls":  study_lstm_cls.best_params,
    "tcn_reg":   study_tcn_reg.best_params,
}

config_path = Path(PROJECT_ROOT) / "configs"
config_path.mkdir(exist_ok=True)

with open(config_path / "best_params_spy.json", "w") as f:
    json.dump(best_params_all, f, indent=2)

best_params_all


In [None]:
plt.figure(figsize=(10, 4))
equity_bh.plot(label="Buy & Hold")
equity_tree.plot(label="Tree Strategy")
equity_lstm_cls.plot(label="LSTM Classifier")
equity_lstm_reg.plot(label="LSTM Regressor (ranking)")
equity_tcn.plot(label="TCN Regressor (ranking)")
plt.legend()
plt.title(f"{ticker} – equity curves (test period, seq_len={seq_len})")
plt.show()


In [None]:
# === Final performance report ===

# 1) Align position series with the common index used for returns
idx_test_full = signals_df.index[split_idx:]

# Buy & Hold: always long
pos_bh = pd.Series(1, index=idx_test_full).loc[common_idx]

# Tree positions (one per test day)
pos_tree_series = pd.Series(positions_tree, index=idx_test_full).loc[common_idx]

# LSTM classifier positions (indexed by dates_test_seq)
pos_lstm_cls_series = pd.Series(positions_lstm_cls, index=dates_test_seq).loc[common_idx]

# LSTM regressor positions (indexed by dates_test_seq_reg)
pos_lstm_reg_series = pd.Series(positions_lstm_reg, index=dates_test_seq_reg).loc[common_idx]

# TCN regressor positions (indexed by dates_test_seq_reg)
pos_tcn_series = pd.Series(positions_tcn, index=dates_test_seq_reg).loc[common_idx]


def long_ratio_and_trades(pos_series: pd.Series) -> tuple[float, int]:
    """Fraction of days long, and number of position changes."""
    vals = pos_series.values
    long_ratio = float((vals == 1).mean())
    trades = int((vals[1:] != vals[:-1]).sum())
    return long_ratio, trades


rows = []
for name, returns, pos in [
    ("Buy & Hold",       bh_common,       pos_bh),
    ("Tree",             tree_common,     pos_tree_series),
    ("LSTM Classifier",  lstm_cls_common, pos_lstm_cls_series),
    ("LSTM Regressor",   lstm_reg_common, pos_lstm_reg_series),
    ("TCN Regressor",    tcn_common,      pos_tcn_series),
]:
    lr, n_trades = long_ratio_and_trades(pos)
    eq = equity_curve_from_returns(returns)
    rows.append({
        "Strategy":   name,
        "CAGR":       cagr(eq),
        "Vol":        annualized_vol(returns),
        "Sharpe":     sharpe_ratio(returns),
        "MaxDD":      max_drawdown(eq),
        "LongRatio":  lr,
        "Trades":     n_trades,
    })

performance_df = pd.DataFrame(rows).set_index("Strategy")
performance_df.round(3)


In [None]:
# === Training / Optuna summary ===

train_rows = [
    {
        "Model": "Tree (reg + ranking)",
        "Objective": "Sharpe",
        "BestValObj": -study_tree.best_value,  # this is the best *Sharpe*
        "BestParams": study_tree.best_params,
    },
    {
        "Model": "LSTM Reg (ranking)",
        "Objective": "Sharpe",
        "BestValObj": -study_lstm_reg.best_value,
        "BestParams": study_lstm_reg.best_params,
    },
    {
        "Model": "TCN Reg (ranking)",
        "Objective": "Sharpe",
        "BestValObj": -study_tcn_reg.best_value,
        "BestParams": study_tcn_reg.best_params,
    },
    {
        "Model": "LSTM Classifier",
        "Objective": "log-loss",
        "BestValObj": study_lstm_cls.best_value,  # lower is better
        "BestParams": study_lstm_cls.best_params,
    },
]

training_df = pd.DataFrame(train_rows)
training_df
