# Model Training and Evaluation Notebook

This notebook demonstrates how to train and evaluate `LeafQuantileRegressionTree` and `QuantileRegressionModelForest` models for multiple quantile pairs.

## 1. Imports and Setup

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm # Import tqdm for progress bars
from utils.data_loader import load_data, rolling_time # Import rolling_time
from models.quantile_regression_model_leaf_tree import QuantileRegressionModelTree
from models.quantile_regression_model_forest import QuantileRegressionModelForest
from utils.trading_strategy import trading_rule
# --- Parameters ---
TRAIN_PERIOD = 5
TEST_PERIOD = 1
DATA_PATH = "data/esg_tf_data.csv"
QUANTILE_PAIRS = [(0.3, 0.7), (0.2, 0.8), (0.1, 0.9)]
SPLIT_CRITERION = 'r2'
RANDOM_STATE = 42
MAX_DEPTH = 3
MIN_SAMPLES_LEAF = 5
N_ESTIMATORS = 10
ANNUALIZATION_BASE = 252.0


ImportError: cannot import name 'LeafQuantileRegressionTree' from 'models.quantile_regression_model_leaf_tree' (/Users/qiuyuhan/Documents/qrt/models/quantile_regression_model_leaf_tree.py)

## 2. Run Experiments

In [None]:
def calculate_annualized_return(df_traded, base):
    n_periods = len(df_traded)
    cum_ret_final = float(df_traded["total_return"].iloc[-1]) if n_periods > 0 else 0.0
    if n_periods > 0 and cum_ret_final > -1.0 and base > 0:
        return (1.0 + cum_ret_final) ** (base / n_periods) - 1.0
    return 0.0

all_results = []
num_rolling_windows = rolling_time(DATA_PATH, TRAIN_PERIOD, TEST_PERIOD)

for rolling_index in tqdm(range(num_rolling_windows), desc="Rolling Window"):
    train_df, test_df = load_data(DATA_PATH, TRAIN_PERIOD, TEST_PERIOD, rolling_index=rolling_index)
    x_cols = [c for c in train_df.columns if c.startswith("esg_topic_")]
    X_train, y_train = train_df[x_cols], train_df["future_return"]
    X_test, y_test = test_df[x_cols], test_df["future_return"]

    for ql, qh in QUANTILE_PAIRS:
        print(f"\n--- Running for Quantiles: QL={ql}, QH={qh} ---")
        
        # --- QuantileRegressionModelTree ---
        print("Training LeafQuantileRegressionTree...")
        qrt_l = QuantileRegressionModelTree(split_criterion=SPLIT_CRITERION, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF, random_state=RANDOM_STATE)
        qrt_l.fit(X_train, y_train, quantile=ql)
        qrt_h = QuantileRegressionModelTree(split_criterion=SPLIT_CRITERION, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF, random_state=RANDOM_STATE)
        qrt_h.fit(X_train, y_train, quantile=qh)
        
        y_pred_l_qrt = qrt_l.predict(X_test)
        y_pred_h_qrt = qrt_h.predict(X_test)
        
        df_pred_qrt = test_df.copy()
        df_pred_qrt[f"pred_q{ql}"] = y_pred_l_qrt
        df_pred_qrt[f"pred_q{qh}"] = y_pred_h_qrt
        df_traded_qrt = trading_rule(df_pred_qrt, qh, ql)
        
        all_results.append({
            'model': 'QRT_leaf',
            'quantile_pair': f'{ql}:{qh}',
            'cum_return': float(df_traded_qrt["total_return"].iloc[-1]),
            'annualized_return': calculate_annualized_return(df_traded_qrt, ANNUALIZATION_BASE)
        })

        # --- QuantileRegressionModelForest ---
        print("Training QuantileRegressionModelForest...")
        qrf_l = QuantileRegressionModelForest(n_estimators=N_ESTIMATORS, quantile=ql, split_criterion=SPLIT_CRITERION, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF, random_state=RANDOM_STATE)
        qrf_l.fit(X_train, y_train)
        qrf_h = QuantileRegressionModelForest(n_estimators=N_ESTIMATORS, quantile=qh, split_criterion=SPLIT_CRITERION, max_depth=MAX_DEPTH, min_samples_leaf=MIN_SAMPLES_LEAF, random_state=RANDOM_STATE)
        qrf_h.fit(X_train, y_train)

        y_pred_l_qrf = qrf_l.predict(X_test)
        y_pred_h_qrf = qrf_h.predict(X_test)
        
        df_pred_qrf = test_df.copy()
        df_pred_qrf[f"pred_q{ql}"] = y_pred_l_qrf
        df_pred_qrf[f"pred_q{qh}"] = y_pred_h_qrf
        df_traded_qrf = trading_rule(df_pred_qrf, qh, ql)
        
        all_results.append({
            'model': 'QRF_model',
            'quantile_pair': f'{ql}:{qh}',
            'cum_return': float(df_traded_qrf["total_return"].iloc[-1]),
            'annualized_return': calculate_annualized_return(df_traded_qrf, ANNUALIZATION_BASE)
        })

results_df = pd.DataFrame(all_results)
print("\n--- All Results ---")
print(results_df)


Unique years in data: [1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2009 2010 2011 2012
 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023]
Total unique years in data: 25


Rolling Window:   0%|          | 0/20 [00:00<?, ?it/s]

Rolling Index: 0
Train Period: 1998 to 2002, Test Period: 2003 to 2003
Train Data Shape: (10566, 62), Test Data Shape: (2687, 62)

--- Running for Quantiles: QL=0.3, QH=0.7 ---
Training LeafQuantileRegressionTree...


## 3. Plot Results

In [None]:
def plot_metric(df, metric, title):
    fig, ax = plt.subplots(figsize=(12, 7))
    df.pivot(index='quantile_pair', columns='model', values=metric).plot(kind='bar', ax=ax)
    ax.set_title(title, fontsize=16)
    ax.set_ylabel(metric.replace('_', ' ').title(), fontsize=12)
    ax.set_xlabel('Quantile Pair', fontsize=12)
    ax.tick_params(axis='x', rotation=0)
    ax.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.show()

plot_metric(results_df, 'annualized_return', 'Annualized Return Comparison')
plot_metric(results_df, 'cum_return', 'Cumulative Return Comparison')