In [1]:
import pandas as pd
import sqlite3

from src.models.config import ModelConfig
from src.models.trainer import AssetPricingTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def load_gold_panel(db_path):
    print("[INFO] Loading Gold Panel...")
    conn = sqlite3.connect(db_path)

    query = "SELECT date_fmt, permno, target_ret_excess, " \
            "mvel1, bm, mom12m, mom1m, retvol " \
            "FROM gold_panel ORDER BY date_fmt" 
    
    df = pd.read_sql(query, conn)
    df['date'] = pd.to_datetime(df['date_fmt'])
    conn.close()
    return df

In [None]:
experiments = [
    ModelConfig.OLS_Huber(
        name="OLS+H", 
        feature_set="all"
    ),
    
    ModelConfig.OLS_Huber(
        name="OLS-3+H", 
        feature_set="ff3"
    ),
    
    ModelConfig.ElasticNet_Huber(
        name="ENET-3+H", 
        feature_set="ff3", 
        trials=30
    )
]

In [3]:
DB_PATH = "data/processed/lakehouse.sqlite"

df = load_gold_panel(DB_PATH)
trainer = AssetPricingTrainer(df)

[INFO] Loading Gold Panel...


In [5]:
results = []
for exp in experiments:
    metrics = trainer.run_experiment(exp)
    metrics['model'] = exp.name
    results.append(metrics)

[2026-02-04 12:46:21] [INFO] [Trainer] STARTING EXPERIMENT: OLS+H
[2026-02-04 12:46:21] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-04 12:46:26] [INFO] [Trainer] RESULT OLS+H: R2_OOS=-0.00988, RMSE=0.19118
[2026-02-04 12:46:26] [INFO] [Trainer] STARTING EXPERIMENT: OLS-3+H
[2026-02-04 12:46:26] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-04 12:46:30] [INFO] [Trainer] RESULT OLS-3+H: R2_OOS=-0.00751, RMSE=0.19096
[2026-02-04 12:46:30] [INFO] [Trainer] STARTING EXPERIMENT: ENET-3+H
[2026-02-04 12:46:31] [INFO] [Trainer]    ... Tuning with Optuna (30 trials)
[2026-02-04 12:49:09] [INFO] [Trainer]    ... Best Params: {'alpha': 0.0030476680652465485, 'l1_ratio': 0.5268839321421274}
[2026-02-04 12:49:09] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-04 12:49:15] [INFO] [Trainer] RESULT ENET-3+H: R2_OOS=-0.00221, RMSE=0.19046


In [6]:
res_df = pd.DataFrame(results)
print("BASELINE LEADERBOARD")
print(res_df[['model', 'r2_oos', 'rmse', 'r2_sklearn']])

BASELINE LEADERBOARD
      model    r2_oos      rmse  r2_sklearn
0     OLS+H -0.009882  0.191184   -0.012937
1   OLS-3+H -0.007514  0.190960   -0.010562
2  ENET-3+H -0.002211  0.190456   -0.005243


In [5]:
df['target_ret_excess'].describe()

count    3.273562e+06
mean     5.847409e-03
std      1.807153e-01
min     -1.000000e+00
25%     -7.165175e-02
50%     -4.300000e-03
75%      6.617600e-02
max      2.399660e+01
Name: target_ret_excess, dtype: float64