In [1]:
import pandas as pd

from src.data_utils.loader import DataLoader
from src.models.config import ModelConfig
from src.models.trainer import AssetPricingTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
experiments = [
    ModelConfig.OLS_Huber(
        name = "OLS+H", 
    ),
    
    ModelConfig.OLS_Huber(
        name = "OLS-3+H", 
        feature_set = "ff3"
    ),
    
    ModelConfig.ElasticNet_Huber(
        name = "ENet+H", 
        trials = 30
    ),

    ModelConfig.PCR(
        name = "PCR", 
        trials = 30
    ),

    # ModelConfig.RandomForest(
    #     name = "RF", 
    #     trials = 30
    # ),

    # ModelConfig.XGBoost(
    #     name = "XGB+H", 
    #     trials = 30
    # ),

    # ModelConfig.MLP(
    #     name = "NN1",
    #     n_hidden_layers = 1,
    #     trials = 30
    # ),

    # ModelConfig.MLP(
    #     name = "NN2",
    #     n_hidden_layers = 2,
    #     trials = 30
    # ),

    # ModelConfig.MLP(
    #     name = "NN3",
    #     n_hidden_layers = 3,
    #     trials = 30
    # ),

    # ModelConfig.MLP(
    #     name = "NN4",
    #     n_hidden_layers = 4,
    #     trials = 30
    # ),

    # ModelConfig.MLP(
    #     name = "NN5",
    #     n_hidden_layers = 5,
    #     trials = 30
    # ),
    
]

In [3]:
DB_PATH = "data/processed/lakehouse.sqlite"
ff5_features = ['mvel1', 'bm', 'mom12m', 'mom1m', 'retvol'] # Not fama french 5, i think

loader = DataLoader(DB_PATH)
df = loader.load_panel_data(
    feature_cols = ff5_features, 
    target_col = 'target_ret_excess'
)

trainer = AssetPricingTrainer(df)

[2026-02-05 21:51:37] [INFO] [DB_Loader] STREAMING DATA, NUMBER OF COLUMNS: 8...
[2026-02-05 21:51:50] [INFO] [DB_Loader] DATA LOADED SUCCESSFULLY. USING: 0.13 GB
[2026-02-05 21:51:50] [INFO] [DB_Loader] CONNECTION CLOSED


In [4]:
results = []
for exp in experiments:
    metrics = trainer.run_experiment(exp)
    metrics['model'] = exp.name
    results.append(metrics)

[2026-02-05 21:51:50] [INFO] [Trainer] STARTING EXPERIMENT: OLS+H
[2026-02-05 21:51:51] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-05 21:51:56] [INFO] [Trainer] RESULT OLS+H: R2_OOS=-0.00988, RMSE=0.19118
[2026-02-05 21:51:56] [INFO] [Trainer] STARTING EXPERIMENT: OLS-3+H
[2026-02-05 21:51:56] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-05 21:52:01] [INFO] [Trainer] RESULT OLS-3+H: R2_OOS=-0.00751, RMSE=0.19096
[2026-02-05 21:52:01] [INFO] [Trainer] STARTING EXPERIMENT: ENet+H
[2026-02-05 21:52:01] [INFO] [Trainer]    ... Tuning with Optuna (30 trials)
[2026-02-05 21:55:02] [INFO] [Trainer]    ... Best Params: {'alpha': 0.0001, 'l1_ratio': 0.15}
[2026-02-05 21:55:02] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-05 21:55:07] [INFO] [Trainer] RESULT ENet+H: R2_OOS=-0.01040, RMSE=0.19123
[2026-02-05 21:55:07] [INFO] [Trainer] STARTING EXPERIMENT: PCR
[2026-02-05 21:55:07] [INFO] [Trainer]    ... Fitting Final Model
[2026-02-05 21:55:07] [INFO] [Trainer] RESULT P

In [5]:
res_df = pd.DataFrame(results)
print("BASELINE LEADERBOARD")
print(res_df[['model', 'r2_oos', 'rmse']])

BASELINE LEADERBOARD
     model    r2_oos      rmse
0    OLS+H -0.009882  0.191184
1  OLS-3+H -0.007514  0.190960
2   ENet+H -0.010396  0.191232
3      PCR  0.001762  0.190078
