In [1]:
import pandas as pd

from src.data_utils.loader import DataLoader
from src.models.config import ModelConfig
from src.models.trainer import AssetPricingTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
experiments = [
    # Linear Methods
    ModelConfig.OLS_Huber(name="OLS+H", feature_set="all", trials=50),
    ModelConfig.OLS_Huber(name="OLS-3+H", feature_set="ff3", trials=50),
    ModelConfig.ElasticNet_Huber(name="ENet+H", feature_set="all", trials=50),
    # ModelConfig.PCR(name="PCR", feature_set="ff3", trials=30),

    # # Tree based methods
    # ModelConfig.RandomForest(name="RF", feature_set="ff3", trials=30),
    # ModelConfig.XGBoost(name="XGB+H", feature_set="ff3", trials=30),

    # # Neural Networks
    # ModelConfig.MLP(name="NN1", feature_set="all", n_hidden_layers=1, trials=50),
    # ModelConfig.MLP(name="NN2", feature_set="all", n_hidden_layers=2, trials=50),
    # ModelConfig.MLP(name="NN3", feature_set="all", n_hidden_layers=3, trials=50),
    # ModelConfig.MLP(name="NN4", feature_set="all", n_hidden_layers=4, trials=50),
    # ModelConfig.MLP(name="NN5", feature_set="all", n_hidden_layers=5, trials=50),
]

In [3]:
gold_panel_path = "data/processed/gold_panel"

loader = DataLoader(data_path = gold_panel_path)
df = loader.load_panel_data()

trainer = AssetPricingTrainer(df)

[2026-02-10 16:03:07] [INFO] [ParquetLoader] REQ: Loading ALL columns...
[2026-02-10 16:03:07] [INFO] [ParquetLoader] SOURCE: data/processed/gold_panel
[2026-02-10 16:03:22] [INFO] [ParquetLoader] LOAD COMPLETE. Shape: (3280700, 100) | RAM: 1.33 GB


In [None]:
results = []
for exp in experiments:
    metrics = trainer.run_experiment(exp)
    metrics['model'] = exp.name
    results.append(metrics)

[2026-02-10 00:20:12] [INFO] [Trainer] --- STARTING EXPERIMENT: NN1 ---


[32m[I 2026-02-10 00:20:13,674][0m Using an existing study with name 'NN1_optimization' instead of creating a new one.[0m


[2026-02-10 00:20:13] [INFO] [Trainer] [+] SPLITTING DATA
[2026-02-10 00:20:13] [INFO] [Trainer] Time Split: 2008-12-31 | Train: 2698578 | Test: 582122
[2026-02-10 00:20:14] [INFO] [Trainer] [+] FINDING OPTIMAL HYPERPARAMETERS
[2026-02-10 00:20:14] [INFO] [Trainer]    > Checking Optuna state for NN1...
[2026-02-10 00:20:14] [INFO] [Trainer]    > Resuming: Found 35 trials. Running 15 more...


[32m[I 2026-02-10 00:29:08,390][0m Trial 35 finished with value: 0.017509877681732178 and parameters: {'alpha': 0.02273800255032061, 'learning_rate_init': 0.002228843400719724}. Best is trial 35 with value: 0.017509877681732178.[0m
[32m[I 2026-02-10 00:33:22,380][0m Trial 36 finished with value: -0.04161357879638672 and parameters: {'alpha': 8.101399924475002e-05, 'learning_rate_init': 0.0021609700144148204}. Best is trial 35 with value: 0.017509877681732178.[0m
[32m[I 2026-02-10 00:43:43,917][0m Trial 37 finished with value: 0.015390455722808838 and parameters: {'alpha': 0.006130948713135182, 'learning_rate_init': 0.0015271574545911335}. Best is trial 35 with value: 0.017509877681732178.[0m
[32m[I 2026-02-10 00:56:48,363][0m Trial 38 finished with value: -0.006834745407104492 and parameters: {'alpha': 0.008058134566196883, 'learning_rate_init': 0.0015058794748457197}. Best is trial 35 with value: 0.017509877681732178.[0m
[32m[I 2026-02-10 01:19:10,366][0m Trial 39 finish

[2026-02-10 03:14:54] [INFO] [Trainer]    > Tuning Complete.
[2026-02-10 03:14:54] [INFO] [Trainer] [+] LOADING MODEL
[2026-02-10 03:14:54] [INFO] [Trainer]    > Training Final Model (NN1)...
[2026-02-10 03:35:48] [INFO] [Trainer]    > Saved trained model to src/models/trained\NN1_20260210_033548.joblib
[2026-02-10 03:36:07] [INFO] [Trainer] RESULT NN1: R2_OOS=-0.00494 | RMSE=0.19248
[2026-02-10 03:36:07] [INFO] [Trainer] --- STARTING EXPERIMENT: NN2 ---


[32m[I 2026-02-10 03:36:07,995][0m A new study created in RDB with name: NN2_optimization[0m


[2026-02-10 03:36:07] [INFO] [Trainer] [+] SPLITTING DATA
[2026-02-10 03:36:08] [INFO] [Trainer] Time Split: 2008-12-31 | Train: 2698578 | Test: 582122
[2026-02-10 03:36:10] [INFO] [Trainer] [+] FINDING OPTIMAL HYPERPARAMETERS
[2026-02-10 03:36:10] [INFO] [Trainer]    > Checking Optuna state for NN2...
[2026-02-10 03:36:10] [INFO] [Trainer]    > Resuming: Found 0 trials. Running 50 more...


[32m[I 2026-02-10 03:40:10,114][0m Trial 0 finished with value: -0.7059128284454346 and parameters: {'alpha': 0.0005433670374209586, 'learning_rate_init': 0.0006397920986512874}. Best is trial 0 with value: -0.7059128284454346.[0m
[32m[I 2026-02-10 04:00:30,965][0m Trial 1 finished with value: -0.21499931812286377 and parameters: {'alpha': 0.006463241144390132, 'learning_rate_init': 0.0008886623013625115}. Best is trial 1 with value: -0.21499931812286377.[0m
[32m[I 2026-02-10 04:04:04,195][0m Trial 2 finished with value: -1.4146487712860107 and parameters: {'alpha': 8.48180002382634e-05, 'learning_rate_init': 0.0005212405506784783}. Best is trial 1 with value: -0.21499931812286377.[0m
[32m[I 2026-02-10 04:18:07,689][0m Trial 3 finished with value: -2.648115396499634 and parameters: {'alpha': 0.0007937249050201012, 'learning_rate_init': 0.0002419437390732874}. Best is trial 1 with value: -0.21499931812286377.[0m
[32m[I 2026-02-10 04:40:35,580][0m Trial 4 finished with valu

In [None]:
res_df = pd.DataFrame(results)
print("BASELINE LEADERBOARD")
print(res_df[['model', 'r2_oos', 'rmse']])