In [1]:
from utils.utils import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from utils.LSTM import *
import yaml

In [None]:
import optuna
import numpy as np
import pandas as pd

# 1) Carga de datos y configuración fija

# Si en algún momento querés usar config.yaml:
# with open("config.yaml", "r") as file:
#     config_data = yaml.safe_load(file)
# dfs = download_tickers(config_data)

stocks = ["AAPL", "MSFT", "CMCSA", "COST", "QCOM",
          "ADBE", "SBUX", "INTU", "AMD", "INTC"]

stocks_for_test = ["AAPL", "CMCSA",  "ADBE",  "AMD"]

dfs = {
    s: pd.read_csv(f"./prices/{s}_prices.csv", index_col=0, parse_dates=True)
    for s in stocks
}

start_day      = "2025-01-01"
horizon_days   = 50          # días hábiles a predecir
training_years = 2           # años previos para entrenar y predecir cada día

feat_cols = ["Close","Volume","High","Low","Open"]
W         = 50

# Parámetros LSTM fijos
hidden_size = 64
num_layers  = 2
patience    = 5
torch_seed  = 0

# Parámetros GNN fijos (solo Pearson en este caso)
model    = "LSTM+GNN"
tau_pear = 0.5
tau_lift = 1.0
alpha    = 1.0  # 1 => solo Pearson

# Parámetro de pérdida direccional
hit_coef = 0.02

# 2) Métrica

def eval_mse(res):
    """
    res: output de walk_forward_predict
    Se asume que res[0] es un DataFrame con columnas:
    'y_pred_norm' y 'y_true_norm'
    """
    res_df = res[0]
    return float(np.mean((res_df["y_pred_norm"] - res_df["y_true_norm"])**2))

# 3) Función objetivo de Optuna

def objective(trial):
    # espacio de búsqueda
    batch_size = trial.suggest_categorical("batch_size", [5, 8, 11, 16])
    lr         = trial.suggest_float("lr", 1e-3, 1e-1, log=True)
    dropout    = trial.suggest_float("dropout", 0.0, 0.5)

    mses = []

    # Evaluamos el mismo conjunto de hiperparámetros en todas las acciones
    for stock in stocks_for_test:
        res = walk_forward_predict(
            LSTMRegressor,
            dfs,                 # ahora se pasa el diccionario completo
            stock,               # ticker que queremos predecir
            start_day,
            horizon_days,
            training_years=training_years,
            feat_cols=feat_cols,
            W=W,

            # LSTM
            hidden_size=hidden_size,
            num_layers=num_layers,
            dropout=dropout,
            hit_coef=hit_coef,
            batch_size=batch_size,
            epochs=40,
            lr=lr,
            patience=patience,

            # otros parámetros
            torch_seed=torch_seed,
            model=model,
            tau_pear=tau_pear,
            tau_lift=tau_lift,
            alpha=alpha,
        )

        mses.append(eval_mse(res))

    # objetivo: minimizar el MSE promedio entre acciones
    return float(np.mean(mses))

# 4) Ejecutar la optimización

study = optuna.create_study(direction="minimize")
study.optimize(objective, n_trials=40, show_progress_bar=True)

print("Mejores hiperparámetros encontrados:")
print(study.best_params)
print("MSE promedio (best):", study.best_value)

# 5) Re-entrenar con los mejores hiperparámetros
#    y guardar resultados por modelo y por acción

best_bs      = study.best_params["batch_size"]
best_lr      = study.best_params["lr"]
best_dropout = study.best_params["dropout"]

results = {model: {}}

for stock in stocks_for_test:
    res = walk_forward_predict(
        LSTMRegressor,
        dfs,
        stock,
        start_day,
        horizon_days,
        training_years=training_years,
        feat_cols=feat_cols,
        W=W,

        hidden_size=hidden_size,
        num_layers=num_layers,
        dropout=best_dropout,
        hit_coef=hit_coef,
        batch_size=best_bs,
        epochs=40,
        lr=best_lr,
        patience=patience,

        torch_seed=torch_seed,
        model=model,
        tau_pear=tau_pear,
        tau_lift=tau_lift,
        alpha=alpha,
    )
    results[model][stock] = res

# 6) MSE por acción 

for stock in stocks_for_test:
    res = results[model][stock]
    print(stock, "MSE =", eval_mse(res))


  from .autonotebook import tqdm as notebook_tqdm
[I 2025-11-25 03:03:05,203] A new study created in memory with name: no-name-fb590794-857b-4aee-89a9-c3995f711c01
Best trial: 0. Best value: 0.00436221:   2%|▎         | 1/40 [06:56<4:30:53, 416.76s/it]

[I 2025-11-25 03:10:01,959] Trial 0 finished with value: 0.004362211057348765 and parameters: {'batch_size': 11, 'lr': 0.02002944727453525, 'dropout': 0.03722887425757371}. Best is trial 0 with value: 0.004362211057348765.


Best trial: 1. Best value: 0.00397529:   5%|▌         | 2/40 [13:57<4:25:17, 418.89s/it]

[I 2025-11-25 03:17:02,337] Trial 1 finished with value: 0.003975290710114741 and parameters: {'batch_size': 11, 'lr': 0.012953904998030574, 'dropout': 0.21091225057010599}. Best is trial 1 with value: 0.003975290710114741.


Best trial: 2. Best value: 0.00382868:   8%|▊         | 3/40 [20:35<4:12:34, 409.59s/it]

[I 2025-11-25 03:23:40,869] Trial 2 finished with value: 0.0038286765753216658 and parameters: {'batch_size': 16, 'lr': 0.002983210705545094, 'dropout': 0.3666327548060726}. Best is trial 2 with value: 0.0038286765753216658.


Best trial: 3. Best value: 0.00230898:  10%|█         | 4/40 [32:18<5:15:08, 525.22s/it]

[I 2025-11-25 03:35:23,354] Trial 3 finished with value: 0.0023089815415054476 and parameters: {'batch_size': 8, 'lr': 0.008836030391844916, 'dropout': 0.02647535530066597}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  12%|█▎        | 5/40 [41:57<5:17:49, 544.84s/it]

[I 2025-11-25 03:45:02,985] Trial 4 finished with value: 0.005880394519760246 and parameters: {'batch_size': 8, 'lr': 0.010832588751302713, 'dropout': 0.48433982099880263}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  15%|█▌        | 6/40 [45:52<4:08:56, 439.32s/it]

[I 2025-11-25 03:48:57,472] Trial 5 finished with value: 0.0638613632333153 and parameters: {'batch_size': 16, 'lr': 0.058716974957250666, 'dropout': 0.06234307944243472}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  18%|█▊        | 7/40 [53:45<4:07:40, 450.33s/it]

[I 2025-11-25 03:56:50,464] Trial 6 finished with value: 0.002594135704054512 and parameters: {'batch_size': 16, 'lr': 0.004440973220960113, 'dropout': 0.048964671504418356}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  20%|██        | 8/40 [1:02:42<4:14:57, 478.04s/it]

[I 2025-11-25 04:05:47,842] Trial 7 finished with value: 0.00375660276529954 and parameters: {'batch_size': 8, 'lr': 0.01012492087561198, 'dropout': 0.27692284441856585}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  22%|██▎       | 9/40 [1:14:51<4:47:31, 556.51s/it]

[I 2025-11-25 04:17:56,895] Trial 8 finished with value: 0.0037990975795166044 and parameters: {'batch_size': 5, 'lr': 0.010937662770718528, 'dropout': 0.17162881356363496}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  25%|██▌       | 10/40 [1:19:43<3:57:26, 474.87s/it]

[I 2025-11-25 04:22:48,967] Trial 9 finished with value: 0.0035435797859259847 and parameters: {'batch_size': 16, 'lr': 0.008328045702650255, 'dropout': 0.3542704186058289}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 3. Best value: 0.00230898:  28%|██▊       | 11/40 [1:35:29<4:59:07, 618.87s/it]

[I 2025-11-25 04:38:34,341] Trial 10 finished with value: 0.002821816811057366 and parameters: {'batch_size': 8, 'lr': 0.0013629169890095238, 'dropout': 0.12148559654825625}. Best is trial 3 with value: 0.0023089815415054476.


Best trial: 11. Best value: 0.00205888:  30%|███       | 12/40 [1:55:07<6:08:16, 789.17s/it]

[I 2025-11-25 04:58:13,023] Trial 11 finished with value: 0.002058875779222743 and parameters: {'batch_size': 5, 'lr': 0.003232108337467318, 'dropout': 0.004535937326692614}. Best is trial 11 with value: 0.002058875779222743.


Best trial: 11. Best value: 0.00205888:  32%|███▎      | 13/40 [2:20:50<7:37:48, 1017.37s/it]

[I 2025-11-25 05:23:55,487] Trial 12 finished with value: 0.0023412813773520053 and parameters: {'batch_size': 5, 'lr': 0.0011207969061616417, 'dropout': 0.01632746003112878}. Best is trial 11 with value: 0.002058875779222743.


Best trial: 11. Best value: 0.00205888:  35%|███▌      | 14/40 [2:38:57<7:29:57, 1038.38s/it]

[I 2025-11-25 05:42:02,426] Trial 13 finished with value: 0.0026204126969239347 and parameters: {'batch_size': 5, 'lr': 0.003186797343665278, 'dropout': 0.13015267808491393}. Best is trial 11 with value: 0.002058875779222743.


Best trial: 11. Best value: 0.00205888:  38%|███▊      | 15/40 [2:46:26<5:58:41, 860.87s/it] 

[I 2025-11-25 05:49:31,919] Trial 14 finished with value: 0.03266187048586895 and parameters: {'batch_size': 8, 'lr': 0.03535738834256823, 'dropout': 0.10307453515752446}. Best is trial 11 with value: 0.002058875779222743.


Best trial: 15. Best value: 0.00205158:  40%|████      | 16/40 [3:02:42<5:58:07, 895.32s/it]

[I 2025-11-25 06:05:47,236] Trial 15 finished with value: 0.0020515805376685845 and parameters: {'batch_size': 5, 'lr': 0.005262310646605044, 'dropout': 0.005187315911358986}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  42%|████▎     | 17/40 [3:19:22<5:55:18, 926.91s/it]

[I 2025-11-25 06:22:27,594] Trial 16 finished with value: 0.003533255180265982 and parameters: {'batch_size': 5, 'lr': 0.002231752374487955, 'dropout': 0.25525514554640144}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  45%|████▌     | 18/40 [3:35:45<5:46:00, 943.66s/it]

[I 2025-11-25 06:38:50,255] Trial 17 finished with value: 0.0020847503428385195 and parameters: {'batch_size': 5, 'lr': 0.005504343703094004, 'dropout': 0.0016959750059135931}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  48%|████▊     | 19/40 [3:57:03<6:05:26, 1044.12s/it]

[I 2025-11-25 07:00:08,401] Trial 18 finished with value: 0.0026334837356579016 and parameters: {'batch_size': 5, 'lr': 0.0018069252946500956, 'dropout': 0.10132274948229722}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  50%|█████     | 20/40 [4:11:51<5:32:29, 997.45s/it] 

[I 2025-11-25 07:14:57,093] Trial 19 finished with value: 0.0031412393233808743 and parameters: {'batch_size': 5, 'lr': 0.004881730588395762, 'dropout': 0.1708261541973702}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  52%|█████▎    | 21/40 [4:23:23<4:46:46, 905.62s/it]

[I 2025-11-25 07:26:28,596] Trial 20 finished with value: 0.009427804968425591 and parameters: {'batch_size': 5, 'lr': 0.022396948103600812, 'dropout': 0.29964218316654967}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  55%|█████▌    | 22/40 [4:40:13<4:41:07, 937.07s/it]

[I 2025-11-25 07:43:19,014] Trial 21 finished with value: 0.0021316870206626337 and parameters: {'batch_size': 5, 'lr': 0.0055846229189765, 'dropout': 0.0089389489912578}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 15. Best value: 0.00205158:  57%|█████▊    | 23/40 [4:55:57<4:26:02, 938.98s/it]

[I 2025-11-25 07:59:02,449] Trial 22 finished with value: 0.0028496872369290615 and parameters: {'batch_size': 5, 'lr': 0.006109874909542444, 'dropout': 0.07845410728899013}. Best is trial 15 with value: 0.0020515805376685845.


Best trial: 23. Best value: 0.00186119:  60%|██████    | 24/40 [5:15:57<4:31:17, 1017.37s/it]

[I 2025-11-25 08:19:02,674] Trial 23 finished with value: 0.0018611930027616756 and parameters: {'batch_size': 5, 'lr': 0.003271956557311898, 'dropout': 0.003721917087529752}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  62%|██████▎   | 25/40 [5:25:44<3:42:04, 888.28s/it] 

[I 2025-11-25 08:28:49,804] Trial 24 finished with value: 0.002793767615440606 and parameters: {'batch_size': 11, 'lr': 0.003047444278676439, 'dropout': 0.15429536384961998}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  65%|██████▌   | 26/40 [5:47:26<3:56:14, 1012.44s/it]

[I 2025-11-25 08:50:31,899] Trial 25 finished with value: 0.0022904820079088254 and parameters: {'batch_size': 5, 'lr': 0.001860546784094539, 'dropout': 0.0732277286411091}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  68%|██████▊   | 27/40 [6:01:39<3:28:58, 964.53s/it] 

[I 2025-11-25 09:04:44,657] Trial 26 finished with value: 0.0038585895555191515 and parameters: {'batch_size': 5, 'lr': 0.0038183943522656144, 'dropout': 0.2129153680162483}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  70%|███████   | 28/40 [6:15:56<3:06:27, 932.33s/it]

[I 2025-11-25 09:19:01,861] Trial 27 finished with value: 0.0043171218010706015 and parameters: {'batch_size': 5, 'lr': 0.002350998528464029, 'dropout': 0.4842945794013122}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  72%|███████▎  | 29/40 [6:30:19<2:47:07, 911.56s/it]

[I 2025-11-25 09:33:24,957] Trial 28 finished with value: 0.002855784299382205 and parameters: {'batch_size': 5, 'lr': 0.0069173290414729, 'dropout': 0.064585710234177}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  75%|███████▌  | 30/40 [6:36:38<2:05:18, 751.81s/it]

[I 2025-11-25 09:39:44,046] Trial 29 finished with value: 0.002869538078968564 and parameters: {'batch_size': 11, 'lr': 0.01701759397166524, 'dropout': 0.036045701640119214}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  78%|███████▊  | 31/40 [6:41:37<1:32:22, 615.82s/it]

[I 2025-11-25 09:44:42,541] Trial 30 finished with value: 0.07180825802798778 and parameters: {'batch_size': 11, 'lr': 0.09061619933378423, 'dropout': 0.04084931894495858}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  80%|████████  | 32/40 [6:59:42<1:40:52, 756.51s/it]

[I 2025-11-25 10:02:47,335] Trial 31 finished with value: 0.0019187138660487005 and parameters: {'batch_size': 5, 'lr': 0.0041412100345457874, 'dropout': 0.005863453206545616}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  82%|████████▎ | 33/40 [7:19:53<1:44:11, 893.04s/it]

[I 2025-11-25 10:22:58,919] Trial 32 finished with value: 0.001968365594903204 and parameters: {'batch_size': 5, 'lr': 0.0025431302930007567, 'dropout': 0.0004808196426234453}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  85%|████████▌ | 34/40 [7:42:36<1:43:24, 1034.08s/it]

[I 2025-11-25 10:45:42,114] Trial 33 finished with value: 0.0024528098428099774 and parameters: {'batch_size': 5, 'lr': 0.0014333958921291558, 'dropout': 0.08714585548877798}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  88%|████████▊ | 35/40 [8:04:07<1:32:34, 1110.99s/it]

[I 2025-11-25 11:07:12,539] Trial 34 finished with value: 0.0020342678453621377 and parameters: {'batch_size': 5, 'lr': 0.00233584194578558, 'dropout': 0.028665134077504123}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  90%|█████████ | 36/40 [8:25:41<1:17:43, 1165.99s/it]

[I 2025-11-25 11:28:46,861] Trial 35 finished with value: 0.0022253586971862443 and parameters: {'batch_size': 5, 'lr': 0.0025367039641825233, 'dropout': 0.043000040874410075}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  92%|█████████▎| 37/40 [8:33:30<47:50, 956.99s/it]   

[I 2025-11-25 11:36:36,198] Trial 36 finished with value: 0.004373922764268921 and parameters: {'batch_size': 16, 'lr': 0.0010030969706983485, 'dropout': 0.4192411084186095}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  95%|█████████▌| 38/40 [8:45:06<29:17, 878.53s/it]

[I 2025-11-25 11:48:11,639] Trial 37 finished with value: 0.0022699733870032465 and parameters: {'batch_size': 11, 'lr': 0.004016488570077431, 'dropout': 0.03388402717614523}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119:  98%|█████████▊| 39/40 [9:06:32<16:40, 1000.72s/it]

[I 2025-11-25 12:09:37,475] Trial 38 finished with value: 0.002672604737899321 and parameters: {'batch_size': 5, 'lr': 0.0017588163712205643, 'dropout': 0.12956376137483178}. Best is trial 23 with value: 0.0018611930027616756.


Best trial: 23. Best value: 0.00186119: 100%|██████████| 40/40 [9:13:37<00:00, 830.43s/it] 


[I 2025-11-25 12:16:42,560] Trial 39 finished with value: 0.0028694625130376884 and parameters: {'batch_size': 16, 'lr': 0.002538278948084091, 'dropout': 0.055905509580410205}. Best is trial 23 with value: 0.0018611930027616756.
Mejores hiperparámetros encontrados:
{'batch_size': 5, 'lr': 0.003271956557311898, 'dropout': 0.003721917087529752}
MSE promedio (best): 0.0018611930027616756
AAPL MSE = 0.001299518312714135
CMCSA MSE = 0.004021129165643405
ADBE MSE = 0.0013970827257740175
AMD MSE = 0.0007270418069151448


In [3]:
best_bs    

5

In [4]:
best_lr      


0.003271956557311898

In [5]:
best_dropout

0.003721917087529752