# Crear  Tensores

In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import optuna
import math
import pickle # Para guardar los scalers
import json   # Para guardar los best_params

# --- 1. Carga y Preprocesamiento de Datos ---
print("Cargando y preprocesando datos...")

# [!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!]
# [! Reemplaza esta sección con TU código para cargar          !]
# [! input_tensor y target_tensor desde tus archivos .csv etc. !]
# [! Ejemplo con datos aleatorios:                             !]
num_scenarios = 500
seq_len = 74 # Número de pares de relés por escenario
input_features = 6
output_features = 4
input_tensor = torch.randn(num_scenarios, seq_len, input_features)
target_tensor = torch.randn(num_scenarios, seq_len, output_features)
print(f"Datos de ejemplo generados: input_shape={input_tensor.shape}, target_shape={target_tensor.shape}")
# [!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!]


# Normalización de los datos
input_np = input_tensor.numpy().reshape(-1, input_tensor.shape[-1])
target_np = target_tensor.numpy().reshape(-1, target_tensor.shape[-1])

scaler_input = StandardScaler()
scaler_target = StandardScaler()

# Ajustar y transformar los scalers
input_normalized = scaler_input.fit_transform(input_np)
target_normalized = scaler_target.fit_transform(target_np)

# Guardar los scalers para usarlos en la evaluación
with open('scaler_input.pkl', 'wb') as f:
    pickle.dump(scaler_input, f)
with open('scaler_target.pkl', 'wb') as f:
    pickle.dump(scaler_target, f)
print("Scalers guardados como 'scaler_input.pkl' y 'scaler_target.pkl'.")

# Volver a convertir a tensores y restaurar la forma original
input_tensor_normalized = torch.tensor(input_normalized, dtype=torch.float32).reshape(input_tensor.shape)
target_tensor_normalized = torch.tensor(target_normalized, dtype=torch.float32).reshape(target_tensor.shape)

# Dividir los datos en 80/20 (entrenamiento/validación) - Usar random_state fijo
train_idx, val_idx = train_test_split(range(input_tensor_normalized.shape[0]), test_size=0.2, random_state=42)

train_input = input_tensor_normalized[train_idx]
train_target = target_tensor_normalized[train_idx]
val_input = input_tensor_normalized[val_idx]
val_target = target_tensor_normalized[val_idx]

# Crear Datasets
train_dataset = TensorDataset(train_input, train_target)
val_dataset = TensorDataset(val_input, val_target)

print(f"Datos divididos: {len(train_dataset)} entrenamiento, {len(val_dataset)} validación.")

# --- 2. Definición del Modelo Transformer ---
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        # ... (código de PositionalEncoding sin cambios)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        # x shape expected: (seq_len, batch_size, d_model)
        x = x + self.pe[:x.size(0), :]
        return x

class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model

        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=False # TransformerEncoderLayer espera (seq_len, batch, feature)
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        self.output_proj = nn.Linear(d_model, output_dim)

    def forward(self, src):
        # src shape: (batch_size, seq_len, input_dim)
        src = self.input_proj(src) * math.sqrt(self.d_model) # Escalar según paper "Attention is All You Need"
        src = src.permute(1, 0, 2)  # Cambiar a (seq_len, batch_size, d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)  # Volver a (batch_size, seq_len, d_model)
        output = self.output_proj(output)
        return output

# --- 3. Función de Entrenamiento ---
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device, model_save_path='best_transformer_model.pth'):
    best_val_loss = float('inf')
    epochs_without_improvement = 0
    patience = 10 # Número de épocas para esperar antes de parar si no hay mejora

    print(f"\nIniciando entrenamiento por {num_epochs} épocas...")
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0.0
        for batch_input, batch_target in train_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            optimizer.zero_grad()
            output = model(batch_input)
            loss = criterion(output, batch_target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validación
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_input, batch_target in val_loader:
                batch_input, batch_target = batch_input.to(device), batch_target.to(device)
                output = model(batch_input)
                loss = criterion(output, batch_target)
                val_loss += loss.item()

        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)

        if (epoch + 1) % 5 == 0 or epoch == 0: # Imprimir cada 5 épocas y la primera
             print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {avg_train_loss:.6f}, Val Loss: {avg_val_loss:.6f}')

        # Guardar el mejor modelo y Early Stopping
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            torch.save(model.state_dict(), model_save_path)
            print(f'---> Nuevo mejor modelo guardado en {model_save_path} (Val Loss: {best_val_loss:.6f})')
            epochs_without_improvement = 0
        else:
            epochs_without_improvement += 1
            if epochs_without_improvement >= patience:
                print(f'Early stopping en época {epoch+1} después de {patience} épocas sin mejora.')
                break # Detener el entrenamiento

    print(f"Entrenamiento completado. Mejor Val Loss: {best_val_loss:.6f}")
    return best_val_loss

# --- 4. Optimización de Hiperparámetros con Optuna ---
def objective(trial):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Hiperparámetros a optimizar
    d_model = trial.suggest_categorical('d_model', [32, 64, 128]) # Reducido para pruebas rápidas
    # Asegurar que nhead divida d_model
    possible_nheads = [h for h in [2, 4, 8] if d_model % h == 0]
    if not possible_nheads: # Si d_model es p.ej. 32, 8 no es posible
        nhead = 2 # O el divisor más pequeño posible > 1
    else:
        nhead = trial.suggest_categorical('nhead', possible_nheads)

    num_encoder_layers = trial.suggest_int('num_encoder_layers', 1, 4) # Reducido
    dim_feedforward = trial.suggest_categorical('dim_feedforward', [128, 256, 512])
    dropout = trial.suggest_float('dropout', 0.1, 0.4) # Rango ligeramente ajustado
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16, 32]) # Reducido
    weight_decay = trial.suggest_float('weight_decay', 1e-6, 1e-3, log=True)

    # Crear DataLoader con el batch_size sugerido
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    model = TransformerModel(
        input_dim=train_input.shape[-1],
        output_dim=train_target.shape[-1],
        d_model=d_model,
        nhead=nhead,
        num_encoder_layers=num_encoder_layers,
        dim_feedforward=dim_feedforward,
        dropout=dropout
    ).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Entrenar el modelo (usar menos épocas para la búsqueda)
    num_optuna_epochs = 25 # Menos épocas para acelerar Optuna
    model_save_path_trial = f'temp_best_model_trial_{trial.number}.pth' # Temporal
    best_val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=num_optuna_epochs, device=device, model_save_path=model_save_path_trial)

    # Limpiar archivo temporal (opcional)
    import os
    if os.path.exists(model_save_path_trial):
        os.remove(model_save_path_trial)

    return best_val_loss

# Ejecutar la optimización de hiperparámetros
print("\n--- Iniciando Optimización de Hiperparámetros (Optuna) ---")
study = optuna.create_study(direction='minimize')
# Aumenta n_trials para una búsqueda más exhaustiva
study.optimize(objective, n_trials=15) # Número de pruebas de Optuna (ajusta según tu tiempo)

# Obtener los mejores hiperparámetros
best_params = study.best_params
print("\n--- Mejores Hiperparámetros Encontrados ---")
print(best_params)

# Guardar los mejores parámetros en un archivo JSON
with open('best_params.json', 'w') as f:
    json.dump(best_params, f, indent=4)
print("Mejores hiperparámetros guardados en 'best_params.json'.")

# --- 5. Entrenamiento Final con los Mejores Hiperparámetros ---
print("\n--- Iniciando Entrenamiento Final con Mejores Parámetros ---")

# Ajustar nhead por si acaso (Optuna podría devolver uno que no fue elegido explícitamente si no se usó suggest_categorical con la lista filtrada)
d_model = best_params['d_model']
possible_nheads = [h for h in [2, 4, 8] if d_model % h == 0]
if best_params['nhead'] not in possible_nheads:
     nhead = min(possible_nheads) if possible_nheads else 1 # O maneja error
     print(f"Ajustando nhead a {nhead} ya que {best_params['nhead']} no divide {d_model}")
else:
    nhead = best_params['nhead']


# Crear DataLoaders para el entrenamiento final
final_batch_size = best_params['batch_size']
final_train_loader = DataLoader(train_dataset, batch_size=final_batch_size, shuffle=True)
final_val_loader = DataLoader(val_dataset, batch_size=final_batch_size)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
final_model = TransformerModel(
    input_dim=train_input.shape[-1],
    output_dim=train_target.shape[-1],
    d_model=best_params['d_model'],
    nhead=nhead, # Usar el nhead ajustado
    num_encoder_layers=best_params['num_encoder_layers'],
    dim_feedforward=best_params['dim_feedforward'],
    dropout=best_params['dropout']
).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(final_model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])

# Entrenar el modelo final (usar más épocas)
final_num_epochs = 100 # O más, según sea necesario
train_model(final_model, final_train_loader, final_val_loader, criterion, optimizer, num_epochs=final_num_epochs, device=device, model_save_path='best_transformer_model.pth')

print("\n--- Entrenamiento Finalizado ---")
print("El mejor modelo basado en la validación se ha guardado como 'best_transformer_model.pth'.")
print("Los scalers están en 'scaler_input.pkl' y 'scaler_target.pkl'.")
print("Los mejores hiperparámetros están en 'best_params.json'.")

[I 2025-04-16 16:55:29,977] A new study created in memory with name: no-name-5c62e5bb-d4e6-4d6e-b7d5-3d40f681cb4a


Cargando y preprocesando datos...
Datos de ejemplo generados: input_shape=torch.Size([500, 74, 6]), target_shape=torch.Size([500, 74, 4])
Scalers guardados como 'scaler_input.pkl' y 'scaler_target.pkl'.
Datos divididos: 400 entrenamiento, 100 validación.

--- Iniciando Optimización de Hiperparámetros (Optuna) ---





Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.278221, Val Loss: 1.275360
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.275360)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.219781)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.182286)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.155614)
Epoch 5/25, Train Loss: 1.167438, Val Loss: 1.135022
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.135022)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.118492)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.105050)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.094062)
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.084808)
Epoch 10/25, Train Loss: 1.121414, Val Loss: 1.076828
---> Nuevo mejor modelo guardado 

[I 2025-04-16 16:55:59,186] Trial 0 finished with value: 1.0334512761660986 and parameters: {'d_model': 64, 'nhead': 4, 'num_encoder_layers': 3, 'dim_feedforward': 128, 'dropout': 0.39236353069604846, 'learning_rate': 1.166863683915956e-05, 'batch_size': 16, 'weight_decay': 0.0006136698219984957}. Best is trial 0 with value: 1.0334512761660986.


Epoch 25/25, Train Loss: 1.071557, Val Loss: 1.033451
---> Nuevo mejor modelo guardado en temp_best_model_trial_0.pth (Val Loss: 1.033451)
Entrenamiento completado. Mejor Val Loss: 1.033451

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.441508, Val Loss: 1.313603
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.313603)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.208074)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.149750)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.104083)
Epoch 5/25, Train Loss: 1.133650, Val Loss: 1.077888
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.077888)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.063712)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.054056)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Va

[I 2025-04-16 16:56:50,705] Trial 1 finished with value: 1.0254742801189423 and parameters: {'d_model': 128, 'nhead': 8, 'num_encoder_layers': 4, 'dim_feedforward': 128, 'dropout': 0.372950366621266, 'learning_rate': 2.4929354146798223e-05, 'batch_size': 32, 'weight_decay': 1.834679339518728e-05}. Best is trial 1 with value: 1.0254742801189423.


Epoch 25/25, Train Loss: 1.043442, Val Loss: 1.025474
---> Nuevo mejor modelo guardado en temp_best_model_trial_1.pth (Val Loss: 1.025474)
Entrenamiento completado. Mejor Val Loss: 1.025474

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.198510, Val Loss: 1.124361
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.124361)
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.073541)
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.049892)
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.040633)
Epoch 5/25, Train Loss: 1.046632, Val Loss: 1.035934
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.035934)
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.033137)
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.031226)
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Va

[I 2025-04-16 16:57:19,833] Trial 2 finished with value: 1.0235818922519684 and parameters: {'d_model': 128, 'nhead': 2, 'num_encoder_layers': 3, 'dim_feedforward': 256, 'dropout': 0.21088795059542947, 'learning_rate': 2.785158719618567e-05, 'batch_size': 32, 'weight_decay': 0.00020342805385118883}. Best is trial 2 with value: 1.0235818922519684.


Epoch 25/25, Train Loss: 1.016206, Val Loss: 1.023582
---> Nuevo mejor modelo guardado en temp_best_model_trial_2.pth (Val Loss: 1.023582)
Entrenamiento completado. Mejor Val Loss: 1.023582

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.111440, Val Loss: 1.041297
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.041297)
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.027573)
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.023773)
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.022662)
Epoch 5/25, Train Loss: 1.014887, Val Loss: 1.022320
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.022320)
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.022046)
---> Nuevo mejor modelo guardado en temp_best_model_trial_3.pth (Val Loss: 1.021937)
Epoch 10/25, Train Loss: 1.007075, Val Loss: 1.022354
---> Nuevo me

[I 2025-04-16 16:57:45,674] Trial 3 finished with value: 1.0213970243930817 and parameters: {'d_model': 32, 'nhead': 4, 'num_encoder_layers': 3, 'dim_feedforward': 256, 'dropout': 0.29205785019056346, 'learning_rate': 0.0003212539417978877, 'batch_size': 32, 'weight_decay': 6.349460371792825e-06}. Best is trial 3 with value: 1.0213970243930817.


Epoch 25/25, Train Loss: 1.001477, Val Loss: 1.021482
Entrenamiento completado. Mejor Val Loss: 1.021397

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.212603, Val Loss: 1.141739
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.141739)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.076868)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.049850)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.038219)
Epoch 5/25, Train Loss: 1.044538, Val Loss: 1.032652
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.032652)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.029502)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.027535)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Val Loss: 1.026323)
---> Nuevo mejor modelo guardado en temp_best_model_trial_4.pth (Va

[I 2025-04-16 16:58:31,603] Trial 4 finished with value: 1.0220377147197723 and parameters: {'d_model': 32, 'nhead': 8, 'num_encoder_layers': 4, 'dim_feedforward': 128, 'dropout': 0.30494325403553635, 'learning_rate': 0.00012629483952232367, 'batch_size': 32, 'weight_decay': 1.7486263433640494e-05}. Best is trial 3 with value: 1.0213970243930817.


Epoch 25/25, Train Loss: 1.009652, Val Loss: 1.022058
Entrenamiento completado. Mejor Val Loss: 1.022038

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.237843, Val Loss: 1.179274
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.179274)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.116882)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.082825)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.062450)
Epoch 5/25, Train Loss: 1.086283, Val Loss: 1.049559
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.049559)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.041171)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.035520)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.032123)
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Va

[I 2025-04-16 16:58:58,709] Trial 5 finished with value: 1.0194911871637617 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 256, 'dropout': 0.30064888186718963, 'learning_rate': 2.1283464050364642e-05, 'batch_size': 16, 'weight_decay': 0.00010177359051613628}. Best is trial 5 with value: 1.0194911871637617.


Epoch 25/25, Train Loss: 1.028582, Val Loss: 1.019491
---> Nuevo mejor modelo guardado en temp_best_model_trial_5.pth (Val Loss: 1.019491)
Entrenamiento completado. Mejor Val Loss: 1.019491

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.123133, Val Loss: 1.045093
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.045093)
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.025055)
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.023045)
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.022795)
Epoch 5/25, Train Loss: 1.012846, Val Loss: 1.022467
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.022467)
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.022361)
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Val Loss: 1.021659)
---> Nuevo mejor modelo guardado en temp_best_model_trial_6.pth (Va

[I 2025-04-16 16:59:36,136] Trial 6 finished with value: 1.0213211476802826 and parameters: {'d_model': 64, 'nhead': 4, 'num_encoder_layers': 2, 'dim_feedforward': 128, 'dropout': 0.3011309973389743, 'learning_rate': 0.0007890950788164766, 'batch_size': 32, 'weight_decay': 2.2173963930491094e-05}. Best is trial 5 with value: 1.0194911871637617.


Epoch 25/25, Train Loss: 0.999615, Val Loss: 1.021532
Early stopping en época 25 después de 10 épocas sin mejora.
Entrenamiento completado. Mejor Val Loss: 1.021321

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.204821, Val Loss: 1.091574
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.091574)
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.054678)
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.041177)
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.035957)
Epoch 5/25, Train Loss: 1.054001, Val Loss: 1.032778
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.032778)
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.029580)
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.027210)
---> Nuevo mejor modelo guardado en temp_best_model_trial_7.pth (Val Loss: 1.026832)
---> Nu

[I 2025-04-16 16:59:55,778] Trial 7 finished with value: 1.0225909352302551 and parameters: {'d_model': 128, 'nhead': 4, 'num_encoder_layers': 1, 'dim_feedforward': 128, 'dropout': 0.2887055897285994, 'learning_rate': 9.89064649004482e-05, 'batch_size': 32, 'weight_decay': 1.2198076241304423e-06}. Best is trial 5 with value: 1.0194911871637617.


Epoch 25/25, Train Loss: 1.015184, Val Loss: 1.023051
Entrenamiento completado. Mejor Val Loss: 1.022591

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.293167, Val Loss: 1.250568
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.250568)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.162102)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.112869)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.085898)
Epoch 5/25, Train Loss: 1.084246, Val Loss: 1.070967
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.070967)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.061482)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.055367)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.051423)
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Va

[I 2025-04-16 17:00:19,397] Trial 8 finished with value: 1.0302011966705322 and parameters: {'d_model': 128, 'nhead': 8, 'num_encoder_layers': 1, 'dim_feedforward': 128, 'dropout': 0.2009596579510352, 'learning_rate': 3.4761542134006376e-05, 'batch_size': 32, 'weight_decay': 0.0008275399810372122}. Best is trial 5 with value: 1.0194911871637617.


Epoch 25/25, Train Loss: 1.025322, Val Loss: 1.030201
---> Nuevo mejor modelo guardado en temp_best_model_trial_8.pth (Val Loss: 1.030201)
Entrenamiento completado. Mejor Val Loss: 1.030201

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.209409, Val Loss: 1.122780
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.122780)
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.058957)
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.040446)
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.033115)
Epoch 5/25, Train Loss: 1.034453, Val Loss: 1.029895
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.029895)
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.028081)
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Val Loss: 1.027012)
---> Nuevo mejor modelo guardado en temp_best_model_trial_9.pth (Va

[I 2025-04-16 17:00:51,380] Trial 9 finished with value: 1.0224107205867767 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 2, 'dim_feedforward': 128, 'dropout': 0.1576004451001064, 'learning_rate': 0.00011800955360567709, 'batch_size': 32, 'weight_decay': 3.6458914840540794e-06}. Best is trial 5 with value: 1.0194911871637617.


Epoch 25/25, Train Loss: 1.006603, Val Loss: 1.022525
Entrenamiento completado. Mejor Val Loss: 1.022411

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.291059, Val Loss: 1.252848
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.252848)
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.177430)
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.130250)
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.099737)
Epoch 5/25, Train Loss: 1.091738, Val Loss: 1.079586
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.079586)
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.065859)
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.056445)
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.049767)
---> Nuevo mejor modelo guardado en temp_best_model_trial_1

[I 2025-04-16 17:01:19,909] Trial 10 finished with value: 1.0240049021584647 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 512, 'dropout': 0.1276038311254491, 'learning_rate': 1.10695364164496e-05, 'batch_size': 16, 'weight_decay': 0.00010272445305765346}. Best is trial 5 with value: 1.0194911871637617.


Epoch 25/25, Train Loss: 1.021730, Val Loss: 1.024005
---> Nuevo mejor modelo guardado en temp_best_model_trial_10.pth (Val Loss: 1.024005)
Entrenamiento completado. Mejor Val Loss: 1.024005

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.077375, Val Loss: 1.021269
---> Nuevo mejor modelo guardado en temp_best_model_trial_11.pth (Val Loss: 1.021269)
---> Nuevo mejor modelo guardado en temp_best_model_trial_11.pth (Val Loss: 1.017812)
---> Nuevo mejor modelo guardado en temp_best_model_trial_11.pth (Val Loss: 1.016827)
Epoch 5/25, Train Loss: 1.005968, Val Loss: 1.017658
---> Nuevo mejor modelo guardado en temp_best_model_trial_11.pth (Val Loss: 1.016469)
---> Nuevo mejor modelo guardado en temp_best_model_trial_11.pth (Val Loss: 1.016141)
Epoch 10/25, Train Loss: 1.000962, Val Loss: 1.016891
---> Nuevo mejor modelo guardado en temp_best_model_trial_11.pth (Val Loss: 1.015639)
Epoch 15/25, Train Loss: 0.998791, Val Loss: 1.016638
Epoch 20/25, Train Loss: 0.998326, Va

[I 2025-04-16 17:01:36,637] Trial 11 finished with value: 1.0156389815466744 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 256, 'dropout': 0.3363087097284188, 'learning_rate': 0.0008921043940918774, 'batch_size': 16, 'weight_decay': 7.547118515409804e-05}. Best is trial 11 with value: 1.0156389815466744.


Early stopping en época 21 después de 10 épocas sin mejora.
Entrenamiento completado. Mejor Val Loss: 1.015639

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.071913, Val Loss: 1.021331
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.021331)
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.019374)
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.018142)
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.017273)
Epoch 5/25, Train Loss: 1.004081, Val Loss: 1.017283
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.016847)
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.016133)
Epoch 10/25, Train Loss: 0.999616, Val Loss: 1.016022
---> Nuevo mejor modelo guardado en temp_best_model_trial_12.pth (Val Loss: 1.016022)
Epoch 15/25, Train Loss: 0.999082, Val Loss: 1.017642


[I 2025-04-16 17:01:55,613] Trial 12 finished with value: 1.0160219073295593 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 256, 'dropout': 0.36571702071861206, 'learning_rate': 0.0009261956215494404, 'batch_size': 16, 'weight_decay': 0.00010439090426743462}. Best is trial 11 with value: 1.0156389815466744.


Epoch 20/25, Train Loss: 0.998494, Val Loss: 1.016771
Early stopping en época 20 después de 10 épocas sin mejora.
Entrenamiento completado. Mejor Val Loss: 1.016022

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.086991, Val Loss: 1.022012
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.022012)
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.019585)
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.019294)
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.018134)
Epoch 5/25, Train Loss: 1.007328, Val Loss: 1.017402
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.017402)
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.016999)
Epoch 10/25, Train Loss: 1.001192, Val Loss: 1.016822
---> Nuevo mejor modelo guardado en temp_best_model_trial_13.pth (Val Loss: 1.016822)
---> Nuevo mejor modelo guardad

[I 2025-04-16 17:02:06,737] Trial 13 finished with value: 1.0164240172931127 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 1, 'dim_feedforward': 256, 'dropout': 0.3450950267573669, 'learning_rate': 0.0009699925671540091, 'batch_size': 16, 'weight_decay': 0.00010614982403330936}. Best is trial 11 with value: 1.0156389815466744.


Early stopping en época 24 después de 10 épocas sin mejora.
Entrenamiento completado. Mejor Val Loss: 1.016424

Iniciando entrenamiento por 25 épocas...
Epoch 1/25, Train Loss: 1.128899, Val Loss: 1.025213
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.025213)
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.019839)
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.018324)
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.017644)
Epoch 5/25, Train Loss: 1.014119, Val Loss: 1.017421
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.017421)
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.017273)
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.016583)
Epoch 10/25, Train Loss: 1.005280, Val Loss: 1.016550
---> Nuevo mejor modelo guardado en temp_best_model_trial_14.pth (Val Loss: 1.016550)

[I 2025-04-16 17:02:27,390] Trial 14 finished with value: 1.0159698128700256 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 256, 'dropout': 0.3475639849474677, 'learning_rate': 0.000436333434844535, 'batch_size': 16, 'weight_decay': 0.0002762698242066277}. Best is trial 11 with value: 1.0156389815466744.


Epoch 25/25, Train Loss: 0.998609, Val Loss: 1.016343
Early stopping en época 25 después de 10 épocas sin mejora.
Entrenamiento completado. Mejor Val Loss: 1.015970

--- Mejores Hiperparámetros Encontrados ---
{'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 256, 'dropout': 0.3363087097284188, 'learning_rate': 0.0008921043940918774, 'batch_size': 16, 'weight_decay': 7.547118515409804e-05}
Mejores hiperparámetros guardados en 'best_params.json'.

--- Iniciando Entrenamiento Final con Mejores Parámetros ---

Iniciando entrenamiento por 100 épocas...
Epoch 1/100, Train Loss: 1.080435, Val Loss: 1.019996
---> Nuevo mejor modelo guardado en best_transformer_model.pth (Val Loss: 1.019996)
---> Nuevo mejor modelo guardado en best_transformer_model.pth (Val Loss: 1.016055)
Epoch 5/100, Train Loss: 1.004698, Val Loss: 1.016791
Epoch 10/100, Train Loss: 1.001117, Val Loss: 1.015950
---> Nuevo mejor modelo guardado en best_transformer_model.pth (Val Loss: 1.015950)
---> Nue

In [6]:
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split # Para obtener el mismo split
from sklearn.preprocessing import StandardScaler # Necesario para cargar scalers
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import math
import pickle # Para cargar los scalers
import json   # Para cargar los best_params
import os

# --- 1. Carga de Artefactos y Datos ---
print("Cargando artefactos y datos para evaluación...")

# Nombres de los archivos guardados por el script de entrenamiento
model_path = 'best_transformer_model.pth'
scaler_input_path = 'scaler_input.pkl'
scaler_target_path = 'scaler_target.pkl'
params_path = 'best_params.json'

# Verificar si los archivos existen
if not all(os.path.exists(p) for p in [model_path, scaler_input_path, scaler_target_path, params_path]):
    print("Error: Faltan archivos necesarios ('best_transformer_model.pth', '.pkl', 'best_params.json').")
    print("Asegúrate de haber ejecutado 'train_transformer.py' primero.")
    exit()

# Cargar los scalers
try:
    with open(scaler_input_path, 'rb') as f:
        scaler_input = pickle.load(f)
    with open(scaler_target_path, 'rb') as f:
        scaler_target = pickle.load(f)
    print("Scalers cargados.")
except Exception as e:
    print(f"Error al cargar los scalers: {e}")
    exit()

# Cargar los mejores hiperparámetros
try:
    with open(params_path, 'r') as f:
        best_params = json.load(f)
    print("Mejores hiperparámetros cargados:", best_params)
except Exception as e:
    print(f"Error al cargar 'best_params.json': {e}")
    exit()

# [!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!]
# [! Reemplaza esta sección con TU código para cargar los      !]
# [! MISMOS datos ORIGINALES (input_tensor, target_tensor)    !]
# [! que se usaron para entrenar.                             !]
# [! Ejemplo con datos aleatorios (DEBEN SER LOS MISMOS):     !]
num_scenarios = 500
seq_len = 74
input_features = 6
output_features = 4
# Es crucial generar/cargar los mismos datos que en el entrenamiento
# Si usas aleatorios, fija la semilla: torch.manual_seed(42); np.random.seed(42) antes de generar
input_tensor = torch.randn(num_scenarios, seq_len, input_features)
target_tensor = torch.randn(num_scenarios, seq_len, output_features)
print(f"Datos originales de ejemplo cargados.")
# [!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!]


# Normalizar los datos usando los SCALERS CARGADOS (transform, NO fit_transform)
input_np = input_tensor.numpy().reshape(-1, input_tensor.shape[-1])
target_np = target_tensor.numpy().reshape(-1, target_tensor.shape[-1])

try:
    input_normalized = scaler_input.transform(input_np)
    target_normalized = scaler_target.transform(target_np)
except Exception as e:
    print(f"Error al aplicar la transformación con los scalers cargados: {e}")
    print("Asegúrate de que los datos cargados tengan la forma correcta.")
    exit()


input_tensor_normalized = torch.tensor(input_normalized, dtype=torch.float32).reshape(input_tensor.shape)
target_tensor_normalized = torch.tensor(target_normalized, dtype=torch.float32).reshape(target_tensor.shape)

# Obtener el MISMO split de validación usando el mismo random_state
_, val_idx = train_test_split(range(input_tensor_normalized.shape[0]), test_size=0.2, random_state=42)
val_input = input_tensor_normalized[val_idx]
val_target = target_tensor_normalized[val_idx] # Datos normalizados
val_target_original_shape = target_tensor[val_idx] # Datos originales para comparación final si es necesario

print(f"Datos de validación preparados. Tamaño: {val_input.shape}")

# Crear DataLoader para validación
eval_batch_size = best_params.get('batch_size', 32) # Usar el batch_size de params o default
val_dataset = TensorDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size=eval_batch_size)


# --- 2. Definición del Modelo (Debe ser idéntica a la de entrenamiento) ---
class PositionalEncoding(nn.Module):
    # ... (Copia exacta de la clase PositionalEncoding de train_transformer.py)
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

class TransformerModel(nn.Module):
    # ... (Copia exacta de la clase TransformerModel de train_transformer.py)
    def __init__(self, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model
        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward, dropout=dropout, batch_first=False
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)
        self.output_proj = nn.Linear(d_model, output_dim)

    def forward(self, src):
        src = self.input_proj(src) * math.sqrt(self.d_model)
        src = src.permute(1, 0, 2)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)
        output = self.output_proj(output)
        return output

# --- 3. Carga del Modelo Entrenado ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Extraer dimensiones y parámetros necesarios del entorno o datos cargados
input_dim = val_input.shape[-1]
output_dim = val_target.shape[-1]

# Ajustar nhead si es necesario (basado en d_model de los parámetros cargados)
d_model = best_params['d_model']
nhead = best_params['nhead']
possible_nheads = [h for h in [2, 4, 8] if d_model % h == 0]
if nhead not in possible_nheads:
     original_nhead = nhead
     nhead = min(possible_nheads) if possible_nheads else 1
     print(f"Advertencia: nhead={original_nhead} en params no divide d_model={d_model}. Usando nhead={nhead} para instanciar.")

# Crear una instancia del modelo con los parámetros correctos
model = TransformerModel(
    input_dim=input_dim,
    output_dim=output_dim,
    d_model=best_params['d_model'],
    nhead=nhead, # Usar el nhead ajustado/verificado
    num_encoder_layers=best_params['num_encoder_layers'],
    dim_feedforward=best_params['dim_feedforward'],
    dropout=best_params['dropout'] # El dropout durante eval no importa si se usa model.eval()
).to(device)

# Cargar los pesos guardados
try:
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval() # ¡Muy importante! Poner el modelo en modo evaluación
    print(f"Modelo cargado exitosamente desde '{model_path}' y puesto en modo evaluación.")
except Exception as e:
    print(f"Error al cargar el state_dict del modelo: {e}")
    print("Verifica que los hiperparámetros en 'best_params.json' coincidan con los del modelo guardado.")
    exit()


# --- 4. Funciones de Evaluación ---
def evaluate_model(model, data_loader, criterion, scaler_target, device):
    model.eval()
    all_preds_normalized = []
    all_targets_normalized = []
    total_loss = 0.0

    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            output = model(batch_input) # Predicciones normalizadas
            loss = criterion(output, batch_target) # Pérdida sobre datos normalizados
            total_loss += loss.item()

            all_preds_normalized.append(output.cpu().numpy())
            all_targets_normalized.append(batch_target.cpu().numpy())

    # Concatenar todas las predicciones y valores reales (normalizados)
    all_preds_normalized = np.concatenate(all_preds_normalized, axis=0)
    all_targets_normalized = np.concatenate(all_targets_normalized, axis=0)

    # Desnormalizar (cuidado con la forma)
    num_samples = all_preds_normalized.shape[0]
    seq_len = all_preds_normalized.shape[1]
    output_features = all_preds_normalized.shape[2]

    preds_flat = all_preds_normalized.reshape(-1, output_features)
    targets_flat = all_targets_normalized.reshape(-1, output_features)

    preds_denorm_flat = scaler_target.inverse_transform(preds_flat)
    targets_denorm_flat = scaler_target.inverse_transform(targets_flat)

    # Restaurar forma original (si es necesario, aunque para métricas no suele serlo)
    all_preds_denorm = preds_denorm_flat.reshape(num_samples, seq_len, output_features)
    all_targets_denorm = targets_denorm_flat.reshape(num_samples, seq_len, output_features)


    # Calcular métricas sobre los datos desnormalizados
    # Usar las versiones aplanadas para las métricas sklearn estándar
    mse = mean_squared_error(targets_denorm_flat, preds_denorm_flat)
    mae = mean_absolute_error(targets_denorm_flat, preds_denorm_flat)
    r2 = r2_score(targets_denorm_flat, preds_denorm_flat)

    avg_loss = total_loss / len(data_loader) # Pérdida promedio (normalizada)
    return avg_loss, mse, mae, r2, all_preds_denorm, all_targets_denorm

def test_model(model, input_data_normalized, scaler_target, device):
    # input_data_normalized debe ser un solo escenario (seq_len, input_dim) o (1, seq_len, input_dim)
    model.eval()
    with torch.no_grad():
        input_tensor = torch.tensor(input_data_normalized, dtype=torch.float32).to(device)
        # Asegurarse que tenga la dimensión de batch
        if len(input_tensor.shape) == 2:
            input_tensor = input_tensor.unsqueeze(0) # Añadir dim de batch: (1, seq_len, input_dim)

        output_normalized = model(input_tensor) # (1, seq_len, output_dim)

        # Desnormalizar
        output_np_norm = output_normalized.cpu().numpy()
        output_features = output_np_norm.shape[-1]
        output_flat_norm = output_np_norm.reshape(-1, output_features)
        output_flat_denorm = scaler_target.inverse_transform(output_flat_norm)
        output_denorm = output_flat_denorm.reshape(output_np_norm.shape) # (1, seq_len, output_dim)

        return output_denorm.squeeze(0) # Quitar la dimension de batch para devolver (seq_len, output_dim)


# --- 5. Ejecución de la Evaluación ---
criterion = nn.MSELoss() # Se usa para calcular la pérdida normalizada

# Evaluar el modelo en el conjunto de validación completo
print("\n--- Evaluando Modelo en Datos de Validación ---")
avg_loss, mse, mae, r2, predictions_denorm, true_values_denorm = evaluate_model(model, val_loader, criterion, scaler_target, device)

print("\nEstadísticas del modelo en los datos de validación:")
print(f"Pérdida promedio (MSE normalizado): {avg_loss:.6f}")
print(f"Error Cuadrático Medio (MSE desnormalizado): {mse:.6f}")
print(f"Error Absoluto Medio (MAE desnormalizado): {mae:.6f}")
print(f"Coeficiente de Determinación (R² desnormalizado): {r2:.4f}")

# Probar el modelo con un ejemplo específico (el primer escenario del conjunto de validación)
print("\n--- Prueba con un Ejemplo Específico (Primer Escenario de Validación) ---")
test_input_normalized = val_input[0].cpu().numpy() # (seq_len, input_dim)
# El valor real correspondiente ya lo tenemos desnormalizado de evaluate_model
true_output_denorm_sample = true_values_denorm[0] # (seq_len, output_dim)

predicted_output_denorm_sample = test_model(model, test_input_normalized, scaler_target, device) # (seq_len, output_dim)

# Mostrar los resultados de la prueba para los primeros N pares
num_pairs_to_show = 5
print(f"\nPredicciones vs Valores Reales (Desnormalizados) para los primeros {num_pairs_to_show} pares de relés:")
for i in range(min(seq_len, num_pairs_to_show)):
    print(f"Par {i+1}:")
    # Formatear para mejor legibilidad
    pred_str = np.array2string(predicted_output_denorm_sample[i, :], precision=4, floatmode='fixed')
    true_str = np.array2string(true_output_denorm_sample[i, :], precision=4, floatmode='fixed')
    print(f"  Predicción: {pred_str}")
    print(f"  Valor real: {true_str}")

Cargando artefactos y datos para evaluación...
Scalers cargados.
Mejores hiperparámetros cargados: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 2, 'dim_feedforward': 256, 'dropout': 0.3363087097284188, 'learning_rate': 0.0008921043940918774, 'batch_size': 16, 'weight_decay': 7.547118515409804e-05}
Datos originales de ejemplo cargados.
Datos de validación preparados. Tamaño: torch.Size([100, 74, 6])
Modelo cargado exitosamente desde 'best_transformer_model.pth' y puesto en modo evaluación.

--- Evaluando Modelo en Datos de Validación ---

Estadísticas del modelo en los datos de validación:
Pérdida promedio (MSE normalizado): 1.005792
Error Cuadrático Medio (MSE desnormalizado): 1.009080
Error Absoluto Medio (MAE desnormalizado): 0.802382
Coeficiente de Determinación (R² desnormalizado): -0.0002

--- Prueba con un Ejemplo Específico (Primer Escenario de Validación) ---

Predicciones vs Valores Reales (Desnormalizados) para los primeros 5 pares de relés:
Par 1:
  Predicción: [ 0.0152



# Implementación del transformer 

*******

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import optuna
import math

# [Código previo para cargar y procesar los datos]
# ... (tu código para cargar input_tensor y target_tensor)

# Normalización de los datos
input_np = input_tensor.numpy().reshape(-1, input_tensor.shape[-1])  # Aplanar para normalizar
target_np = target_tensor.numpy().reshape(-1, target_tensor.shape[-1])

scaler_input = StandardScaler()
scaler_target = StandardScaler()

input_normalized = scaler_input.fit_transform(input_np)
target_normalized = scaler_target.fit_transform(target_np)

# Volver a convertir a tensores y restaurar la forma original
input_tensor_normalized = torch.tensor(input_normalized, dtype=torch.float32).reshape(input_tensor.shape)
target_tensor_normalized = torch.tensor(target_normalized, dtype=torch.float32).reshape(target_tensor.shape)

# Dividir los datos en 80/20 (entrenamiento/validación)
train_idx, val_idx = train_test_split(range(input_tensor_normalized.shape[0]), test_size=0.2, random_state=42)

train_input = input_tensor_normalized[train_idx]
train_target = target_tensor_normalized[train_idx]
val_input = input_tensor_normalized[val_idx]
val_target = target_tensor_normalized[val_idx]

# Crear DataLoader para manejar los datos en lotes
train_dataset = TensorDataset(train_input, train_target)
val_dataset = TensorDataset(val_input, val_target)

# Definir la arquitectura del Transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model

        # Proyección lineal para ajustar la dimensión de entrada a d_model
        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)

        # Capas del Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)

        # Proyección lineal para la salida
        self.output_proj = nn.Linear(d_model, output_dim)

    def forward(self, src):
        # src shape: (batch_size, seq_len, input_dim)
        src = self.input_proj(src)  # Proyectar a d_model
        src = src.permute(1, 0, 2)  # Cambiar a (seq_len, batch_size, d_model) para Transformer
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)  # Volver a (batch_size, seq_len, d_model)
        output = self.output_proj(output)  # Proyectar a output_dim
        return output

# Función para entrenar el modelo
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    model.train()
    best_val_loss = float('inf')
    for epoch in range(num_epochs):
        train_loss = 0.0
        for batch_input, batch_target in train_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            optimizer.zero_grad()
            output = model(batch_input)
            loss = criterion(output, batch_target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validación
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for batch_input, batch_target in val_loader:
                batch_input, batch_target = batch_input.to(device), batch_target.to(device)
                output = model(batch_input)
                loss = criterion(output, batch_target)
                val_loss += loss.item()

        val_loss /= len(val_loader)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_transformer_model.pth')

        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss/len(train_loader):.4f}, Val Loss: {val_loss:.4f}')
    return best_val_loss

# Ajuste de hiperparámetros con Optuna
def objective(trial):
    # Hiperparámetros a optimizar
    d_model = trial.suggest_categorical('d_model', [64, 128, 256])
    nhead = trial.suggest_categorical('nhead', [2, 4, 8])  # Debe dividir d_model
    num_encoder_layers = trial.suggest_int('num_encoder_layers', 1, 6)
    dim_feedforward = trial.suggest_categorical('dim_feedforward', [128, 256, 512])
    dropout = trial.suggest_uniform('dropout', 0.1, 0.3)
    learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64])
    weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)  # Regularización L2

    # Asegurar que nhead divida d_model
    if d_model % nhead != 0:
        nhead = min([h for h in [2, 4, 8] if d_model % h == 0], default=2)

    # Crear DataLoader con el batch_size sugerido
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = TransformerModel(
        input_dim=train_input.shape[-1],  # 6
        output_dim=train_target.shape[-1],  # 4
        d_model=d_model,
        nhead=nhead,
        num_encoder_layers=num_encoder_layers,
        dim_feedforward=dim_feedforward,
        dropout=dropout
    ).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

    # Entrenar el modelo
    best_val_loss = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs=50, device=device)
    return best_val_loss

# Ejecutar la optimización de hiperparámetros
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=20)

# Obtener los mejores hiperparámetros
best_params = study.best_params
print("Mejores hiperparámetros:", best_params)

# Ajustar nhead para que divida d_model
d_model = best_params['d_model']
nhead = best_params['nhead']
if d_model % nhead != 0:
    nhead = min([h for h in [2, 4, 8] if d_model % h == 0], default=2)

# Entrenar el modelo final con los mejores hiperparámetros
train_loader = DataLoader(train_dataset, batch_size=best_params['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=best_params['batch_size'])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
final_model = TransformerModel(
    input_dim=train_input.shape[-1],
    output_dim=train_target.shape[-1],
    d_model=best_params['d_model'],
    nhead=nhead,
    num_encoder_layers=best_params['num_encoder_layers'],
    dim_feedforward=best_params['dim_feedforward'],
    dropout=best_params['dropout']
).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(final_model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])

# Entrenar el modelo final
train_model(final_model, train_loader, val_loader, criterion, optimizer, num_epochs=100, device=device)

# Cargar el mejor modelo guardado
final_model.load_state_dict(torch.load('best_transformer_model.pth'))
print("Entrenamiento finalizado. Modelo guardado como 'best_transformer_model.pth'.")

[I 2025-04-16 16:36:15,043] A new study created in memory with name: no-name-c5ecf6d3-a62a-4301-9336-5703d448fb12
  dropout = trial.suggest_uniform('dropout', 0.1, 0.3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)  # Regularización L2


Epoch 10/50, Train Loss: 1.0660, Val Loss: 0.9674
Epoch 20/50, Train Loss: 1.0319, Val Loss: 0.9426
Epoch 30/50, Train Loss: 1.0204, Val Loss: 0.9453
Epoch 40/50, Train Loss: 1.0031, Val Loss: 0.9459


[I 2025-04-16 16:36:40,886] Trial 0 finished with value: 0.9419369697570801 and parameters: {'d_model': 256, 'nhead': 8, 'num_encoder_layers': 5, 'dim_feedforward': 512, 'dropout': 0.21303799415208624, 'learning_rate': 0.0031011514758145188, 'batch_size': 32, 'weight_decay': 0.004120132999818286}. Best is trial 0 with value: 0.9419369697570801.


Epoch 50/50, Train Loss: 1.0121, Val Loss: 0.9444


  dropout = trial.suggest_uniform('dropout', 0.1, 0.3)
  learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-2)
  weight_decay = trial.suggest_loguniform('weight_decay', 1e-6, 1e-2)  # Regularización L2


Epoch 10/50, Train Loss: 0.3682, Val Loss: 0.3358
Epoch 20/50, Train Loss: 0.1786, Val Loss: 0.1626
Epoch 30/50, Train Loss: 0.0938, Val Loss: 0.0912
Epoch 40/50, Train Loss: 0.0522, Val Loss: 0.0563


[I 2025-04-16 16:36:44,469] Trial 1 finished with value: 0.04534054547548294 and parameters: {'d_model': 128, 'nhead': 4, 'num_encoder_layers': 1, 'dim_feedforward': 128, 'dropout': 0.14643364570918843, 'learning_rate': 0.0018446960698927804, 'batch_size': 32, 'weight_decay': 7.526837086957923e-05}. Best is trial 1 with value: 0.04534054547548294.


Epoch 50/50, Train Loss: 0.0377, Val Loss: 0.0458
Epoch 10/50, Train Loss: 0.3164, Val Loss: 0.2980
Epoch 20/50, Train Loss: 0.1257, Val Loss: 0.1174
Epoch 30/50, Train Loss: 0.0586, Val Loss: 0.0635
Epoch 40/50, Train Loss: 0.0358, Val Loss: 0.0461


[I 2025-04-16 16:36:51,194] Trial 2 finished with value: 0.03157302364706993 and parameters: {'d_model': 256, 'nhead': 2, 'num_encoder_layers': 1, 'dim_feedforward': 512, 'dropout': 0.14038590394434391, 'learning_rate': 0.003007830107786523, 'batch_size': 32, 'weight_decay': 2.2647870313852387e-05}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.0247, Val Loss: 0.0316
Epoch 10/50, Train Loss: 0.6916, Val Loss: 0.6292
Epoch 20/50, Train Loss: 0.4862, Val Loss: 0.4268
Epoch 30/50, Train Loss: 0.2967, Val Loss: 0.2698
Epoch 40/50, Train Loss: 0.1901, Val Loss: 0.1773


[I 2025-04-16 16:36:57,998] Trial 3 finished with value: 0.11623222380876541 and parameters: {'d_model': 128, 'nhead': 8, 'num_encoder_layers': 2, 'dim_feedforward': 128, 'dropout': 0.18480547173463935, 'learning_rate': 0.00011621368108340085, 'batch_size': 16, 'weight_decay': 0.00015282243844425946}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.1207, Val Loss: 0.1162
Epoch 10/50, Train Loss: 0.7423, Val Loss: 0.6765
Epoch 20/50, Train Loss: 0.5978, Val Loss: 0.5468
Epoch 30/50, Train Loss: 0.4805, Val Loss: 0.4334
Epoch 40/50, Train Loss: 0.3717, Val Loss: 0.3402


[I 2025-04-16 16:37:04,929] Trial 4 finished with value: 0.26231876015663147 and parameters: {'d_model': 128, 'nhead': 2, 'num_encoder_layers': 3, 'dim_feedforward': 256, 'dropout': 0.2894294608481086, 'learning_rate': 0.0002610178660512947, 'batch_size': 64, 'weight_decay': 3.840700022235123e-05}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.2829, Val Loss: 0.2623
Epoch 10/50, Train Loss: 0.6474, Val Loss: 0.5907
Epoch 20/50, Train Loss: 0.4771, Val Loss: 0.4367
Epoch 30/50, Train Loss: 0.3233, Val Loss: 0.2975
Epoch 40/50, Train Loss: 0.2183, Val Loss: 0.2014


[I 2025-04-16 16:37:10,844] Trial 5 finished with value: 0.13925901055335999 and parameters: {'d_model': 128, 'nhead': 4, 'num_encoder_layers': 2, 'dim_feedforward': 512, 'dropout': 0.106504064896535, 'learning_rate': 0.0001250628079709077, 'batch_size': 32, 'weight_decay': 6.827650275796048e-05}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.1429, Val Loss: 0.1393
Epoch 10/50, Train Loss: 1.0917, Val Loss: 1.0023
Epoch 20/50, Train Loss: 0.9206, Val Loss: 0.8411
Epoch 30/50, Train Loss: 0.8029, Val Loss: 0.7337
Epoch 40/50, Train Loss: 0.7233, Val Loss: 0.6603


[I 2025-04-16 16:37:13,155] Trial 6 finished with value: 0.6078706383705139 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 1, 'dim_feedforward': 512, 'dropout': 0.1541078948560042, 'learning_rate': 0.0001438887831733158, 'batch_size': 64, 'weight_decay': 4.581427110580153e-05}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.6658, Val Loss: 0.6079
Epoch 10/50, Train Loss: 0.7318, Val Loss: 0.6562
Epoch 20/50, Train Loss: 0.5680, Val Loss: 0.5122
Epoch 30/50, Train Loss: 0.4539, Val Loss: 0.4081
Epoch 40/50, Train Loss: 0.3464, Val Loss: 0.3162


[I 2025-04-16 16:37:20,917] Trial 7 finished with value: 0.2362021803855896 and parameters: {'d_model': 128, 'nhead': 4, 'num_encoder_layers': 2, 'dim_feedforward': 512, 'dropout': 0.2523249189030542, 'learning_rate': 0.0002660607897954807, 'batch_size': 64, 'weight_decay': 0.001070223750301344}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.2535, Val Loss: 0.2362
Epoch 10/50, Train Loss: 0.6198, Val Loss: 0.6038
Epoch 20/50, Train Loss: 0.3885, Val Loss: 0.3604
Epoch 30/50, Train Loss: 0.2053, Val Loss: 0.1884
Epoch 40/50, Train Loss: 0.0991, Val Loss: 0.0962


[I 2025-04-16 16:37:51,742] Trial 8 finished with value: 0.06729038804769516 and parameters: {'d_model': 256, 'nhead': 8, 'num_encoder_layers': 6, 'dim_feedforward': 256, 'dropout': 0.28697797202075104, 'learning_rate': 3.866471449118007e-05, 'batch_size': 16, 'weight_decay': 0.00021766928742243395}. Best is trial 2 with value: 0.03157302364706993.


Epoch 50/50, Train Loss: 0.0605, Val Loss: 0.0673
Epoch 10/50, Train Loss: 0.1795, Val Loss: 0.1798
Epoch 20/50, Train Loss: 0.0419, Val Loss: 0.0463
Epoch 30/50, Train Loss: 0.0223, Val Loss: 0.0275
Epoch 40/50, Train Loss: 0.0160, Val Loss: 0.0215


[I 2025-04-16 16:37:59,679] Trial 9 finished with value: 0.02057831548154354 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 3, 'dim_feedforward': 512, 'dropout': 0.12701899235413902, 'learning_rate': 0.001758834453253742, 'batch_size': 16, 'weight_decay': 0.00012014700596852297}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0184, Val Loss: 0.0239
Epoch 10/50, Train Loss: 0.2520, Val Loss: 0.2241
Epoch 20/50, Train Loss: 0.0669, Val Loss: 0.0693
Epoch 30/50, Train Loss: 0.0338, Val Loss: 0.0411
Epoch 40/50, Train Loss: 0.0239, Val Loss: 0.0341


[I 2025-04-16 16:38:10,529] Trial 10 finished with value: 0.023838568478822708 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 4, 'dim_feedforward': 128, 'dropout': 0.1013170208989892, 'learning_rate': 0.0011212853157921113, 'batch_size': 16, 'weight_decay': 2.842004631133699e-06}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0205, Val Loss: 0.0238
Epoch 10/50, Train Loss: 0.2441, Val Loss: 0.2230
Epoch 20/50, Train Loss: 0.0781, Val Loss: 0.0831
Epoch 30/50, Train Loss: 0.0426, Val Loss: 0.0541
Epoch 40/50, Train Loss: 0.0255, Val Loss: 0.0313


[I 2025-04-16 16:38:22,447] Trial 11 finished with value: 0.02368009276688099 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 4, 'dim_feedforward': 128, 'dropout': 0.11146843022689354, 'learning_rate': 0.001108831384301685, 'batch_size': 16, 'weight_decay': 1.5825691315433117e-06}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0197, Val Loss: 0.0237
Epoch 10/50, Train Loss: 0.8546, Val Loss: 0.7757
Epoch 20/50, Train Loss: 0.7483, Val Loss: 0.6889
Epoch 30/50, Train Loss: 0.5619, Val Loss: 0.5570
Epoch 40/50, Train Loss: 0.6406, Val Loss: 0.6326


[I 2025-04-16 16:38:32,330] Trial 12 finished with value: 0.4758501350879669 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 4, 'dim_feedforward': 128, 'dropout': 0.12608542993078775, 'learning_rate': 0.007678035041415385, 'batch_size': 16, 'weight_decay': 1.1070453059280337e-06}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.7542, Val Loss: 0.6907
Epoch 10/50, Train Loss: 0.3441, Val Loss: 0.3138
Epoch 20/50, Train Loss: 0.1084, Val Loss: 0.1053
Epoch 30/50, Train Loss: 0.0647, Val Loss: 0.0681
Epoch 40/50, Train Loss: 0.0338, Val Loss: 0.0411


[I 2025-04-16 16:38:39,280] Trial 13 finished with value: 0.029767794534564018 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 3, 'dim_feedforward': 128, 'dropout': 0.1757789676990953, 'learning_rate': 0.0010912597673119193, 'batch_size': 16, 'weight_decay': 1.0881779567341535e-05}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0258, Val Loss: 0.0298
Epoch 10/50, Train Loss: 0.3719, Val Loss: 0.3278
Epoch 20/50, Train Loss: 0.1136, Val Loss: 0.1049
Epoch 30/50, Train Loss: 0.0465, Val Loss: 0.0498
Epoch 40/50, Train Loss: 0.0372, Val Loss: 0.0441


[I 2025-04-16 16:38:52,022] Trial 14 finished with value: 0.02961997129023075 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 5, 'dim_feedforward': 256, 'dropout': 0.20677072728869045, 'learning_rate': 0.0006060229119104824, 'batch_size': 16, 'weight_decay': 0.0004747236039878941}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0246, Val Loss: 0.0296
Epoch 10/50, Train Loss: 0.9347, Val Loss: 0.8650
Epoch 20/50, Train Loss: 0.8920, Val Loss: 0.8628
Epoch 30/50, Train Loss: 0.9316, Val Loss: 0.8628
Epoch 40/50, Train Loss: 0.8871, Val Loss: 0.8614


[I 2025-04-16 16:39:00,318] Trial 15 finished with value: 0.8613825440406799 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 4, 'dim_feedforward': 512, 'dropout': 0.11589192533065852, 'learning_rate': 0.009119157395653556, 'batch_size': 16, 'weight_decay': 5.504664792276691e-06}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.9131, Val Loss: 0.8624
Epoch 10/50, Train Loss: 1.1663, Val Loss: 1.0593
Epoch 20/50, Train Loss: 0.9973, Val Loss: 0.9634
Epoch 30/50, Train Loss: 1.0262, Val Loss: 0.9032
Epoch 40/50, Train Loss: 0.9224, Val Loss: 0.8572


[I 2025-04-16 16:39:11,848] Trial 16 finished with value: 0.8169777393341064 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 5, 'dim_feedforward': 128, 'dropout': 0.1643526510709878, 'learning_rate': 1.3032560333199646e-05, 'batch_size': 16, 'weight_decay': 0.002327535959533399}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.8544, Val Loss: 0.8170
Epoch 10/50, Train Loss: 0.3003, Val Loss: 0.2628
Epoch 20/50, Train Loss: 0.0983, Val Loss: 0.0942
Epoch 30/50, Train Loss: 0.0416, Val Loss: 0.0463
Epoch 40/50, Train Loss: 0.0370, Val Loss: 0.0328


[I 2025-04-16 16:39:20,996] Trial 17 finished with value: 0.027140503749251366 and parameters: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 3, 'dim_feedforward': 512, 'dropout': 0.23348947715797252, 'learning_rate': 0.0006186079699422896, 'batch_size': 16, 'weight_decay': 0.00044995433825662033}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0233, Val Loss: 0.0291
Epoch 10/50, Train Loss: 0.9577, Val Loss: 0.8419
Epoch 20/50, Train Loss: 0.2530, Val Loss: 0.2182
Epoch 30/50, Train Loss: 0.1963, Val Loss: 0.1575
Epoch 40/50, Train Loss: 0.0611, Val Loss: 0.0705


[I 2025-04-16 16:39:34,205] Trial 18 finished with value: 0.06625315546989441 and parameters: {'d_model': 64, 'nhead': 4, 'num_encoder_layers': 6, 'dim_feedforward': 128, 'dropout': 0.12866413018695816, 'learning_rate': 0.004780505085180394, 'batch_size': 16, 'weight_decay': 0.009828309799503299}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.0600, Val Loss: 0.0664
Epoch 10/50, Train Loss: 0.6757, Val Loss: 0.6061
Epoch 20/50, Train Loss: 0.4657, Val Loss: 0.4257
Epoch 30/50, Train Loss: 0.3195, Val Loss: 0.2934
Epoch 40/50, Train Loss: 0.2252, Val Loss: 0.2126


[I 2025-04-16 16:39:40,021] Trial 19 finished with value: 0.15645889937877655 and parameters: {'d_model': 64, 'nhead': 2, 'num_encoder_layers': 4, 'dim_feedforward': 256, 'dropout': 0.18676617546368462, 'learning_rate': 0.0007542397826772367, 'batch_size': 64, 'weight_decay': 1.084152986790639e-06}. Best is trial 9 with value: 0.02057831548154354.


Epoch 50/50, Train Loss: 0.1621, Val Loss: 0.1565
Mejores hiperparámetros: {'d_model': 64, 'nhead': 8, 'num_encoder_layers': 3, 'dim_feedforward': 512, 'dropout': 0.12701899235413902, 'learning_rate': 0.001758834453253742, 'batch_size': 16, 'weight_decay': 0.00012014700596852297}
Epoch 10/100, Train Loss: 0.2241, Val Loss: 0.1949
Epoch 20/100, Train Loss: 0.0525, Val Loss: 0.0535
Epoch 30/100, Train Loss: 0.0286, Val Loss: 0.0331
Epoch 40/100, Train Loss: 0.0203, Val Loss: 0.0260
Epoch 50/100, Train Loss: 0.0186, Val Loss: 0.0238
Epoch 60/100, Train Loss: 0.0135, Val Loss: 0.0208
Epoch 70/100, Train Loss: 0.0121, Val Loss: 0.0184
Epoch 80/100, Train Loss: 0.0112, Val Loss: 0.0193
Epoch 90/100, Train Loss: 0.0118, Val Loss: 0.0179
Epoch 100/100, Train Loss: 0.0113, Val Loss: 0.0176
Entrenamiento finalizado. Modelo guardado como 'best_transformer_model.pth'.


# Validación del modelo

In [None]:
    import torch
    import torch.nn as nn
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
    import numpy as np
    from torch.utils.data import DataLoader, TensorDataset
    import math

    # Definir la clase PositionalEncoding (necesaria para el Transformer)
    class PositionalEncoding(nn.Module):
        def __init__(self, d_model, max_len=5000):
            super(PositionalEncoding, self).__init__()
            pe = torch.zeros(max_len, d_model)
            position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
            div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
            pe[:, 0::2] = torch.sin(position * div_term)
            pe[:, 1::2] = torch.cos(position * div_term)
            pe = pe.unsqueeze(0).transpose(0, 1)
            self.register_buffer('pe', pe)

        def forward(self, x):
            x = x + self.pe[:x.size(0), :]
            return x

    # Definir la clase del modelo Transformer (necesaria para cargar el modelo)
    class TransformerModel(nn.Module):
        def __init__(self, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
            super(TransformerModel, self).__init__()
            self.input_dim = input_dim
            self.output_dim = output_dim
            self.d_model = d_model

            # Proyección lineal para ajustar la dimensión de entrada a d_model
            self.input_proj = nn.Linear(input_dim, d_model)
            self.pos_encoder = PositionalEncoding(d_model)

            # Capas del Transformer
            encoder_layer = nn.TransformerEncoderLayer(
                d_model=d_model,
                nhead=nhead,
                dim_feedforward=dim_feedforward,
                dropout=dropout
            )
            self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)

            # Proyección lineal para la salida
            self.output_proj = nn.Linear(d_model, output_dim)

        def forward(self, src):
            # src shape: (batch_size, seq_len, input_dim)
            src = self.input_proj(src)  # Proyectar a d_model
            src = src.permute(1, 0, 2)  # Cambiar a (seq_len, batch_size, d_model) para Transformer
            src = self.pos_encoder(src)
            output = self.transformer_encoder(src)
            output = output.permute(1, 0, 2)  # Volver a (batch_size, seq_len, d_model)
            output = self.output_proj(output)  # Proyectar a output_dim
            return output

    # Cargar el modelo guardado
    def load_model(model_path, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout, device):
        model = TransformerModel(
            input_dim=input_dim,
            output_dim=output_dim,
            d_model=d_model,
            nhead=nhead,
            num_encoder_layers=num_encoder_layers,
            dim_feedforward=dim_feedforward,
            dropout=dropout
        ).to(device)
        model.load_state_dict(torch.load(model_path))
        model.eval()
        return model

    # Función para evaluar el modelo y calcular métricas
    def evaluate_model(model, data_loader, criterion, scaler_target, device):
        model.eval()
        all_preds = []
        all_targets = []
        total_loss = 0.0

        with torch.no_grad():
            for batch_input, batch_target in data_loader:
                batch_input, batch_target = batch_input.to(device), batch_target.to(device)
                output = model(batch_input)
                loss = criterion(output, batch_target)
                total_loss += loss.item()

                # Guardar predicciones y valores reales
                all_preds.append(output.cpu().numpy())
                all_targets.append(batch_target.cpu().numpy())

        # Concatenar todas las predicciones y valores reales
        all_preds = np.concatenate(all_preds, axis=0)
        all_targets = np.concatenate(all_targets, axis=0)

        # Desnormalizar las predicciones y los valores reales
        all_preds_denorm = scaler_target.inverse_transform(all_preds.reshape(-1, all_preds.shape[-1])).reshape(all_preds.shape)
        all_targets_denorm = scaler_target.inverse_transform(all_targets.reshape(-1, all_targets.shape[-1])).reshape(all_targets.shape)

        # Calcular métricas en la escala original
        mse = mean_squared_error(all_targets_denorm.reshape(-1), all_preds_denorm.reshape(-1))
        mae = mean_absolute_error(all_targets_denorm.reshape(-1), all_preds_denorm.reshape(-1))
        r2 = r2_score(all_targets_denorm.reshape(-1), all_preds_denorm.reshape(-1))

        avg_loss = total_loss / len(data_loader)
        return avg_loss, mse, mae, r2, all_preds_denorm, all_targets_denorm

    # Función para probar el modelo con un ejemplo específico
    def test_model(model, input_data, scaler_input, scaler_target, device):
        model.eval()
        with torch.no_grad():
            # Normalizar el dato de entrada
            input_np = input_data.numpy().reshape(-1, input_data.shape[-1])
            input_normalized = scaler_input.transform(input_np).reshape(input_data.shape)
            input_tensor = torch.tensor(input_normalized, dtype=torch.float32).to(device)

            # Hacer la predicción
            if len(input_tensor.shape) == 2:  # Si es un solo ejemplo, agregar dimensión de batch
                input_tensor = input_tensor.unsqueeze(0)
            output = model(input_tensor)

            # Desnormalizar la predicción
            output_np = output.cpu().numpy()
            output_denorm = scaler_target.inverse_transform(output_np.reshape(-1, output_np.shape[-1])).reshape(output_np.shape)
            return output_denorm

    # Configuración
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    # Hiperparámetros del modelo (AJUSTA ESTOS VALORES SEGÚN LOS MEJORES HIPERPARÁMETROS DE OPTUNA)
    # Reemplaza estos valores con los que obtuviste al entrenar el modelo
    d_model = 128              # Ajusta según best_params['d_model']
    nhead = 4                  # Ajusta según best_params['nhead']
    num_encoder_layers = 3     # Ajusta según best_params['num_encoder_layers']
    dim_feedforward = 512      # Ajusta según best_params['dim_feedforward'] (esto causaba el error, era 512 en el modelo guardado)
    dropout = 0.1              # Ajusta según best_params['dropout']
    input_dim = 6              # Número de características de entrada por par de relés
    output_dim = 4             # Número de características de salida por par de relés
    batch_size = 32            # Ajusta según best_params['batch_size']

    # Cargar el modelo
    model_path = 'best_transformer_model.pth'
    model = load_model(model_path, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout, device)

    # Crear DataLoader para los datos de validación (o prueba)
    # Asume que tienes val_input y val_target del código anterior
    val_dataset = TensorDataset(val_input, val_target)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)

    # Definir el criterio de pérdida
    criterion = nn.MSELoss()

    # Evaluar el modelo en los datos de validación
    avg_loss, mse, mae, r2, predictions, true_values = evaluate_model(model, val_loader, criterion, scaler_target, device)

    # Mostrar estadísticas
    print("Estadísticas del modelo en los datos de validación:")
    print(f"Pérdida promedio (MSE normalizado): {avg_loss:.4f}")
    print(f"Error Cuadrático Medio (MSE desnormalizado): {mse:.4f}")
    print(f"Error Absoluto Medio (MAE desnormalizado): {mae:.4f}")
    print(f"Coeficiente de Determinación (R² desnormalizado): {r2:.4f}")

    # Probar el modelo con un ejemplo específico (por ejemplo, el primer escenario de validación)
    test_input = val_input[0]  # Primer escenario de validación
    predicted_output = test_model(model, test_input, scaler_input, scaler_target, device)
    true_output = scaler_target.inverse_transform(val_target[0].numpy().reshape(-1, val_target.shape[-1])).reshape(val_target[0].shape)

    # Mostrar los resultados de la prueba
    print("\nPrueba con un ejemplo específico (primer escenario de validación):")
    print("Predicciones (desnormalizadas) para los primeros 5 pares de relés:")
    for i in range(min(74, predicted_output.shape[1])):  # Mostrar solo los primeros 5 pares
        print(f"Par {i+1}:")
        print(f"  Predicción: {predicted_output[0, i, :]}")
        print(f"  Valor real: {true_output[i, :]}")

RuntimeError: Error(s) in loading state_dict for TransformerModel:
	size mismatch for input_proj.weight: copying a param with shape torch.Size([64, 6]) from checkpoint, the shape in current model is torch.Size([128, 6]).
	size mismatch for input_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for pos_encoder.pe: copying a param with shape torch.Size([5000, 1, 64]) from checkpoint, the shape in current model is torch.Size([5000, 1, 128]).
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).
	size mismatch for transformer_encoder.layers.0.self_attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for transformer_encoder.layers.0.self_attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).
	size mismatch for transformer_encoder.layers.0.self_attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.0.linear1.weight: copying a param with shape torch.Size([512, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).
	size mismatch for transformer_encoder.layers.0.linear2.weight: copying a param with shape torch.Size([64, 512]) from checkpoint, the shape in current model is torch.Size([128, 512]).
	size mismatch for transformer_encoder.layers.0.linear2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.0.norm1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.0.norm1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.0.norm2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.0.norm2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.1.self_attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).
	size mismatch for transformer_encoder.layers.1.self_attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for transformer_encoder.layers.1.self_attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).
	size mismatch for transformer_encoder.layers.1.self_attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.1.linear1.weight: copying a param with shape torch.Size([512, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).
	size mismatch for transformer_encoder.layers.1.linear2.weight: copying a param with shape torch.Size([64, 512]) from checkpoint, the shape in current model is torch.Size([128, 512]).
	size mismatch for transformer_encoder.layers.1.linear2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.1.norm1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.1.norm1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.1.norm2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.1.norm2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.2.self_attn.in_proj_weight: copying a param with shape torch.Size([192, 64]) from checkpoint, the shape in current model is torch.Size([384, 128]).
	size mismatch for transformer_encoder.layers.2.self_attn.in_proj_bias: copying a param with shape torch.Size([192]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for transformer_encoder.layers.2.self_attn.out_proj.weight: copying a param with shape torch.Size([64, 64]) from checkpoint, the shape in current model is torch.Size([128, 128]).
	size mismatch for transformer_encoder.layers.2.self_attn.out_proj.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.2.linear1.weight: copying a param with shape torch.Size([512, 64]) from checkpoint, the shape in current model is torch.Size([512, 128]).
	size mismatch for transformer_encoder.layers.2.linear2.weight: copying a param with shape torch.Size([64, 512]) from checkpoint, the shape in current model is torch.Size([128, 512]).
	size mismatch for transformer_encoder.layers.2.linear2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.2.norm1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.2.norm1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.2.norm2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for transformer_encoder.layers.2.norm2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([128]).
	size mismatch for output_proj.weight: copying a param with shape torch.Size([4, 64]) from checkpoint, the shape in current model is torch.Size([4, 128]).

# export data


In [None]:
import pandas as pd # Asegúrate de tener pandas instalado (pip install pandas)
import numpy as np
import time
import traceback

# --- 9. EXPORTACIÓN DETALLADA DE RESULTADOS (SOLO CONJUNTO DE ENTRENAMIENTO) ---
print("\n" + "="*60)
print("--- INICIANDO EXPORTACIÓN DETALLADA (SOLO DATOS DE ENTRENAMIENTO) ---")

# Verificar que las variables necesarias existen
required_vars_export = ['final_model', 'scaler_input', 'scaler_target',
                        'input_tensor', 'target_tensor', 'DEVICE', 'train_idx'] # Asegurar que train_idx exista

if all(var in globals() for var in required_vars_export):
    print("Variables necesarias encontradas. Procediendo con la exportación...")

    # Preparar listas para almacenar los datos aplanados
    training_results_list = []

    num_train_scenarios = len(train_idx)
    if num_train_scenarios == 0:
        print("Error: La lista de índices de entrenamiento (train_idx) está vacía.")
    else:
        seq_len = input_tensor.shape[1]
        num_input_features = input_tensor.shape[2]
        num_output_features = target_tensor.shape[2]

        # Definir nombres de columnas (GENÉRICOS - AJUSTA SEGÚN TUS DATOS)
        input_cols = [f'Input_F{i+1}' for i in range(num_input_features)]
        # ¡AJUSTA ESTOS NOMBRES A TDS, PICKUP, etc.!
        target_cols = [f'Target_F{i+1}' for i in range(num_output_features)]
        pred_cols = [f'Pred_F{i+1}' for i in range(num_output_features)] # Salidas del modelo desnormalizadas

        print(f"Se procesarán {num_train_scenarios} escenarios del conjunto de entrenamiento.")
        start_export_time = time.time()

        # Iterar SOLAMENTE por los índices del conjunto de entrenamiento
        count = 0
        for scenario_index in train_idx:
            count += 1
            print(f"Procesando escenario de entrenamiento {count}/{num_train_scenarios} (Índice Original: {scenario_index})...")
            try:
                # Obtener datos originales del escenario actual
                single_input_original = input_tensor[scenario_index]
                single_target_original = target_tensor[scenario_index]

                # Realizar la predicción desnormalizada para este escenario
                # Esta función devuelve la salida del modelo ya desnormalizada
                prediction_denormalized = predict_single_scenario(
                    final_model,
                    single_input_original,
                    scaler_input,
                    scaler_target,
                    DEVICE
                ) # Shape: (seq_len, output_features)

                # Convertir target a numpy si es necesario
                if isinstance(single_target_original, torch.Tensor):
                    target_np_original = single_target_original.cpu().numpy()
                else:
                    target_np_original = np.array(single_target_original)

                # Convertir input a numpy si es necesario
                if isinstance(single_input_original, torch.Tensor):
                     input_np_original = single_input_original.cpu().numpy()
                else:
                     input_np_original = np.array(single_input_original)

                # Iterar por cada par de relés (paso de tiempo) dentro del escenario
                for j in range(seq_len):
                    row_data = {
                        'Scenario_Index': scenario_index, # Índice Original
                        'Relay_Pair_Index': j
                    }
                    # Añadir entradas originales
                    for k in range(num_input_features):
                        row_data[input_cols[k]] = input_np_original[j, k]
                    # Añadir salidas objetivo originales (reales)
                    for k in range(num_output_features):
                        row_data[target_cols[k]] = target_np_original[j, k]
                    # Añadir salidas predichas por el modelo (desnormalizadas)
                    for k in range(num_output_features):
                        row_data[pred_cols[k]] = prediction_denormalized[j, k]

                    training_results_list.append(row_data)

            except Exception as e:
                print(f"\n*** Error procesando el escenario de entrenamiento con índice original {scenario_index}: ***")
                print(e)
                traceback.print_exc()
                print(f"*** Saltando escenario {scenario_index} ***")
                continue # Continuar con el siguiente escenario

        # Crear DataFrame de Pandas con los resultados de entrenamiento
        print("\nCreando DataFrame con los resultados del conjunto de entrenamiento...")
        if training_results_list: # Solo si se procesó algo
            try:
                results_df_train = pd.DataFrame(training_results_list)
                column_order = ['Scenario_Index', 'Relay_Pair_Index'] + input_cols + target_cols + pred_cols
                results_df_train = results_df_train[column_order]

                # Exportar a CSV
                output_filename = 'training_set_predictions.csv'
                print(f"Exportando resultados a '{output_filename}'...")
                results_df_train.to_csv(output_filename, index=False, float_format='%.6f')

                end_export_time = time.time()
                print("\n--- EXPORTACIÓN DE ENTRENAMIENTO COMPLETADA ---")
                print(f"Resultados guardados en: {output_filename}")
                print(f"Número total de filas exportadas (entrenamiento): {len(results_df_train)}")
                print(f"Tiempo total de exportación: {end_export_time - start_export_time:.2f} segundos.")

            except Exception as e:
                print("\n*** Error al crear o guardar el DataFrame/CSV de entrenamiento: ***")
                print(e)
                traceback.print_exc()
        else:
            print("\nNo se procesaron datos de entrenamiento, no se generó archivo CSV.")

else:
    print("\nNo se puede ejecutar la exportación detallada para el conjunto de entrenamiento.")
    print("Asegúrate de que el entrenamiento se completó y las variables")
    print(f"{required_vars_export} están disponibles en el entorno.")
    print("Verifica también que la función 'predict_single_scenario' esté definida.")

print("=" * 60 + "\n")