# Crear  Tensores

In [None]:

import torch
import json
from collections import defaultdict

# Cargar los archivos JSON de entrada y salida
with open("../data/processed/model/input_data.json", "r") as f:
    input_data_json = json.load(f)

with open("../data/processed/model/reference_data.json", "r") as f:
    output_data_json = json.load(f)

# Agrupar los datos por scenario_id
input_by_scenario = defaultdict(list)
output_by_scenario = defaultdict(list)

# Agrupar datos de entrada por scenario_id
for entry in input_data_json:
    scenario_id = entry["scenario_id"]
    input_by_scenario[scenario_id].append(entry)

# Agrupar datos de salida por scenario_id
for entry in output_data_json:
    scenario_id = entry["scenario_id"]
    output_by_scenario[scenario_id].append(entry)

# Obtener la lista de escenarios (deberían ser 68)
scenarios = sorted(input_by_scenario.keys())
if len(scenarios) != 68:
    print(f"Advertencia: Se esperaban 68 escenarios, pero se encontraron {len(scenarios)}")

# Crear listas para almacenar los datos de entrada y salida
input_data = []
target_data = []

# Función para convertir valores a float de manera segura
def safe_float(value, default=0.0):
    try:
        return float(value)
    except (ValueError, TypeError):
        return default

# Procesar cada escenario
for scenario_id in scenarios:
    scenario_inputs_list = input_by_scenario[scenario_id]
    scenario_outputs_list = output_by_scenario[scenario_id]

    # Verificar que el número de pares coincida entre entrada y salida
    if len(scenario_inputs_list) != len(scenario_outputs_list):
        print(f"Advertencia: El escenario {scenario_id} tiene {len(scenario_inputs_list)} entradas y {len(scenario_outputs_list)} salidas")

    scenario_inputs = []
    scenario_targets = []

    # Procesar cada par de relés en el escenario (hasta 100)
    num_pairs = min(len(scenario_inputs_list), 100)
    for pair_idx in range(num_pairs):
        pair_input = scenario_inputs_list[pair_idx]
        pair_output = scenario_outputs_list[pair_idx] if pair_idx < len(scenario_outputs_list) else None

        # Datos de entrada
        inputs = [
            # safe_float(pair_input["fault"]),
            safe_float(pair_input["main_relay"]["Ishc"]),
            safe_float(pair_input["main_relay"]["Time_out"]),
            safe_float(pair_input["main_relay"]["current_a"]),
            safe_float(pair_input["backup_relay"]["Ishc"]),
            safe_float(pair_input["backup_relay"]["Time_out"]),
            safe_float(pair_input["backup_relay"]["current_a"])
        ]
        scenario_inputs.append(inputs)

        # Datos de salida (si existen)
        if pair_output:
            targets = [
                safe_float(pair_output["main_relay"]["pick_up"]),
                safe_float(pair_output["main_relay"]["TDS"]),
                safe_float(pair_output["backup_relay"]["pick_up"]),
                safe_float(pair_output["backup_relay"]["TDS"])
            ]
        else:
            targets = [0.0] * 4  # Rellenar con ceros si no hay salida correspondiente
        scenario_targets.append(targets)

    # Rellenar con ceros si hay menos de 100 pares
    while len(scenario_inputs) < 100:
        scenario_inputs.append([0.0] * 6)
        scenario_targets.append([0.0] * 4)

    input_data.append(scenario_inputs)
    target_data.append(scenario_targets)

# Convertir las listas a tensores de PyTorch
input_tensor = torch.tensor(input_data, dtype=torch.float32)  # Forma: (68, 100, 6)
target_tensor = torch.tensor(target_data, dtype=torch.float32)  # Forma: (68, 100, 4)

# Verificar las formas de los tensores
print("Forma del tensor de entrada:", input_tensor.shape)
print("Forma del tensor de salida:", target_tensor.shape)



# Implementación del transformer 

In [20]:
# -*- coding: utf-8 -*-
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F # Importado para F.relu
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import optuna
import math
import time # Para medir tiempo
import traceback # Para imprimir errores detallados

# --- 1. Carga y Preprocesamiento de Datos ---
# ######################################################################### #
# ############# ¡¡¡IMPORTANTE!!! ############# #
# REEMPLAZA ESTA SECCIÓN CON TU CÓDIGO REAL PARA CARGAR Y PREPARAR #
# 'input_tensor' y 'target_tensor' #
# Deben tener la forma: (num_escenarios, secuencia_len, num_features) #
# Y ser tensores de PyTorch. #
# ######################################################################### #
print("Cargando y preprocesando datos...")
# Ejemplo Placeholder (DEBES REEMPLAZAR ESTO):
try:
    # Intenta cargar desde archivos si ya los tienes guardados
    input_tensor = torch.load('input_tensor.pt')
    target_tensor = torch.load('target_tensor.pt')
    print("Tensores cargados desde archivos .pt")
except FileNotFoundError:
    print("Archivos .pt no encontrados, generando datos de ejemplo...")
    # Generar datos de ejemplo si no se encuentran archivos
    num_escenarios = 500 # Reducido para ejemplo rápido
    secuencia_len = 74   # Número de pares de relés
    input_features = 6
    output_features = 4
    input_tensor = torch.rand(num_escenarios, secuencia_len, input_features, dtype=torch.float32)
    # Asegurar que los datos objetivo de ejemplo sean no negativos
    target_tensor = torch.abs(torch.randn(num_escenarios, secuencia_len, output_features, dtype=torch.float32)) * 5
    # Guardar para la próxima vez (opcional)
    # torch.save(input_tensor, 'input_tensor.pt')
    # torch.save(target_tensor, 'target_tensor.pt')
    print("Datos de ejemplo generados.")
except Exception as e:
     print(f"Error al cargar/generar datos: {e}. Saliendo.")
     exit()

# Asegurar que los tensores son float32
input_tensor = input_tensor.float()
target_tensor = target_tensor.float()

print(f"Forma Input Tensor Original: {input_tensor.shape}")
print(f"Forma Target Tensor Original: {target_tensor.shape}")
# ######################################################################### #
# ############# FIN DE LA SECCIÓN A REEMPLAZAR ############# #
# ######################################################################### #


# --- 2. Normalización ---
print("Normalizando datos...")
# Guardar scalers para uso posterior
scaler_input = StandardScaler()
scaler_target = StandardScaler()

try:
    # Aplanar para ajustar los scalers
    input_np_original = input_tensor.numpy().reshape(-1, input_tensor.shape[-1])
    target_np_original = target_tensor.numpy().reshape(-1, target_tensor.shape[-1])

    # Validar que no haya NaNs antes de escalar
    if np.isnan(input_np_original).any() or np.isinf(input_np_original).any():
        raise ValueError("Datos de entrada originales contienen NaNs o Infinitos.")
    if np.isnan(target_np_original).any() or np.isinf(target_np_original).any():
         raise ValueError("Datos objetivo originales contienen NaNs o Infinitos.")

    # Ajustar y transformar
    input_normalized = scaler_input.fit_transform(input_np_original)
    target_normalized = scaler_target.fit_transform(target_np_original)

    # Validar que no haya NaNs después de escalar
    if np.isnan(input_normalized).any() or np.isinf(input_normalized).any():
        raise ValueError("Datos de entrada contienen NaNs/Inf DESPUÉS de normalizar.")
    if np.isnan(target_normalized).any() or np.isinf(target_normalized).any():
        raise ValueError("Datos objetivo contienen NaNs/Inf DESPUÉS de normalizar.")

    # Restaurar forma original
    input_tensor_normalized = torch.tensor(input_normalized, dtype=torch.float32).reshape(input_tensor.shape)
    target_tensor_normalized = torch.tensor(target_normalized, dtype=torch.float32).reshape(target_tensor.shape)
    print("Datos normalizados correctamente.")

except Exception as e:
    print(f"Error durante la normalización: {e}")
    traceback.print_exc()
    exit()


# --- 3. División Entrenamiento / Validación e Identificación ---
print("Dividiendo datos...")
num_total_escenarios = input_tensor_normalized.shape[0]
indices = list(range(num_total_escenarios)) # Usar lista para train_test_split
test_split_percentage = 0.2
random_seed = 42

try:
    train_idx, val_idx = train_test_split(indices, test_size=test_split_percentage, random_state=random_seed, shuffle=True)
except ValueError as e:
     print(f"Error en train_test_split: {e}. ¿Hay suficientes datos?")
     exit()

# Convertir a listas y ordenar para facilitar visualización/manejo
train_idx = sorted(list(train_idx))
val_idx = sorted(list(val_idx))

# --- Impresión de detalles de la división ---
print("\n" + "="*60)
print("--- DETALLES DE LA DIVISIÓN DE DATOS ---")
print(f"Número total de escenarios originales: {num_total_escenarios}")
print(f"Porcentaje para validación (test_size): {test_split_percentage * 100:.1f}%")
print(f"Semilla aleatoria (random_state): {random_seed}")
print("-" * 60)
print(f"Número de escenarios para ENTRENAMIENTO: {len(train_idx)}")
print(f"Índices originales usados para ENTRENAMIENTO (primeros 50):")
print(train_idx[:50])
if len(train_idx) > 50: print("...")
print("-" * 60)
print(f"Número de escenarios para VALIDACIÓN: {len(val_idx)}")
print(f"Índices originales usados para VALIDACIÓN (primeros 50):")
print(val_idx[:50])
if len(val_idx) > 50: print("...")
print("=" * 60 + "\n")
# --- Fin Impresión ---

# Crear los tensores de entrenamiento/validación usando los índices
try:
    train_input = input_tensor_normalized[train_idx]
    train_target = target_tensor_normalized[train_idx]
    val_input = input_tensor_normalized[val_idx]
    val_target = target_tensor_normalized[val_idx]
    print(f"Forma datos entrenamiento (Input): {train_input.shape}")
    print(f"Forma datos validación (Input): {val_input.shape}")
except IndexError:
     print("Error al crear subconjuntos de datos. ¿Coinciden los índices con las dimensiones?")
     exit()

# --- 4. Definición del Modelo Transformer (con ReLU al final) ---

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model) # Shape: (1, max_len, d_model) para batch_first
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        """ Args: x: Tensor, shape [batch_size, seq_len, d_model] """
        x = x + self.pe[:, :x.size(1), :]
        return self.dropout(x)

class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model

        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=nhead, dim_feedforward=dim_feedforward,
            dropout=dropout, batch_first=True, activation=F.gelu
        )
        encoder_norm = nn.LayerNorm(d_model)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers, norm=encoder_norm)

        self.output_proj = nn.Linear(d_model, output_dim)
        self.init_weights()

    def init_weights(self):
        initrange = 0.1
        nn.init.uniform_(self.input_proj.weight, -initrange, initrange)
        nn.init.zeros_(self.input_proj.bias)
        nn.init.uniform_(self.output_proj.weight, -initrange, initrange)
        nn.init.zeros_(self.output_proj.bias)

    def forward(self, src, src_mask=None, src_key_padding_mask=None):
        src = self.input_proj(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, mask=src_mask, src_key_padding_mask=src_key_padding_mask)
        output = self.output_proj(output)
        output = F.relu(output) # Asegurar salida no negativa
        return output

# --- 5. Funciones de Entrenamiento, Evaluación y Predicción ---

def train_epoch(model, data_loader, criterion, optimizer, device, scaler=None):
    model.train()
    total_loss = 0.0
    for batch_input, batch_target in data_loader:
        batch_input, batch_target = batch_input.to(device), batch_target.to(device)
        optimizer.zero_grad(set_to_none=True)

        use_amp_here = scaler is not None
        with torch.cuda.amp.autocast(enabled=use_amp_here):
            output = model(batch_input)
            loss = criterion(output, batch_target)

        if use_amp_here:
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer) # Desescalar antes de clip
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()

        total_loss += loss.item()
    return total_loss / len(data_loader)

def evaluate_epoch(model, data_loader, criterion, device):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            with torch.cuda.amp.autocast(enabled=(device.type == 'cuda')): # AMP también en eval
                 output = model(batch_input)
                 loss = criterion(output, batch_target)
            total_loss += loss.item()
    return total_loss / len(data_loader)

def train_model(model, train_loader, val_loader, criterion, optimizer, scheduler, num_epochs, device, model_save_path='best_transformer_model.pth', use_amp=False):
    best_val_loss = float('inf')
    epochs_no_improve = 0
    patience = 15
    scaler = torch.cuda.amp.GradScaler() if use_amp and device.type == 'cuda' else None
    if scaler: print("Usando Precisión Mixta Automática (AMP) para el entrenamiento.")

    for epoch in range(num_epochs):
        start_time = time.time()
        train_loss = train_epoch(model, train_loader, criterion, optimizer, device, scaler)
        val_loss = evaluate_epoch(model, val_loader, criterion, device)
        end_time = time.time()

        # Validar pérdida de validación
        if np.isnan(val_loss) or np.isinf(val_loss):
            print(f"Epoch {epoch+1:03d}/{num_epochs}: Pérdida de validación inválida ({val_loss}). Deteniendo entrenamiento.")
            break # Detener si la pérdida diverge

        lr_current = optimizer.param_groups[0]['lr']
        print(f'Epoch {epoch+1:03d}/{num_epochs}, Train Loss: {train_loss:.5f}, Val Loss: {val_loss:.5f}, LR: {lr_current:.6f}, Time: {end_time - start_time:.2f}s')

        if scheduler:
            if isinstance(scheduler, torch.optim.lr_scheduler.ReduceLROnPlateau):
                 scheduler.step(val_loss)
            # Añadir 'else' si usas otros tipos de scheduler que requieran step()

        if val_loss < best_val_loss:
            best_val_loss = val_loss
            try:
                torch.save(model.state_dict(), model_save_path)
                print(f'    --> Modelo guardado en {model_save_path} (Val Loss: {best_val_loss:.5f})')
            except Exception as e:
                print(f"    --> ERROR al guardar el modelo: {e}")
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1

        if epochs_no_improve >= patience:
            print(f'    --> Early stopping activado tras {patience} épocas sin mejora.')
            break

    print(f'Entrenamiento finalizado. Mejor Val Loss: {best_val_loss:.5f}')
    # Cargar el mejor modelo al final para devolverlo
    if best_val_loss != float('inf'): # Solo cargar si se guardó algo
        try:
            model.load_state_dict(torch.load(model_save_path))
            print(f"Mejor modelo cargado desde {model_save_path}")
        except Exception as e:
            print(f"Error al cargar el mejor modelo guardado: {e}")
    else:
        print("Advertencia: No se guardó ningún modelo (la pérdida de validación nunca mejoró).")

    return best_val_loss, model

# --- Función para Predicción de un Solo Escenario ---
def predict_single_scenario(model, scenario_input_original, scaler_input, scaler_target, device):
    model.eval()
    if isinstance(scenario_input_original, torch.Tensor):
        input_np_original = scenario_input_original.cpu().numpy()
    else:
        input_np_original = np.array(scenario_input_original)

    if input_np_original.ndim != 2:
         raise ValueError(f"La entrada del escenario debe ser 2D (seq_len, features), pero tiene forma {input_np_original.shape}")

    seq_len, expected_features = input_np_original.shape
    if not hasattr(scaler_input, 'n_features_in_') or expected_features != scaler_input.n_features_in_:
         # Añadir chequeo por si el scaler no está ajustado o las features no coinciden
         raise ValueError(f"Inconsistencia en características de entrada: Scaler espera {getattr(scaler_input, 'n_features_in_', 'N/A')} features, datos tienen {expected_features}.")

    input_np_normalized = scaler_input.transform(input_np_original)
    input_tensor_norm = torch.tensor(input_np_normalized, dtype=torch.float32).unsqueeze(0).to(device)

    with torch.no_grad():
        with torch.cuda.amp.autocast(enabled=(device.type == 'cuda')):
             predicted_output_normalized = model(input_tensor_norm)

    pred_np_norm = predicted_output_normalized.squeeze(0).cpu().numpy()
    pred_np_denorm = scaler_target.inverse_transform(pred_np_norm)
    pred_np_denorm = np.maximum(pred_np_denorm, 0) # Clip final
    return pred_np_denorm


# --- 6. Optimización de Hiperparámetros con Optuna ---

# Obtener dimensiones y dispositivo
try:
    INPUT_DIM = train_input.shape[-1]
    OUTPUT_DIM = train_target.shape[-1]
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else ('mps' if torch.backends.mps.is_available() else 'cpu'))
    print(f"Usando dispositivo: {DEVICE}")
    print(f"Input dim: {INPUT_DIM}, Output dim: {OUTPUT_DIM}")
    USE_AMP = (DEVICE.type == 'cuda') # Usar AMP solo si es CUDA
except Exception as e:
    print(f"Error al determinar dimensiones o dispositivo: {e}. Saliendo.")
    exit()

def objective(trial):
    d_model = trial.suggest_categorical('d_model', [64, 128, 256])
    ff_multiplier = trial.suggest_categorical('ff_multiplier', [2, 4])
    dim_feedforward = d_model * ff_multiplier
    trial.set_user_attr('calculated_dim_feedforward', dim_feedforward)

    possible_nheads = [h for h in [2, 4, 8, 16] if d_model % h == 0] # Añadido 16
    if not possible_nheads: nhead = 2
    else: nhead = trial.suggest_categorical('nhead', possible_nheads)

    num_encoder_layers = trial.suggest_int('num_encoder_layers', 1, 6) # Aumentado rango
    dropout = trial.suggest_float('dropout', 0.05, 0.35) # Rango ajustado
    learning_rate = trial.suggest_float('learning_rate', 5e-6, 1e-3, log=True)
    batch_size = trial.suggest_categorical('batch_size', [16, 32, 64, 128]) # Añadido 128
    weight_decay = trial.suggest_float('weight_decay', 1e-7, 1e-3, log=True)

    # DataLoaders para el trial
    try:
        train_loader = DataLoader(TensorDataset(train_input, train_target), batch_size=batch_size, shuffle=True, num_workers=2, pin_memory=True, persistent_workers=True)
        val_loader = DataLoader(TensorDataset(val_input, val_target), batch_size=batch_size, num_workers=2, pin_memory=True, persistent_workers=True)
    except Exception: # Fallback más general
        print("Warning: DataLoader con workers/pin_memory falló, usando configuración básica.")
        train_loader = DataLoader(TensorDataset(train_input, train_target), batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(TensorDataset(val_input, val_target), batch_size=batch_size)

    model = TransformerModel(
        input_dim=INPUT_DIM, output_dim=OUTPUT_DIM, d_model=d_model, nhead=nhead,
        num_encoder_layers=num_encoder_layers, dim_feedforward=dim_feedforward, dropout=dropout
    ).to(DEVICE)

    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5, verbose=False)
    scaler_optuna = torch.cuda.amp.GradScaler() if USE_AMP else None

    num_optuna_epochs = 30
    print(f"\n--- Optuna Trial {trial.number} ---")
    print(f"Params: d_model={d_model}, nhead={nhead}, layers={num_encoder_layers}, ff={dim_feedforward}({ff_multiplier}x), drop={dropout:.3f}, lr={learning_rate:.6f}, bs={batch_size}, wd={weight_decay:.7f}")

    best_trial_val_loss = float('inf')
    epochs_no_improve = 0
    patience_optuna = 7

    for epoch in range(num_optuna_epochs):
        train_loss = train_epoch(model, train_loader, criterion, optimizer, DEVICE, scaler_optuna)
        val_loss = evaluate_epoch(model, val_loader, criterion, DEVICE)

        if np.isnan(val_loss) or np.isinf(val_loss):
             print(f"    --> Trial {trial.number} divergió (Val Loss: {val_loss}). Podando.")
             raise optuna.TrialPruned()

        print(f'    Epoch {epoch+1:02d}/{num_optuna_epochs}, Train Loss: {train_loss:.5f}, Val Loss: {val_loss:.5f}')
        if scheduler: scheduler.step(val_loss)

        if val_loss < best_trial_val_loss:
            best_trial_val_loss = val_loss
            epochs_no_improve = 0
        else:
             epochs_no_improve += 1

        if epochs_no_improve >= patience_optuna:
             print(f'    --> Optuna trial early stopping at epoch {epoch+1}. Val Loss: {best_trial_val_loss:.5f}')
             break

        trial.report(val_loss, epoch)
        if trial.should_prune():
            print(f"    --> Trial {trial.number} podado en la época {epoch+1}.")
            raise optuna.TrialPruned()

    return best_trial_val_loss

# Ejecutar Optuna
print("\n" + "="*60)
print("--- INICIANDO OPTIMIZACIÓN CON OPTUNA ---")
study_name = "transformer_optimization_v1" # Nombre para posible reanudación
storage_name = f"sqlite:///{study_name}.db" # Guardar estudio en archivo sqlite

try:
    # Intentar cargar estudio existente o crear uno nuevo
    study = optuna.create_study(
        study_name=study_name,
        storage=storage_name,
        direction='minimize',
        sampler=optuna.samplers.TPESampler(seed=random_seed),
        pruner=optuna.pruners.MedianPruner(n_warmup_steps=5), # Podar tras 5 épocas
        load_if_exists=True # Cargar si ya existe
    )
    print(f"Estudio Optuna '{study_name}' cargado/creado desde '{storage_name}'.")
    print(f"Número de trials ya completados: {len(study.trials)}")

    # Ejecutar optimize
    study.optimize(
        objective,
        n_trials=25, # Número de NUEVOS trials a ejecutar
        timeout=None, # Sin límite de tiempo
        n_jobs=1, # Usar 1 job para evitar problemas con GPU/MPS
        catch=(ValueError, RuntimeError, ) # Capturar errores comunes sin detener todo el estudio
    )

except KeyboardInterrupt:
    print("\nOptimización interrumpida por el usuario.")
except Exception as e:
    print(f"\nError durante la optimización de Optuna: {e}")
    traceback.print_exc()

# Obtener mejores parámetros
print("\n" + "="*60)
print("--- OPTIMIZACIÓN FINALIZADA ---")
best_params = {}
best_value = float('inf')

try:
    if study.best_trial:
        best_params = study.best_trial.params
        best_value = study.best_trial.value
        print(f"Mejor Valor (Val Loss): {best_value:.5f}")
        print("Mejores Hiperparámetros encontrados:")
        print(best_params)
        # Añadir valor calculado
        if 'ff_multiplier' in best_params and 'd_model' in best_params:
            best_params['calculated_dim_feedforward'] = best_params['d_model'] * best_params['ff_multiplier']
    else:
        print("No se encontró un 'best_trial' en el estudio Optuna.")

except Exception as e:
    print(f"Error al obtener resultados de Optuna: {e}")

# Fallback a parámetros por defecto si Optuna falló o no encontró nada
if not best_params:
    print("Usando parámetros por defecto debido a fallo o falta de resultados en Optuna.")
    best_params = {'d_model': 128, 'ff_multiplier': 4, 'nhead': 4, 'num_encoder_layers': 3,
                   'dropout': 0.15, 'learning_rate': 0.0005, 'batch_size': 64, 'weight_decay': 1e-5}
    # Calcular dim_feedforward para los por defecto
    best_params['calculated_dim_feedforward'] = best_params['d_model'] * best_params['ff_multiplier']
    print(f"Parámetros por defecto: {best_params}")


# Ajuste final de nhead (redundante si la lógica en objective es correcta)
if 'd_model' in best_params:
    d_model = best_params['d_model']
    possible_nheads = [h for h in [2, 4, 8, 16] if d_model % h == 0]
    if not possible_nheads:
        best_params['nhead'] = 2 # Fallback
    elif 'nhead' not in best_params or best_params['nhead'] not in possible_nheads:
        best_params['nhead'] = possible_nheads[0] # Elegir el primero compatible
        print(f"Ajuste post-optuna: nhead establecido a {best_params['nhead']} para ser compatible con d_model={d_model}")
else:
     # Si ni siquiera d_model está, algo falló gravemente
     print("Error crítico: 'd_model' no encontrado en best_params. Usando nhead=4 por defecto.")
     best_params['nhead'] = 4


# --- 7. Entrenamiento Final con los Mejores Hiperparámetros ---
print("\n" + "="*60)
print("--- INICIANDO ENTRENAMIENTO FINAL ---")
print(f"Usando parámetros: {best_params}")

# Crear DataLoaders finales
final_batch_size = best_params.get('batch_size', 64) # Usar get con default
try:
    final_train_loader = DataLoader(TensorDataset(train_input, train_target), batch_size=final_batch_size, shuffle=True, num_workers=2, pin_memory=True, persistent_workers=True)
    final_val_loader = DataLoader(TensorDataset(val_input, val_target), batch_size=final_batch_size, num_workers=2, pin_memory=True, persistent_workers=True)
except Exception:
    print("Warning: DataLoader con workers/pin_memory falló, usando configuración básica.")
    final_train_loader = DataLoader(TensorDataset(train_input, train_target), batch_size=final_batch_size, shuffle=True)
    final_val_loader = DataLoader(TensorDataset(val_input, val_target), batch_size=final_batch_size)

# Crear el modelo final
try:
    final_model = TransformerModel(
        input_dim=INPUT_DIM,
        output_dim=OUTPUT_DIM,
        d_model=best_params['d_model'],
        nhead=best_params['nhead'],
        num_encoder_layers=best_params['num_encoder_layers'],
        dim_feedforward=best_params['calculated_dim_feedforward'], # Usa el valor calculado
        dropout=best_params['dropout']
    ).to(DEVICE)
except KeyError as e:
     print(f"Error: Falta el hiperparámetro '{e}' en best_params. No se puede crear el modelo final.")
     exit()

criterion = nn.MSELoss()
optimizer = optim.AdamW(final_model.parameters(), lr=best_params['learning_rate'], weight_decay=best_params['weight_decay'])
final_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5, verbose=True)

# Entrenar
final_num_epochs = 150 # Ajusta según necesidad
model_save_filename = 'best_transformer_model_relu.pth'
best_final_loss, final_model = train_model(
    final_model, final_train_loader, final_val_loader, criterion, optimizer, final_scheduler,
    num_epochs=final_num_epochs, device=DEVICE, model_save_path=model_save_filename, use_amp=USE_AMP
)

print("\n" + "="*60)
print("--- SCRIPT DE ENTRENAMIENTO FINALIZADO ---")
print(f"Modelo final (con ReLU) entrenado. Mejor Loss de Validación: {best_final_loss:.5f}")
print(f"Modelo guardado como '{model_save_filename}'.")
print("Los scalers (scaler_input, scaler_target) son necesarios para usar el modelo en datos nuevos.")
print("Los índices de entrenamiento/validación están en 'train_idx' y 'val_idx'.")


# --- 8. Prueba con un Escenario Específico (Usando la función definida) ---
print("\n" + "="*60)
print("--- PRUEBA CON UN ESCENARIO ESPECÍFICO ---")

# Asegurarse de que las variables necesarias existen
required_vars_test = ['final_model', 'scaler_input', 'scaler_target', 'input_tensor', 'target_tensor', 'DEVICE']
if all(var in globals() for var in required_vars_test):

    # === PASO 1: Elige el índice del escenario a probar ===
    if 'val_idx' in globals() and len(val_idx) > 0:
        test_scenario_index = val_idx[0] # Primer escenario de validación
        print(f"Se probará con el escenario de índice original: {test_scenario_index} (pertenece al conjunto de validación)")
    else:
        test_scenario_index = 0 # Primer escenario del dataset
        if test_scenario_index < input_tensor.shape[0]:
             print(f"Se probará con el escenario de índice original: {test_scenario_index} (No hay datos de validación, usando índice 0)")
        else:
             print(f"Error: No hay datos disponibles para probar (índice {test_scenario_index} inválido).")
             test_scenario_index = -1 # Marcar como inválido

    if test_scenario_index != -1:
        # === PASO 2: Obtén los datos ORIGINALES (antes de normalizar) ===
        try:
            single_input_original = input_tensor[test_scenario_index]
            single_target_original = target_tensor[test_scenario_index]
            print(f"Forma de la entrada original del escenario: {single_input_original.shape}")
        except IndexError:
            print(f"Error fatal: No se pudo acceder al índice {test_scenario_index} en los tensores originales.")
            test_scenario_index = -1 # Marcar como inválido

    if test_scenario_index != -1:
        # === PASO 3: Realiza la predicción ===
        try:
            start_pred_time = time.time()
            prediction_denormalized = predict_single_scenario(
                final_model, single_input_original, scaler_input, scaler_target, DEVICE
            )
            end_pred_time = time.time()
            print(f"Predicción realizada en {end_pred_time - start_pred_time:.4f} segundos.")
            print(f"\nPredicción para el escenario {test_scenario_index} (Desnormalizada):")
            print("Forma:", prediction_denormalized.shape)
            print("Primeros 5 pasos:\n", prediction_denormalized[:5, :])

            # === PASO 4: (Opcional) Compara con el valor real ===
            if isinstance(single_target_original, torch.Tensor):
                target_np_original = single_target_original.cpu().numpy()
            else:
                 target_np_original = np.array(single_target_original)

            print(f"\nValor Real para el escenario {test_scenario_index} (Original):")
            print("Forma:", target_np_original.shape)
            print("Primeros 5 pasos:\n", target_np_original[:5, :])

            # Calcular error
            mae_escenario = np.mean(np.abs(prediction_denormalized - target_np_original))
            rmse_escenario = np.sqrt(np.mean((prediction_denormalized - target_np_original)**2))
            print(f"\nError Absoluto Medio (MAE) para este escenario: {mae_escenario:.5f}")
            print(f"Raíz del Error Cuadrático Medio (RMSE) para este escenario: {rmse_escenario:.5f}")

            # Verificar no negatividad final
            if np.any(prediction_denormalized < 0):
                 print("\n¡ADVERTENCIA! Se encontraron valores < 0 en la predicción desnormalizada final.")
            else:
                 print("\nConfirmado: No hay valores < 0 en la predicción desnormalizada final.")


        except Exception as e:
            print(f"\n*** Error durante la predicción del escenario específico: ***")
            print(e)
            traceback.print_exc()

else:
    print("\nNo se puede ejecutar la prueba de escenario específico.")
    print("Asegúrate de que el entrenamiento se completó y las variables")
    print(f"{required_vars_test} están disponibles en el entorno.")

print("=" * 60 + "\n")

Cargando y preprocesando datos...
Archivos .pt no encontrados, generando datos de ejemplo...
Datos de ejemplo generados.
Forma Input Tensor Original: torch.Size([500, 74, 6])
Forma Target Tensor Original: torch.Size([500, 74, 4])
Normalizando datos...
Datos normalizados correctamente.
Dividiendo datos...

--- DETALLES DE LA DIVISIÓN DE DATOS ---
Número total de escenarios originales: 500
Porcentaje para validación (test_size): 20.0%
Semilla aleatoria (random_state): 42
------------------------------------------------------------
Número de escenarios para ENTRENAMIENTO: 400
Índices originales usados para ENTRENAMIENTO (primeros 50):
[1, 3, 4, 5, 6, 7, 8, 10, 12, 13, 14, 16, 17, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 31, 32, 34, 35, 36, 37, 38, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 56, 57, 58, 59, 60, 61]
...
------------------------------------------------------------
Número de escenarios para VALIDACIÓN: 100
Índices originales usados para VALIDACIÓN (primeros 50):
[0

[I 2025-04-16 02:37:06,102] A new study created in RDB with name: transformer_optimization_v1


Estudio Optuna 'transformer_optimization_v1' cargado/creado desde 'sqlite:///transformer_optimization_v1.db'.
Número de trials ya completados: 0

--- Optuna Trial 0 ---
Params: d_model=128, nhead=8, layers=5, ff=256(2x), drop=0.056, lr=0.000853, bs=16, wd=0.0000016




    Epoch 01/30, Train Loss: 1.00477, Val Loss: 1.02005
    Epoch 02/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 03/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 04/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 05/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 06/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 07/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 08/30, Train Loss: 0.99794, Val Loss: 1.02005
    --> Optuna trial early stopping at epoch 8. Val Loss: 1.02005


[I 2025-04-16 02:37:20,399] Trial 0 finished with value: 1.0200546383857727 and parameters: {'d_model': 128, 'ff_multiplier': 2, 'nhead': 8, 'num_encoder_layers': 5, 'dropout': 0.05617534828874074, 'learning_rate': 0.000852632076949155, 'batch_size': 16, 'weight_decay': 1.6480446427978977e-06}. Best is trial 0 with value: 1.0200546383857727.



--- Optuna Trial 1 ---
Params: d_model=64, nhead=16, layers=2, ff=128(2x), drop=0.204, lr=0.000115, bs=32, wd=0.0006245
    Epoch 01/30, Train Loss: 1.02351, Val Loss: 1.03662
    Epoch 02/30, Train Loss: 1.00141, Val Loss: 1.03238
    Epoch 03/30, Train Loss: 0.99807, Val Loss: 1.03237
    Epoch 04/30, Train Loss: 0.99716, Val Loss: 1.03238
    Epoch 05/30, Train Loss: 0.99721, Val Loss: 1.03238
    Epoch 06/30, Train Loss: 0.99767, Val Loss: 1.03238
    Epoch 07/30, Train Loss: 0.99944, Val Loss: 1.03238
    Epoch 08/30, Train Loss: 0.99933, Val Loss: 1.03238
    Epoch 09/30, Train Loss: 0.99879, Val Loss: 1.03238
    Epoch 10/30, Train Loss: 0.99850, Val Loss: 1.03238
    --> Optuna trial early stopping at epoch 10. Val Loss: 1.03237


[I 2025-04-16 02:37:29,384] Trial 1 finished with value: 1.0323684513568878 and parameters: {'d_model': 64, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 2, 'dropout': 0.20427033152408347, 'learning_rate': 0.00011538084020058666, 'batch_size': 32, 'weight_decay': 0.000624513957474307}. Best is trial 0 with value: 1.0200546383857727.



--- Optuna Trial 2 ---
Params: d_model=64, nhead=8, layers=6, ff=256(4x), drop=0.128, lr=0.000167, bs=64, wd=0.0007557
    Epoch 01/30, Train Loss: 1.04152, Val Loss: 1.01502
    Epoch 02/30, Train Loss: 1.00104, Val Loss: 1.01505
    Epoch 03/30, Train Loss: 0.99787, Val Loss: 1.01505
    Epoch 04/30, Train Loss: 0.99809, Val Loss: 1.01505
    Epoch 05/30, Train Loss: 0.99442, Val Loss: 1.01505
    Epoch 06/30, Train Loss: 0.99474, Val Loss: 1.01505
    Epoch 07/30, Train Loss: 0.99338, Val Loss: 1.01505
    Epoch 08/30, Train Loss: 0.99355, Val Loss: 1.01505
    --> Optuna trial early stopping at epoch 8. Val Loss: 1.01502


[I 2025-04-16 02:37:39,736] Trial 2 finished with value: 1.0150229632854462 and parameters: {'d_model': 64, 'ff_multiplier': 4, 'nhead': 8, 'num_encoder_layers': 6, 'dropout': 0.12763399448000506, 'learning_rate': 0.00016728371068484117, 'batch_size': 64, 'weight_decay': 0.0007556810141274422}. Best is trial 2 with value: 1.0150229632854462.



--- Optuna Trial 3 ---
Params: d_model=128, nhead=16, layers=3, ff=512(4x), drop=0.131, lr=0.000404, bs=64, wd=0.0001617
    Epoch 01/30, Train Loss: 1.01231, Val Loss: 1.01505
    Epoch 02/30, Train Loss: 1.00112, Val Loss: 1.01505
    Epoch 03/30, Train Loss: 1.00066, Val Loss: 1.01505
    Epoch 04/30, Train Loss: 1.00270, Val Loss: 1.01505
    Epoch 05/30, Train Loss: 0.99768, Val Loss: 1.01505
    Epoch 06/30, Train Loss: 0.99559, Val Loss: 1.01505
    Epoch 07/30, Train Loss: 1.00571, Val Loss: 1.01505
    Epoch 08/30, Train Loss: 0.99689, Val Loss: 1.01505
    --> Optuna trial early stopping at epoch 8. Val Loss: 1.01505


[I 2025-04-16 02:38:34,518] Trial 3 finished with value: 1.0150474607944489 and parameters: {'d_model': 128, 'ff_multiplier': 4, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.13140470953216876, 'learning_rate': 0.00040357092610848494, 'batch_size': 64, 'weight_decay': 0.00016172900811143125}. Best is trial 2 with value: 1.0150229632854462.



--- Optuna Trial 4 ---
Params: d_model=128, nhead=2, layers=1, ff=256(2x), drop=0.158, lr=0.000009, bs=16, wd=0.0000018
    Epoch 01/30, Train Loss: 1.16488, Val Loss: 1.13624
    Epoch 02/30, Train Loss: 1.11600, Val Loss: 1.08969
    Epoch 03/30, Train Loss: 1.07779, Val Loss: 1.05680
    Epoch 04/30, Train Loss: 1.04964, Val Loss: 1.03658
    Epoch 05/30, Train Loss: 1.02939, Val Loss: 1.02714
    Epoch 06/30, Train Loss: 1.01921, Val Loss: 1.02314
    Epoch 07/30, Train Loss: 1.01343, Val Loss: 1.02130
    Epoch 08/30, Train Loss: 1.00885, Val Loss: 1.02050
    Epoch 09/30, Train Loss: 1.00698, Val Loss: 1.02020
    Epoch 10/30, Train Loss: 1.00431, Val Loss: 1.01997
    Epoch 11/30, Train Loss: 1.00374, Val Loss: 1.01992
    Epoch 12/30, Train Loss: 1.00306, Val Loss: 1.01996
    Epoch 13/30, Train Loss: 1.00154, Val Loss: 1.01998
    Epoch 14/30, Train Loss: 1.00149, Val Loss: 1.02001
    Epoch 15/30, Train Loss: 1.00089, Val Loss: 1.02004
    Epoch 16/30, Train Loss: 1.00088, V

[I 2025-04-16 02:38:50,321] Trial 4 finished with value: 1.0199229291507177 and parameters: {'d_model': 128, 'ff_multiplier': 2, 'nhead': 2, 'num_encoder_layers': 1, 'dropout': 0.15753971856328178, 'learning_rate': 9.238217564420603e-06, 'batch_size': 16, 'weight_decay': 1.7535949529764409e-06}. Best is trial 2 with value: 1.0150229632854462.



--- Optuna Trial 5 ---
Params: d_model=128, nhead=8, layers=5, ff=256(2x), drop=0.198, lr=0.000080, bs=16, wd=0.0000351
    Epoch 01/30, Train Loss: 1.04549, Val Loss: 1.02005
    Epoch 02/30, Train Loss: 0.99795, Val Loss: 1.02005
    Epoch 03/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 04/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 05/30, Train Loss: 0.99795, Val Loss: 1.02005
    Epoch 06/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 07/30, Train Loss: 0.99794, Val Loss: 1.02005
    Epoch 08/30, Train Loss: 0.99794, Val Loss: 1.02005
    --> Optuna trial early stopping at epoch 8. Val Loss: 1.02005


[I 2025-04-16 02:39:06,325] Trial 5 finished with value: 1.0200546383857727 and parameters: {'d_model': 128, 'ff_multiplier': 2, 'nhead': 8, 'num_encoder_layers': 5, 'dropout': 0.19813867890931725, 'learning_rate': 7.976161234554842e-05, 'batch_size': 16, 'weight_decay': 3.512704726270847e-05}. Best is trial 2 with value: 1.0150229632854462.



--- Optuna Trial 6 ---
Params: d_model=256, nhead=2, layers=1, ff=1024(4x), drop=0.329, lr=0.000362, bs=32, wd=0.0003717
    Epoch 01/30, Train Loss: 1.05382, Val Loss: 1.03238
    Epoch 02/30, Train Loss: 0.99937, Val Loss: 1.03238
    Epoch 03/30, Train Loss: 1.00008, Val Loss: 1.03238
    Epoch 04/30, Train Loss: 0.99739, Val Loss: 1.03238
    Epoch 05/30, Train Loss: 0.99757, Val Loss: 1.03238


[I 2025-04-16 02:39:15,129] Trial 6 pruned. 


    Epoch 06/30, Train Loss: 0.99839, Val Loss: 1.03238
    --> Trial 6 podado en la época 6.

--- Optuna Trial 7 ---
Params: d_model=256, nhead=16, layers=1, ff=512(2x), drop=0.203, lr=0.000046, bs=128, wd=0.0000020
    Epoch 01/30, Train Loss: 1.49333, Val Loss: 1.31284
    Epoch 02/30, Train Loss: 1.27696, Val Loss: 1.11420
    Epoch 03/30, Train Loss: 1.12144, Val Loss: 1.02855
    Epoch 04/30, Train Loss: 1.05205, Val Loss: 1.01004
    Epoch 05/30, Train Loss: 1.02102, Val Loss: 1.00837
    Epoch 06/30, Train Loss: 1.01307, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 0.99768, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 0.99536, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 1.00483, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99403, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99568, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.00364, Val Loss: 1.00823
    Epoch 13/30, Train Loss: 0.99116, Val Loss: 1.00823
    Epoch 14/30, Train Loss: 0.99472, Val Loss: 1.00823

[I 2025-04-16 02:39:33,359] Trial 7 finished with value: 1.0082253217697144 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 1, 'dropout': 0.2032241907732697, 'learning_rate': 4.565046893584959e-05, 'batch_size': 128, 'weight_decay': 1.9625093208439855e-06}. Best is trial 7 with value: 1.0082253217697144.



--- Optuna Trial 8 ---
Params: d_model=128, nhead=4, layers=1, ff=256(2x), drop=0.233, lr=0.000072, bs=64, wd=0.0000004
    Epoch 01/30, Train Loss: 1.13557, Val Loss: 1.07873
    Epoch 02/30, Train Loss: 1.05909, Val Loss: 1.02786
    Epoch 03/30, Train Loss: 1.02391, Val Loss: 1.01570
    Epoch 04/30, Train Loss: 1.00546, Val Loss: 1.01487
    Epoch 05/30, Train Loss: 1.00086, Val Loss: 1.01501
    Epoch 06/30, Train Loss: 1.00318, Val Loss: 1.01504
    Epoch 07/30, Train Loss: 1.00072, Val Loss: 1.01505
    Epoch 08/30, Train Loss: 0.99852, Val Loss: 1.01505
    Epoch 09/30, Train Loss: 0.99994, Val Loss: 1.01505
    Epoch 10/30, Train Loss: 0.99901, Val Loss: 1.01505
    --> Trial 8 podado en la época 10.


[I 2025-04-16 02:39:41,686] Trial 8 pruned. 



--- Optuna Trial 9 ---
Params: d_model=128, nhead=4, layers=4, ff=512(4x), drop=0.211, lr=0.000008, bs=16, wd=0.0000231
    Epoch 01/30, Train Loss: 1.09064, Val Loss: 1.04025
    Epoch 02/30, Train Loss: 1.01879, Val Loss: 1.02117
    Epoch 03/30, Train Loss: 1.00401, Val Loss: 1.02017
    Epoch 04/30, Train Loss: 1.00071, Val Loss: 1.02010
    Epoch 05/30, Train Loss: 0.99939, Val Loss: 1.02010


[I 2025-04-16 02:39:53,659] Trial 9 pruned. 


    Epoch 06/30, Train Loss: 0.99915, Val Loss: 1.02008
    --> Trial 9 podado en la época 6.

--- Optuna Trial 10 ---
Params: d_model=256, nhead=16, layers=3, ff=512(2x), drop=0.287, lr=0.000025, bs=128, wd=0.0000001


Python(73754) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(73755) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.25475, Val Loss: 1.12715
    Epoch 02/30, Train Loss: 1.12791, Val Loss: 1.02880
    Epoch 03/30, Train Loss: 1.04638, Val Loss: 1.00897
    Epoch 04/30, Train Loss: 1.01009, Val Loss: 1.00822
    Epoch 05/30, Train Loss: 1.01559, Val Loss: 1.00823
    Epoch 06/30, Train Loss: 1.00128, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 0.99595, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 0.99875, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 1.00159, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99359, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99618, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 11. Val Loss: 1.00822


[I 2025-04-16 02:40:18,828] Trial 10 finished with value: 1.0082236528396606 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.28678589490492357, 'learning_rate': 2.457981884685407e-05, 'batch_size': 128, 'weight_decay': 1.1941063844170234e-07}. Best is trial 10 with value: 1.0082236528396606.



--- Optuna Trial 11 ---
Params: d_model=256, nhead=16, layers=3, ff=512(2x), drop=0.304, lr=0.000024, bs=128, wd=0.0000001


Python(74019) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74020) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74093) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74094) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.39892, Val Loss: 1.18898
    Epoch 02/30, Train Loss: 1.21212, Val Loss: 1.05288
    Epoch 03/30, Train Loss: 1.09658, Val Loss: 1.01429
    Epoch 04/30, Train Loss: 1.03708, Val Loss: 1.00842
    Epoch 05/30, Train Loss: 1.02193, Val Loss: 1.00822
    Epoch 06/30, Train Loss: 1.00527, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 0.99829, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 1.00080, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 0.98856, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99561, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99862, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 0.99990, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 12. Val Loss: 1.00822


[I 2025-04-16 02:40:40,264] Trial 11 finished with value: 1.008224606513977 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.30381104231125405, 'learning_rate': 2.3519033967501944e-05, 'batch_size': 128, 'weight_decay': 1.1315208010515518e-07}. Best is trial 10 with value: 1.0082236528396606.



--- Optuna Trial 12 ---
Params: d_model=256, nhead=16, layers=3, ff=512(2x), drop=0.321, lr=0.000028, bs=128, wd=0.0000002


Python(74340) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74341) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74413) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74414) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.25305, Val Loss: 1.06629
    Epoch 02/30, Train Loss: 1.11517, Val Loss: 1.01648
    Epoch 03/30, Train Loss: 1.04839, Val Loss: 1.00855
    Epoch 04/30, Train Loss: 1.01645, Val Loss: 1.00824
    Epoch 05/30, Train Loss: 1.00707, Val Loss: 1.00823
    Epoch 06/30, Train Loss: 1.00045, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 0.99867, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 0.99910, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 0.99684, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 1.00843, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99246, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.00292, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 12. Val Loss: 1.00823


[I 2025-04-16 02:41:01,151] Trial 12 finished with value: 1.0082253217697144 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.32061418767892724, 'learning_rate': 2.750210473326018e-05, 'batch_size': 128, 'weight_decay': 1.5199561745327618e-07}. Best is trial 10 with value: 1.0082236528396606.



--- Optuna Trial 13 ---
Params: d_model=256, nhead=16, layers=4, ff=512(2x), drop=0.270, lr=0.000017, bs=128, wd=0.0000001


Python(74654) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74655) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74727) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(74728) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.62166, Val Loss: 1.46128
    Epoch 02/30, Train Loss: 1.36894, Val Loss: 1.21220
    Epoch 03/30, Train Loss: 1.20149, Val Loss: 1.08139
    Epoch 04/30, Train Loss: 1.10181, Val Loss: 1.02705
    Epoch 05/30, Train Loss: 1.03096, Val Loss: 1.01147
    Epoch 06/30, Train Loss: 1.01810, Val Loss: 1.00864
    Epoch 07/30, Train Loss: 1.00731, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 1.00212, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 0.99996, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99932, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99966, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 0.99151, Val Loss: 1.00823
    Epoch 13/30, Train Loss: 0.99676, Val Loss: 1.00823
    Epoch 14/30, Train Loss: 0.99359, Val Loss: 1.00823
    Epoch 15/30, Train Loss: 1.00351, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 15. Val Loss: 1.00823


[I 2025-04-16 02:41:34,261] Trial 13 finished with value: 1.0082253217697144 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 4, 'dropout': 0.2699284325342433, 'learning_rate': 1.677257879303403e-05, 'batch_size': 128, 'weight_decay': 1.0230463944244417e-07}. Best is trial 10 with value: 1.0082236528396606.



--- Optuna Trial 14 ---
Params: d_model=256, nhead=16, layers=3, ff=512(2x), drop=0.276, lr=0.000021, bs=128, wd=0.0000006


Python(75131) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75132) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75169) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75170) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.33683, Val Loss: 1.19538
    Epoch 02/30, Train Loss: 1.17289, Val Loss: 1.06475
    Epoch 03/30, Train Loss: 1.09348, Val Loss: 1.01714
    Epoch 04/30, Train Loss: 1.03684, Val Loss: 1.00899
    Epoch 05/30, Train Loss: 1.01223, Val Loss: 1.00824
    Epoch 06/30, Train Loss: 1.01370, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 1.00126, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 0.99505, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 0.99555, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99743, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99583, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.00569, Val Loss: 1.00823
    Epoch 13/30, Train Loss: 0.99960, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 13. Val Loss: 1.00823


[I 2025-04-16 02:41:57,047] Trial 14 finished with value: 1.0082250833511353 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.2763422375654154, 'learning_rate': 2.0937038683110453e-05, 'batch_size': 128, 'weight_decay': 5.529598542572281e-07}. Best is trial 10 with value: 1.0082236528396606.



--- Optuna Trial 15 ---
Params: d_model=256, nhead=16, layers=2, ff=512(2x), drop=0.284, lr=0.000036, bs=128, wd=0.0000005


Python(75479) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75480) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75516) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75517) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.26051, Val Loss: 1.10661
    Epoch 02/30, Train Loss: 1.11882, Val Loss: 1.02112
    Epoch 03/30, Train Loss: 1.04279, Val Loss: 1.00872
    Epoch 04/30, Train Loss: 1.00630, Val Loss: 1.00823
    Epoch 05/30, Train Loss: 0.99869, Val Loss: 1.00823
    Epoch 06/30, Train Loss: 1.00412, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 0.99175, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 1.00018, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 1.00665, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 1.00019, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99761, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.00075, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 12. Val Loss: 1.00823


[I 2025-04-16 02:42:13,447] Trial 15 finished with value: 1.0082253217697144 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 2, 'dropout': 0.28393247612858, 'learning_rate': 3.550901209356492e-05, 'batch_size': 128, 'weight_decay': 4.6369021127463605e-07}. Best is trial 10 with value: 1.0082236528396606.



--- Optuna Trial 16 ---
Params: d_model=256, nhead=16, layers=2, ff=512(2x), drop=0.350, lr=0.000013, bs=128, wd=0.0000047


Python(75743) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75744) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75781) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75782) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.45674, Val Loss: 1.31247
    Epoch 02/30, Train Loss: 1.36122, Val Loss: 1.22316
    Epoch 03/30, Train Loss: 1.30350, Val Loss: 1.15427
    Epoch 04/30, Train Loss: 1.24981, Val Loss: 1.10248
    Epoch 05/30, Train Loss: 1.20424, Val Loss: 1.06514


[I 2025-04-16 02:42:22,765] Trial 16 pruned. 


    Epoch 06/30, Train Loss: 1.15700, Val Loss: 1.03961
    --> Trial 16 podado en la época 6.

--- Optuna Trial 17 ---
Params: d_model=256, nhead=4, layers=4, ff=512(2x), drop=0.297, lr=0.000006, bs=128, wd=0.0000002


Python(75883) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75884) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75953) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(75954) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.35726, Val Loss: 1.27004
    Epoch 02/30, Train Loss: 1.28066, Val Loss: 1.19309
    Epoch 03/30, Train Loss: 1.23123, Val Loss: 1.13294
    Epoch 04/30, Train Loss: 1.17018, Val Loss: 1.08741
    Epoch 05/30, Train Loss: 1.14015, Val Loss: 1.05472


[I 2025-04-16 02:42:40,052] Trial 17 pruned. 


    Epoch 06/30, Train Loss: 1.10255, Val Loss: 1.03321
    --> Trial 17 podado en la época 6.

--- Optuna Trial 18 ---
Params: d_model=64, nhead=2, layers=3, ff=256(4x), drop=0.245, lr=0.000046, bs=128, wd=0.0000085


Python(76459) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(76460) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(76630) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(76659) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.20158, Val Loss: 1.19575
    Epoch 02/30, Train Loss: 1.15487, Val Loss: 1.14598
    Epoch 03/30, Train Loss: 1.10843, Val Loss: 1.10557
    Epoch 04/30, Train Loss: 1.08672, Val Loss: 1.07390
    Epoch 05/30, Train Loss: 1.06042, Val Loss: 1.04997
    Epoch 06/30, Train Loss: 1.04277, Val Loss: 1.03306


[I 2025-04-16 02:43:52,359] Trial 18 pruned. 


    --> Trial 18 podado en la época 6.

--- Optuna Trial 19 ---
Params: d_model=256, nhead=16, layers=5, ff=512(2x), drop=0.309, lr=0.000012, bs=32, wd=0.0000009


Python(76931) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(76933) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(76999) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(77000) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.30709, Val Loss: 1.03695
    Epoch 02/30, Train Loss: 1.02183, Val Loss: 1.03238
    Epoch 03/30, Train Loss: 0.99861, Val Loss: 1.03238
    Epoch 04/30, Train Loss: 0.99858, Val Loss: 1.03238
    Epoch 05/30, Train Loss: 0.99811, Val Loss: 1.03238
    Epoch 06/30, Train Loss: 0.99782, Val Loss: 1.03238


[I 2025-04-16 02:44:26,524] Trial 19 pruned. 


    --> Trial 19 podado en la época 6.

--- Optuna Trial 20 ---
Params: d_model=256, nhead=16, layers=2, ff=512(2x), drop=0.253, lr=0.000025, bs=128, wd=0.0000001


Python(77563) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(77564) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(77729) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(77730) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.45183, Val Loss: 1.33635
    Epoch 02/30, Train Loss: 1.30054, Val Loss: 1.16825
    Epoch 03/30, Train Loss: 1.18060, Val Loss: 1.06931
    Epoch 04/30, Train Loss: 1.09826, Val Loss: 1.02399
    Epoch 05/30, Train Loss: 1.05581, Val Loss: 1.01090
    Epoch 06/30, Train Loss: 1.01958, Val Loss: 1.00837
    Epoch 07/30, Train Loss: 1.01365, Val Loss: 1.00821
    Epoch 08/30, Train Loss: 1.00501, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 1.00733, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99121, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99997, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.01581, Val Loss: 1.00823
    Epoch 13/30, Train Loss: 0.99852, Val Loss: 1.00823
    Epoch 14/30, Train Loss: 1.00190, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 14. Val Loss: 1.00821


[I 2025-04-16 02:45:14,109] Trial 20 finished with value: 1.0082131624221802 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 2, 'dropout': 0.2528329926341195, 'learning_rate': 2.4879974687118915e-05, 'batch_size': 128, 'weight_decay': 1.1333479648867827e-07}. Best is trial 20 with value: 1.0082131624221802.



--- Optuna Trial 21 ---
Params: d_model=256, nhead=16, layers=2, ff=512(2x), drop=0.248, lr=0.000024, bs=128, wd=0.0000002


Python(77933) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(77934) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(77999) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78000) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.44560, Val Loss: 1.32426
    Epoch 02/30, Train Loss: 1.26863, Val Loss: 1.15051
    Epoch 03/30, Train Loss: 1.16080, Val Loss: 1.05521
    Epoch 04/30, Train Loss: 1.08882, Val Loss: 1.01758
    Epoch 05/30, Train Loss: 1.03695, Val Loss: 1.00912
    Epoch 06/30, Train Loss: 1.02116, Val Loss: 1.00827
    Epoch 07/30, Train Loss: 1.00937, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 0.99665, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 1.00262, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99528, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 1.00443, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 0.99938, Val Loss: 1.00823
    Epoch 13/30, Train Loss: 0.99994, Val Loss: 1.00823
    Epoch 14/30, Train Loss: 1.00552, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 14. Val Loss: 1.00823


[I 2025-04-16 02:45:29,336] Trial 21 finished with value: 1.0082250833511353 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 2, 'dropout': 0.24775512750979153, 'learning_rate': 2.4125632692153175e-05, 'batch_size': 128, 'weight_decay': 2.0544243016563943e-07}. Best is trial 20 with value: 1.0082131624221802.



--- Optuna Trial 22 ---
Params: d_model=256, nhead=16, layers=2, ff=512(2x), drop=0.259, lr=0.000042, bs=128, wd=0.0000001


Python(78163) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78164) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78197) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78198) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.29907, Val Loss: 1.09486
    Epoch 02/30, Train Loss: 1.12409, Val Loss: 1.01411
    Epoch 03/30, Train Loss: 1.02696, Val Loss: 1.00834
    Epoch 04/30, Train Loss: 1.00177, Val Loss: 1.00823
    Epoch 05/30, Train Loss: 0.99446, Val Loss: 1.00823
    Epoch 06/30, Train Loss: 1.00408, Val Loss: 1.00823
    Epoch 07/30, Train Loss: 0.99916, Val Loss: 1.00823
    Epoch 08/30, Train Loss: 0.99320, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 1.00181, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99930, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 1.00495, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.00627, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 12. Val Loss: 1.00823


[I 2025-04-16 02:45:44,773] Trial 22 finished with value: 1.0082253217697144 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 2, 'dropout': 0.2591055456592846, 'learning_rate': 4.181982807762219e-05, 'batch_size': 128, 'weight_decay': 1.0023022534034042e-07}. Best is trial 20 with value: 1.0082131624221802.



--- Optuna Trial 23 ---
Params: d_model=256, nhead=16, layers=3, ff=512(2x), drop=0.299, lr=0.000018, bs=128, wd=0.0000003


Python(78391) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78392) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78425) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78426) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.34360, Val Loss: 1.28956
    Epoch 02/30, Train Loss: 1.19769, Val Loss: 1.13548
    Epoch 03/30, Train Loss: 1.11791, Val Loss: 1.05276
    Epoch 04/30, Train Loss: 1.05851, Val Loss: 1.01869
    Epoch 05/30, Train Loss: 1.02304, Val Loss: 1.00942
    Epoch 06/30, Train Loss: 1.01696, Val Loss: 1.00829
    Epoch 07/30, Train Loss: 1.01264, Val Loss: 1.00821
    Epoch 08/30, Train Loss: 0.99532, Val Loss: 1.00823
    Epoch 09/30, Train Loss: 0.99980, Val Loss: 1.00823
    Epoch 10/30, Train Loss: 0.99941, Val Loss: 1.00823
    Epoch 11/30, Train Loss: 0.99168, Val Loss: 1.00823
    Epoch 12/30, Train Loss: 1.00276, Val Loss: 1.00823
    Epoch 13/30, Train Loss: 0.99980, Val Loss: 1.00823
    Epoch 14/30, Train Loss: 0.99327, Val Loss: 1.00823
    --> Optuna trial early stopping at epoch 14. Val Loss: 1.00821


[I 2025-04-16 02:46:08,505] Trial 23 finished with value: 1.0082108974456787 and parameters: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.29867226031868743, 'learning_rate': 1.7633861657015756e-05, 'batch_size': 128, 'weight_decay': 2.9841239840264177e-07}. Best is trial 23 with value: 1.0082108974456787.



--- Optuna Trial 24 ---
Params: d_model=256, nhead=16, layers=4, ff=512(2x), drop=0.341, lr=0.000006, bs=128, wd=0.0000003


Python(78715) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78716) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78777) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78778) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


    Epoch 01/30, Train Loss: 1.44805, Val Loss: 1.46518
    Epoch 02/30, Train Loss: 1.38513, Val Loss: 1.36717
    Epoch 03/30, Train Loss: 1.30759, Val Loss: 1.28447
    Epoch 04/30, Train Loss: 1.26414, Val Loss: 1.21560
    Epoch 05/30, Train Loss: 1.22201, Val Loss: 1.15942


[I 2025-04-16 02:46:23,838] Trial 24 pruned. 


    Epoch 06/30, Train Loss: 1.16946, Val Loss: 1.11433
    --> Trial 24 podado en la época 6.

--- OPTIMIZACIÓN FINALIZADA ---
Mejor Valor (Val Loss): 1.00821
Mejores Hiperparámetros encontrados:
{'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.29867226031868743, 'learning_rate': 1.7633861657015756e-05, 'batch_size': 128, 'weight_decay': 2.9841239840264177e-07}

--- INICIANDO ENTRENAMIENTO FINAL ---
Usando parámetros: {'d_model': 256, 'ff_multiplier': 2, 'nhead': 16, 'num_encoder_layers': 3, 'dropout': 0.29867226031868743, 'learning_rate': 1.7633861657015756e-05, 'batch_size': 128, 'weight_decay': 2.9841239840264177e-07, 'calculated_dim_feedforward': 512}


Python(78943) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78944) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78977) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.
Python(78978) MallocStackLogging: can't turn off malloc stack logging because it was not enabled.


Epoch 001/150, Train Loss: 1.43308, Val Loss: 1.27109, LR: 0.000018, Time: 5.20s
    --> Modelo guardado en best_transformer_model_relu.pth (Val Loss: 1.27109)
Epoch 002/150, Train Loss: 1.28258, Val Loss: 1.11978, LR: 0.000018, Time: 1.55s
    --> Modelo guardado en best_transformer_model_relu.pth (Val Loss: 1.11978)
Epoch 003/150, Train Loss: 1.16825, Val Loss: 1.04327, LR: 0.000018, Time: 1.45s
    --> Modelo guardado en best_transformer_model_relu.pth (Val Loss: 1.04327)
Epoch 004/150, Train Loss: 1.08491, Val Loss: 1.01537, LR: 0.000018, Time: 1.32s
    --> Modelo guardado en best_transformer_model_relu.pth (Val Loss: 1.01537)
Epoch 005/150, Train Loss: 1.05102, Val Loss: 1.00914, LR: 0.000018, Time: 1.27s
    --> Modelo guardado en best_transformer_model_relu.pth (Val Loss: 1.00914)
Epoch 006/150, Train Loss: 1.01522, Val Loss: 1.00835, LR: 0.000018, Time: 1.09s
    --> Modelo guardado en best_transformer_model_relu.pth (Val Loss: 1.00835)
Epoch 007/150, Train Loss: 1.01349, Val 

*******

# Validación del modelo

In [21]:
import torch
import torch.nn as nn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
from torch.utils.data import DataLoader, TensorDataset
import math

# Definir la clase PositionalEncoding (necesaria para el Transformer)
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return x

# Definir la clase del modelo Transformer (necesaria para cargar el modelo)
class TransformerModel(nn.Module):
    def __init__(self, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout=0.1):
        super(TransformerModel, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.d_model = d_model

        # Proyección lineal para ajustar la dimensión de entrada a d_model
        self.input_proj = nn.Linear(input_dim, d_model)
        self.pos_encoder = PositionalEncoding(d_model)

        # Capas del Transformer
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_encoder_layers)

        # Proyección lineal para la salida
        self.output_proj = nn.Linear(d_model, output_dim)

    def forward(self, src):
        # src shape: (batch_size, seq_len, input_dim)
        src = self.input_proj(src)  # Proyectar a d_model
        src = src.permute(1, 0, 2)  # Cambiar a (seq_len, batch_size, d_model) para Transformer
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src)
        output = output.permute(1, 0, 2)  # Volver a (batch_size, seq_len, d_model)
        output = self.output_proj(output)  # Proyectar a output_dim
        return output

# Cargar el modelo guardado
def load_model(model_path, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout, device):
    model = TransformerModel(
        input_dim=input_dim,
        output_dim=output_dim,
        d_model=d_model,
        nhead=nhead,
        num_encoder_layers=num_encoder_layers,
        dim_feedforward=dim_feedforward,
        dropout=dropout
    ).to(device)
    model.load_state_dict(torch.load(model_path))
    model.eval()
    return model

# Función para evaluar el modelo y calcular métricas
def evaluate_model(model, data_loader, criterion, scaler_target, device):
    model.eval()
    all_preds = []
    all_targets = []
    total_loss = 0.0

    with torch.no_grad():
        for batch_input, batch_target in data_loader:
            batch_input, batch_target = batch_input.to(device), batch_target.to(device)
            output = model(batch_input)
            loss = criterion(output, batch_target)
            total_loss += loss.item()

            # Guardar predicciones y valores reales
            all_preds.append(output.cpu().numpy())
            all_targets.append(batch_target.cpu().numpy())

    # Concatenar todas las predicciones y valores reales
    all_preds = np.concatenate(all_preds, axis=0)
    all_targets = np.concatenate(all_targets, axis=0)

    # Desnormalizar las predicciones y los valores reales
    all_preds_denorm = scaler_target.inverse_transform(all_preds.reshape(-1, all_preds.shape[-1])).reshape(all_preds.shape)
    all_targets_denorm = scaler_target.inverse_transform(all_targets.reshape(-1, all_targets.shape[-1])).reshape(all_targets.shape)

    # Calcular métricas en la escala original
    mse = mean_squared_error(all_targets_denorm.reshape(-1), all_preds_denorm.reshape(-1))
    mae = mean_absolute_error(all_targets_denorm.reshape(-1), all_preds_denorm.reshape(-1))
    r2 = r2_score(all_targets_denorm.reshape(-1), all_preds_denorm.reshape(-1))

    avg_loss = total_loss / len(data_loader)
    return avg_loss, mse, mae, r2, all_preds_denorm, all_targets_denorm

# Función para probar el modelo con un ejemplo específico
def test_model(model, input_data, scaler_input, scaler_target, device):
    model.eval()
    with torch.no_grad():
        # Normalizar el dato de entrada
        input_np = input_data.numpy().reshape(-1, input_data.shape[-1])
        input_normalized = scaler_input.transform(input_np).reshape(input_data.shape)
        input_tensor = torch.tensor(input_normalized, dtype=torch.float32).to(device)

        # Hacer la predicción
        if len(input_tensor.shape) == 2:  # Si es un solo ejemplo, agregar dimensión de batch
            input_tensor = input_tensor.unsqueeze(0)
        output = model(input_tensor)

        # Desnormalizar la predicción
        output_np = output.cpu().numpy()
        output_denorm = scaler_target.inverse_transform(output_np.reshape(-1, output_np.shape[-1])).reshape(output_np.shape)
        return output_denorm

# Configuración
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Hiperparámetros del modelo (AJUSTA ESTOS VALORES SEGÚN LOS MEJORES HIPERPARÁMETROS DE OPTUNA)
# Reemplaza estos valores con los que obtuviste al entrenar el modelo
# Hiperparámetros del modelo (AJUSTA ESTOS VALORES SEGÚN LOS DEL MODELO GUARDADO)
# Reemplaza estos valores con los que obtuviste al entrenar el modelo
d_model = 256              # <--- Cambiado de 128
nhead = 4                  # Asumiendo que este no cambió, pero verifica si es necesario
num_encoder_layers = 2     # <--- Cambiado de 3
dim_feedforward = 256      # <--- Cambiado de 512
dropout = 0.1              # Asumiendo que este no cambió, pero verifica si es necesario
input_dim = 6              # Probablemente correcto
output_dim = 4             # Probablemente correcto
batch_size = 32            # Este afecta al DataLoader, no a la estructura del modelo

# Cargar el modelo
model_path = 'best_transformer_model.pth'
model = load_model(model_path, input_dim, output_dim, d_model, nhead, num_encoder_layers, dim_feedforward, dropout, device)

# Crear DataLoader para los datos de validación (o prueba)
# Asume que tienes val_input y val_target del código anterior
val_dataset = TensorDataset(val_input, val_target)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Definir el criterio de pérdida
criterion = nn.MSELoss()

# Evaluar el modelo en los datos de validación
avg_loss, mse, mae, r2, predictions, true_values = evaluate_model(model, val_loader, criterion, scaler_target, device)

# Mostrar estadísticas
print("Estadísticas del modelo en los datos de validación:")
print(f"Pérdida promedio (MSE normalizado): {avg_loss:.4f}")
print(f"Error Cuadrático Medio (MSE desnormalizado): {mse:.4f}")
print(f"Error Absoluto Medio (MAE desnormalizado): {mae:.4f}")
print(f"Coeficiente de Determinación (R² desnormalizado): {r2:.4f}")

# Probar el modelo con un ejemplo específico (por ejemplo, el primer escenario de validación)
test_input = val_input[0]  # Primer escenario de validación
predicted_output = test_model(model, test_input, scaler_input, scaler_target, device)
true_output = scaler_target.inverse_transform(val_target[0].numpy().reshape(-1, val_target.shape[-1])).reshape(val_target[0].shape)

# Mostrar los resultados de la prueba
print("\nPrueba con un ejemplo específico (primer escenario de validación):")
print("Predicciones (desnormalizadas) para los primeros 5 pares de relés:")
for i in range(min(74, predicted_output.shape[1])):  # Mostrar solo los primeros 5 pares
    print(f"Par {i+1}:")
    print(f"  Predicción: {predicted_output[0, i, :]}")
    print(f"  Valor real: {true_output[i, :]}")



Estadísticas del modelo en los datos de validación:
Pérdida promedio (MSE normalizado): 1.8232
Error Cuadrático Medio (MSE desnormalizado): 16.5326
Error Absoluto Medio (MAE desnormalizado): 3.1601
Coeficiente de Determinación (R² desnormalizado): -0.8002

Prueba con un ejemplo específico (primer escenario de validación):
Predicciones (desnormalizadas) para los primeros 5 pares de relés:
Par 1:
  Predicción: [ 0.43002382  7.486592   12.017996    4.080612  ]
  Valor real: [10.739031   1.9509555  5.9624233  1.2299176]
Par 2:
  Predicción: [0.11465684 5.9169545  9.741835   2.696026  ]
  Valor real: [1.0115863 4.9236174 3.1815815 7.994131 ]
Par 3:
  Predicción: [ 1.259947    5.960698    2.5113537  -0.33478466]
  Valor real: [9.688989  9.712443  3.1813238 5.280407 ]
Par 4:
  Predicción: [0.7969634 1.1996793 9.026089  2.500911 ]
  Valor real: [ 6.613281   3.8063807  1.4643133 17.902504 ]
Par 5:
  Predicción: [2.8568864 2.7802656 6.520256  5.565122 ]
  Valor real: [2.7434456 2.0836818 3.99795