# BLOQUE process_data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from scipy import stats
from pathlib import Path

def pre_process(data_dir: str | Path):
    """
    Lee y prepara los datos desde `data_dir`, que debe contener:
      - train.csv
      - stores.csv
      - features.csv
    Devuelve: data limpia, columnas categóricas, columnas numéricas, y cardinalidades.
    """
    data_dir = Path(data_dir)

    # Lectura de archivos desde Google Drive / carpeta local
    data = pd.read_csv(data_dir / 'train.csv')
    stores = pd.read_csv(data_dir / 'stores.csv')
    features = pd.read_csv(data_dir / 'features.csv')

    # Missing values en features
    features['CPI'] = features['CPI'].fillna(features['CPI'].median())
    features['Unemployment'] = features['Unemployment'].fillna(features['Unemployment'].median())

    # MarkDowns: negativos -> 0 y NaN -> 0
    for i in range(1, 6):
      col = f"MarkDown{i}"
      if col in features.columns:
          features[col] = features[col].apply(lambda x: 0 if pd.notna(x) and x < 0 else x)
          features[col] = features[col].fillna(0)

    # Merge
    data = pd.merge(data, stores, on='Store', how='left')
    data = pd.merge(data, features, on=['Store', 'Date'], how='left')

    # Tiempos y orden
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
    data.sort_values(by=['Date'], inplace=True)
    data.set_index(data['Date'], inplace=True)

    # Consolidar IsHoliday (viene duplicado del merge)
    if 'IsHoliday_x' in data.columns:
        data.drop(columns='IsHoliday_x', inplace=True)
    if 'IsHoliday_y' in data.columns:
        data.rename(columns={"IsHoliday_y": "IsHoliday"}, inplace=True)

    # Derivadas de fecha
    data['Year'] = data['Date'].dt.year
    data['Month'] = data['Date'].dt.month

    # Agregados por Store/Dept para outliers
    agg_data = (data.groupby(['Store', 'Dept'])
                    .Weekly_Sales.agg(['max', 'min', 'mean', 'median', 'std'])
                    .reset_index())
    store_data = pd.merge(left=data, right=agg_data, on=['Store', 'Dept'], how='left')
    store_data.dropna(inplace=True)

    data = store_data.copy()
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
    data.sort_values(by=['Date'], inplace=True)
    data.set_index(data['Date'], inplace=True)

    # Total_MarkDown y limpieza de columnas
    md_cols = [f"MarkDown{i}" for i in range(1, 6) if f"MarkDown{i}" in data.columns]
    data['Total_MarkDown'] = data[md_cols].sum(axis=1) if md_cols else 0
    for c in md_cols:
        data.drop(columns=c, inplace=True)

    # Filtro de outliers sobre numéricas clave
    numeric_col = ['Weekly_Sales', 'Size', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment', 'Total_MarkDown']
    data_numeric = data[numeric_col].copy()
    data = data[(np.abs(stats.zscore(data_numeric, nan_policy='omit')) < 2.5).all(axis=1)]
    data = data[data['Weekly_Sales'] >= 0]

    # Tipos
    if 'IsHoliday' in data.columns:
        data['IsHoliday'] = data['IsHoliday'].astype('int')

    # Categóricas: factorize
    cat_col = ['Store', 'Dept', 'Type']
    num_of_unique = []
    for col in cat_col:
        num_of_unique.append(data[col].nunique())
        data[col] = pd.factorize(data[col])[0]

    # Numéricas finales (las que usará el modelo)
    num_col = ['Size', 'Temperature', 'Fuel_Price', 'CPI', 'Unemployment',
               'Total_MarkDown', 'max', 'min', 'mean', 'median', 'std']

    return data, cat_col, num_col, num_of_unique

def get_data(data_dir: str | Path,
             test_size: float = 0.20,
             valid_size: float = 0.15,
             seed: int = 0):
    """
    Devuelve splits aleatorios (train/valid/test) listos para DataLoader.
    Nota: este split es ALEATORIO (sirve para probar el pipeline). Para replicar el paper 1:1,
    luego cambiaremos a split temporal 113/15/15 + ventana creciente.
    """
    data, cat_col, num_col, num_of_unique = pre_process(data_dir)

    X = data.drop(['Weekly_Sales'], axis=1)
    y = data['Weekly_Sales'].astype(float)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=seed, shuffle=True
    )
    X_train, X_valid, y_train, y_valid = train_test_split(
        X_train, y_train, test_size=valid_size, random_state=seed, shuffle=True
    )

    print("Training set size:", len(X_train))
    print("Validation set size:", len(X_valid))
    print("Test set size:", len(X_test))

    return X_train, X_valid, X_test, y_train, y_valid, y_test, num_of_unique, cat_col, num_col


# BLOQUE 0 — Preparación

In [None]:
# =========================
# BLOQUE 0 — Preparación (paper 113/15/15 + preprocesamiento sin fuga)
# =========================
import time, json, numpy as np, pandas as pd
from pathlib import Path
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --- Directorios (ajusta si fuera necesario)
SAVE_DIR = Path("/content/drive/MyDrive/Modelo/outputs")
DATA_DIR = Path("/content/drive/MyDrive/Modelo/data")
SAVE_DIR.mkdir(parents=True, exist_ok=True)
print("Usando SAVE_DIR =", SAVE_DIR)

# --- 0.1) Cargar y preparar datos con tu pre_process del notebook
# Debe devolver: data, cat_col, num_col, num_of_unique
data, cat_col, num_col, num_of_unique = pre_process(DATA_DIR)

# =======================
# Normalizar 'Date' (arreglo ambigüedad índice/columna)
# =======================
# Si el índice se llama 'Date', quitar ese nombre para evitar colisiones
if getattr(data.index, "name", None) == "Date":
    data.index.name = None

# Si 'Date' es parte del índice (MultiIndex), soltar ese nivel
if hasattr(data.index, "names") and "Date" in (data.index.names or []):
    try:
        data = data.reset_index(level="Date", drop=True)
    except Exception:
        data = data.reset_index(drop=True)

# Asegurar que 'Date' es columna y ordenar temporalmente
if 'Date' not in data.columns:
    # si venía como índice (simple) y no existe la columna, lo reconstruimos
    # (en tu pre_process normalmente ya existe)
    raise ValueError("No se encontró columna 'Date' después de normalizar. Revisa pre_process().")
data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
data = data.sort_values('Date').reset_index(drop=True)

# =======================
# Columnas efectivas (como en el paper)
# =======================
cat_features = [c for c in ['Store', 'Dept', 'Type'] if c in data.columns]
extra_num    = [c for c in ['IsHoliday', 'Year', 'Month'] if c in data.columns]
num_features = [c for c in num_col if c in data.columns] + extra_num

# =======================
# Split temporal 113/15/15
# =======================
uniq_weeks = np.array(sorted(data['Date'].unique()))
assert len(uniq_weeks) >= 113 + 15 + 15, "No hay suficientes semanas para 113/15/15."

weeks_train = uniq_weeks[:113]
weeks_valid = uniq_weeks[113:113+15]
weeks_test  = uniq_weeks[113+15:113+15+15]

mask_weeks = lambda df, weeks: df['Date'].isin(weeks)

df_train = data[mask_weeks(data, weeks_train)].copy()
df_valid = data[mask_weeks(data, weeks_valid)].copy()
df_test  = data[mask_weeks(data, weeks_test)].copy()

# =======================
# Preprocesador SIN FUGA (fit en train, transform en valid/test)
# =======================
preprocess = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(with_mean=False), num_features),
        ('cat', OneHotEncoder(handle_unknown='ignore'), cat_features),
    ],
    remainder='drop',
    sparse_threshold=0.3
)

drop_cols = ['Weekly_Sales', 'Date']

X_train = df_train.drop(columns=[c for c in drop_cols if c in df_train.columns])
y_train = df_train['Weekly_Sales'].astype(float)

X_valid = df_valid.drop(columns=[c for c in drop_cols if c in df_valid.columns])
y_valid = df_valid['Weekly_Sales'].astype(float)

X_test  = df_test.drop(columns=[c for c in drop_cols if c in df_test.columns])
y_test  = df_test['Weekly_Sales'].astype(float)

Xtr = preprocess.fit_transform(X_train, y_train)
Xva = preprocess.transform(X_valid)
Xte = preprocess.transform(X_test)

print("Shapes → Xtr:", Xtr.shape, "| Xva:", Xva.shape, "| Xte:", Xte.shape)

# =======================
# Métricas y evaluador común (para usar desde cada bloque de modelo)
# =======================
def mape(y_true, y_pred, eps=1e-8):
    y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
    return np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), eps))) * 100

def eval_on_mats(name, model):
    t0 = time.time()
    model.fit(Xtr, y_train)
    dt = time.time() - t0
    def rep(X, y):
        p = model.predict(X)
        mse = mean_squared_error(y, p); rmse = np.sqrt(mse)
        return {"MSE": mse, "RMSE": rmse, "MAE": mean_absolute_error(y,p),
                "MAPE": mape(y,p), "R2": r2_score(y,p)}
    print(f"[{name}] entrenado en {dt/60:.1f} min")
    return rep(Xva, y_valid), rep(Xte, y_test)

# =======================
# Utilidades de guardado/recarga para no perder trabajo
# =======================
!pip -q install joblib scipy
import joblib
from scipy import sparse

def save_artifacts():
    joblib.dump(preprocess, SAVE_DIR / "preprocess.joblib")
    sparse.save_npz(SAVE_DIR / "Xtr.npz", Xtr)
    sparse.save_npz(SAVE_DIR / "Xva.npz", Xva)
    sparse.save_npz(SAVE_DIR / "Xte.npz", Xte)
    np.save(SAVE_DIR / "y_train.npy", y_train.values if hasattr(y_train, "values") else y_train)
    np.save(SAVE_DIR / "y_valid.npy", y_valid.values if hasattr(y_valid, "values") else y_valid)
    np.save(SAVE_DIR / "y_test.npy",  y_test.values  if hasattr(y_test,  "values") else y_test)
    meta = {
        "cat_features": cat_features,
        "num_features": num_features,
        "weeks_train": weeks_train.tolist(),
        "weeks_valid": weeks_valid.tolist(),
        "weeks_test":  weeks_test.tolist()
    }
    with open(SAVE_DIR / "metadata.json", "w") as f:
        json.dump(meta, f, indent=2, default=str)
    print("✓ Preprocesador, matrices y metadatos guardados en", SAVE_DIR)

def load_artifacts():
    global preprocess, Xtr, Xva, Xte, y_train, y_valid, y_test
    preprocess = joblib.load(SAVE_DIR / "preprocess.joblib")
    Xtr = sparse.load_npz(SAVE_DIR / "Xtr.npz")
    Xva = sparse.load_npz(SAVE_DIR / "Xva.npz")
    Xte = sparse.load_npz(SAVE_DIR / "Xte.npz")
    y_train = np.load(SAVE_DIR / "y_train.npy")
    y_valid = np.load(SAVE_DIR / "y_valid.npy")
    y_test  = np.load(SAVE_DIR / "y_test.npy")
    print("✓ Artifacts recargados desde", SAVE_DIR)

# =======================
# Contenedores de resultados en memoria (opcional)
# =======================
import pandas as pd, json

def save_metrics(nombre_modelo, val_dict, test_dict):
    row = {"Modelo": nombre_modelo,
           **{f"val_{k}": v for k, v in val_dict.items()},
           **{f"test_{k}": v for k, v in test_dict.items()}}
    csv_path = SAVE_DIR / "resultados_modelos.csv"
    try:
        df = pd.read_csv(csv_path)
        df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)
    except FileNotFoundError:
        df = pd.DataFrame([row])
    df.to_csv(csv_path, index=False)

    with open(SAVE_DIR / f"metrics_{nombre_modelo}.json", "w") as f:
        json.dump({"val": val_dict, "test": test_dict}, f, indent=2)
    print(f"✓ Métricas guardadas en {csv_path} y en metrics_{nombre_modelo}.json")


RESULTS_VAL, RESULTS_TEST = {}, {}

Usando SAVE_DIR = /content/drive/MyDrive/Modelo/outputs
Shapes → Xtr: (290842, 143) | Xva: (42285, 143) | Xte: (41120, 143)


In [None]:
import numpy as np, pandas as pd

def summarize_runs(rows):
    # rows = lista de dicts con claves MSE, RMSE, MAE, MAPE, R2
    keys = ["MSE","RMSE","MAE","MAPE","R2"]
    mean = {k: np.mean([r[k] for r in rows]) for k in keys}
    std  = {k: np.std( [r[k] for r in rows], ddof=1) for k in keys}
    return mean, std

def fmt_pm(mean: dict, std: dict):
    out = {}
    for k in mean:                 # k = 'MSE', 'RMSE', ...
        m = float(mean[k])
        s = float(std[k])
        if k == "R2":
            out[k] = f"{m:.3f} ± {s:.3f}"
        else:
            out[k] = f"{m:.2f} ± {s:.2f}"
    return out

## BLOQUE 1 — Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

runs_val, runs_test = [], []
for seed in range(15):
    val, test = eval_on_mats("LinearRegression", LinearRegression())
    runs_val.append(val); runs_test.append(test)

mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("LinearRegression — Valid:", fmt_pm(mean_val, std_val))
print("LinearRegression — Test :", fmt_pm(mean_test, std_test))

import pandas as pd
pd.DataFrame([fmt_pm(mean_test, std_test)], index=["LinearRegression (Test)"])

[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.1 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.0 min
[LinearRegression] entrenado en 0.1 min
LinearRegression — Valid: {'MSE': '19066684.43 ± 0.00', 'RMSE': '4366.54 ± 0.00', 'MAE': '2291.31 ± 0.00', 'MAPE': '2259816842.85 ± 0.00', 'R2': '0.922 ± 0.000'}
LinearRegression — Test : {'MSE': '14202434.57 ± 0.00', 'RMSE': '3768.61 ± 0.00', 'MAE': '2167.10 ± 0.00', 'MAPE': '693817530.55 ± 0.00', 'R2': '0.939 ± 0.000'}


Unnamed: 0,MSE,RMSE,MAE,MAPE,R2
LinearRegression (Test),14202434.57 ± 0.00,3768.61 ± 0.00,2167.10 ± 0.00,693817530.55 ± 0.00,0.939 ± 0.000


In [None]:
# Guardar tabla estilo paper (±) a CSV acumulativo
import pandas as pd
row_valid = fmt_pm(mean_val, std_val)
row_test  = fmt_pm(mean_test, std_test)
df_pm = pd.DataFrame([row_valid, row_test],
                     index=["LinearRegression (Valid)", "LinearRegression (Test)"])

csv_pm = SAVE_DIR / "resultados_resumen_pm.csv"
try:
    old = pd.read_csv(csv_pm, index_col=0)
    out = pd.concat([old, df_pm]).drop_duplicates()
except FileNotFoundError:
    out = df_pm
out.to_csv(csv_pm)
print("✓ Resumen ± guardado en", csv_pm)

✓ Resumen ± guardado en /content/drive/MyDrive/Modelo/outputs/resultados_resumen_pm.csv


In [None]:
save_artifacts()

✓ Preprocesador, matrices y metadatos guardados en /content/drive/MyDrive/Modelo/outputs


## BLOQUE 2 - XGBoost

In [None]:
from xgboost import XGBRegressor
import pandas as pd

runs_val, runs_test = [], []
for _ in range(15):
    v, t = eval_on_mats("XGBoost_baseline", XGBRegressor())
    runs_val.append(v); runs_test.append(t)

mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("XGBoost — Valid:", fmt_pm(mean_val, std_val))
print("XGBoost — Test :", fmt_pm(mean_test, std_test))

[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
[XGBoost_baseline] entrenado en 0.1 min
XGBoost — Valid: {'MSE': '9744079.75 ± 0.00', 'RMSE': '3121.55 ± 0.00', 'MAE': '1747.51 ± 0.00', 'MAPE': '715932120.91 ± 0.00', 'R2': '0.960 ± 0.000'}
XGBoost — Test : {'MSE': '9131063.16 ± 0.00', 'RMSE': '3021.76 ± 0.00', 'MAE': '1766.86 ± 0.00', 'MAPE': '531546132.87 ± 0.00', 'R2': '0.961 ± 0.000'}


In [None]:
# Guardar tabla estilo paper (±)
row_valid = fmt_pm(mean_val, std_val)
row_test  = fmt_pm(mean_test, std_test)
df_pm = pd.DataFrame([row_valid, row_test],
                     index=["XGBoost (Valid)", "XGBoost (Test)"])

csv_pm = SAVE_DIR / "resultados_resumen_pm.csv"
try:
    old = pd.read_csv(csv_pm, index_col=0)
    out = pd.concat([old, df_pm]).drop_duplicates()
except FileNotFoundError:
    out = df_pm
out.to_csv(csv_pm)
print("✓ Resumen ± guardado en", csv_pm)

✓ Resumen ± guardado en /content/drive/MyDrive/Modelo/outputs/resultados_resumen_pm.csv


BLOQUE DE PPEPARACIÓN DE LOS MODELOS RF y KNN

In [None]:
# Mini-prep para modelos que no aceptan sparse (RF y kNN)
from scipy import sparse
import time, numpy as np
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

Xtr_d = Xtr.toarray() if sparse.issparse(Xtr) else Xtr
Xva_d = Xva.toarray() if sparse.issparse(Xva) else Xva
Xte_d = Xte.toarray() if sparse.issparse(Xte) else Xte

def eval_on_mats_dense(name, model):
    t0 = time.time()
    model.fit(Xtr_d, y_train)
    dt = time.time() - t0
    def rep(X, y):
        p = model.predict(X)
        mse = mean_squared_error(y, p); rmse = np.sqrt(mse)
        return {"MSE": mse, "RMSE": rmse, "MAE": mean_absolute_error(y,p),
                "MAPE": mape(y,p), "R2": r2_score(y,p)}
    print(f"[{name}] entrenado en {dt/60:.1f} min")
    return rep(Xva_d, y_valid), rep(Xte_d, y_test)

## BLOQUE 3 - KNN

### BLOQUE DE PRIMERA EJECUCIÓN

In [None]:
from sklearn.neighbors import KNeighborsRegressor
import pandas as pd

runs_val, runs_test = [], []
for _ in range(15):
    v, t = eval_on_mats("kNN_baseline", KNeighborsRegressor())
    runs_val.append(v); runs_test.append(t)

mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("kNN — Valid:", fmt_pm(mean_val, std_val))
print("kNN — Test :", fmt_pm(mean_test, std_test))

[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min


KeyboardInterrupt: 

In [None]:
# Guardar tabla estilo paper (±) en CSV acumulativo
row_valid = fmt_pm(mean_val, std_val)
row_test  = fmt_pm(mean_test, std_test)
df_pm = pd.DataFrame([row_valid, row_test],
                     index=["kNN (Valid)", "kNN (Test)"])

csv_pm = SAVE_DIR / "resultados_resumen_pm.csv"
try:
    old = pd.read_csv(csv_pm, index_col=0)
    out = pd.concat([old, df_pm]).drop_duplicates()
except FileNotFoundError:
    out = df_pm
out.to_csv(csv_pm)
print("✓ Resumen ± guardado en", csv_pm)

### BLOQUE DE EJECUCIÓN OTRO DIA

In [None]:
load_artifacts()  # recupera preprocess, Xtr/Xva/Xte, y_*
import joblib
prog = joblib.load(SAVE_DIR / "knn_progress.joblib")
runs_val  = prog.get("runs_val", [])
runs_test = prog.get("runs_test", [])
start_i   = len(runs_val)  # continúa desde aquí

In [None]:
load_artifacts()

✓ Artifacts recargados desde /content/drive/MyDrive/Modelo/outputs


In [None]:
import joblib, os
prog_path = SAVE_DIR / "knn_progress.joblib"
if os.path.exists(prog_path):
    prog = joblib.load(prog_path)
    runs_val  = prog.get("runs_val", [])
    runs_test = prog.get("runs_test", [])
    print(f"Progreso cargado: {len(runs_val)} corridas")
else:
    runs_val, runs_test = [], []
    print("No había progreso previo. Empezando desde 0.")

start_i = len(runs_val)  # continúa desde aquí

Progreso cargado: 5 corridas


In [None]:
from sklearn.neighbors import KNeighborsRegressor
import joblib

remaining = max(0, 15 - start_i)
print(f"Ya tienes {start_i}/15 corridas. Ejecutando {remaining} más...")

for i in range(start_i, 15):
    v, t = eval_on_mats("kNN_baseline", KNeighborsRegressor())
    runs_val.append(v); runs_test.append(t)

    # checkpoint opcional cada 5 corridas o al final
    if ((i + 1) % 5 == 0) or (i == 14):
        joblib.dump({"runs_val": runs_val, "runs_test": runs_test},
                    SAVE_DIR / "knn_progress.joblib")
        print(f"✓ Progreso guardado tras {i+1} corridas")

# Resumen ± (usa tus helpers summarize_runs y fmt_pm ya definidos)
mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("kNN — Valid:", fmt_pm(mean_val, std_val))
print("kNN — Test :", fmt_pm(mean_test, std_test))

# (opcional) guardar también en CSV/JSON con tu helper
save_metrics("kNN_15runs_mean±std", mean_val, mean_test)

Ya tienes 5/15 corridas. Ejecutando 10 más...
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
✓ Progreso guardado tras 10 corridas
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
[kNN_baseline] entrenado en 0.0 min
✓ Progreso guardado tras 15 corridas
kNN — Valid: {'MSE': '16809389.82 ± 0.00', 'RMSE': '4099.93 ± 0.00', 'MAE': '1999.61 ± 0.00', 'MAPE': '630020630.93 ± 0.00', 'R2': '0.931 ± 0.000'}
kNN — Test : {'MSE': '12931535.93 ± 0.00', 'RMSE': '3596.04 ± 0.00', 'MAE': '1854.84 ± 0.00', 'MAPE': '292653810.88 ± 0.00', 'R2': '0.945 ± 0.000'}
✓ Métricas guardadas en /content/drive/MyDrive/Modelo/outputs/resultados_modelos.csv y en metrics_kNN_15runs_mean±std.json


### GUARDADO DEL PROGRESO

In [None]:
save_artifacts()

✓ Preprocesador, matrices y metadatos guardados en /content/drive/MyDrive/Modelo/outputs


In [None]:
import joblib
joblib.dump(
    {"runs_val": runs_val, "runs_test": runs_test},
    SAVE_DIR / "knn_progress.joblib"
)
print("✓ Progreso kNN guardado:", SAVE_DIR / "knn_progress.joblib")

✓ Progreso kNN guardado: /content/drive/MyDrive/Modelo/outputs/knn_progress.joblib


## BLOQUE 4 - Random Forest

In [None]:
from sklearn.ensemble import RandomForestRegressor
from joblib import Parallel, delayed

def _rf_fast_run(seed: int):
    rf = RandomForestRegressor(
        # ⚡️ Hiperparámetros pro-velocidad:
        n_estimators=100,        # 100 árboles (menos que 200/300)
        max_depth=15,            # tope de profundidad (evita árboles muy grandes)
        min_samples_leaf=5,      # hojas más “gruesas” = menos nodos
        max_features="sqrt",     # menos variables por split
        bootstrap=True,
        max_samples=0.7,         # 70% de las filas por árbol (subsampling)
        n_jobs=1,                # importante: 1 aquí, paralelizamos afuera
        random_state=seed
    )
    return eval_on_mats("RandomForest", rf)

# 15 corridas en paralelo (1 proceso por corrida)
results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
    delayed(_rf_fast_run)(s) for s in range(15)
)
runs_val  = [v for v, t in results]
runs_test = [t for v, t in results]

mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("RF_fast — Valid:", fmt_pm(mean_val, std_val))
print("RF_fast — Test :", fmt_pm(mean_test, std_test))
save_metrics("RandomForest_pm", mean_val, mean_test)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  4.8min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  9.7min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 23.6min
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 36.0min finished


RF_fast — Valid: {'MSE': '17583209.31 ± 158282.68', 'RMSE': '4193.19 ± 18.90', 'MAE': '2212.91 ± 22.70', 'MAPE': '2774615691.69 ± 221478438.53', 'R2': '0.928 ± 0.001'}
RF_fast — Test : {'MSE': '12793924.50 ± 97889.60', 'RMSE': '3576.84 ± 13.69', 'MAE': '1995.35 ± 19.27', 'MAPE': '1073470015.90 ± 137684590.52', 'R2': '0.945 ± 0.000'}
✓ Métricas guardadas en /content/drive/MyDrive/Modelo/outputs/resultados_modelos.csv y en metrics_RandomForest_pm.json


## BLOQUE 5 - GRU-basedmodel

In [None]:
# =========================
# BLOQUE 5 — GRU (15 corridas, paralelo CPU, rápido)
# =========================

# 0) Modo CPU y evitar problemas de Dynamo en Colab
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""       # fuerza CPU
os.environ["TORCHDYNAMO_DISABLE"] = "1"       # evita errores de torch._dynamo

# 1) Imports
import numpy as np
from scipy import sparse
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from joblib import Parallel, delayed

# 2) Fallbacks por si no existen en el entorno
if 'mape' not in globals():
    def mape(y_true, y_pred, eps=1e-8):
        y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
        return np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), eps))) * 100

def _ensure_save_metrics():
    # define un no-op si no existe save_metrics en tu BLOQUE 0
    if 'save_metrics' not in globals():
        def save_metrics(name, val_dict, test_dict):
            print(f"(Nota) save_metrics no definido; omitiendo guardado para {name}.")
        globals()['save_metrics'] = save_metrics
_ensure_save_metrics()

# 3) Densificar matrices si vienen en sparse (GRU trabaja en float denso)
Xtr_d = Xtr.toarray() if sparse.issparse(Xtr) else Xtr
Xva_d = Xva.toarray() if sparse.issparse(Xva) else Xva
Xte_d = Xte.toarray() if sparse.issparse(Xte) else Xte

# 4) DataLoaders (cada fila como secuencia de longitud 1: (B, 1, F))
def make_loader(X, y, bs=1024, shuffle=False):
    X = np.asarray(X, dtype=np.float32)
    y = np.asarray(y, dtype=np.float32)
    X = torch.from_numpy(X).unsqueeze(1)  # (N, 1, F)
    y = torch.from_numpy(y)               # (N,)
    ds = TensorDataset(X, y)
    return DataLoader(ds, batch_size=bs, shuffle=shuffle, num_workers=0, pin_memory=False)

train_loader = make_loader(Xtr_d, y_train, bs=1024, shuffle=True)
valid_loader = make_loader(Xva_d, y_valid, bs=2048, shuffle=False)
test_loader  = make_loader(Xte_d, y_test,  bs=2048, shuffle=False)

# 5) Modelo GRU compacto (rápido en CPU)
class GRUTabular(nn.Module):
    def __init__(self, input_size, hidden=64, layers=1, dropout=0.0):
        super().__init__()
        self.gru = nn.GRU(
            input_size=input_size,
            hidden_size=hidden,
            num_layers=layers,
            batch_first=True,
            dropout=0.0 if layers == 1 else dropout
        )
        self.head = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):            # x: (B, 1, F)
        out, _ = self.gru(x)         # (B, 1, H)
        h = out[:, -1, :]            # (B, H)
        return self.head(h).squeeze(1)  # (B,)

# 6) Loop de entrenamiento y evaluación
def train_epoch(model, loader, opt, lossf, device):
    model.train()
    tot = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        pred = model(xb)
        loss = lossf(pred, yb)
        loss.backward()
        opt.step()
        tot += loss.item() * xb.size(0)
    return tot / len(loader.dataset)

@torch.no_grad()
def eval_metrics(model, loader, device):
    model.eval()
    ys, ps = [], []
    for xb, yb in loader:
        xb = xb.to(device)
        pred = model(xb).cpu().numpy()
        ps.append(pred); ys.append(yb.numpy())
    y = np.concatenate(ys); p = np.concatenate(ps)
    mse  = mean_squared_error(y, p)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y, p)
    mp   = mape(y, p)
    r2   = r2_score(y, p)
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "MAPE": mp, "R2": r2}

# 7) Helpers para media ± desviación
def summarize_runs(dicts_list):
    keys = dicts_list[0].keys()
    mean = {k: float(np.mean([d[k] for d in dicts_list])) for k in keys}
    std  = {k: float(np.std([d[k] for d in dicts_list], ddof=1)) for k in keys}
    return mean, std

def fmt_pm(mean, std, r2_dec=3):
    out = {}
    for k in mean.keys():
        if k == "R2":
            out[k] = f"{mean[k]:.{r2_dec}f} ± {std[k]:.{r2_dec}f}"
        else:
            out[k] = f"{mean[k]:.2f} ± {std[k]:.2f}"
    return out

# 8) Función de una corrida (CPU) — parámetros “rápidos”
def run_one_gru_cpu(seed: int, epochs=3, hidden=64):
    torch.manual_seed(seed); np.random.seed(seed)
    device = torch.device("cpu")

    model = GRUTabular(input_size=Xtr_d.shape[1], hidden=hidden, layers=1).to(device)
    opt   = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss  = torch.nn.MSELoss()

    for _ in range(epochs):
        _ = train_epoch(model, train_loader, opt, loss, device)

    val = eval_metrics(model, valid_loader, device)
    tst = eval_metrics(model, test_loader,  device)
    return val, tst

# 9) Ejecutar 15 corridas en paralelo (CPU)
results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
    delayed(run_one_gru_cpu)(s, epochs=3, hidden=64) for s in range(15)
)
runs_val  = [v for v, t in results]
runs_test = [t for v, t in results]

mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("GRU_fast — Valid:", fmt_pm(mean_val, std_val))
print("GRU_fast — Test :", fmt_pm(mean_test, std_test))

# 10) Guardar métricas (usando tu save_metrics del BLOQUE 0 si existe)
save_metrics("GRU_fast_15runs_pm", mean_val, mean_test)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.0min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  1.9min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  3.7min
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  5.3min finished


GRU_fast — Valid: {'MSE': '395330048.00 ± 4051131.10', 'RMSE': '19882.67 ± 101.73', 'MAE': '12537.66 ± 104.65', 'MAPE': '1761501568.00 ± 272532832.00', 'R2': '-0.614 ± 0.017'}
GRU_fast — Test : {'MSE': '372576597.33 ± 3881662.34', 'RMSE': '19302.00 ± 100.41', 'MAE': '12032.03 ± 102.54', 'MAPE': '1811410304.00 ± 280254496.00', 'R2': '-0.588 ± 0.017'}
✓ Métricas guardadas en /content/drive/MyDrive/Modelo/outputs/resultados_modelos.csv y en metrics_GRU_fast_15runs_pm.json


## BLOQUE 6 LSTM-basedmodel

In [None]:
# =========================
# BLOQUE 6 — LSTM (15 corridas, paralelo CPU, rápido)
# =========================

# 0) Forzar CPU y evitar problemas de Dynamo en Colab
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""   # fuerza CPU
os.environ["TORCHDYNAMO_DISABLE"] = "1"   # evita errores de torch._dynamo

# 1) Imports
import numpy as np
from scipy import sparse
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from joblib import Parallel, delayed

# 2) Fallbacks si faltan helpers del BLOQUE 0
if 'mape' not in globals():
    def mape(y_true, y_pred, eps=1e-8):
        y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
        return np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), eps))) * 100

if 'summarize_runs' not in globals():
    def summarize_runs(dicts_list):
        keys = dicts_list[0].keys()
        mean = {k: float(np.mean([d[k] for d in dicts_list])) for k in keys}
        std  = {k: float(np.std([d[k] for d in dicts_list], ddof=1)) for k in keys}
        return mean, std

if 'fmt_pm' not in globals():
    def fmt_pm(mean, std, r2_dec=3):
        out = {}
        for k in mean.keys():
            if k == "R2":
                out[k] = f"{mean[k]:.{r2_dec}f} ± {std[k]:.{r2_dec}f}"
            else:
                out[k] = f"{mean[k]:.2f} ± {std[k]:.2f}"
        return out

if 'save_metrics' not in globals():
    def save_metrics(name, val_dict, test_dict):
        print(f"(Nota) save_metrics no definido; omitiendo guardado para {name}.")

# 3) Densificar matrices si vienen en sparse (LSTM trabaja en float denso)
Xtr_d = Xtr.toarray() if sparse.issparse(Xtr) else Xtr
Xva_d = Xva.toarray() if sparse.issparse(Xva) else Xva
Xte_d = Xte.toarray() if sparse.issparse(Xte) else Xte

# 4) DataLoaders (cada fila como secuencia de longitud 1: (B, 1, F))
def make_loader(X, y, bs=1024, shuffle=False):
    X = np.asarray(X, dtype=np.float32)
    y = np.asarray(y, dtype=np.float32)
    X = torch.from_numpy(X).unsqueeze(1)  # (N, 1, F)
    y = torch.from_numpy(y)               # (N,)
    ds = TensorDataset(X, y)
    return DataLoader(ds, batch_size=bs, shuffle=shuffle, num_workers=0, pin_memory=False)

# reutiliza loaders si ya existen; si no, créalos
if not all(k in globals() for k in ['train_loader','valid_loader','test_loader']):
    train_loader = make_loader(Xtr_d, y_train, bs=1024, shuffle=True)
    valid_loader = make_loader(Xva_d, y_valid, bs=2048, shuffle=False)
    test_loader  = make_loader(Xte_d, y_test,  bs=2048, shuffle=False)

# 5) Modelo LSTM compacto (rápido en CPU)
class LSTMTabular(nn.Module):
    def __init__(self, input_size, hidden=64, layers=1, dropout=0.0):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=hidden,
            num_layers=layers,
            batch_first=True,
            dropout=0.0 if layers == 1 else dropout
        )
        self.head = nn.Sequential(
            nn.Linear(hidden, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, x):           # x: (B, 1, F)
        out, _ = self.lstm(x)       # (B, 1, H)
        h = out[:, -1, :]           # (B, H)
        return self.head(h).squeeze(1)  # (B,)

# 6) Train y evaluación
def train_epoch(model, loader, opt, lossf, device):
    model.train()
    tot = 0.0
    for xb, yb in loader:
        xb, yb = xb.to(device), yb.to(device)
        opt.zero_grad()
        pred = model(xb)
        loss = lossf(pred, yb)
        loss.backward()
        opt.step()
        tot += loss.item() * xb.size(0)
    return tot / len(loader.dataset)

@torch.no_grad()
def eval_metrics(model, loader, device):
    model.eval()
    ys, ps = [], []
    for xb, yb in loader:
        xb = xb.to(device)
        pred = model(xb).cpu().numpy()
        ps.append(pred); ys.append(yb.numpy())
    y = np.concatenate(ys); p = np.concatenate(ps)
    mse  = mean_squared_error(y, p)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y, p)
    mp   = mape(y, p)
    r2   = r2_score(y, p)
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "MAPE": mp, "R2": r2}

# 7) Una corrida rápida (CPU): 3 épocas, hidden=64
def run_one_lstm_cpu(seed: int, epochs=3, hidden=64):
    torch.manual_seed(seed); np.random.seed(seed)
    device = torch.device("cpu")
    model = LSTMTabular(input_size=Xtr_d.shape[1], hidden=hidden, layers=1).to(device)
    opt   = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss  = torch.nn.MSELoss()

    for _ in range(epochs):
        _ = train_epoch(model, train_loader, opt, loss, device)

    val = eval_metrics(model, valid_loader, device)
    tst = eval_metrics(model, test_loader,  device)
    return val, tst

# 8) Ejecutar 15 corridas en paralelo (CPU)
results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
    delayed(run_one_lstm_cpu)(s, epochs=3, hidden=64) for s in range(15)
)
runs_val  = [v for v, t in results]
runs_test = [t for v, t in results]

# 9) Media ± Desv
mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("LSTM_fast — Valid:", fmt_pm(mean_val, std_val))
print("LSTM_fast — Test :", fmt_pm(mean_test, std_test))

# 10) Guardar métricas agregadas (si tienes save_metrics del BLOQUE 0)
save_metrics("LSTM_fast_15runs_pm", mean_val, mean_test)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  1.3min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:  4.6min
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  6.6min finished


LSTM_fast — Valid: {'MSE': '409117060.27 ± 2659818.39', 'RMSE': '20226.54 ± 65.74', 'MAE': '12907.99 ± 75.69', 'MAPE': '850084864.00 ± 171774976.00', 'R2': '-0.670 ± 0.011'}
LSTM_fast — Test : {'MSE': '385796844.80 ± 2553018.34', 'RMSE': '19641.61 ± 64.98', 'MAE': '12396.30 ± 74.90', 'MAPE': '874170368.00 ± 176641920.00', 'R2': '-0.644 ± 0.011'}
✓ Métricas guardadas en /content/drive/MyDrive/Modelo/outputs/resultados_modelos.csv y en metrics_LSTM_fast_15runs_pm.json


## BLOQUE 7 TabTransformer

In [None]:
# =========================
# BLOQUE 7 — TabTransformer (15 corridas, CPU, rápido)
# =========================

# 0) Forzar CPU y desactivar Dynamo (evita errores en Colab con torch 2.x)
import os
os.environ["CUDA_VISIBLE_DEVICES"] = ""
os.environ["TORCHDYNAMO_DISABLE"] = "1"

# 1) Imports
import numpy as np
import torch
from torch import nn, einsum
from torch.utils.data import Dataset, DataLoader
from joblib import Parallel, delayed
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# 2) Fallbacks por si faltan helpers del BLOQUE 0
if 'mape' not in globals():
    def mape(y_true, y_pred, eps=1e-8):
        y_true = np.asarray(y_true); y_pred = np.asarray(y_pred)
        return np.mean(np.abs((y_true - y_pred) / np.maximum(np.abs(y_true), eps))) * 100

if 'summarize_runs' not in globals():
    def summarize_runs(dicts_list):
        keys = dicts_list[0].keys()
        mean = {k: float(np.mean([d[k] for d in dicts_list])) for k in keys}
        std  = {k: float(np.std([d[k] for d in dicts_list], ddof=1)) for k in keys}
        return mean, std

if 'fmt_pm' not in globals():
    def fmt_pm(mean, std, r2_dec=3):
        out = {}
        for k in mean.keys():
            out[k] = f"{mean[k]:.{r2_dec}f} ± {std[k]:.{r2_dec}f}" if k=="R2" else f"{mean[k]:.2f} ± {std[k]:.2f}"
        return out

if 'save_metrics' not in globals():
    def save_metrics(name, val_dict, test_dict):
        print(f"(Nota) save_metrics no definido; omitiendo guardado para {name}.")

# 3) Dataset tabular (categorías + numéricas) desde tus DataFrames ya preprocesados
class TabDataset(Dataset):
    def __init__(self, X_df, y, cat_cols, num_cols):
        self.Xc = X_df[cat_cols].astype(np.int64).to_numpy()
        self.Xn = X_df[num_cols].astype(np.float32).to_numpy() if len(num_cols) else np.empty((len(X_df),0), np.float32)
        self.y  = np.asarray(y, dtype=np.float32)
    def __len__(self):  return len(self.y)
    def __getitem__(self, i):
        return torch.from_numpy(self.Xc[i]), torch.from_numpy(self.Xn[i]), torch.tensor(self.y[i])

# 4) DataLoaders rápidos (CPU)
def make_loaders(Xtr_df, ytr, Xva_df, yva, Xte_df, yte, bs=1024):
    train_ds = TabDataset(Xtr_df, ytr, cat_features, num_features)
    valid_ds = TabDataset(Xva_df, yva, cat_features, num_features)
    test_ds  = TabDataset(Xte_df, yte, cat_features, num_features)
    train_loader = DataLoader(train_ds, batch_size=bs, shuffle=True,  num_workers=0, pin_memory=False)
    valid_loader = DataLoader(valid_ds, batch_size=2*bs, shuffle=False, num_workers=0, pin_memory=False)
    test_loader  = DataLoader(test_ds,  batch_size=2*bs, shuffle=False, num_workers=0, pin_memory=False)
    return train_loader, valid_loader, test_loader

train_loader, valid_loader, test_loader = make_loaders(
    X_train, y_train, X_valid, y_valid, X_test, y_test, bs=1024
)

# 5) Bloques básicos del TabTransformer (compacto para ir rápido)
class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim); self.fn = fn
    def forward(self, x): return self.fn(self.norm(x))

class Attention(nn.Module):
    def __init__(self, dim, heads=4, dim_head=16, dropout=0.0):
        super().__init__()
        inner = heads * dim_head
        self.heads = heads
        self.scale = dim_head ** -0.5
        self.to_qkv = nn.Linear(dim, inner*3, bias=False)
        self.to_out = nn.Linear(inner, dim)
        self.drop = nn.Dropout(dropout)
    def forward(self, x):                  # x: (B, N_cat, D)
        h = self.heads
        q,k,v = self.to_qkv(x).chunk(3, dim=-1)
        def reshape(t): return t.view(t.size(0), t.size(1), h, -1).transpose(1,2)  # (B, H, N, d)
        q,k,v = map(reshape, (q,k,v))
        attn = (q @ k.transpose(-2,-1)) * self.scale
        attn = attn.softmax(dim=-1)
        attn = self.drop(attn)
        out = attn @ v                      # (B, H, N, d)
        out = out.transpose(1,2).contiguous().view(x.size(0), x.size(1), -1)
        return self.to_out(out)

class FeedForward(nn.Module):
    def __init__(self, dim, mult=4, dropout=0.0):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, dim*mult),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(dim*mult, dim)
        )
    def forward(self, x): return self.net(x)

class Transformer(nn.Module):
    def __init__(self, dim, depth=2, heads=4, dim_head=16, dropout=0.0):
        super().__init__()
        self.layers = nn.ModuleList([
            nn.ModuleList([
                PreNorm(dim, Attention(dim, heads=heads, dim_head=dim_head, dropout=dropout)),
                PreNorm(dim, FeedForward(dim, mult=2, dropout=dropout))
            ]) for _ in range(depth)
        ])
    def forward(self, x):
        for attn, ff in self.layers:
            x = x + attn(x)
            x = x + ff(x)
        return x

# 6) Modelo TabTransformer compacto:
#    - embeddings por campo categórico
#    - transformer sobre tokens categóricos
#    - concat con numéricas normalizadas (LayerNorm)
#    - MLP a salida escalar
class TabTransformer(nn.Module):
    def __init__(self, cardinalities, num_cont, dim=32, depth=2, heads=4, dim_head=16, dropout=0.0):
        super().__init__()
        self.num_cat = len(cardinalities)
        self.num_cont = num_cont
        # embeddings por campo
        self.embeds = nn.ModuleList([
            nn.Embedding(card, dim) for card in cardinalities
        ])
        self.transformer = Transformer(dim=dim, depth=depth, heads=heads, dim_head=dim_head, dropout=dropout)
        self.norm_cont = nn.LayerNorm(num_cont) if num_cont>0 else nn.Identity()
        mlp_in = dim*self.num_cat + (num_cont if num_cont>0 else 0)
        self.head = nn.Sequential(
            nn.Linear(mlp_in, 64), nn.ReLU(),
            nn.Linear(64, 1)
        )
    def forward(self, x_categ, x_cont):
        # x_categ: (B, N_cat) con ids; x_cont: (B, N_num)
        toks = [emb(x_categ[:,i]) for i,emb in enumerate(self.embeds)]  # lista de (B, D)
        x_tok = torch.stack(toks, dim=1)  # (B, N_cat, D)
        x_tok = self.transformer(x_tok)   # (B, N_cat, D)
        x_tok = x_tok.reshape(x_tok.size(0), -1)  # (B, N_cat*D)
        if self.num_cont>0:
            xc = self.norm_cont(x_cont)
            x = torch.cat([x_tok, xc], dim=-1)
        else:
            x = x_tok
        return self.head(x).squeeze(1)

# 7) Entrenamiento/evaluación (rápidos en CPU)
def train_epoch(model, loader, opt, lossf, device):
    model.train(); tot=0.0
    for xc, xn, y in loader:
        xc, xn, y = xc.to(device), xn.to(device), y.to(device)
        opt.zero_grad()
        pred = model(xc, xn)
        loss = lossf(pred, y)
        loss.backward()
        opt.step()
        tot += loss.item()*y.size(0)
    return tot/len(loader.dataset)

@torch.no_grad()
def evaluate(model, loader, device):
    model.eval(); ys=[]; ps=[]
    for xc, xn, y in loader:
        xc, xn = xc.to(device), xn.to(device)
        p = model(xc, xn).cpu().numpy()
        ps.append(p); ys.append(y.numpy())
    y = np.concatenate(ys); p = np.concatenate(ps)
    mse  = mean_squared_error(y, p)
    rmse = np.sqrt(mse)
    mae  = mean_absolute_error(y, p)
    mp   = mape(y, p)
    r2   = r2_score(y, p)
    return {"MSE": mse, "RMSE": rmse, "MAE": mae, "MAPE": mp, "R2": r2}

# 8) Una corrida rápida (3 épocas, dim=32, depth=2)
def run_one_tabtx(seed:int, epochs=3, dim=32, depth=2, heads=4):
    torch.manual_seed(seed); np.random.seed(seed)
    device = torch.device("cpu")
    # cardinalidades en el mismo orden que cat_features
    # (tu pre_process ya dejó enteros 0..(card-1))
    # usa num_of_unique que viene en el mismo orden ['Store','Dept','Type'] ∩ cat_features
    # alineamos por nombre:
    name_to_card = dict(zip(['Store','Dept','Type'], num_of_unique))
    cards = [int(name_to_card[c]) for c in cat_features]
    model = TabTransformer(cardinalities=cards, num_cont=len(num_features),
                           dim=dim, depth=depth, heads=heads).to(device)
    opt  = torch.optim.Adam(model.parameters(), lr=1e-3)
    loss = nn.MSELoss()
    for _ in range(epochs):
        _ = train_epoch(model, train_loader, opt, loss, device)
    val = evaluate(model, valid_loader, device)
    tst = evaluate(model, test_loader,  device)
    return val, tst

# 9) Ejecutar 15 corridas en paralelo (CPU)
results = Parallel(n_jobs=-1, backend="loky", verbose=10)(
    delayed(run_one_tabtx)(s, epochs=3, dim=32, depth=2, heads=4) for s in range(15)
)
runs_val  = [v for v,t in results]
runs_test = [t for v,t in results]

# 10) Media ± Desv
mean_val, std_val   = summarize_runs(runs_val)
mean_test, std_test = summarize_runs(runs_test)

print("TabTransformer_fast — Valid:", fmt_pm(mean_val, std_val))
print("TabTransformer_fast — Test :", fmt_pm(mean_test, std_test))

# 11) Guardar métricas agregadas
save_metrics("TabTransformer_fast_15runs_pm", mean_val, mean_test)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:  4.5min
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed: 11.1min
[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 16.9min finished


TabTransformer_fast — Valid: {'MSE': '47061047.87 ± 8648948.59', 'RMSE': '6833.17 ± 628.60', 'MAE': '4311.93 ± 415.70', 'MAPE': '6249863168.00 ± 5541392896.00', 'R2': '0.808 ± 0.035'}
TabTransformer_fast — Test : {'MSE': '41051326.67 ± 7601130.12', 'RMSE': '6381.41 ± 593.66', 'MAE': '4060.59 ± 388.34', 'MAPE': '3969446656.00 ± 2926473984.00', 'R2': '0.825 ± 0.032'}
✓ Métricas guardadas en /content/drive/MyDrive/Modelo/outputs/resultados_modelos.csv y en metrics_TabTransformer_fast_15runs_pm.json
