# **Modelo TabTransformer**
con Pesos por clase

In [None]:
import os
os.environ["PYTORCH_LIGHTNING_DISABLE_PROGRESS_BAR"] = "1"  # apaga barra de progreso
os.environ["LIGHTNING_LAUNCHER"] = "none"                   # evita lanzador distribuido
os.environ["WORLD_SIZE"] = "1"                              # fuerza 1 proceso
os.environ["PL_TORCH_DISTRIBUTED_BACKEND"] = ""             # sin backend distribuido
os.environ["CUDA_VISIBLE_DEVICES"] = ""                     # CPU si no tienes CUDA


NameError: name 'os' is not defined

In [None]:
import time
import numpy as np
import pandas as pd
import torch
import random

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report, ConfusionMatrixDisplay

from pytorch_tabular import TabularModel
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.tab_transformer.config import TabTransformerConfig
from pytorch_lightning import seed_everything

import matplotlib.pyplot as plt

In [None]:
# ---------- 1) Cargar y preparar (igual a tu TabNet) ----------
ruta_carpeta_actual = os.getcwd()
ruta_carpeta_raiz   = os.path.dirname(ruta_carpeta_actual)
csv_path  = os.path.join(ruta_carpeta_raiz, "dataset", "PuntosMuestra_CR_2023.csv")
df = pd.read_csv(csv_path)

# quitar categoría 2 y columnas no predictoras
df = df[df["CATEGORIA"] != 2].copy()
df = df.drop(columns=["lon","lat","year"], errors="ignore")

# reindexar clases a 0..K-1
clases_ordenadas = sorted(df["CATEGORIA"].unique())
mapa = {old:new for new,old in enumerate(clases_ordenadas)}
df["CATEGORIA"] = df["CATEGORIA"].map(mapa)

# separar X, y
feat_cols = [c for c in df.columns if c!="CATEGORIA"]
X = df[feat_cols].values
y = df["CATEGORIA"].values

# split 70/30 y luego train/val (80/20 del train)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, stratify=y, random_state=42
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

X_tr, X_val, y_tr, y_val = train_test_split(
    X_train, y_train, test_size=0.20, stratify=y_train, random_state=42
)

# convertir a DataFrame con la misma estructura que espera TabularModel
train_df = pd.DataFrame(X_tr, columns=feat_cols); train_df["CATEGORIA"] = y_tr
valid_df = pd.DataFrame(X_val, columns=feat_cols); valid_df["CATEGORIA"] = y_val
test_df  = pd.DataFrame(X_test, columns=feat_cols); test_df["CATEGORIA"]  = y_test

num_classes = len(np.unique(y))
print("Clases (mapeadas):", clases_ordenadas, "=>", list(range(num_classes)))

# ---------- 2) Configurar TabTransformer ----------
data_config = DataConfig(
    target=["CATEGORIA"],
    continuous_cols=feat_cols,   # todas continuas aquí
    categorical_cols=[],         # si tuvieras categóricas, listarlas aquí
)

# Métrica interna simple (accuracy). F1 la calculamos al final con sklearn.
model_config = TabTransformerConfig(
    task="classification",
    metrics=["accuracy"],
)

optimizer_config = OptimizerConfig()

# Configuración de entrenador sencilla y robusta (CPU)
trainer_config = TrainerConfig(
    auto_lr_find=False,
    batch_size=256,
    max_epochs=1,  # pon 100 como en tu TabNet; para pruebas rápidas usa 10-20
)

tabular_model = TabularModel(
    data_config=data_config,
    model_config=model_config,
    optimizer_config=optimizer_config,
    trainer_config=trainer_config,
)

# ---------- 3) Entrenar ----------
t0 = time.perf_counter()
tabular_model.fit(train=train_df, validation=valid_df, callbacks=[])  
fit_s = time.perf_counter() - t0

# ---------- 4) Evaluar en TEST ----------
t0 = time.perf_counter()
preds = tabular_model.predict(test_df)
pred_s = time.perf_counter() - t0

y_true = test_df["CATEGORIA"].to_numpy()
y_pred = np.asarray(preds["prediction"])

oa  = accuracy_score(y_true, y_pred)
f1m = f1_score(y_true, y_pred, average="macro")
cm  = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))

print(f"\n⏱ Entrenamiento: {fit_s:.2f}s | ⏱ Inferencia: {pred_s:.2f}s ({pred_s/len(y_true)*1000:.2f} ms/muestra)")
print(f"OA: {oa:.4f} | F1-macro: {f1m:.4f}")
print("Matriz de confusión:\n", cm)
print("\nReporte por clase:\n", classification_report(y_true, y_pred, digits=4))

In [None]:
oa  = accuracy_score(y_true, y_pred)
f1m = f1_score(y_true, y_pred, average="macro")
cm  = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))
cm_row_percentage = cm.astype('float') / cm.sum(axis=1, keepdims=True) * 100
cm_row_percentage = np.nan_to_num(cm_row_percentage)  # handle division by zero

print(f"\n⏱ Entrenamiento: {fit_s:.2f}s | ⏱ Inferencia: {pred_s:.2f}s ({pred_s/len(y_true)*1000:.2f} ms/muestra)")
print(f"OA: {oa:.4f} | F1-macro: {f1m:.4f}")
print("Matriz de confusión:\n", cm)
print("\nReporte por clase:\n", classification_report(y_true, y_pred, digits=4))

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

ConfusionMatrixDisplay(cm).plot(cmap='Blues')
plt.title("Matriz de confusión - TabTransformer (Test)")
plt.show()

ConfusionMatrixDisplay(cm_row_percentage).plot(cmap='Blues')
plt.title("Matriz de confusión (%) - TabTransformer (Test)")
plt.show()