# 📊 Scoring Masivo — Champion & Challenger

Este notebook aplica scoring robusto sin errores de columnas ausentes:

1. Carga de nuevas solicitudes.
2. Carga de artefactos entrenados (Champion & Challenger).
3. Transformación y codificación de datos.
4. Scoring Champion (WOE + Regresión Logística) sin errores de dimensiones.
5. Scoring Challenger (RiskNN) con normalización y calibración.
6. Conversión de probabilidades a score 300–900.
7. Métricas opcionales si existe `target`.
8. Exportación de resultados.

**Versión:** Junio 2025 · *Pipeline listo para producción*

In [1]:
# %% 1) IMPORTACIONES & CONFIGURACIÓN GLOBAL
import json, joblib, random
from pathlib import Path

import numpy as np
import pandas as pd
import torch
from torch import nn

# Directorios
PROJ_ROOT  = Path(r"C:/GitHub/Trading/credit-risk-ml-nn")
DATA_DIR   = PROJ_ROOT / "data/new_solicitude"
REPORT_DIR = PROJ_ROOT / "reports"
MODEL_DIR  = REPORT_DIR / "modeloFinal"

# Semilla y dispositivo
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark     = False

print("Configuración lista — Dispositivo:", DEVICE)

Configuración lista — Dispositivo: cuda


In [2]:
# %% 2) UTILIDADES TEMPERATURE SCALING
import torch
from torch import nn

class TempScaling(nn.Module):
    """Escala logits: logit_calibrado = logit_original / T"""
    def __init__(self):
        super().__init__()
        self.T = nn.Parameter(torch.ones(()))
    def forward(self, logits: torch.Tensor) -> torch.Tensor:
        return logits / self.T.clamp(min=1e-3)

def optimize_temperature(logits: torch.Tensor,
                         labels: torch.Tensor,
                         init_temp: float = 1.0,
                         max_iter: int = 50):
    ts = TempScaling().to(DEVICE)
    ts.T.data.fill_(init_temp)
    optimizer = torch.optim.LBFGS([ts.T], lr=0.01, max_iter=max_iter)
    criterion = nn.BCEWithLogitsLoss()
    def closure():
        optimizer.zero_grad(set_to_none=True)
        loss = criterion(ts(logits), labels)
        loss.backward()
        return loss
    optimizer.step(closure)
    return ts.T.item(), ts

In [3]:
# %% 3) CARGA DE NUEVAS SOLICITUDES
CSV_IN = DATA_DIR / "nuevas_solicitudes.csv"
assert CSV_IN.exists(), f"No existe el archivo: {CSV_IN}"
df_new = pd.read_csv(CSV_IN)
print("Nuevas solicitudes cargadas:", df_new.shape)

Nuevas solicitudes cargadas: (250, 14)


In [4]:
# %% 4) CARGA DE ARTEFACTOS
import sys
if str(PROJ_ROOT) not in sys.path:
    sys.path.append(str(PROJ_ROOT))
from train import RiskNN

# Champion artifacts
logreg_model = joblib.load(REPORT_DIR/"champion"/"logreg_woe.pkl")
bins_woe     = joblib.load(REPORT_DIR/"champion"/"bins_woe.pkl")

# Challenger artifacts
meta           = json.loads((MODEL_DIR/"model_metadata.json").read_text())
cat_maps       = joblib.load(MODEL_DIR/"cat_maps.pkl")
scaler         = joblib.load(MODEL_DIR/"scaler.pkl")
cat_cols       = meta["categorical_cols"]
numerical_cols = meta["numerical_cols"]
T_init         = meta.get("opt_temperature", 1.0)

model = RiskNN(
    num_features = len(numerical_cols),
    cat_dims     = meta["cat_dims"],
    emb_dims     = meta["emb_dims"],
    hidden       = meta["hidden_layers"],
    dropout      = meta["dropout"]
).to(DEVICE)
model.load_state_dict(torch.load(MODEL_DIR/"best_model_final.pth", map_location=DEVICE))
model.eval()
print("Artefactos cargados correctamente")

  from .autonotebook import tqdm as notebook_tqdm


Artefactos cargados correctamente


In [5]:
# %% 5) FUNCIÓN DE CODIFICACIÓN ROBUSTA
import numpy as np

def encode_df(df: pd.DataFrame):
    # Asegurar presencia de todas las columnas numéricas
    df_num = df.copy()
    for col in numerical_cols:
        if col not in df_num.columns:
            df_num[col] = 0.0
    # Variables numéricas normalizadas
    X_num = scaler.transform(df_num[numerical_cols].fillna(0))
    # Variables categóricas con '__unk__'
    X_cat = np.stack([
        df[col].map(lambda v: cat_maps[col].get(v, cat_maps[col].get("__unk__", 0))).values
        for col in cat_cols
    ], axis=1).astype("int64")
    return X_num.astype("float32"), X_cat

In [6]:
# %% 6) SCORING CHAMPION robusto (WOE + RegLog)
import scorecardpy as sc

df_woe = sc.woebin_ply(df_new.copy(), bins_woe)
exp_cols = list(logreg_model.feature_names_in_)
X_woe = pd.DataFrame(0.0, index=df_woe.index, columns=exp_cols)
for c in exp_cols:
    if c in df_woe.columns:
        X_woe[c] = df_woe[c]
X_woe = X_woe.fillna(0.0)

p_bad = logreg_model.predict_proba(X_woe)[:, 1]
df_new["P_default_CH"] = p_bad
df_new["P_pay_CH"]     = 1 - p_bad

print("Scoring Champion completado sin errores")

  import pkg_resources


[INFO] converting into woe values ...
Scoring Champion completado sin errores


In [7]:
# %% 7) SCORING CHALLENGER (RiskNN) con calibración
num, cat = encode_df(df_new)
with torch.no_grad():
    logits = model(torch.from_numpy(num).to(DEVICE),
                   torch.from_numpy(cat).to(DEVICE)) / T_init
    p_bad_nn = torch.sigmoid(logits).cpu().numpy()

df_new["P_default_NN"] = p_bad_nn
df_new["P_pay_NN"]     = 1 - p_bad_nn

print("Scoring Challenger completado")



Scoring Challenger completado


In [8]:
# %% 8) CONVERTIR PROBABILIDADES A SCORE [300–900]
BASE_SCORE, PDO, BASE_ODDS = 600, 20, 50
factor = PDO / np.log(2)
offset = BASE_SCORE - factor * np.log(BASE_ODDS)

def pd_to_score(pd):
    odds = (1 - pd) / pd
    return np.clip(np.round(offset + factor * np.log(odds)), 300, 900)

df_new["score_CH"] = pd_to_score(df_new["P_default_CH"])
df_new["score_NN"] = pd_to_score(df_new["P_default_NN"])
print(" Conversión a score completada")

 Conversión a score completada


In [9]:
# %% 9) MÉTRICAS OPCIONALES (si existe 'target')
import sklearn.metrics as skm

if "target" in df_new.columns:
    y_true = df_new["target"].astype(int)
    roc_ch = skm.roc_auc_score(y_true, df_new["P_default_CH"])
    roc_nn = skm.roc_auc_score(y_true, df_new["P_default_NN"])
    metrics = {"ROC_CH": float(roc_ch), "ROC_NN": float(roc_nn)}
    with open(REPORT_DIR/"test_metrics.json", "w") as fp:
        json.dump(metrics, fp, indent=2)
    print(f" ROC Champion: {roc_ch:.4f} | ROC Challenger: {roc_nn:.4f}")
else:
    print(" Columna 'target' no detectada; métricas omitidas")

 ROC Champion: 0.7918 | ROC Challenger: 0.4299


In [10]:
# %% 10) EXPORTAR RESULTADOS
out = REPORT_DIR/"predictions"/f"predicciones_{pd.Timestamp.now():%Y%m%d_%H%M%S}.csv"
out.parent.mkdir(parents=True, exist_ok=True)
df_new.to_csv(out, index=False)
print("Exportado a:", out)

Exportado a: C:\GitHub\Trading\credit-risk-ml-nn\reports\predictions\predicciones_20250614_040631.csv
