In [None]:
# ============================================================
# 1️⃣ Cargar datos procesados y preprocesador
# ============================================================

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ks_2samp  # Kolmogorov-Smirnov Test
import joblib
import os

base_path = os.path.abspath('../data')

# Cargar datasets
X_train = np.load(os.path.join(base_path, 'X_train.npy'), allow_pickle=True)
X_test = np.load(os.path.join(base_path, 'X_test.npy'), allow_pickle=True)
y_train = np.load(os.path.join(base_path, 'y_train.npy'), allow_pickle=True)
y_test = np.load(os.path.join(base_path, 'y_test.npy'), allow_pickle=True)

# Cargar preprocesador
preprocessor = joblib.load(os.path.join(base_path, 'preprocessor.pkl'))

print("✅ Datos cargados correctamente.")
print(f"X_train: {X_train.shape}, X_test: {X_test.shape}")




In [17]:
np.random.seed(42)
X_new = X_test.copy().astype(float)
X_new[:, 0] *= np.random.normal(1.1, 0.05, len(X_new))
X_new[:, 1] += np.random.normal(0, 0.2, len(X_new))

st.info("Nuevos datos simulados para análisis de drift.")



DeltaGenerator()

In [18]:
def ks_test(base, new):
    """KS-test entre base y nuevos datos"""
    results = []
    for i in range(base.shape[1]):
        stat, p_val = ks_2samp(base[:, i], new[:, i])
        results.append({
            'Feature_Index': i,
            'KS_Statistic': round(stat, 4),
            'P_Value': round(p_val, 4),
            'Drift': 'Sí' if p_val < 0.05 else 'No'
        })
    return pd.DataFrame(results)


def calculate_psi(base, new, bins=10):
    """Calcula PSI (Population Stability Index)"""
    def psi_single(base_col, new_col, bins):
        base_perc, _ = np.histogram(base_col, bins=bins)
        new_perc, _ = np.histogram(new_col, bins=bins)
        base_perc = base_perc / len(base_col)
        new_perc = new_perc / len(new_col)
        new_perc = np.where(new_perc == 0, 1e-6, new_perc)
        base_perc = np.where(base_perc == 0, 1e-6, base_perc)
        psi = np.sum((base_perc - new_perc) * np.log(base_perc / new_perc))
        return psi

    psi_values = [psi_single(base[:, i], new[:, i], bins) for i in range(base.shape[1])]
    df_psi = pd.DataFrame({'Feature_Index': range(len(psi_values)), 'PSI': np.round(psi_values, 4)})
    df_psi['Nivel'] = pd.cut(df_psi['PSI'], bins=[-1, 0.1, 0.25, 1],
                             labels=['Estable', 'Moderado', 'Inestable'])
    return df_psi


In [19]:
df_ks = ks_test(X_train, X_new)
df_psi = calculate_psi(X_train, X_new)
df_drift = pd.merge(df_ks, df_psi, on="Feature_Index")

st.subheader("Resultados del análisis de drift (KS-test + PSI)")
st.dataframe(df_drift.head(10), use_container_width=True)

2025-11-10 20:48:02.506 Please replace `use_container_width` with `width`.

`use_container_width` will be removed after 2025-12-31.

For `use_container_width=True`, use `width='stretch'`. For `use_container_width=False`, use `width='content'`.


DeltaGenerator()

In [20]:
st.markdown("### Distribuciones comparadas")

num_features_to_plot = 4
fig, axs = plt.subplots(2, 2, figsize=(12, 8))
for i in range(num_features_to_plot):
    sns.kdeplot(X_train[:, i], label="Entrenamiento", fill=True, ax=axs[i//2, i%2], alpha=0.4)
    sns.kdeplot(X_new[:, i], label="Nuevos datos", fill=True, ax=axs[i//2, i%2], alpha=0.4)
    axs[i//2, i%2].set_title(f"Feature {i} | Drift: {df_drift.loc[i, 'Drift']} | PSI: {df_drift.loc[i, 'PSI']}")
    axs[i//2, i%2].legend()

plt.tight_layout()
st.pyplot(fig)



DeltaGenerator()

In [21]:
st.markdown("### Reporte visual tipo semáforo (PSI)")

fig, ax = plt.subplots(figsize=(8, 4))
colors = df_drift['Nivel'].map({'Estable': 'green', 'Moderado': 'gold', 'Inestable': 'red'})
ax.barh(df_drift['Feature_Index'], df_drift['PSI'], color=colors)
ax.set_xlabel("PSI (Population Stability Index)")
ax.set_ylabel("Feature Index")
ax.set_title("Monitoreo de estabilidad por variable")
st.pyplot(fig)



DeltaGenerator()



DeltaGenerator()