# Definición parámetros

In [None]:
# === HANUMI — Percentiles de Number (S3N) y Factor (S3N/AVG177N) ==============
import pandas as pd, numpy as np, math

pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

# -------- Parámetros --------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-1", "I-2"] 
NUMBER_QS = [0.85, 0.90, 0.95, 0.97, 0.99]   # percentiles para {var.Number}
FACTOR_QS = [0.95, 0.97, 0.99]               # percentiles para {var.Factor}

# -------- Carga + filtro HANUMI (Inbound Cash) --------
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()

if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

mask = (
    df["tx_direction"].eq("Inbound") &
    df["tx_type"].eq("Cash") &
    df["tx_date_time"].notna()
)
g = df.loc[mask, ["customer_id","tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HANUMI.")
else:
    num_points = []
    fac_points = []

    for cid, sub in g.groupby("customer_id", sort=False):
        sub = sub.sort_values("tx_date_time")
        # Conteo diario de transacciones
        daily_cnt = sub.set_index("tx_date_time").assign(x=1)["x"].resample("D").sum().fillna(0)

        if daily_cnt.empty:
            continue

        # S3N: suma de conteos en ventana de 3 días
        S3N = daily_cnt.rolling("3D").sum()

        # AVG177N: promedio histórico de S3N en [t-180d, t-3d], aprox con rolling 177D sobre S3N desplazado
        AVG177N = S3N.shift(3).rolling("177D", min_periods=1).mean()

        # Number: puntos con S3N>0
        ok_num = S3N.notna() & (S3N > 0)
        if ok_num.any():
            num_points.append(S3N.loc[ok_num])

        # Factor: S3N>0 y AVG177N>0
        ok_fac = ok_num & AVG177N.notna() & (AVG177N > 0)
        if ok_fac.any():
            fac_points.append((S3N.loc[ok_fac] / AVG177N.loc[ok_fac]).replace([np.inf, -np.inf], np.nan).dropna())

    S_num = pd.concat(num_points) if num_points else pd.Series(dtype=float)
    S_fac = pd.concat(fac_points) if fac_points else pd.Series(dtype=float)

    num_q = S_num.quantile(NUMBER_QS) if len(S_num) else pd.Series(index=NUMBER_QS, dtype=float)
    fac_q = S_fac.quantile(FACTOR_QS) if len(S_fac) else pd.Series(index=FACTOR_QS, dtype=float)

    df_number = pd.DataFrame({
        "percentil":      [f"p{int(q*100)}" for q in NUMBER_QS],
        "Number_raw_S3N": [num_q.get(q, np.nan) for q in NUMBER_QS],
        "Number_ceiled":  [int(math.ceil(num_q.get(q))) if pd.notna(num_q.get(q, np.nan)) else np.nan for q in NUMBER_QS]
    })

    df_factor = pd.DataFrame({
        "percentil":        [f"p{int(q*100)}" for q in FACTOR_QS],
        "Factor_raw":       [fac_q.get(q, np.nan) for q in FACTOR_QS],
        "Factor_ceiled":    [int(math.ceil(fac_q.get(q))) if pd.notna(fac_q.get(q, np.nan)) and np.isfinite(fac_q.get(q)) else np.nan
                             for q in FACTOR_QS]
    })

    print("=== HANUMI — Resultados (sin requisito estricto de 180d) ===")
    print(f"Puntos Number (S3N): {len(S_num):,} | Puntos Factor (S3N/AVG177N): {len(S_fac):,}\n")
    print("— Number (S3N = # IN Cash en 3 días) —")
    display(df_number)
    print("— Factor (S3N / AVG177N) —")
    display(df_factor)



  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== HANUMI — Resultados (sin requisito estricto de 180d) ===
Puntos Number (S3N): 15,350 | Puntos Factor (S3N/AVG177N): 13,804

— Number (S3N = # IN Cash en 3 días) —


Unnamed: 0,percentil,Number_raw_S3N,Number_ceiled
0,p85,2.0,2
1,p90,2.0,2
2,p95,3.0,3
3,p97,3.0,3
4,p99,4.0,4


— Factor (S3N / AVG177N) —


Unnamed: 0,percentil,Factor_raw,Factor_ceiled
0,p95,10.89,11
1,p97,13.78,14
2,p99,22.25,23


# Simulación alertas

In [None]:
# === HANUMI — Sensibilidad de alertas (Actual vs p95/p97/p99) ==================
# Regla: En 3 días, # IN > Factor × promedio histórico 3d (177 días previos),
#        y además # IN 3d ≥ Number.
# Unidad de conteo = ventanas (cliente, día) que cumplen la condición.

import pandas as pd, numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = "I-1"               # <-- ajusta el sub-subsegmento
PARAMS = {
    "Actual": {"Number": 2, "Factor": 59},
    "p95":    {"Number": 3, "Factor": 38},
    "p97":    {"Number": 3, "Factor": 52},
    "p99":    {"Number": 3, "Factor": 76},
}
# -------------------------------------------------------------------------------

# --- Carga base y filtro HANUMI (Inbound Cash) ---
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

mask = (
    df["tx_direction"].eq("Inbound") &
    df["tx_type"].eq("Cash") &
    df["customer_id"].notna() &
    df["tx_date_time"].notna()
)
g = df.loc[mask, ["customer_id","tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HANUMI en el archivo dado.")
else:
    # --- Construir métricas diarias por cliente: S3N y AVG177N ---
    parts = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily_n = (sub.set_index("tx_date_time")
                      .assign(x=1)["x"]
                      .resample("D").sum()
                      .astype(float))

        if daily_n.empty:
            continue

        S3N = daily_n.rolling("3D").sum()                      # # IN en 3 días
        AVG177N = S3N.shift(3).rolling("177D", min_periods=1).mean()  # promedio 3d histórico
        parts.append(pd.DataFrame({
            "customer_id": cid,
            "date": daily_n.index,
            "S3N": S3N.values,
            "AVG177N": AVG177N.values
        }))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","S3N","AVG177N"])

    if M.empty:
        print("No se pudieron construir ventanas de 3 días (M vacío).")
    else:
        # Factor (evitar divisiones por cero)
        M["Factor"] = np.where(M["AVG177N"] > 0, M["S3N"] / M["AVG177N"], np.nan)

        def mask_hanumi(dfm: pd.DataFrame, Number: int, Factor: int) -> pd.Series:
            m = (
                (dfm["S3N"] >= Number) &
                (dfm["AVG177N"] > 0) &
                (dfm["Factor"] > Factor)
            )
            return m.fillna(False)

        # --- Tabla de parámetros (para visualizar qué se está probando) ---
        param_tbl = (pd.DataFrame(PARAMS).T
                        .loc[["Actual","p95","p97","p99"]]
                        .rename_axis("escenario")
                        .reset_index())
        print("=== HANUMI — Parámetros por escenario ===")
        display(param_tbl)

        # --- Conteo de alertas por escenario ---
        counts = {}
        for k, v in PARAMS.items():
            m = mask_hanumi(M, **v)
            # ventanas únicas (cliente, día)
            counts[k] = int(M.loc[m, ["customer_id","date"]].drop_duplicates().shape[0])

        # Tabla en ancho con columnas solicitadas
        alerts_wide = pd.DataFrame([{
            "alertas_actual": counts.get("Actual", 0),
            "alertas_p95":    counts.get("p95", 0),
            "alertas_p97":    counts.get("p97", 0),
            "alertas_p99":    counts.get("p99", 0),
        }])

        print("=== HANUMI — Alertas por escenario (ventanas cliente–día) ===")
        display(alerts_wide)


=== HANUMI — Parámetros por escenario ===


Unnamed: 0,escenario,Number,Factor
0,Actual,2,59
1,p95,3,38
2,p97,3,52
3,p99,3,76


=== HANUMI — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p95,alertas_p97,alertas_p99
0,4,7,4,0
