# Definición parámetros

In [None]:
# === HANUMO — Percentiles de Number (S3N) y Factor (S3N/AVG177N) ==============
import pandas as pd, numpy as np, math

pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

# -------- Parámetros --------
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = "I-1"                # <-- ajusta el sub-subsegmento
NUMBER_QS = [0.85, 0.90, 0.95, 0.97, 0.99]
FACTOR_QS = [0.95, 0.97, 0.99]

# -------- Carga + filtro HANUMO (Outbound Cash) --------
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()

if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

mask = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq("Cash") &
    df["tx_date_time"].notna()
)
g = df.loc[mask, ["customer_id","tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HANUMO.")
else:
    num_points = []
    fac_points = []

    for cid, sub in g.groupby("customer_id", sort=False):
        sub = sub.sort_values("tx_date_time")
        daily_cnt = sub.set_index("tx_date_time").assign(x=1)["x"].resample("D").sum().fillna(0)
        if daily_cnt.empty:
            continue

        S3N = daily_cnt.rolling("3D").sum()
        AVG177N = S3N.shift(3).rolling("177D", min_periods=1).mean()

        ok_num = S3N.notna() & (S3N > 0)
        if ok_num.any():
            num_points.append(S3N.loc[ok_num])

        ok_fac = ok_num & AVG177N.notna() & (AVG177N > 0)
        if ok_fac.any():
            fac_points.append((S3N.loc[ok_fac] / AVG177N.loc[ok_fac]).replace([np.inf, -np.inf], np.nan).dropna())

    S_num = pd.concat(num_points) if num_points else pd.Series(dtype=float)
    S_fac = pd.concat(fac_points) if fac_points else pd.Series(dtype=float)

    num_q = S_num.quantile(NUMBER_QS) if len(S_num) else pd.Series(index=NUMBER_QS, dtype=float)
    fac_q = S_fac.quantile(FACTOR_QS) if len(S_fac) else pd.Series(index=FACTOR_QS, dtype=float)

    df_number = pd.DataFrame({
        "percentil":      [f"p{int(q*100)}" for q in NUMBER_QS],
        "Number_raw_S3N": [num_q.get(q, np.nan) for q in NUMBER_QS],
        "Number_ceiled":  [int(math.ceil(num_q.get(q))) if pd.notna(num_q.get(q, np.nan)) else np.nan for q in NUMBER_QS]
    })

    df_factor = pd.DataFrame({
        "percentil":        [f"p{int(q*100)}" for q in FACTOR_QS],
        "Factor_raw":       [fac_q.get(q, np.nan) for q in FACTOR_QS],
        "Factor_ceiled":    [int(math.ceil(fac_q.get(q))) if pd.notna(fac_q.get(q, np.nan)) and np.isfinite(fac_q.get(q)) else np.nan
                             for q in FACTOR_QS]
    })

    print("=== HANUMO — Resultados (sin requisito estricto de 180d) ===")
    print(f"Puntos Number (S3N): {len(S_num):,} | Puntos Factor (S3N/AVG177N): {len(S_fac):,}\n")
    print("— Number (S3N = # OUT Cash en 3 días) —")
    display(df_number)
    print("— Factor (S3N / AVG177N) —")
    display(df_factor)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== HANUMO — Resultados (sin requisito estricto de 180d) ===
Puntos Number (S3N): 1,267 | Puntos Factor (S3N/AVG177N): 1,213

— Number (S3N = # OUT Cash en 3 días) —


Unnamed: 0,percentil,Number_raw_S3N,Number_ceiled
0,p85,10.0,10
1,p90,13.0,13
2,p95,19.0,19
3,p97,21.02,22
4,p99,30.0,30


— Factor (S3N / AVG177N) —


Unnamed: 0,percentil,Factor_raw,Factor_ceiled
0,p95,7.63,8
1,p97,9.98,10
2,p99,15.64,16


# Simulación alertas

In [5]:
# === HANUMO — Sensibilidad de alertas (Actual vs p85/p90/p95/p97/p99) ==========
# Regla: En 3 días, # OUT > Factor × promedio histórico 3d (177 días previos aprox),
#        y además # OUT 3d ≥ Number.
# Unidad de conteo = ventanas (cliente, día) que cumplen la condición.

import pandas as pd, numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ---------------------------------------------------------------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]               # <-- ajusta el sub-subsegmento

PARAMS = {
    "Actual": {"Number": 2, "Factor": 45},
    "p95":    {"Number": 19, "Factor": 8},
    "p97":    {"Number": 22, "Factor": 10},
    "p99":    {"Number": 30, "Factor": 16},
}
# -----------------------------------------------------------------------------

# --- Carga y filtro HANUMO (Outbound Cash) ---
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

mask = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq("Cash") &
    df["customer_id"].notna() &
    df["tx_date_time"].notna()
)
g = df.loc[mask, ["customer_id","tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HANUMO en el archivo dado.")
else:
    # --- Métricas diarias por cliente: S3N y AVG177N ---
    parts = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily_n = (sub.set_index("tx_date_time")
                      .assign(x=1)["x"]
                      .resample("D").sum()
                      .astype(float))
        if daily_n.empty:
            continue
        S3N = daily_n.rolling("3D").sum()
        AVG177N = S3N.shift(3).rolling("177D", min_periods=1).mean()
        parts.append(pd.DataFrame({
            "customer_id": cid,
            "date": daily_n.index,
            "S3N": S3N.values,
            "AVG177N": AVG177N.values
        }))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","S3N","AVG177N"])

    if M.empty:
        print("No se pudieron construir ventanas de 3 días (M vacío).")
    else:
        M["Factor"] = np.where(M["AVG177N"] > 0, M["S3N"] / M["AVG177N"], np.nan)

        def mask_hanumo(dfm: pd.DataFrame, Number: int, Factor: int) -> pd.Series:
            m = (
                (dfm["S3N"] >= Number) &
                (dfm["AVG177N"] > 0) &
                (dfm["Factor"] > Factor)
            )
            return m.fillna(False)

        # Tabla de parámetros
        order = ["Actual","p85","p90","p95","p97","p99"]
        param_tbl = (pd.DataFrame(PARAMS).T
                        .loc[[k for k in order if k in PARAMS]]
                        .rename_axis("escenario")
                        .reset_index())
        print("=== HANUMO — Parámetros por escenario ===")
        display(param_tbl)

        # Conteos
        counts = {}
        for k, v in PARAMS.items():
            m = mask_hanumo(M, **v)
            counts[k] = int(M.loc[m, ["customer_id","date"]].drop_duplicates().shape[0])

        alerts_wide = pd.DataFrame([{
            "alertas_actual": counts.get("Actual", 0),
            "alertas_p95":    counts.get("p95", 0),
            "alertas_p97":    counts.get("p97", 0),
            "alertas_p99":    counts.get("p99", 0),
        }])

        print("=== HANUMO — Alertas por escenario (ventanas cliente–día) ===")
        display(alerts_wide)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== HANUMO — Parámetros por escenario ===


Unnamed: 0,escenario,Number,Factor
0,Actual,2,45
1,p95,19,8
2,p97,22,10
3,p99,30,16


=== HANUMO — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p95,alertas_p97,alertas_p99
0,0,7,2,0
