# Definición parámetros

In [3]:
# === HASUMO — Percentiles de Amount (S3) y Factor (S3/AVG177) — sin 180d ======
import pandas as pd
import numpy as np
import math

pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

# -------- Parámetros --------
PATH = "../../data/tx_retail_core.csv"     # <-- ajusta la ruta
AMOUNT_QS = [0.85, 0.90, 0.95, 0.97, 0.99]
FACTOR_QS = [0.95, 0.97, 0.99]

# -------- Carga + filtro HASUMO --------
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
df["tx_type"]        = df["tx_type"].astype(str).str.title()

mask = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq("Cash") &
    df["tx_date_time"].notna() &
    df["tx_base_amount"].notna()
)
g = df.loc[mask, ["customer_id","tx_date_time","tx_base_amount"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HASUMO.")
else:
    amt_points = []
    fac_points = []

    for cid, sub in g.groupby("customer_id", sort=False):
        sub = sub.sort_values("tx_date_time")
        daily = sub.set_index("tx_date_time")["tx_base_amount"].abs().resample("D").sum()
        if daily.empty:
            continue

        S3 = daily.rolling("3D").sum()
        AVG177 = S3.shift(3).rolling("177D", min_periods=1).mean()

        ok_amt = (S3 > 0) & S3.notna()
        if ok_amt.any():
            amt_points.append(S3.loc[ok_amt])

        ok_fac = ok_amt & (AVG177 > 0) & AVG177.notna()
        if ok_fac.any():
            fac_points.append((S3.loc[ok_fac] / AVG177.loc[ok_fac]).replace([np.inf, -np.inf], np.nan).dropna())

    S_amt = pd.concat(amt_points) if len(amt_points) else pd.Series(dtype=float)
    S_fac = pd.concat(fac_points) if len(fac_points) else pd.Series(dtype=float)

    amount_q = S_amt.quantile(AMOUNT_QS) if len(S_amt) else pd.Series(index=AMOUNT_QS, dtype=float)
    factor_q = S_fac.quantile(FACTOR_QS) if len(S_fac) else pd.Series(index=FACTOR_QS, dtype=float)

    df_amount = pd.DataFrame({
        "percentil": [f"p{int(q*100)}" for q in AMOUNT_QS],
        "Amount_S3": [amount_q.get(q, np.nan) for q in AMOUNT_QS]
    })

    df_factor = pd.DataFrame({
        "percentil":        [f"p{int(q*100)}" for q in FACTOR_QS],
        "Factor_raw":       [factor_q.get(q, np.nan) for q in FACTOR_QS],
        "Factor_recommend": [int(math.ceil(factor_q.get(q))) if pd.notna(factor_q.get(q, np.nan)) and np.isfinite(factor_q.get(q)) else np.nan
                             for q in FACTOR_QS]
    })

    print("=== HASUMO — Resultados (sin requisito de 180 días) ===")
    print(f"Puntos Amount (S3): {len(S_amt):,} | Puntos Factor (S3/AVG177): {len(S_fac):,}\n")
    print("— Amount (S3 en 3 días, CLP) —")
    display(df_amount)
    print("— Factor (S3 / AVG177) —")
    display(df_factor)


=== HASUMO — Resultados (sin requisito de 180 días) ===
Puntos Amount (S3): 26,058 | Puntos Factor (S3/AVG177): 17,811

— Amount (S3 en 3 días, CLP) —


Unnamed: 0,percentil,Amount_S3
0,p85,2297120.6
1,p90,3000000.0
2,p95,4001673.0
3,p97,5120765.0
4,p99,14059095.0


— Factor (S3 / AVG177) —


Unnamed: 0,percentil,Factor_raw,Factor_recommend
0,p95,61.19,62
1,p97,108.51,109
2,p99,352.32,353


# Simulación alertas

In [7]:
# === HASUMO — Sensibilidad (Actual vs propuestos) ==============================
# LÓGICA EXACTA (sin exigir “historia >180d”):
#   tx_direction = Outbound
#   AND sum base en 3d (S3) > [Amount]
#   AND S3 > [Factor] * { [sum en 180d (S180) - S3] / 59 }
# Notas:
# - Ventanas móviles por cliente que incluyen el día actual (rolling "3D" y "180D").
# - No imponemos el requisito “count all-time > count en 180d” por falta de 180d en BBDD.
# - Se usa |tx_base_amount| para evitar signos contables.
# Unidad = ventanas cliente–día

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ---------------------------------------------------------------
PATH = "../../data/tx_retail_whale.csv"
PARAMS = {
    # Ejemplos (reemplaza con los tuyos; no invento valores):
    "Actual": {"Amount": 16_000_000, "Factor": 169},
    "p95":    {"Amount": 624_101_788, "Factor": 71},
    "p97":    {"Amount": 940_120_287, "Factor": 131},
    "p99":    {"Amount": 1_175_759_000, "Factor": 675},
}
# -------------------------------------------------------------------------------

# --- Carga + filtro Outbound Cash ---
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
df["tx_type"]        = df["tx_type"].astype(str).str.title()
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")

mask = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq("Cash") &
    df["customer_id"].notna() &
    df["tx_date_time"].notna() &
    df["tx_base_amount"].notna()
)
g = df.loc[mask, ["customer_id","tx_date_time","tx_base_amount"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HASUMO.")
else:
    parts = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily = (sub.set_index("tx_date_time")["tx_base_amount"].abs()
                    .resample("D").sum())
        if daily.empty:
            continue
        S3   = daily.rolling("3D").sum()
        S180 = daily.rolling("180D").sum()
        parts.append(pd.DataFrame({
            "customer_id": cid,
            "date": daily.index,
            "S3": S3.values,
            "S180": S180.values
        }))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","S3","S180"])

    if M.empty:
        print("No se pudieron construir ventanas 3d (M vacío).")
    else:
        M["avg3_hist"] = (M["S180"] - M["S3"]) / 59.0

        order = ["Actual","p85","p90","p95","p97","p99"]
        if PARAMS:
            param_tbl = (pd.DataFrame(PARAMS).T
                           .loc[[k for k in order if k in PARAMS] or list(PARAMS.keys())]
                           .rename_axis("escenario")
                           .reset_index())
            print("=== HASUMO — Parámetros (Amount, Factor) ==="); display(param_tbl)
        else:
            print("⚠️  Define PARAMS (Amount, Factor) antes de ejecutar conteos.")

        counts = {}
        for k, v in PARAMS.items():
            A = float(v["Amount"])
            F = float(v["Factor"])
            m = (
                (M["S3"] > A) &
                (M["avg3_hist"] > 0) &
                (M["S3"] > F * M["avg3_hist"])
            )
            counts[k] = int(M.loc[m, ["customer_id","date"]].drop_duplicates().shape[0])

        if PARAMS:
            alerts_wide = pd.DataFrame([{f"alertas_{name.lower()}": counts.get(name, 0)
                                         for name in ([k for k in order if k in PARAMS] or list(PARAMS.keys()))}])
            print("=== HASUMO — Alertas por escenario (ventanas cliente–día) ==="); display(alerts_wide)


=== HASUMO — Parámetros (Amount, Factor) ===


Unnamed: 0,escenario,Amount,Factor
0,Actual,16000000,169
1,p95,624101788,71
2,p97,940120287,131
3,p99,1175759000,675


=== HASUMO — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p95,alertas_p97,alertas_p99
0,22,6,3,2
