In [3]:
# === P-HSUMI — Percentiles de la MÁXIMA suma 30d por cliente (Inbound Cash) ===
# LÓGICA EXACTA (parametrización):
#   tx_direction = Inbound AND tx_type = Cash
#   Por cliente: S30(t) = suma base en los últimos 30 días (incluye t)
#   Tomamos max_{t}(S30) por cliente y calculamos percentiles sobre esos máximos.

import pandas as pd, numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# -------- EDITA AQUÍ -----------------------------------------------------------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = "I-2"     # <-- ajusta el sub-subsegmento
PCTS = [0.85, 0.90, 0.95, 0.97, 0.99]     # percentiles a reportar
# ------------------------------------------------------------------------------

# Carga y limpieza mínima
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df.get("tx_direction","").astype(str).str.title()
df["tx_type"]        = df.get("tx_type","").astype(str).str.title()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# Filtro: IN + Cash + datos válidos
g = df[
    df["tx_direction"].eq("Inbound")
    & df["tx_type"].eq("Cash")
    & df["tx_date_time"].notna()
    & df["tx_base_amount"].notna()
][["customer_id","tx_date_time","tx_base_amount"]].copy()

if g.empty:
    print("P-HSUMI: No hay transacciones elegibles.")
else:
    max_rows = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily = (sub.set_index("tx_date_time")["tx_base_amount"]
                   .abs()
                   .resample("D").sum())
        if daily.empty:
            continue
        S30 = daily.rolling("30D").sum()
        max_rows.append({"customer_id": cid, "S30_max": float(S30.max())})

    R = pd.DataFrame(max_rows)
    if R.empty:
        print("P-HSUMI: No se pudieron construir ventanas 30d.")
    else:
        s = R["S30_max"].astype(float)
        q = s.quantile(PCTS) if len(s) else pd.Series(index=PCTS, dtype=float)

        out = pd.DataFrame({
            "percentil": [f"p{int(p*100)}" for p in PCTS],
            "Amount_30d_max_per_customer_CLP": [q.get(p, np.nan) for p in PCTS]
        })

        print("=== P-HSUMI — Máxima suma CLP en 30 días por cliente (percentiles) ===")
        print(f"Clientes considerados: {s.shape[0]:,}")
        display(out)

        if pd.notna(q.get(0.95, np.nan)):
            print(f"\nSugerencia {{var.Amount}} (p95 máx-30d por cliente): {q.get(0.95):,.0f} CLP")


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== P-HSUMI — Máxima suma CLP en 30 días por cliente (percentiles) ===
Clientes considerados: 18


Unnamed: 0,percentil,Amount_30d_max_per_customer_CLP
0,p85,15194239794
1,p90,15746323788
2,p95,16861936597
3,p97,17640863445
4,p99,18419790292



Sugerencia {var.Amount} (p95 máx-30d por cliente): 16,861,936,597 CLP


# Simulación alertas

In [6]:
# === P-HSUMI — Sensibilidad (Actual vs propuestos) =============================
# Lógica: tx_direction=Inbound & tx_type=Cash; SUM_30d > Amount
# Unidad = ventanas cliente–día

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx_retail_whale.csv"
PARAMS = {
    #"Actual": {"Amount": 299_000_073},
   # "p85":    {"Amount":   402_334_913},
    #"p90":    {"Amount":   603_000_000},
    "p95":    {"Amount":1_652_627_419},
    "p97":    {"Amount": 2_274_301_629},
    "p99":    {"Amount": 3_824_000_816},
}

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
df["tx_type"]        = df["tx_type"].astype(str).str.title()

IN_ = df[(df["tx_direction"].eq("Inbound")) & (df["tx_type"].eq("Cash")) & df["tx_base_amount"].notna() & df["tx_date_time"].notna()]
parts=[]
for cid, sub in IN_.groupby("customer_id", sort=False):
    daily = sub.set_index("tx_date_time")["tx_base_amount"].abs().resample("D").sum()
    S30  = daily.rolling("30D").sum()
    parts.append(pd.DataFrame({"customer_id": cid, "date": S30.index, "S30": S30.values}))
M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","S30"])

order = ["Actual","p85","p90","p95","p97","p99"]
param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
print("=== P-HSUMI — Parámetros (Amount) ==="); display(param_tbl)

counts = {k: int(M.loc[M["S30"] > v["Amount"], ["customer_id","date"]].drop_duplicates().shape[0]) for k,v in PARAMS.items()}

out = pd.DataFrame([{
    "alertas_actual": counts.get("Actual", 0),
    "alertas_p85":    counts.get("p85", 0),
    "alertas_p90":    counts.get("p90", 0),
    "alertas_p95":    counts.get("p95", 0),
    "alertas_p97":    counts.get("p97", 0),
    "alertas_p99":    counts.get("p99", 0),
}])
print("=== P-HSUMI — Alertas por escenario (ventanas cliente–día) ==="); display(out)


=== P-HSUMI — Parámetros (Amount) ===


Unnamed: 0,escenario,Amount
0,p95,1652627419
1,p97,2274301629
2,p99,3824000816


=== P-HSUMI — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p85,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,0,0,0,37,31,1


In [7]:
# === P-HSUMI — Simulación de alertas (Actual vs propuestos) ====================
# LÓGICA EXACTA (simulación):
#   tx_direction = Inbound AND tx_type = Cash
#   sum of tx_base_amount per {customer_id & tx_direction} in 30 days > [Amount]
# Unidad = ventanas (customer_id, día) que cumplen.

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ----------------------------------------------------------------
PATH = "../../data/tx_iv.csv"   # <-- usa el MISMO CSV que en la parametrización
PARAMS = {
    "Actual": {"Amount": 500_000_000},
    "p95":    {"Amount": 1_255_000_000},
    "p97":    {"Amount": 2_102_000_000},
    "p99":    {"Amount": 4_609_000_000},
}
# -----------------------------------------------------------------------------

# Carga + filtros mínimos
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df.get("tx_date_time"), errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df.get("tx_base_amount"), errors="coerce")
df["tx_direction"]   = df.get("tx_direction","").astype(str).str.title()
df["tx_type"]        = df.get("tx_type","").astype(str).str.title()

g = df[
    df["tx_direction"].eq("Inbound")
    & df["tx_type"].eq("Cash")
    & df["tx_date_time"].notna()
    & df["tx_base_amount"].notna()
    & df["customer_id"].notna()
][["customer_id","tx_date_time","tx_base_amount"]].copy()

if g.empty:
    print("No hay transacciones elegibles para P-HSUMI.")
else:
    parts = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily = (sub.set_index("tx_date_time")["tx_base_amount"]
                   .abs()
                   .resample("D").sum())
        if daily.empty:
            continue
        S30 = daily.rolling("30D", min_periods=1).sum()
        parts.append(pd.DataFrame({
            "customer_id": cid,
            "date": S30.index,        # DatetimeIndex diario
            "S30":  S30.values
        }))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","S30"])

    # Tabla de parámetros (solo para mostrar qué se está probando)
    order = ["Actual","p95","p97","p99"]
    param_tbl = (pd.DataFrame(PARAMS).T
                    .loc[[k for k in order if k in PARAMS]]
                    .rename_axis("escenario")
                    .reset_index())
    print("=== P-HSUMI — Parámetros (Amount) ==="); display(param_tbl)

    # Conteos (ventanas cliente–día)
    counts = {}
    for k, v in PARAMS.items():
        A = v["Amount"]
        m_ok = (M["S30"] > A)
        counts[k] = int(M.loc[m_ok, ["customer_id","date"]].drop_duplicates().shape[0])

    out = pd.DataFrame([{
        "alertas_actual": counts.get("Actual",0),
        "alertas_p95":    counts.get("p95",0),
        "alertas_p97":    counts.get("p97",0),
        "alertas_p99":    counts.get("p99",0),
    }])
    print("=== P-HSUMI — Alertas por escenario (ventanas cliente–día) ==="); display(out)


=== P-HSUMI — Parámetros (Amount) ===


Unnamed: 0,escenario,Amount
0,Actual,500000000
1,p95,1255000000
2,p97,2102000000
3,p99,4609000000


=== P-HSUMI — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p95,alertas_p97,alertas_p99
0,5074,2107,1221,491
