In [None]:
# === P-HVO — Max 30-day window per customer (Outbound Cash) ====================
# Regla: When a Customer sends more than {var.Number} Outbound Cash transactions in 30 days...
# Parámetro recomendado: Whole Number = ceil(p95) del máximo # de transacciones en 30 días por cliente

import pandas as pd
import numpy as np
import math

# -------- Parámetros editables --------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = "I-2"               # <-- ajusta el sub-subsegmento
WINDOW_DAYS = 30
PCTS = [90, 95, 97, 99]                   # percentiles a reportar

# -------- Carga mínima --------
df = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# -------- Filtro según regla --------
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")

mask = (
    (df["tx_direction"].astype(str).str.title() == "Outbound") &
    (df["tx_type"].astype(str).str.title() == "Cash") &
    (df["tx_date_time"].notna()) &
    (df["customer_id"].notna())
)
g = df.loc[mask, ["customer_id", "tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para P-HVO con los filtros dados.")
else:
    # --- Para cada cliente: máximo conteo en cualquier ventana [t, t+30d] ---
    def max_count_30d(group: pd.DataFrame) -> int:
        dates = np.sort(group["tx_date_time"].values)
        n = len(dates)
        j = 0
        best = 0
        for i in range(n):
            end = dates[i] + np.timedelta64(WINDOW_DAYS, "D")
            while j < n and dates[j] <= end:
                j += 1
            best = max(best, j - i)
        return best

    max_per_customer = g.sort_values(["customer_id", "tx_date_time"]) \
                        .groupby("customer_id", as_index=False) \
                        .apply(lambda sub: pd.Series({"max_30d": max_count_30d(sub)})) \
                        .reset_index(drop=True)

    s = pd.to_numeric(max_per_customer["max_30d"], errors="coerce").dropna()
    stats = {f"p{p}": (float(np.percentile(s, p)) if len(s) else np.nan) for p in PCTS}
    recommended = int(math.ceil(stats["p95"])) if np.isfinite(stats.get("p95", np.nan)) else np.nan

    print("=== P-HVO — Máximo # de transacciones en 30 días por cliente (Outbound Cash) ===")
    print(f"Clientes con ≥1 tx elegible: {max_per_customer.shape[0]}")
    for p in PCTS:
        v = stats[f"p{p}"]
        print(f"p{p:>2}: {v:.2f}" if np.isfinite(v) else f"p{p:>2}: NA")
    print(f"\nWhole Number recomendado (ceil p95): {recommended}")


  df = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")


=== P-HVO — Máximo # de transacciones en 30 días por cliente (Outbound Cash) ===
Clientes con ≥1 tx elegible: 18
p90: 68.30
p95: 95.45
p97: 123.67
p99: 151.89

Whole Number recomendado (ceil p95): 96


  .apply(lambda sub: pd.Series({"max_30d": max_count_30d(sub)})) \


# Simulación alertas

In [3]:
# === P-HVO — Sensibilidad (Actual vs propuestos) ===============================
# Lógica: tx_direction=Outbound & tx_type=Cash; COUNT_30d > Number
# Unidad = ventanas cliente–día

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]                # <-- ajusta el sub-subsegmento
PARAMS = {
    "Actual": {"Number": 121},
    #"p90":    {"Number": 5},
    "p95":    {"Number": 95},
    "p97":    {"Number": 124},
    "p99":    {"Number": 152},
}

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

g = df[(df["tx_direction"].eq("Outbound")) & (df["tx_type"].eq("Cash")) & df["customer_id"].notna() & df["tx_date_time"].notna()][["customer_id","tx_date_time"]]

parts=[]
for cid, sub in g.groupby("customer_id", sort=False):
    daily = sub.set_index("tx_date_time").assign(x=1)["x"].resample("D").sum().fillna(0)
    C30   = daily.rolling("30D").sum()
    parts.append(pd.DataFrame({"customer_id": cid, "date": C30.index, "C30": C30.values}))
M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","C30"])

order = ["Actual","p90","p95","p97","p99"]
param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
print("=== P-HVO — Parámetros (Number) ==="); display(param_tbl)

counts = {k: int(M.loc[M["C30"] > v["Number"], ["customer_id","date"]].drop_duplicates().shape[0]) for k,v in PARAMS.items()}

out = pd.DataFrame([{
    "alertas_actual": counts.get("Actual", 0),
    "alertas_p90":    counts.get("p90", 0),
    "alertas_p95":    counts.get("p95", 0),
    "alertas_p97":    counts.get("p97", 0),
    "alertas_p99":    counts.get("p99", 0),
}])
print("=== P-HVO — Alertas por escenario (ventanas cliente–día) ==="); display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== P-HVO — Parámetros (Number) ===


Unnamed: 0,escenario,Number
0,Actual,121
1,p95,95
2,p97,124
3,p99,152


=== P-HVO — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,112,0,149,107,12
