# Definición parámetros

In [2]:
# === HNR-OUT — Parametrización del Number (30 días) ============================
# LÓGICA EXACTA:
#   tx_direction = Outbound
#   AND tx_type = Cash
#   AND tx_base_amount > 1000        (high-value)
#   AND mod( tx_amount_defaulted , 1000 ) == 0   (redonda)
#   Parámetro a fijar: [Number] = máximo # de tx en cualquier ventana 30d (por cliente)

import pandas as pd, numpy as np, math
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# -------- EDITA AQUÍ -----------------------------------------------------------
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = ["R-High"]            # <-- ajusta el sub-subsegmento
PCTS = [95, 97, 99]
# ------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_amount"]      = pd.to_numeric(df["tx_amount"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
df["tx_type"]        = df["tx_type"].astype(str).str.title()

if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

amt_orig = df["tx_amount"].fillna(0.0001)
is_round = np.isfinite(amt_orig) & np.isclose(amt_orig % 1000, 0, atol=1e-9)

m = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq("Cash") &
    df["tx_date_time"].notna() &
    df["customer_id"].notna() &
    (df["tx_base_amount"] > 1000) &
    is_round
)
g = df.loc[m, ["customer_id","tx_date_time"]].copy()

def max_count_30d(group: pd.DataFrame) -> int:
    dates = np.sort(group["tx_date_time"].values)
    n = len(dates)
    j = 0; best = 0
    for i in range(n):
        end = dates[i] + np.timedelta64(30, "D")
        while j < n and dates[j] <= end:
            j += 1
        best = max(best, j - i)
    return best

if g.empty:
    print("HNR-OUT: no hay transacciones elegibles.")
else:
    max_per_cust = (g.sort_values(["customer_id","tx_date_time"])
                      .groupby("customer_id", as_index=False)
                      .apply(lambda sub: pd.Series({"max_30d": max_count_30d(sub)}))
                      .reset_index(drop=True))
    s = pd.to_numeric(max_per_cust["max_30d"], errors="coerce").dropna()
    stats = {f"p{p}": (float(np.percentile(s, p)) if len(s) else np.nan) for p in PCTS}
    recommend = int(math.ceil(stats["p95"])) if np.isfinite(stats.get("p95", np.nan)) else np.nan

    print("=== HNR-OUT — Máximo #tx redondas & high-value (30d) por cliente ===")
    print(f"Clientes con ≥1 tx elegible: {max_per_cust.shape[0]:,}")
    for p in PCTS:
        v = stats[f"p{p}"]
        print(f"p{p:>2}: {v:.2f}" if np.isfinite(v) else f"p{p:>2}: —")
    print(f"\nSugerencia Number (ceil p95): {recommend}")


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== HNR-OUT — Máximo #tx redondas & high-value (30d) por cliente ===
Clientes con ≥1 tx elegible: 136
p95: 4.00
p97: 4.95
p99: 8.95

Sugerencia Number (ceil p95): 4


  .apply(lambda sub: pd.Series({"max_30d": max_count_30d(sub)}))


# Simulación alertas

In [5]:
# === HNR-OUT — Simulación de alertas (Actual vs propuestos) ====================
# LÓGICA EXACTA:
# tx_direction = Outbound
# AND tx_type = Cash
# AND tx_base_amount > 1000
# AND mod( tx_amount [default: 0.0001] , 1000 ) = 0
# AND count de tx por {customer_id & tx_direction} en 30 días > [Number]
# Unidad = ventanas cliente–día que cumplen

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ----------------------------------------------------------------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]               # <-- ajusta el sub-subsegmento
PARAMS = {
    "Actual": {"Number": 26},
    "p95":    {"Number":12},
    "p97":    {"Number":14},
    "p99":    {"Number": 16},
}
# -------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df.get("tx_date_time"), errors="coerce")
df["tx_amount"]      = pd.to_numeric(df.get("tx_amount"), errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df.get("tx_base_amount"), errors="coerce")
df["tx_direction"]   = df.get("tx_direction","").astype(str).str.title()
df["tx_type"]        = df.get("tx_type","").astype(str).str.title()

if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

amt_orig = df["tx_amount"].fillna(0.0001)
is_round = np.isfinite(amt_orig) & np.isclose(amt_orig % 1000, 0, atol=1e-9)

m = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq("Cash") &
    df["tx_date_time"].notna() &
    df["customer_id"].notna() &
    is_round &
    (df["tx_base_amount"] > 1000)
)
g = df.loc[m, ["customer_id","tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para HNR-OUT.")
else:
    parts=[]
    for cid, sub in g.groupby("customer_id", sort=False):
        daily_cnt = (sub.set_index("tx_date_time")
                        .assign(x=1)["x"]
                        .resample("D").sum()
                        .fillna(0.0))
        CNT30 = daily_cnt.rolling("30D").sum()
        parts.append(pd.DataFrame({"customer_id": cid, "date": CNT30.index, "CNT30": CNT30.values}))
    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","CNT30"])

    order = ["Actual","p90","p95","p97","p99"]
    param_tbl = (pd.DataFrame(PARAMS).T
                   .loc[[k for k in order if k in PARAMS]]
                   .rename_axis("escenario")
                   .reset_index())
    print("=== HNR-OUT — Parámetros (Number) ==="); display(param_tbl)

    counts={}
    for k,v in PARAMS.items():
        N = v["Number"]
        m_ok = (M["CNT30"] > N)
        counts[k] = int(M.loc[m_ok, ["customer_id","date"]].drop_duplicates().shape[0])

    out = pd.DataFrame([{
        "alertas_actual": counts.get("Actual",0),
        "alertas_p90":    counts.get("p90",0),
        "alertas_p95":    counts.get("p95",0),
        "alertas_p97":    counts.get("p97",0),
        "alertas_p99":    counts.get("p99",0),
    }])
    print("=== HNR-OUT — Alertas por escenario (ventanas cliente–día) ==="); display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== HNR-OUT — Parámetros (Number) ===


Unnamed: 0,escenario,Number
0,Actual,26
1,p95,12
2,p97,14
3,p99,16


=== HNR-OUT — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,0,0,31,2,0
