In [2]:
# === P-HVI — Max 30-day window per customer (Inbound Cash) =====================
# Regla: When a Customer receives more than {var.Number} Inbound Cash transactions in 30 days...
# Parámetro recomendado: Whole Number = ceil(p95) del máximo # de transacciones en 30 días por cliente

import pandas as pd
import numpy as np
import math

# -------- Parámetros editables --------
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = "R-Low"         # <-- ajusta el sub-subsegmento
WINDOW_DAYS = 30
PCTS = [90, 95, 97, 99]         # percentiles a reportar

# -------- Carga mínima --------
df = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# -------- Filtro según regla --------
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")

mask = (
    (df["tx_direction"].astype(str).str.title() == "Inbound") &
    (df["tx_type"].astype(str).str.title() == "Cash") &
    (df["tx_date_time"].notna()) &
    (df["customer_id"].notna())
)
g = df.loc[mask, ["customer_id", "tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para P-HVI con los filtros dados.")
else:
    # --- Para cada cliente: máximo conteo en cualquier ventana [t, t+30d] ---
    def max_count_30d(group: pd.DataFrame) -> int:
        dates = np.sort(group["tx_date_time"].values)
        n = len(dates)
        j = 0
        best = 0
        for i in range(n):
            end = dates[i] + np.timedelta64(WINDOW_DAYS, "D")
            while j < n and dates[j] <= end:
                j += 1
            best = max(best, j - i)
        return best

    max_per_customer = g.sort_values(["customer_id", "tx_date_time"]) \
                        .groupby("customer_id", as_index=False) \
                        .apply(lambda sub: pd.Series({"max_30d": max_count_30d(sub)})) \
                        .reset_index(drop=True)

    s = pd.to_numeric(max_per_customer["max_30d"], errors="coerce").dropna()
    stats = {f"p{p}": (float(np.percentile(s, p)) if len(s) else np.nan) for p in PCTS}
    recommended = int(math.ceil(stats["p95"])) if np.isfinite(stats.get("p95", np.nan)) else np.nan

    print("=== P-HVI — Máximo # de transacciones en 30 días por cliente (Inbound Cash) ===")
    print(f"Clientes con ≥1 tx elegible: {max_per_customer.shape[0]}")
    for p in PCTS:
        v = stats[f"p{p}"]
        print(f"p{p:>2}: {v:.2f}" if np.isfinite(v) else f"p{p:>2}: NA")
    print(f"\nWhole Number recomendado (ceil p95): {recommended}")


  df = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")


=== P-HVI — Máximo # de transacciones en 30 días por cliente (Inbound Cash) ===
Clientes con ≥1 tx elegible: 8065
p90: 5.00
p95: 7.00
p97: 8.00
p99: 12.00

Whole Number recomendado (ceil p95): 7


  .apply(lambda sub: pd.Series({"max_30d": max_count_30d(sub)})) \


# Simulación alertas

In [3]:
# === P-HVI — Sensibilidad (Actual vs propuestos) ===============================
# Lógica: tx_direction=Inbound & tx_type=Cash; COUNT_30d > Number
# Unidad = ventanas cliente–día

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]               # <-- ajusta el sub-subsegmento
PARAMS = {
   "Actual": {"Number": 26},
   "p90":    {"Number": 21},
    "p95":    {"Number": 21},
    "p97":    {"Number": 21},
    "p99":    {"Number": 22},
}

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

g = df[(df["tx_direction"].eq("Inbound")) & (df["tx_type"].eq("Cash")) & df["customer_id"].notna() & df["tx_date_time"].notna()][["customer_id","tx_date_time"]]

parts=[]
for cid, sub in g.groupby("customer_id", sort=False):
    daily = sub.set_index("tx_date_time").assign(x=1)["x"].resample("D").sum().fillna(0)
    C30   = daily.rolling("30D").sum()
    parts.append(pd.DataFrame({"customer_id": cid, "date": C30.index, "C30": C30.values}))
M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","C30"])

order = ["Actual","p90","p95","p97","p99"]
param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
print("=== P-HVI — Parámetros (Number) ==="); display(param_tbl)

counts = {k: int(M.loc[M["C30"] > v["Number"], ["customer_id","date"]].drop_duplicates().shape[0]) for k,v in PARAMS.items()}

out = pd.DataFrame([{
    "alertas_actual": counts.get("Actual", 0),
    "alertas_p90":    counts.get("p90", 0),
    "alertas_p95":    counts.get("p95", 0),
    "alertas_p97":    counts.get("p97", 0),
    "alertas_p99":    counts.get("p99", 0),
}])
print("=== P-HVI — Alertas por escenario (ventanas cliente–día) ==="); display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== P-HVI — Parámetros (Number) ===


Unnamed: 0,escenario,Number
0,Actual,26
1,p90,21
2,p95,21
3,p97,21
4,p99,22


=== P-HVI — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,0,2,2,2,0


In [2]:
# === P-HVI — Sensibilidad (Actual vs propuestos) ============================
# Lógica: tx_direction=Inbound & tx_type=Cash; COUNT_30d >= Number
# Unidad = transacciones (cada alerta se asocia a un tx_id)
 
import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")
pd.set_option("display.max_rows", None)
 
# Soporte para scripts sin notebook
try:
    from IPython.display import display  # noqa
except Exception:
    def display(x):  # fallback simple
        print(x.to_string(index=False) if hasattr(x, "to_string") else x)
 
PATH = "../../data/tx/datos_trx__with_subsub_oficial.csv"
SUBSEGMENT = "Investment Vehicle"   # customer_sub_type
 
PARAMS = {
   "Actual": {"Number": 37},
   "p90":    {"Number": 6},
   "p95":    {"Number": 8},
   "p97":    {"Number": 9},
   "p99":    {"Number": 15},
}
 
# Ventana de análisis (sólo se cuentan alertas de transacciones dentro de este rango)
START_DATE = pd.Timestamp("2025-02-21", tz="UTC")
END_DATE   = pd.Timestamp("2025-08-21", tz="UTC")
 
# ---------------------------------------------------------------------------
 
df = pd.read_csv(
    PATH,
    dtype={"customer_id": "string", "tx_id": "string"},
    encoding="utf-8-sig"
)
 
# Normalizaciones mínimas
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce", utc=True)
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()
 
# Filtrado por sub-segmento (customer_sub_type)
if isinstance(SUBSEGMENT, str):
    target_labels = {SUBSEGMENT}
else:
    target_labels = set(map(str, SUBSEGMENT))
df = df[df["customer_sub_type"].astype(str).isin(target_labels)].copy()
 
# Base relevante (sólo Inbound/Cash con ids y fecha válidas)
base = df[
    (df["tx_direction"].eq("Inbound")) &
    (df["tx_type"].eq("Cash")) &
    df["customer_id"].notna() &
    df["tx_date_time"].notna()
][["customer_id","customer_name","tx_date_time","tx_id"]].rename(
    columns={"tx_date_time":"tx_dt"}
).copy()
 
# ------------------ Rolling 30D sólo en eventos (no por día) ----------------
# Para cada cliente, ordenamos por fecha y computamos COUNT_30d en la marca de tiempo de CADA transacción.
parts = []
for cid, sub in base.groupby("customer_id", sort=False):
    sub = sub.sort_values("tx_dt", kind="mergesort")  # estable por si hay empates
    cname = sub["customer_name"].dropna().astype(str).mode()
    cname = cname.iloc[0] if len(cname) else np.nan
 
    # Serie de unos indexada por la marca de tiempo de cada transacción
    s = pd.Series(1, index=sub["tx_dt"])
    # Rolling 30D inclusivo al final: cuenta transacciones en [tx_dt-29d, tx_dt]
    c30 = s.rolling("30D").sum().astype(int)
 
    tmp = sub.copy()
    tmp["C30_at_tx"]     = c30.values
    tmp["customer_id"]   = cid
    tmp["customer_name"] = cname
    parts.append(tmp)
 
E = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(
    columns=["customer_id","customer_name","tx_dt","tx_id","C30_at_tx"]
)
 
# ------------------ Resumen: alertas por escenario en la ventana ------------
 
order = ["Actual","p90","p95","p97","p99"]
param_tbl = (pd.DataFrame(PARAMS).T
             .loc[[k for k in order if k in PARAMS]]
             .rename_axis("escenario")
             .reset_index())
print("=== P-HVI — Parámetros (Number) ===")
display(param_tbl)
 
# Consideramos alertas sólo para transacciones dentro de la ventana de análisis
mask_tx_win = E["tx_dt"].between(START_DATE, END_DATE)
 
counts = {
    k: int((E.loc[mask_tx_win, "C30_at_tx"] >= v["Number"]).sum())
    for k, v in PARAMS.items()
}
 
out = pd.DataFrame([{
    "alertas_actual": counts.get("Actual", 0),
    "alertas_p90":    counts.get("p90", 0),
    "alertas_p95":    counts.get("p95", 0),
    "alertas_p97":    counts.get("p97", 0),
    "alertas_p99":    counts.get("p99", 0),
}])
 
print("=== P-HVI — Alertas por escenario (sólo transacciones en ventana) ===")
display(out)
 
# ---------------------- DESGLOSE: escenario Actual --------------------------
# Cada fila = una TRANSACCIÓN que gatilla (sin colapsar por día).
if "Actual" in PARAMS:
    thr = PARAMS["Actual"]["Number"]
    hits = E.loc[mask_tx_win & (E["C30_at_tx"] >= thr)].copy()
 
    if not hits.empty:
        # Ventana inclusiva de 30 días por transacción: [tx_dt-29d, tx_dt]
        hits["window_start"] = hits["tx_dt"] - pd.Timedelta(days=29)
        hits["window_end"]   = hits["tx_dt"]
        hits["n_tx_30d"]     = hits["C30_at_tx"].astype(int)
 
        # Para auditoría: ids de transacción, y primera/última fecha en la ventana por (cliente, tx_id gatillante)
        base2 = base.rename(columns={"tx_dt":"dt"})
        tx_lists, first_last = [], []
        for _, r in hits.iterrows():
            wstart, wend = r["window_start"], r["window_end"]
            subw = base2[
                (base2["customer_id"] == r["customer_id"]) &
                (base2["dt"].between(wstart, wend))
            ].sort_values("dt", kind="mergesort")
            tx_lists.append(", ".join(subw["tx_id"].dropna().astype(str).tolist()))
            first_last.append((subw["dt"].min(), subw["dt"].max()))
 
        fl_df = pd.DataFrame(first_last, columns=["first_tx_in_window","last_tx_in_window"])
        ids_df = pd.DataFrame({"tx_ids_in_window": tx_lists})
        hits = pd.concat([hits.reset_index(drop=True), fl_df, ids_df], axis=1)
 
        cols = [
            "customer_name","customer_id",
            "tx_dt","tx_id",
            "window_start","window_end","n_tx_30d",
            "first_tx_in_window","last_tx_in_window",
            "tx_ids_in_window"
        ]
        actual_breakdown = (hits[cols]
                            .sort_values(["tx_dt","customer_name","customer_id"])
                            .reset_index(drop=True))
 
        print("=== P-HVI — Desglose de alertas (ESCENARIO: Actual) ===")
        print("Cada fila es una TRANSACCIÓN que gatilla. Ventana: [tx_dt-29d, tx_dt].")
        display(actual_breakdown)
    else:
        print("=== P-HVI — Desglose de alertas (ESCENARIO: Actual) ===")
        print("No hay alertas en el rango seleccionado.")

=== P-HVI — Parámetros (Number) ===


Unnamed: 0,escenario,Number
0,Actual,37
1,p90,6
2,p95,8
3,p97,9
4,p99,15


=== P-HVI — Alertas por escenario (sólo transacciones en ventana) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,167,1442,1014,876,504


=== P-HVI — Desglose de alertas (ESCENARIO: Actual) ===
Cada fila es una TRANSACCIÓN que gatilla. Ventana: [tx_dt-29d, tx_dt].


Unnamed: 0,customer_name,customer_id,tx_dt,tx_id,window_start,window_end,n_tx_30d,first_tx_in_window,last_tx_in_window,tx_ids_in_window
0,OKANE CAPITAL SPA ...,76824018,2025-02-21 00:00:00+00:00,201662027.0,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,59,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,"200189854.0, 200195916.0, 200236932.0, 2002981..."
1,OKANE CAPITAL SPA ...,76824018,2025-02-21 00:00:00+00:00,201678850.0,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,60,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,"200189854.0, 200195916.0, 200236932.0, 2002981..."
2,OKANE CAPITAL SPA ...,76824018,2025-02-21 00:00:00+00:00,201679433.0,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,61,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,"200189854.0, 200195916.0, 200236932.0, 2002981..."
3,OKANE CAPITAL SPA ...,76824018,2025-02-21 00:00:00+00:00,201687463.0,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,62,2025-01-23 00:00:00+00:00,2025-02-21 00:00:00+00:00,"200189854.0, 200195916.0, 200236932.0, 2002981..."
4,OKANE CAPITAL SPA ...,76824018,2025-02-24 00:00:00+00:00,201720502.0,2025-01-26 00:00:00+00:00,2025-02-24 00:00:00+00:00,60,2025-01-27 00:00:00+00:00,2025-02-24 00:00:00+00:00,"200298191.0, 200299593.0, 200322818.0, 2003562..."
5,OKANE CAPITAL SPA ...,76824018,2025-02-24 00:00:00+00:00,201743420.0,2025-01-26 00:00:00+00:00,2025-02-24 00:00:00+00:00,61,2025-01-27 00:00:00+00:00,2025-02-24 00:00:00+00:00,"200298191.0, 200299593.0, 200322818.0, 2003562..."
6,OKANE CAPITAL SPA ...,76824018,2025-02-25 00:00:00+00:00,201754977.0,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,62,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,"200298191.0, 200299593.0, 200322818.0, 2003562..."
7,OKANE CAPITAL SPA ...,76824018,2025-02-25 00:00:00+00:00,201801915.0,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,63,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,"200298191.0, 200299593.0, 200322818.0, 2003562..."
8,OKANE CAPITAL SPA ...,76824018,2025-02-25 00:00:00+00:00,201810406.0,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,64,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,"200298191.0, 200299593.0, 200322818.0, 2003562..."
9,OKANE CAPITAL SPA ...,76824018,2025-02-25 00:00:00+00:00,201828614.0,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,65,2025-01-27 00:00:00+00:00,2025-02-25 00:00:00+00:00,"200298191.0, 200299593.0, 200322818.0, 2003562..."
