In [5]:
# === P-2nd — Percentiles de monto (segunda transacción ≤7 días desde apertura) ==
import pandas as pd
import numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# Parámetros
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = "R-High"  # <-- ajusta el sub-subsegmento
PCTS = [0.85, 0.90, 0.95, 0.97, 0.99]
FILTER_TO_CASH = True
WINDOW_DAYS = 7

# Carga mínima
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["customer_account_creation_date"] = pd.to_datetime(df["customer_account_creation_date"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")

if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# Filtrado básico
m = df["tx_date_time"].notna() & df["customer_account_creation_date"].notna() & df["tx_base_amount"].notna()
if FILTER_TO_CASH and "tx_type" in df.columns:
    m &= df["tx_type"].eq("Cash")

g = df.loc[m, ["customer_id","tx_date_time","customer_account_creation_date","tx_base_amount"]].copy()
if g.empty:
    print("No hay transacciones elegibles para P-2nd.")
else:
    g = g.sort_values(["customer_id","tx_date_time"])
    g["tx_order"] = g.groupby("customer_id").cumcount() + 1

    # Segunda transacción dentro de 7 días desde la apertura (inclusive)
    within = (g["tx_date_time"] - g["customer_account_creation_date"]).dt.days.between(0, WINDOW_DAYS)
    second_tx = g[(g["tx_order"] == 2) & within & (g["tx_base_amount"] > 0)]["tx_base_amount"].astype(float)

    if second_tx.empty:
        print("No hay segundas transacciones dentro de los 7 días.")
    else:
        q = second_tx.quantile(PCTS)
        out = pd.DataFrame({"percentil":[f"p{int(p*100)}" for p in PCTS],
                            "Amount_CLP":[q.get(p, np.nan) for p in PCTS]})
        print(f"=== P-2nd — Percentiles (n={len(second_tx):,}) ===")
        display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== P-2nd — Percentiles (n=8) ===


Unnamed: 0,percentil,Amount_CLP
0,p85,151531030
1,p90,226128127
2,p95,313064063
3,p97,347838438
4,p99,382612813


# Simulación alertas

In [None]:
# === P-2nd — Sensibilidad (Actual vs propuestos) ===============================
# LÓGICA EXACTA:
# customer_account_creation_date - tx_date_time < [Days]
# AND count de transacciones por {customer_id} = 2
# AND tx_base_amount > [Amount]
# Unidad = segundas transacciones que cumplen

import pandas as pd
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-1", "I-2"]              # <-- ajusta el sub-subsegmento
PARAMS={
    "Actual":{"Days":7, "Amount":990_300_000},
    "p95":   {"Days":7, "Amount":195_000_000},
    "p97":   {"Days":7, "Amount":199_100_000},
    "p99":   {"Days":7, "Amount":203_300_000},
}

df=pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]=pd.to_datetime(df["tx_date_time"], errors="coerce")
df["customer_account_creation_date"]=pd.to_datetime(df["customer_account_creation_date"], errors="coerce")
df["tx_base_amount"]=pd.to_numeric(df["tx_base_amount"], errors="coerce")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

g=df[df["tx_date_time"].notna() & df["customer_account_creation_date"].notna() & df["tx_base_amount"].notna()].copy()
g=g.sort_values(["customer_id","tx_date_time"])
g["tx_order"]=g.groupby("customer_id").cumcount()+1
g["days_from_open"]=(g["tx_date_time"]-g["customer_account_creation_date"]).dt.total_seconds()/86400.0

param_tbl=pd.DataFrame(PARAMS).T.rename_axis("escenario").reset_index()
print("=== P-2nd — Parámetros ==="); display(param_tbl)

counts={}
for k,v in PARAMS.items():
    D,A=v["Days"], v["Amount"]
    m=(g["tx_order"].eq(2) & (g["days_from_open"]>=0) & (g["days_from_open"]<=D) & (g["tx_base_amount"]>A))
    counts[k]=int(m.sum())

out=pd.DataFrame([{
    "alertas_actual":counts.get("Actual",0),
    "alertas_p95":counts.get("p95",0),
    "alertas_p97":counts.get("p97",0),
    "alertas_p99":counts.get("p99",0),
}])
print("=== P-2nd — Alertas por escenario (tx) ==="); display(out)


=== P-2nd — Parámetros ===


Unnamed: 0,escenario,Days,Amount
0,Actual,7,990300000
1,p95,7,195000000
2,p97,7,199100000
3,p99,7,203300000


=== P-2nd — Alertas por escenario (tx) ===


Unnamed: 0,alertas_actual,alertas_p95,alertas_p97,alertas_p99
0,0,1,1,1
