In [1]:
# === P-%BAL — Umbral de Balance (Amount) desde percentil 95 por cliente =========
# Regla: When the account balance (prior to a transaction) is over [Amount] {var.Balance} CLP
#        and an Outbound Cash transaction is > {var.Percentage}% (fijo 95%) del balance, apply {action}.
# Justificación: {var.Balance} = p95 de la distribución de "patrimonios/saldos" de clientes.
# Columna usada: customer_account_balance

import pandas as pd
import numpy as np

# -------- Parámetros editables --------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = "I-2"         # <-- ajusta el sub-subsegmento
PCTS = [90, 95, 97, 99]         # percentiles a reportar

# -------- Carga mínima --------
df = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# -------- Preparación de campos --------
df["customer_account_balance"] = pd.to_numeric(df["customer_account_balance"], errors="coerce")
has_time = "tx_date_time" in df.columns
if has_time:
    df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")

# -------- Un valor de balance por cliente (más reciente si hay fecha) --------
cols = ["customer_id", "customer_account_balance"] + (["tx_date_time"] if has_time else [])
g = df[cols].dropna(subset=["customer_id", "customer_account_balance"]).copy()

if g.empty:
    print("No hay saldos válidos en customer_account_balance.")
else:
    if has_time:
        g = g.sort_values(["customer_id", "tx_date_time"]).groupby("customer_id", as_index=False).tail(1)
    else:
        # sin fecha: nos quedamos con la última ocurrencia por cliente
        g = g.groupby("customer_id", as_index=False).tail(1)

    # considerar solo saldos positivos
    s = g.loc[g["customer_account_balance"] > 0, "customer_account_balance"].astype(float)

    if s.empty:
        print("No hay saldos positivos para calcular percentiles.")
    else:
        stats = {f"p{p}": float(np.percentile(s, p)) for p in PCTS}
        recommended_amount = int(round(stats["p95"]))

        print("=== P-%BAL — Percentiles de 'customer_account_balance' por cliente (CLP) ===")
        print(f"Clientes considerados: {s.shape[0]}")
        for p in PCTS:
            v = stats[f"p{p}"]
            print(f"p{p:>2}: {v:,.0f}")
        print(f"\n{'{var.Percentage}'} fijo = 95%")
        print(f"{'{var.Balance}'} recomendado (p95): {recommended_amount:,.0f} CLP")


  df = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")


=== P-%BAL — Percentiles de 'customer_account_balance' por cliente (CLP) ===
Clientes considerados: 11
p90: 900,944,537
p95: 1,264,371,558
p97: 1,409,742,366
p99: 1,555,113,175

{var.Percentage} fijo = 95%
{var.Balance} recomendado (p95): 1,264,371,558 CLP


# Simulación alertas

In [None]:
# === P-%BAL — Sensibilidad (Actual vs propuestos) ==============================
# LÓGICA EXACTA:
# tx_direction = Outbound
# AND customer_base_account_balance [Default: -1e16] ≥ [Balance]
# AND (
#        customer_base_account_balance - tx_base_amount ≤ 0
#      OR
#        (customer_base_account_balance - tx_base_amount > 0
#         AND tx_base_amount > customer_base_account_balance * ([Percentage]/100))
#     )
# Unidad = transacciones que cumplen

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx_retail_core.csv"

PARAMS = {
    "Actual": {"Balance": 7_305_200_400, "Percentage": 95},
    "p90":    {"Balance": 2_012_533_361, "Percentage": 95},
    "p95":    {"Balance": 3_624_167_233, "Percentage": 95},
    "p97":    {"Balance": 5_307_767_468, "Percentage": 95},
    "p99":    {"Balance":10_466_651_357, "Percentage": 95},
}

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_direction"] = df["tx_direction"].astype(str).str.title()
df["tx_type"]      = df["tx_type"].astype(str).str.title()
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
# Balance previo: usar columna base, default -1e16 si NaN
bal = pd.to_numeric(df["customer_base_account_balance"], errors="coerce").fillna(-1e16)
df["_bal_prev"] = bal

g = df[
    df["tx_direction"].eq("Outbound") &
    df["tx_base_amount"].notna()
].copy()

order = ["Actual","p90","p95","p97","p99"]
param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
print("=== P-%BAL — Parámetros ==="); display(param_tbl)

counts = {}
for k,v in PARAMS.items():
    B, P = v["Balance"], v["Percentage"]
    pre = g["_bal_prev"] >= B
    cond1 = (g["_bal_prev"] - g["tx_base_amount"]) <= 0
    cond2 = ((g["_bal_prev"] - g["tx_base_amount"]) > 0) & (g["tx_base_amount"] > g["_bal_prev"] * (P/100.0))
    counts[k] = int((pre & (cond1 | cond2)).sum())

out = pd.DataFrame([{
    "alertas_actual": counts.get("Actual",0),
    "alertas_p90":    counts.get("p90",0),
    "alertas_p95":    counts.get("p95",0),
    "alertas_p97":    counts.get("p97",0),
    "alertas_p99":    counts.get("p99",0),
}])
print("=== P-%BAL — Alertas por escenario (tx) ==="); display(out)


=== P-%BAL — Parámetros ===


Unnamed: 0,escenario,Balance,Percentage
0,Actual,7305200400,95
1,p90,2012533361,95
2,p95,3624167233,95
3,p97,5307767468,95
4,p99,10466651357,95


=== P-%BAL — Alertas por escenario (tx) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,0,0,0,0,0
