In [None]:
# === OUT>AVG — Percentiles de Amount y Factor (Outbound Cash) ==================
import pandas as pd
import numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

# -------- Parámetros editables --------
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = "I"     # <-- ajusta el sub-subsegmento
MIN_PREV_TX = 1                # mínimo de transacciones previas para calcular Factor
MIN_AMOUNT = 0                 # umbral mínimo (CLP) para considerar Amount en la muestra (0 = todas >0)

# -------- Carga mínima --------
tx = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")
tx["tx_date_time"]   = pd.to_datetime(tx["tx_date_time"], errors="coerce")
tx["tx_base_amount"] = pd.to_numeric(tx["tx_base_amount"], errors="coerce")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))   

tx = tx[tx["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# -------- Selección: Outbound + Cash --------
mask_out = (
    tx["tx_direction"].astype(str).str.upper().str.startswith("OUT") &
    tx["tx_type"].astype(str).str.upper().str.startswith("CASH") &
    tx["tx_date_time"].notna() &
    tx["tx_base_amount"].notna()
)
g = tx.loc[mask_out, ["customer_id","tx_date_time","tx_base_amount"]].copy()
g = g[g["tx_base_amount"] > MIN_AMOUNT]  # Amount base para la muestra
g = g.sort_values(["customer_id","tx_date_time"])

if g.empty:
    print("No hay transacciones OUT Cash elegibles.")
else:
    # Promedio previo (excluye la fila actual)
    g["prev_avg"] = (
        g.groupby("customer_id")["tx_base_amount"]
         .transform(lambda s: s.shift().expanding().mean())
    )
    # Conteo previo para elegibilidad del Factor
    g["prev_cnt"] = g.groupby("customer_id").cumcount()

    # Factor solo donde hay suficiente histórico y promedio previo > 0
    elig = (g["prev_cnt"] >= MIN_PREV_TX) & (g["prev_avg"] > 0)
    g["factor"] = np.where(elig, g["tx_base_amount"] / g["prev_avg"], np.nan)

    # Percentiles requeridos
    Q = [0.85, 0.90, 0.95, 0.97, 0.99]
    amount_s = g["tx_base_amount"].astype(float).dropna()
    factor_s = pd.to_numeric(g["factor"], errors="coerce").replace([np.inf, -np.inf], np.nan).dropna()

    amount_q = amount_s.quantile(Q) if len(amount_s) else pd.Series(index=Q, dtype=float)
    factor_q = factor_s.quantile(Q) if len(factor_s) else pd.Series(index=Q, dtype=float)

    out = pd.DataFrame({
        "percentil": [f"p{int(q*100)}" for q in Q],
        "Amount": [amount_q.get(q, np.nan) for q in Q],
        "Factor": [factor_q.get(q, np.nan) for q in Q],
    })

    print("=== OUT>AVG — Percentiles (Amount y Factor) ===")
    print(f"Transacciones OUT usadas (Amount): {len(amount_s):,}")
    print(f"Transacciones OUT con Factor válido: {len(factor_s):,} (prev_cnt ≥ {MIN_PREV_TX}, prev_avg > 0)\n")
    display(out)


  tx = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")


=== OUT>AVG — Percentiles (Amount y Factor) ===
Transacciones OUT usadas (Amount): 1,977
Transacciones OUT con Factor válido: 1,959 (prev_cnt ≥ 1, prev_avg > 0)



Unnamed: 0,percentil,Amount,Factor
0,p85,426474642.4,1.89
1,p90,744523200.0,2.42
2,p95,1272172000.0,3.81
3,p97,1589859702.4,5.42
4,p99,2391201468.8,9.51


: 

# Sensibilidad alertas

In [9]:
# === OUT>AVG — Sensibilidad (Actual vs propuestos, por transacción) ============
# Lógica:
#   tx_direction=Outbound & tx_type=Cash
#   tx_base_amount >= Amount
#   prev_cnt (t-1) > Number
#   factor = tx_base_amount / promedio_previo (excluye la actual) >= Factor
# Unidad = transacciones que cumplen

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]              # <-- ajusta el sub-subsegmento
PARAMS = {
   "Actual": {"Amount": 45_402_580, "Factor": 4, "Number": 65},
   "p90":    {"Amount": 98_005_949, "Factor": 2.5, "Number": 65},
    "p95":    {"Amount": 167_328_171, "Factor": 4, "Number": 65},
    "p97":    {"Amount": 227_056_563, "Factor": 5.5, "Number": 65},
    "p99":    {"Amount": 401_946_233, "Factor": 10.8,"Number": 65},
}

tx = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
tx["tx_date_time"]   = pd.to_datetime(tx["tx_date_time"], errors="coerce")
tx["tx_base_amount"] = pd.to_numeric(tx["tx_base_amount"], errors="coerce")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

tx = tx[tx["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

mask = (
    tx["tx_direction"].astype(str).str.title().eq("Outbound") &
    tx["tx_type"].astype(str).str.title().eq("Cash") &
    tx["tx_date_time"].notna() & tx["tx_base_amount"].notna()
)
g = tx.loc[mask, ["customer_id","tx_date_time","tx_base_amount"]].sort_values(["customer_id","tx_date_time"])

if g.empty:
    print("No hay transacciones OUT Cash elegibles.")
else:
    g["prev_avg"] = g.groupby("customer_id")["tx_base_amount"].transform(lambda s: s.shift().expanding().mean())
    g["prev_cnt"] = g.groupby("customer_id").cumcount()
    g["factor"]   = np.where((g["prev_cnt"]>=1) & (g["prev_avg"]>0), g["tx_base_amount"]/g["prev_avg"], np.nan)

    order = ["Actual","p90","p95","p97"]
    param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
    print("=== OUT>AVG — Parámetros (Amount, Factor, Number) ==="); display(param_tbl)

    counts = {}
    for k, v in PARAMS.items():
        A, F, N = v["Amount"], v["Factor"], v["Number"]
        elig = (g["tx_base_amount"] >= A) & (g["prev_cnt"] > N) & np.isfinite(g["factor"])
        m_ok = elig & (g["factor"] >= F)
        counts[k] = int(m_ok.sum())

    out = pd.DataFrame([{
        "alertas_actual": counts.get("Actual", 0),
        "alertas_p90":    counts.get("p90", 0),
        "alertas_p95":    counts.get("p95", 0),
        "alertas_p97":    counts.get("p97", 0),
        "alertas_p99":    counts.get("p99", 0),
    }])
    print("=== OUT>AVG — Alertas por escenario (transacciones) ==="); display(out)


  tx = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== OUT>AVG — Parámetros (Amount, Factor, Number) ===


Unnamed: 0,escenario,Amount,Factor,Number
0,Actual,45402580,4,65
1,p90,98005949,2,65
2,p95,167328171,4,65
3,p97,227056563,6,65


=== OUT>AVG — Alertas por escenario (transacciones) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,64,62,31,17,5
