# Definición parámetros

In [2]:
# === IN>AVG — Percentiles de Amount y Factor (Inbound Cash) ====================
import pandas as pd
import numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

# -------- Parámetros editables --------
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = "R-High"               # <-- ajusta el sub-subsegmento
MIN_PREV_TX = 1                # mínimo de transacciones previas para calcular Factor
MIN_AMOUNT = 0                 # umbral mínimo (CLP) para considerar Amount en la muestra (0 = todas >0)

# -------- Carga mínima --------
tx = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")
tx["tx_date_time"]   = pd.to_datetime(tx["tx_date_time"], errors="coerce")
tx["tx_base_amount"] = pd.to_numeric(tx["tx_base_amount"], errors="coerce")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

tx = tx[tx["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# -------- Selección: Inbound + Cash --------
mask_in = (
    tx["tx_direction"].astype(str).str.upper().str.startswith("IN") &
    tx["tx_type"].astype(str).str.upper().str.startswith("CASH") &
    tx["tx_date_time"].notna() &
    tx["tx_base_amount"].notna()
)
g = tx.loc[mask_in, ["customer_id","tx_date_time","tx_base_amount"]].copy()
g = g[g["tx_base_amount"] > MIN_AMOUNT]  # Amount base para la muestra
g = g.sort_values(["customer_id","tx_date_time"])

if g.empty:
    print("No hay transacciones IN Cash elegibles.")
else:
    # Promedio previo (excluye la fila actual): shift().expanding().mean()
    g["prev_avg"] = (
        g.groupby("customer_id")["tx_base_amount"]
         .transform(lambda s: s.shift().expanding().mean())
    )
    # Conteo previo para elegibilidad del Factor
    g["prev_cnt"] = g.groupby("customer_id").cumcount()

    # Factor solo donde hay suficiente histórico y promedio previo > 0
    elig = (g["prev_cnt"] >= MIN_PREV_TX) & (g["prev_avg"] > 0)
    g["factor"] = np.where(elig, g["tx_base_amount"] / g["prev_avg"], np.nan)

    # Percentiles requeridos
    Q = [0.85, 0.90, 0.95, 0.97, 0.99]
    amount_s = g["tx_base_amount"].astype(float).dropna()
    factor_s = pd.to_numeric(g["factor"], errors="coerce").replace([np.inf, -np.inf], np.nan).dropna()

    amount_q = amount_s.quantile(Q) if len(amount_s) else pd.Series(index=Q, dtype=float)
    factor_q = factor_s.quantile(Q) if len(factor_s) else pd.Series(index=Q, dtype=float)

    out = pd.DataFrame({
        "percentil": [f"p{int(q*100)}" for q in Q],
        "Amount": [amount_q.get(q, np.nan) for q in Q],
        "Factor": [factor_q.get(q, np.nan) for q in Q],
    })

    print("=== IN>AVG — Percentiles (Amount y Factor) ===")
    print(f"Transacciones IN usadas (Amount): {len(amount_s):,}")
    print(f"Transacciones IN con Factor válido: {len(factor_s):,} (prev_cnt ≥ {MIN_PREV_TX}, prev_avg > 0)\n")
    display(out)


  tx = pd.read_csv(PATH, dtype={"customer_id": "string"}, encoding="utf-8-sig")


=== IN>AVG — Percentiles (Amount y Factor) ===
Transacciones IN usadas (Amount): 885
Transacciones IN con Factor válido: 691 (prev_cnt ≥ 1, prev_avg > 0)



Unnamed: 0,percentil,Amount,Factor
0,p85,400000000.0,2.18
1,p90,516848600.0,3.31
2,p95,735000000.0,8.19
3,p97,971617600.0,22.87
4,p99,1507594798.24,222.27


# Simulación alertas

In [3]:
# === IN>AVG — Sensibilidad (Actual vs propuestos, por transacción) =============
# Lógica:
#   tx_direction=Inbound & tx_type=Cash
#   tx_base_amount >= Amount
#   prev_cnt (t-1 sobre all-time) > Number
#   factor = tx_base_amount / promedio_previo (excluye la actual)  >= Factor
# Unidad = transacciones que cumplen

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-1"]          # <-- ajusta el sub-subsegmento
PARAMS = {
    "Actual": {"Amount": 134_712_184, "Factor": 2,  "Number": 38},
    "p90":    {"Amount": 140_212_000, "Factor": 2.3,  "Number": 38},
    "p95":    {"Amount": 215_009_088, "Factor": 3.33,  "Number": 38},
    "p97":    {"Amount": 285_346_710, "Factor": 4.5,  "Number": 38},
    "p99":    {"Amount": 475_601_052, "Factor": 9, "Number": 38},
}

tx = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
tx["tx_date_time"]   = pd.to_datetime(tx["tx_date_time"], errors="coerce")
tx["tx_base_amount"] = pd.to_numeric(tx["tx_base_amount"], errors="coerce")

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))   

tx = tx[tx["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

mask = (
    tx["tx_direction"].astype(str).str.title().eq("Inbound") &
    tx["tx_type"].astype(str).str.title().eq("Cash") &
    tx["tx_date_time"].notna() & tx["tx_base_amount"].notna()
)
g = tx.loc[mask, ["customer_id","tx_date_time","tx_base_amount"]].sort_values(["customer_id","tx_date_time"])

if g.empty:
    print("No hay transacciones IN Cash elegibles.")
else:
    g["prev_avg"] = g.groupby("customer_id")["tx_base_amount"].transform(lambda s: s.shift().expanding().mean())
    g["prev_cnt"] = g.groupby("customer_id").cumcount()
    g["factor"]   = np.where((g["prev_cnt"]>=1) & (g["prev_avg"]>0), g["tx_base_amount"]/g["prev_avg"], np.nan)

    order = ["Actual","p85","p90","p95","p97","p99"]
    param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
    print("=== IN>AVG — Parámetros (Amount, Factor, Number) ==="); display(param_tbl)

    counts = {}
    for k, v in PARAMS.items():
        A, F, N = v["Amount"], v["Factor"], v["Number"]
        elig = (g["tx_base_amount"] >= A) & (g["prev_cnt"] > N) & np.isfinite(g["factor"])
        m_ok = elig & (g["factor"] >= F)
        counts[k] = int(m_ok.sum())

    out = pd.DataFrame([{
        "alertas_actual": counts.get("Actual", 0),
        "alertas_p85":    counts.get("p85", 0),
        "alertas_p90":    counts.get("p90", 0),
        "alertas_p95":    counts.get("p95", 0),
        "alertas_p97":    counts.get("p97", 0),
        "alertas_p99":    counts.get("p99", 0),
    }])
    print("=== IN>AVG — Alertas por escenario (transacciones) ==="); display(out)


  tx = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== IN>AVG — Parámetros (Amount, Factor, Number) ===


Unnamed: 0,escenario,Amount,Factor,Number
0,Actual,134712184,2,38
1,p90,140212000,2,38
2,p95,215009088,3,38
3,p97,285346710,4,38
4,p99,475601052,9,38


=== IN>AVG — Alertas por escenario (transacciones) ===


Unnamed: 0,alertas_actual,alertas_p85,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,43,0,35,10,5,1
