# Definición parámetros

In [6]:
# === IN-OUT-1 — Parametrización de Amount (Outbound Cash) ======================
# Regla (parte Amount): el umbral es un monto en CLP; aquí calculamos sus percentiles.
# Criterio sugerido: Amount = p90 de la distribución de montos OUT Cash.

import pandas as pd
import numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# -------- Parámetros editables --------
PATH = "../../data/tx_retail_whale.csv"      # <-- cambia a tu CSV
PCTS = [0.85, 0.90, 0.95, 0.97, 0.99]

# -------- Carga mínima --------
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")

# -------- Filtro: Outbound + Cash + montos positivos --------
mask = (
    df["tx_direction"].astype(str).str.title().eq("Outbound") &
    df["tx_type"].astype(str).str.title().eq("Cash") &
    df["tx_base_amount"].notna() &
    (df["tx_base_amount"] > 0)
)
g = df.loc[mask, ["tx_base_amount"]].copy()

if g.empty:
    print("No hay transacciones OUT Cash con monto positivo para parametrizar Amount.")
else:
    s = g["tx_base_amount"].astype(float)
    q = s.quantile(PCTS)

    out = pd.DataFrame({
        "percentil": [f"p{int(p*100)}" for p in PCTS],
        "Amount_CLP": [q.get(p, np.nan) for p in PCTS]
    })

    # Sugerencia según tu práctica: p90
    amount_recommended = int(round(q.get(0.90, np.nan))) if not np.isnan(q.get(0.90, np.nan)) else np.nan

    print("=== IN-OUT-1 — Percentiles de Amount (CLP) para OUT Cash ===")
    print(f"Transacciones consideradas: {len(s):,}\n")
    display(out)

    print(f"\nAmount recomendado (p90): {amount_recommended:,.0f} CLP")


=== IN-OUT-1 — Percentiles de Amount (CLP) para OUT Cash ===
Transacciones consideradas: 335



Unnamed: 0,percentil,Amount_CLP
0,p85,319556169
1,p90,400000000
2,p95,521250215
3,p97,587195362
4,p99,941841000



Amount recomendado (p90): 400,000,000 CLP


# Simulación alertas

In [10]:
# === IN-OUT-1 — Sensibilidad (Actual vs propuestos, por transacción) ==========
# Lógica:
#   tx_direction=Outbound & tx_type=Cash
#   tx_base_amount > Amount
#   IN_cnt_14d > Number
#   tx_base_amount >= (Percentage/100) * IN_sum_14d
# Unidad = transacciones OUT que cumplen

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

PATH = "../../data/tx_retail_whale.csv"
PARAMS = {
    "Actual": {"Amount": 135_093_183, "Number": 2, "Percentage": 80},
    "p95":    {"Amount": 521_250_006, "Number": 2, "Percentage": 80},
    "p97":    {"Amount": 587_200_000, "Number": 2, "Percentage": 80},
    "p99":    {"Amount": 941_847_647, "Number": 2, "Percentage": 80},
}

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
df["tx_type"]        = df["tx_type"].astype(str).str.title()

IN_  = df[(df["tx_direction"].eq("Inbound"))  & (df["tx_type"].eq("Cash")) & df["tx_base_amount"].notna() & df["tx_date_time"].notna()]
OUT_ = df[(df["tx_direction"].eq("Outbound")) & (df["tx_type"].eq("Cash")) & df["tx_base_amount"].notna() & df["tx_date_time"].notna()]

# Precalcular series IN por cliente (suma y conteo 14d)
def in_metrics(sub):
    daily = sub.set_index("tx_date_time")["tx_base_amount"].abs().resample("D").agg(['sum','count']).rename(columns={'sum':'IN_sum','count':'IN_cnt'})
    return daily

IN_daily = {cid: in_metrics(sub) for cid, sub in IN_.groupby("customer_id", sort=False)}

order = ["Actual","p90","p95","p97"]
param_tbl = pd.DataFrame(PARAMS).T.loc[[k for k in order if k in PARAMS]].rename_axis("escenario").reset_index()
print("=== IN-OUT-1 — Parámetros (Amount, Number, Percentage) ==="); display(param_tbl)

counts = {}

for k, v in PARAMS.items():
    A, N, P = v["Amount"], v["Number"], v["Percentage"]

    ok = []
    for cid, sub in OUT_.groupby("customer_id", sort=False):
        out_sub = sub.sort_values("tx_date_time")
        # si no hay IN para el cliente, nunca gatilla
        ind = IN_daily.get(cid)
        if ind is None or ind.empty:
            continue
        # rolling 14d sobre IN
        IN14_sum = ind["IN_sum"].rolling("14D").sum()
        IN14_cnt = ind["IN_cnt"].rolling("14D").sum()

        # Evaluar por tx OUT en su fecha (usar fecha de la tx en índice diario)
        for t, amt in zip(out_sub["tx_date_time"], out_sub["tx_base_amount"].abs()):
            d = pd.Timestamp(t.normalize())
            if d not in IN14_sum.index:
                # extender con 0s si falta
                val_sum = 0.0; val_cnt = 0.0
            else:
                val_sum = float(IN14_sum.loc[:d].iloc[-1]) if not IN14_sum.loc[:d].empty else 0.0
                val_cnt = float(IN14_cnt.loc[:d].iloc[-1]) if not IN14_cnt.loc[:d].empty else 0.0

            cond = (amt > A) & (val_cnt > N) & (amt >= (P/100.0)*val_sum)
            ok.append(bool(cond))

    counts[k] = int(np.sum(ok))

out = pd.DataFrame([{
    "alertas_actual": counts.get("Actual", 0),
    "alertas_p90":    counts.get("p90", 0),
    "alertas_p95":    counts.get("p95", 0),
    "alertas_p97":    counts.get("p97", 0),
    "alertas_p99":    counts.get("p99", 0),
}])
print("=== IN-OUT-1 — Alertas por escenario (transacciones OUT) ==="); display(out)


=== IN-OUT-1 — Parámetros (Amount, Number, Percentage) ===


Unnamed: 0,escenario,Amount,Number,Percentage
0,Actual,135093183,2,80
1,p95,521250006,2,80
2,p97,587200000,2,80


=== IN-OUT-1 — Alertas por escenario (transacciones OUT) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,6,0,4,0,0
