# Definición parámetros

In [1]:
# === OUT>%IN — Amount-OUT-30d: percentiles de la suma OUT Cash en 30 días =====
import pandas as pd
import numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# Parámetros
PATH = "../../data/tx_iv.csv"   # <-- ajusta ruta
WINDOW_DAYS = 30
PCTS = [0.95, 0.97, 0.98, 0.99]
FILTER_TO_CASH = True
USE_ABS = True  # True recomendado para evitar signos contables

# Carga mínima
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
if FILTER_TO_CASH and "tx_type" in df.columns:
    df["tx_type"] = df["tx_type"].astype(str).str.title()

# Filtro: OUTBOUND (+ Cash opcional) y datos válidos
mask = (
    df["tx_direction"].eq("Outbound") &
    df["tx_date_time"].notna() &
    df["tx_base_amount"].notna()
)
if FILTER_TO_CASH and "tx_type" in df.columns:
    mask &= df["tx_type"].eq("Cash")

g = df.loc[mask, ["customer_id","tx_date_time","tx_base_amount"]].copy()
if g.empty:
    print("No hay transacciones elegibles para OUT>%IN (Amount-OUT-30d).")
else:
    g["amt"] = g["tx_base_amount"].abs() if USE_ABS else g["tx_base_amount"]

    parts = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily = sub.set_index("tx_date_time")["amt"].resample("D").sum()
        roll_sum = daily.rolling(f"{WINDOW_DAYS}D").sum()
        parts.append(roll_sum.rename(cid))

    if not parts:
        print("Sin series diarias para calcular la ventana.")
    else:
        s = pd.concat([ser.dropna().astype(float) for ser in parts], axis=0)
        q = s.quantile(PCTS) if len(s) else pd.Series(index=PCTS, dtype=float)

        out = pd.DataFrame({
            "percentil":    [f"p{int(p*100)}" for p in PCTS],
            "Amount_OUT_30d":[q.get(p, np.nan) for p in PCTS],
        })

        print(f"=== OUT>%IN — Percentiles de suma OUT {WINDOW_DAYS}d (CLP) ===")
        print(f"Clientes con OUT: {g['customer_id'].nunique():,} | Ventanas: {len(s):,}\n")
        display(out)


=== OUT>%IN — Percentiles de suma OUT 30d (CLP) ===
Clientes con OUT: 2,191 | Ventanas: 155,109



Unnamed: 0,percentil,Amount_OUT_30d
0,p95,368566817
1,p97,652451721
2,p98,1006045476
3,p99,1940081511


# Simulación alertas

In [4]:
# === OUT>%IN — Sensibilidad (Actual vs propuestos) =============================
# LÓGICA EXACTA (ventanas cliente–día):
#   sum(OUT) 30d > Amount  AND  OUT30 ∈ [Low%, High%] de IN30
# Notas: rolling "30D" incluye el día actual; usamos |tx_base_amount|.

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ---------------------------------------------------------------
PATH = "../../data/tx_iv.csv"
FILTER_TO_CASH = True
PARAMS = {
    #"Actual": {"Amount_OUT_30d": 45_604_357, "Low": 90, "High": 110},
    "p95":    {"Amount_OUT_30d":   368_566_817, "Low": 90, "High": 110},
    "p97":    {"Amount_OUT_30d":   652_451_721, "Low": 90, "High": 110},
    "p98":    {"Amount_OUT_30d": 1_006_045_208, "Low": 90, "High": 110},
    "p99":    {"Amount_OUT_30d": 1_940_081_511, "Low": 90, "High": 110},
}
# -------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
if FILTER_TO_CASH and "tx_type" in df.columns:
    df["tx_type"] = df["tx_type"].astype(str).str.title()

base_mask = df["tx_date_time"].notna() & df["tx_base_amount"].notna() & df["customer_id"].notna()
if FILTER_TO_CASH and "tx_type" in df.columns:
    base_mask &= df["tx_type"].eq("Cash")

OUT_ = df[base_mask & df["tx_direction"].eq("Outbound")][["customer_id","tx_date_time","tx_base_amount"]].copy()
IN_  = df[base_mask & df["tx_direction"].eq("Inbound")][["customer_id","tx_date_time","tx_base_amount"]].copy()

if OUT_.empty:
    print("No hay transacciones Outbound elegibles.")
else:
    parts=[]
    for cid, sub_out in OUT_.groupby("customer_id", sort=False):
        out_daily = (sub_out.set_index("tx_date_time")["tx_base_amount"]
                            .abs()
                            .resample("D")
                            .sum())
        sub_in = IN_[IN_["customer_id"].eq(cid)]
        if sub_in.empty:
            in_daily = pd.Series(0.0, index=out_daily.index)
        else:
            in_daily = (sub_in.set_index("tx_date_time")["tx_base_amount"]
                               .abs()
                               .resample("D")
                               .sum())

        start = min(out_daily.index.min(), in_daily.index.min())
        end   = max(out_daily.index.max(), in_daily.index.max())
        idx   = pd.date_range(start, end, freq="D")

        out_daily = out_daily.reindex(idx, fill_value=0.0)
        in_daily  = in_daily.reindex(idx,  fill_value=0.0)

        OUT30 = out_daily.rolling("30D", min_periods=1).sum()
        IN30  = in_daily.rolling("30D", min_periods=1).sum()

        parts.append(pd.DataFrame({"customer_id": cid, "date": idx, "OUT30": OUT30.values, "IN30": IN30.values}))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","OUT30","IN30"])

    if M.empty:
        print("No se pudieron construir ventanas 30d (M vacío).")
    else:
        order = ["Actual","p85","p90","p95","p97","p98","p99"]
        if PARAMS:
            param_tbl = (pd.DataFrame(PARAMS).T
                           .loc[[k for k in order if k in PARAMS] or list(PARAMS.keys())]
                           .rename_axis("escenario").reset_index())
            print("=== OUT>%IN — Parámetros (Amount_OUT_30d, Low, High) ==="); display(param_tbl)

        counts = {}
        for k, v in PARAMS.items():
            A, L, H = float(v["Amount_OUT_30d"]), float(v["Low"]), float(v["High"])
            m = (
                (M["OUT30"] > A) &
                (M["IN30"] > 0) &
                (M["OUT30"] >= M["IN30"] * (L/100.0)) &
                (M["OUT30"] <= M["IN30"] * (H/100.0))
            )
            counts[k] = int(M.loc[m, ["customer_id","date"]].drop_duplicates().shape[0])

        if PARAMS:
            alerts = pd.DataFrame([{f"alertas_{name.lower()}": counts.get(name, 0)
                                    for name in ([k for k in order if k in PARAMS] or list(PARAMS.keys()))}])
            print("=== OUT>%IN — Alertas por escenario (ventanas cliente–día) ==="); display(alerts)


=== OUT>%IN — Parámetros (Amount_OUT_30d, Low, High) ===


Unnamed: 0,escenario,Amount_OUT_30d,Low,High
0,p95,368566817,90,110
1,p97,652451721,90,110
2,p98,1006045208,90,110
3,p99,1940081511,90,110


=== OUT>%IN — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_p95,alertas_p97,alertas_p98,alertas_p99
0,2583,1758,1096,618
