# Definición parámetros

In [3]:
# === IN>%OUT — Amount-IN-30d: percentiles de la suma IN Cash en 30 días =======
import pandas as pd
import numpy as np

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# Parámetros
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = "I-2"
WINDOW_DAYS = 30
PCTS = [0.85, 0.90, 0.95, 0.97, 0.98, 0.99]
FILTER_TO_CASH = True
USE_ABS = True  # True recomendado para evitar signos contables

# Carga mínima
df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
if FILTER_TO_CASH and "tx_type" in df.columns:
    df["tx_type"] = df["tx_type"].astype(str).str.title()

if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))   

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

# Filtro: INBOUND (+ Cash opcional) y datos válidos
mask = (
    df["tx_direction"].eq("Inbound") &
    df["tx_date_time"].notna() &
    df["tx_base_amount"].notna()
)
if FILTER_TO_CASH and "tx_type" in df.columns:
    mask &= df["tx_type"].eq("Cash")

g = df.loc[mask, ["customer_id","tx_date_time","tx_base_amount"]].copy()
if g.empty:
    print("No hay transacciones elegibles para IN>%OUT (Amount-IN-30d).")
else:
    g["amt"] = g["tx_base_amount"].abs() if USE_ABS else g["tx_base_amount"]

    # Suma diaria por cliente y rolling 30 días (incluye el día actual)
    parts = []
    for cid, sub in g.groupby("customer_id", sort=False):
        daily = sub.set_index("tx_date_time")["amt"].resample("D").sum()
        roll_sum = daily.rolling(f"{WINDOW_DAYS}D").sum()
        parts.append(roll_sum.rename(cid))

    if not parts:
        print("Sin series diarias para calcular la ventana.")
    else:
        s = pd.concat([ser.dropna().astype(float) for ser in parts], axis=0)  # todas las ventanas
        q = s.quantile(PCTS) if len(s) else pd.Series(index=PCTS, dtype=float)

        out = pd.DataFrame({
            "percentil":   [f"p{int(p*100)}" for p in PCTS],
            "Amount_IN_30d":[q.get(p, np.nan) for p in PCTS],
        })

        print(f"=== IN>%OUT — Percentiles de suma IN {WINDOW_DAYS}d (CLP) ===")
        print(f"Clientes con IN: {g['customer_id'].nunique():,} | Ventanas: {len(s):,}\n")
        display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")


=== IN>%OUT — Percentiles de suma IN 30d (CLP) ===
Clientes con IN: 18 | Ventanas: 2,805



Unnamed: 0,percentil,Amount_IN_30d
0,p85,7102291855
1,p90,9167878980
2,p95,11161573028
3,p97,12646696272
4,p98,13061585000
5,p99,14058194904


# Simulación alertas

In [5]:
# === IN>%OUT — Sensibilidad (Actual vs propuestos) =============================
# LÓGICA EXACTA (ventanas cliente–día):
#   sum(IN) 30d > Amount  AND  IN30 ∈ [Low%, High%] de OUT30
# Notas: rolling "30D" incluye el día actual; usamos |tx_base_amount|.

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ---------------------------------------------------------------
PATH = "../../data/tx_iv.csv"
FILTER_TO_CASH = True
PARAMS = {
    #"Actual": {"Amount_IN_30d": 49_084_140, "Low": 80, "High": 100},
    "p95":    {"Amount_IN_30d":   824_400_000, "Low": 80, "High": 100},
    "p97":    {"Amount_IN_30d": 936_000_111, "Low": 80, "High": 100},
    "p98":    {"Amount_IN_30d": 1_776_500_000, "Low": 80, "High": 100},
    "p99":    {"Amount_IN_30d": 1_416_700_000, "Low": 80, "High": 100},
}
# -------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
if FILTER_TO_CASH and "tx_type" in df.columns:
    df["tx_type"] = df["tx_type"].astype(str).str.title()

base_mask = df["tx_date_time"].notna() & df["tx_base_amount"].notna() & df["customer_id"].notna()
if FILTER_TO_CASH and "tx_type" in df.columns:
    base_mask &= df["tx_type"].eq("Cash")

IN_  = df[base_mask & df["tx_direction"].eq("Inbound")][["customer_id","tx_date_time","tx_base_amount"]].copy()
OUT_ = df[base_mask & df["tx_direction"].eq("Outbound")][["customer_id","tx_date_time","tx_base_amount"]].copy()

if IN_.empty:
    print("No hay transacciones Inbound elegibles.")
else:
    parts=[]
    for cid, sub_in in IN_.groupby("customer_id", sort=False):
        in_daily = (sub_in.set_index("tx_date_time")["tx_base_amount"]
                           .abs()
                           .resample("D")
                           .sum())
        # OUT del mismo cliente (puede no existir)
        sub_out = OUT_[OUT_["customer_id"].eq(cid)]
        if sub_out.empty:
            # si no hay OUT, construimos serie vacía con índice de IN
            out_daily = pd.Series(0.0, index=in_daily.index)
        else:
            out_daily = (sub_out.set_index("tx_date_time")["tx_base_amount"]
                                .abs()
                                .resample("D")
                                .sum())
        # índice diario explícito (garantiza DatetimeIndex)
        start = min(in_daily.index.min(), out_daily.index.min())
        end   = max(in_daily.index.max(), out_daily.index.max())
        idx   = pd.date_range(start, end, freq="D")

        in_daily  = in_daily.reindex(idx,  fill_value=0.0)
        out_daily = out_daily.reindex(idx, fill_value=0.0)

        IN30  = in_daily.rolling("30D", min_periods=1).sum()
        OUT30 = out_daily.rolling("30D", min_periods=1).sum()

        parts.append(pd.DataFrame({"customer_id": cid, "date": idx, "IN30": IN30.values, "OUT30": OUT30.values}))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","date","IN30","OUT30"])

    if M.empty:
        print("No se pudieron construir ventanas 30d (M vacío).")
    else:
        order = ["Actual","p85","p90","p95","p97","p98","p99"]
        if PARAMS:
            param_tbl = (pd.DataFrame(PARAMS).T
                           .loc[[k for k in order if k in PARAMS] or list(PARAMS.keys())]
                           .rename_axis("escenario").reset_index())
            print("=== IN>%OUT — Parámetros (Amount_IN_30d, Low, High) ==="); display(param_tbl)

        counts = {}
        for k, v in PARAMS.items():
            A, L, H = float(v["Amount_IN_30d"]), float(v["Low"]), float(v["High"])
            m = (
                (M["IN30"] > A) &
                (M["OUT30"] > 0) &
                (M["IN30"] >= M["OUT30"] * (L/100.0)) &
                (M["IN30"] <= M["OUT30"] * (H/100.0))
            )
            counts[k] = int(M.loc[m, ["customer_id","date"]].drop_duplicates().shape[0])

        if PARAMS:
            alerts = pd.DataFrame([{f"alertas_{name.lower()}": counts.get(name, 0)
                                    for name in ([k for k in order if k in PARAMS] or list(PARAMS.keys()))}])
            print("=== IN>%OUT — Alertas por escenario (ventanas cliente–día) ==="); display(alerts)


=== IN>%OUT — Parámetros (Amount_IN_30d, Low, High) ===


Unnamed: 0,escenario,Amount_IN_30d,Low,High
0,p95,824400000,80,100
1,p97,936000111,80,100
2,p98,1776500000,80,100
3,p99,1416700000,80,100


=== IN>%OUT — Alertas por escenario (ventanas cliente–día) ===


Unnamed: 0,alertas_p95,alertas_p97,alertas_p98,alertas_p99
0,911,821,516,572
