In [2]:
# === SUMCCO — Parametrización de Amount (mejor ventana 14d por par) ============
# LÓGICA (parametrización):
#   tx_direction = Outbound
#   AND tx_type = [Type]
#   Mejor suma en 14 días por (customer_id, counterparty_id), percentiles globales.
#   Sugerencia típica: p95 (ajústalo a tu política).

import pandas as pd, numpy as np
import math

pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# -------- EDITA AQUÍ -----------------------------------------------------------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = "I-2"                     # <-- ajusta el sub-subsegmento
TYPE = "Cash"
PCTS = [0.90, 0.95, 0.97, 0.99]
# ------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df["tx_base_amount"], errors="coerce")
df["tx_direction"]   = df["tx_direction"].astype(str).str.title()
df["tx_type"]        = df["tx_type"].astype(str).str.title()

# -------- Filtrado por sub-subsegmento --------
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

m = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq(TYPE) &
    df["customer_id"].notna() &
    df["counterparty_id"].notna() &
    df["tx_date_time"].notna() &
    df["tx_base_amount"].notna()
)

g = df.loc[m, ["customer_id","counterparty_id","tx_date_time","tx_base_amount"]].copy()
if g.empty:
    print("SUMCCO: no hay transacciones elegibles.")
else:
    g["amt"] = g["tx_base_amount"].abs().astype(float)
    out_max = []

    for (cid, cpid), sub in g.groupby(["customer_id","counterparty_id"], sort=False):
        ts = sub["tx_date_time"].sort_values().values
        am = sub.loc[sub["tx_date_time"].sort_values().index, "amt"].values
        if ts.size == 0:
            continue
        j = 0
        pref = np.concatenate([[0.0], np.cumsum(am)])
        best_s = 0.0
        delta = np.timedelta64(14, "D")
        for i in range(ts.size):
            end = ts[i] + delta
            while j < ts.size and ts[j] <= end:
                j += 1
            s = pref[j] - pref[i]
            if s > best_s:
                best_s = s
        out_max.append(best_s)

    s = pd.Series(out_max, dtype=float)
    if s.empty:
        print("SUMCCO: no se pudieron construir ventanas 14d por par.")
    else:
        q = s.quantile(PCTS)
        tbl = pd.DataFrame({
            "percentil":[f"p{int(p*100)}" for p in PCTS],
            "Amount_CLP":[q.get(p, np.nan) for p in PCTS]
        })
        print(f"=== SUMCCO — Percentiles de mejor suma 14d por par (Type={TYPE}) ===")
        print(f"Pares evaluados: {len(s):,}\n")
        display(tbl)
        if 0.95 in PCTS and pd.notna(q.get(0.95)):
            print(f"Sugerencia Amount (p95): {q.get(0.95):,.0f} CLP")


  df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")


=== SUMCCO — Percentiles de mejor suma 14d por par (Type=Cash) ===
Pares evaluados: 347



Unnamed: 0,percentil,Amount_CLP
0,p90,780638319
1,p95,2446673529
2,p97,3558083905
3,p99,5947660063


Sugerencia Amount (p95): 2,446,673,529 CLP


# Simulación alertas

In [3]:
# === SUMCCO — Simulación de alertas (Actual vs propuestos) ======================
# LÓGICA EXACTA (simulación):
#   tx_direction = Outbound
#   AND tx_type = [Type]
#   AND sum base por {customer_id, counterparty_id, tx_direction, tx_type} en 14 días > [Amount]
# Unidad = ventanas (customer_id, counterparty_id, día) que cumplen.

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ----------------------------------------------------------------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]                # <-- ajusta el sub-subsegmento
TYPE = "Cash"
PARAMS = {
    "Actual": {"Type": "Cash", "Amount": 1_000_000},
    "p95":    {"Type": "Cash", "Amount": 2_446_673_529},
    "p97":    {"Type": "Cash", "Amount": 3_558_083_905},
    "p99":    {"Type": "Cash", "Amount": 5_947_660_063},
}
# -------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"]   = pd.to_datetime(df.get("tx_date_time"), errors="coerce")
df["tx_base_amount"] = pd.to_numeric(df.get("tx_base_amount"), errors="coerce")
df["tx_direction"]   = df.get("tx_direction","").astype(str).str.title()
df["tx_type"]        = df.get("tx_type","").astype(str).str.title()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

m = (
    df["tx_direction"].eq("Outbound") &
    df["tx_type"].eq(TYPE) &
    df["customer_id"].notna() &
    df["counterparty_id"].notna() &
    df["tx_date_time"].notna() &
    df["tx_base_amount"].notna()
)
g = df.loc[m, ["customer_id","counterparty_id","tx_date_time","tx_base_amount"]].copy()

if g.empty:
    print("No hay transacciones elegibles para SUMCCO.")
else:
    g["amt"] = g["tx_base_amount"].abs().astype(float)

    parts=[]
    for (cid,cpid), sub in g.groupby(["customer_id","counterparty_id"], sort=False):
        daily_sum = (sub.set_index("tx_date_time")["amt"]
                        .resample("D").sum()
                        .astype(float))
        if daily_sum.empty:
            continue
        S14 = daily_sum.rolling("14D").sum()
        parts.append(pd.DataFrame({
            "customer_id": cid,
            "counterparty_id": cpid,
            "date": S14.index,
            "S14": S14.values
        }))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","counterparty_id","date","S14"])

    order = ["Actual","p85","p90","p95","p97","p99"]
    param_tbl = (pd.DataFrame(PARAMS).T
                    .loc[[k for k in order if k in PARAMS]]
                    .rename_axis("escenario")
                    .reset_index())
    print(f"=== SUMCCO — Parámetros (Type={TYPE}) ==="); display(param_tbl)

    counts={}
    for k,v in PARAMS.items():
        A = v["Amount"]
        m_ok = (M["S14"] > A)
        counts[k] = int(M.loc[m_ok, ["customer_id","counterparty_id","date"]].drop_duplicates().shape[0])

    out = pd.DataFrame([{
        "alertas_actual": counts.get("Actual",0),
        "alertas_p85":    counts.get("p85",0),
        "alertas_p90":    counts.get("p90",0),
        "alertas_p95":    counts.get("p95",0),
        "alertas_p97":    counts.get("p97",0),
        "alertas_p99":    counts.get("p99",0),
    }])
    print("=== SUMCCO — Alertas por escenario (ventanas cliente–contraparte–día) ==="); display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")


=== SUMCCO — Parámetros (Type=Cash) ===


Unnamed: 0,escenario,Type,Amount
0,Actual,Cash,1000000
1,p95,Cash,2446673529
2,p97,Cash,3558083905
3,p99,Cash,5947660063


=== SUMCCO — Alertas por escenario (ventanas cliente–contraparte–día) ===


Unnamed: 0,alertas_actual,alertas_p85,alertas_p90,alertas_p95,alertas_p97,alertas_p99
0,13855,0,0,268,107,29
