# Definicióm parámetros

In [None]:
# === NUMCCI — Parametrización de Number (conteo por contraparte en 14 días) ===
# LÓGICA EXACTA (parametrización):
#   tx_direction = Inbound
#   AND tx_type = [Type]
#   Count en 14d por {customer_id, counterparty_id, tx_direction, tx_type}
# Método:
#   - Para cada par (cliente, contraparte) Inbound+Type, contar tx por día.
#   - Rolling "14D" del conteo diario => C14 (incluye el día actual).
#   - Percentiles de C14 apilando todas las ventanas de todos los pares.
#   - Sugerencia típica: Number = ceil(p95) o según lo que definan.

import pandas as pd, numpy as np, math
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ---------------------------------------------------------------
PATH = "../../data/tx/datos_trx__with_subsub.csv"
SUBSUBSEGMENTS = ["I-1", "I-2"]   # <-- ajusta el sub-subsegmento
TYPE = "Cash"
WINDOW_DAYS = 14
NUM_QS = [0.50, 0.75, 0.90, 0.95, 0.97,0.98, 0.99]
# -------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df.get("tx_date_time"), errors="coerce")
df["tx_direction"] = df.get("tx_direction","").astype(str).str.title()
df["tx_type"]      = df.get("tx_type","").astype(str).str.title()
df["counterparty_id"] = df.get("counterparty_id","").astype(str).str.strip()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

m = (
    df["tx_direction"].eq("Inbound") &
    df["tx_type"].eq(TYPE) &
    df["customer_id"].notna() &
    df["counterparty_id"].notna() &
    df["counterparty_id"].ne("NA") &
    df["tx_date_time"].notna()
)
g = df.loc[m, ["customer_id","counterparty_id","tx_date_time"]].copy()

if g.empty:
    print("NUMCCI: no hay transacciones elegibles con los filtros dados.")
else:
    parts = []
    pairs = 0
    for (cid, cpid), sub in g.groupby(["customer_id","counterparty_id"], sort=False):
        daily_cnt = (sub.set_index("tx_date_time")
                        .assign(x=1)["x"]
                        .resample("D").sum()
                        .astype(float))
        if daily_cnt.empty:
            continue
        C14 = daily_cnt.rolling(f"{WINDOW_DAYS}D", min_periods=1).sum()
        C14.name = (cid, cpid)
        parts.append(C14)
        pairs += 1

    if not parts:
        print("NUMCCI: no se pudieron construir ventanas 14d.")
    else:
        s = pd.concat([ser.dropna().astype(float) for ser in parts], axis=0)
        q = s.quantile(NUM_QS) if len(s) else pd.Series(index=NUM_QS, dtype=float)

        out = pd.DataFrame({
            "percentil":    [f"p{int(p*100)}" for p in NUM_QS],
            "Number_raw":   [q.get(p, np.nan) for p in NUM_QS],
            "Number_ceil":  [int(math.ceil(q.get(p))) if pd.notna(q.get(p, np.nan)) else np.nan for p in NUM_QS],
        })

        print(f"=== NUMCCI — Percentiles de conteo 14d por contraparte (IN, Type={TYPE}) ===")
        print(f"Pares cliente–contraparte: {pairs:,} | Ventanas evaluadas: {len(s):,}\n")
        display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")


=== NUMCCI — Percentiles de conteo 14d por contraparte (IN, Type=Cash) ===
Pares cliente–contraparte: 27 | Ventanas evaluadas: 3,176



Unnamed: 0,percentil,Number_raw,Number_ceil
0,p50,2,2
1,p75,4,4
2,p90,6,6
3,p95,7,7
4,p97,8,8
5,p98,8,8
6,p99,9,9


# Simulación alertas

In [3]:
# === NUMCCI — Simulación de alertas (Actual vs propuestos) =====================
# LÓGICA EXACTA (simulación):
#   tx_direction = Inbound
#   AND tx_type = [Type]
#   AND count por {customer_id, counterparty_id, tx_direction, tx_type} en 14 días > [Number]
# Unidad = ventanas (customer_id, counterparty_id, día) que cumplen.

import pandas as pd, numpy as np
pd.set_option("display.float_format", lambda x: f"{x:,.0f}")

# ---- EDITA AQUÍ ----------------------------------------------------------------
PATH = "../../data/tx/transacciones_cash_2025__with_subsub.csv"
SUBSUBSEGMENTS = ["I-2"]               # <-- ajusta el sub-subsegmento
TYPE = "Cash"
PARAMS = {
    "Actual": {"Number": 2},
    "p95":    {"Number": 7},
    "p97":    {"Number": 8},
    "p99":    {"Number": 9},
}
WINDOW_DAYS = 14
# -------------------------------------------------------------------------------

df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df.get("tx_date_time"), errors="coerce")
df["tx_direction"] = df.get("tx_direction","").astype(str).str.title()
df["tx_type"]      = df.get("tx_type","").astype(str).str.title()
df["counterparty_id"] = df.get("counterparty_id","").astype(str).str.strip()

# Filtrado por sub-subsegmento
if isinstance(SUBSUBSEGMENTS, str):
    target_labels = {SUBSUBSEGMENTS}
else:
    target_labels = set(map(str, SUBSUBSEGMENTS))

df = df[df["customer_sub_sub_type"].astype(str).isin(target_labels)].copy()

m = (
    df["tx_direction"].eq("Inbound") &
    df["tx_type"].eq(TYPE) &
    df["customer_id"].notna() &
    df["counterparty_id"].notna() &
    df["counterparty_id"].ne("NA") &
    df["tx_date_time"].notna()
)
g = df.loc[m, ["customer_id","counterparty_id","tx_date_time"]].copy()

if g.empty:
    print("No hay transacciones elegibles para NUMCCI.")
else:
    parts=[]
    for (cid, cpid), sub in g.groupby(["customer_id","counterparty_id"], sort=False):
        daily_cnt = (sub.set_index("tx_date_time")
                        .assign(x=1)["x"]
                        .resample("D").sum()
                        .astype(float))
        if daily_cnt.empty:
            continue
        C14 = daily_cnt.rolling(f"{WINDOW_DAYS}D", min_periods=1).sum()
        parts.append(pd.DataFrame({
            "customer_id": cid,
            "counterparty_id": cpid,
            "date": C14.index,
            "C14": C14.values
        }))

    M = pd.concat(parts, ignore_index=True) if parts else pd.DataFrame(columns=["customer_id","counterparty_id","date","C14"])

    order = ["Actual","p85","p90","p95","p97","p99"]
    param_tbl = (pd.DataFrame(PARAMS).T
                    .loc[[k for k in order if k in PARAMS]]
                    .rename_axis("escenario")
                    .reset_index())
    print(f"=== NUMCCI — Parámetros (Type={TYPE}) ==="); display(param_tbl)

    counts={}
    for k,v in PARAMS.items():
        N = v["Number"]
        m_ok = (M["C14"] > N)
        counts[k] = int(M.loc[m_ok, ["customer_id","counterparty_id","date"]].drop_duplicates().shape[0])

    out = pd.DataFrame([{
        "alertas_actual": counts.get("Actual",0),
        "alertas_p90":    counts.get("p90",0),
        "alertas_p95":    counts.get("p95",0),
        "alertas_p97":    counts.get("p97",0),
        "alertas_p98":    counts.get("p98",0),
        "alertas_p99":    counts.get("p99",0),
    }])
    print("=== NUMCCI — Alertas por escenario (ventanas cliente–contraparte–día) ===")
    display(out)


  df = pd.read_csv(PATH, dtype={"customer_id":"string","counterparty_id":"string"}, encoding="utf-8-sig")


=== NUMCCI — Parámetros (Type=Cash) ===


Unnamed: 0,escenario,Number
0,Actual,2
1,p95,7
2,p97,8
3,p99,9


=== NUMCCI — Alertas por escenario (ventanas cliente–contraparte–día) ===


Unnamed: 0,alertas_actual,alertas_p90,alertas_p95,alertas_p97,alertas_p98,alertas_p99
0,1168,0,97,53,0,26
