In [None]:
# === STROTEUR — OUT / EUR / 9.950–10.000 en 7 días ============================
import pandas as pd, numpy as np, math

PATH = "../../data/tx/datos_trx__with_subsub.csv"
PCTS = [0.85, 0.90, 0.95, 0.97, 0.99]

df = pd.read_csv(PATH, dtype={"customer_id":"string"}, encoding="utf-8-sig")
df["tx_date_time"] = pd.to_datetime(df["tx_date_time"], errors="coerce")
df["tx_amount"]    = pd.to_numeric(df["tx_amount"], errors="coerce")

mask = (
    df["tx_direction"].astype(str).str.title().eq("Outbound") &
    df["tx_type"].astype(str).str.title().eq("Cash") &
    df["tx_currency"].astype(str).str.upper().eq("EUR") &
    df["tx_date_time"].notna() & df["tx_amount"].notna() &
    df["tx_amount"].abs().between(9950, 10000)
)
g = df.loc[mask, ["customer_id","tx_date_time"]].sort_values(["customer_id","tx_date_time"])

def counts_7d(dates):
    dates = np.array(dates.values, dtype="datetime64[ns]")
    n, j, out = len(dates), 0, []
    for i in range(n):
        end = dates[i] + np.timedelta64(7, "D")
        while j < n and dates[j] <= end: j += 1
        out.append(j - i)
    return out

counts = []
for _, sub in g.groupby("customer_id", sort=False):
    if len(sub): counts.extend(counts_7d(sub["tx_date_time"]))

s = pd.Series(counts, dtype=float)
q = s.quantile(PCTS) if len(s) else pd.Series(index=PCTS, dtype=float)

tbl = pd.DataFrame({"percentil":[f"p{int(p*100)}" for p in PCTS],
                    "X_candidatos":[q.get(p, np.nan) for p in PCTS]})
print("=== STROTEUR — Candidatos para X (conteos en 7 días) ===")
print(f"Ventanas evaluadas: {len(s):,} | Clientes: {g['customer_id'].nunique():,}")
display(tbl)
print(f"Sugerencia (ceil p95): {int(math.ceil(q.get(0.95))) if not np.isnan(q.get(0.95)) else 'NA'}")


=== STROTEUR — Candidatos para X (conteos en 7 días) ===
Ventanas evaluadas: 0 | Clientes: 0


Unnamed: 0,percentil,X_candidatos
0,p85,
1,p90,
2,p95,
3,p97,
4,p99,


Sugerencia (ceil p95): NA
