In [2]:
%pip install -U caas_jupyter_tools

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement caas_jupyter_tools (from versions: none)
ERROR: No matching distribution found for caas_jupyter_tools


In [5]:
# === IV alerts: volumen y tasa de falsos positivos por grupo y por regla ===
import pandas as pd, numpy as np
from IPython.display import display

# 1) Carga ---------------------------------------------------------------
PATH_ALERTS = "../data/high_alerts_enriched_with_tx.csv"  # <-- ajusta si es necesario
alerts = pd.read_csv(PATH_ALERTS, encoding="utf-8-sig")
alerts.columns = [str(c).strip().replace("\ufeff","") for c in alerts.columns]

# Normalizaciones mínimas
alerts["rule_code"] = alerts["rule_code"].astype(str).str.strip().str.upper()
alerts["status"] = alerts["status"].astype(str).str.strip().str.title()        # "Suspicious"/"Not Suspicious"
alerts["customer_sub_type"] = alerts["customer_sub_type"].astype(str).str.strip()

# Filtrar a Investment Vehicle
iv = alerts[alerts["customer_sub_type"].str.upper().eq("INVESTMENT VEHICLE")].copy()

# 2) Taxonomía de reglas -------------------------------------------------
# Mapa: rule_code -> (grupo, subgrupo)
G = {}

# Monto transaccional
for rc in ["P-TLI","P-HSUMI","IN>AVG","PGAV-IN"]:                      G[rc] = ("Monto transaccional","Inbound")
for rc in ["P-TLO","P-HSUMO","OUT>AVG","PGAV-OUT"]:                    G[rc] = ("Monto transaccional","Outbound")
for rc in ["P-LVAL","P-LBAL","P-%BAL"]:                                G[rc] = ("Monto transaccional","N/A")

# Frecuencia
for rc in ["HANUMI","HNR-IN","P-HVI","RVT-IN"]:                        G[rc] = ("Frecuencia","Inbound")
for rc in ["HANUMO","HNR-OUT","P-HVO","RVT-OUT"]:                      G[rc] = ("Frecuencia","Outbound")

# Contraparte
for rc in ["OCMC_1","MCOC_1","NCU","NBCOU","NCOU","SUMCCI","SUMCCO","NUMCCI","NUMCCO"]:
    G[rc] = ("Contraparte","—")

# Comportamiento transaccional
for rc in ["OUT>%IN","IN>%OUT","IN-OUT-1","SEC"]:                      G[rc] = ("Comportamiento transaccional","—")

# Otros
for rc in ["P-1ST","P-2ND","CDC01","CDC02","CDC03","VC","DORMANT"]:    G[rc] = ("Otros (descriptivos)","—")

# IA (si existen en tu CSV)
for rc in ["AAD-IO","AAD-LA","AAD-RD","AAD-SMUR","AAD-RS"]:            G[rc] = ("IA","—")

iv["group"]    = iv["rule_code"].map(lambda x: G.get(x, ("No clasificado","—"))[0])
iv["subgroup"] = iv["rule_code"].map(lambda x: G.get(x, ("No clasificado","—"))[1])

# 3) Métricas ------------------------------------------------------------
def summarize(df, by_cols):
    out = (df.assign(is_fp = df["status"].eq("Not Suspicious").astype(int),
                     is_tp = df["status"].eq("Suspicious").astype(int))
             .groupby(by_cols, dropna=False)
             .agg(total_alerts=("alert_id","nunique"),
                  suspicious=("is_tp","sum"),
                  not_suspicious=("is_fp","sum"))
             .reset_index())
    out["fp_rate%"]   = np.where(out["total_alerts"]>0, 100*out["not_suspicious"]/out["total_alerts"], np.nan).round(2)
    out["precision%"] = np.where(out["total_alerts"]>0, 100*out["suspicious"]/out["total_alerts"], np.nan).round(2)
    return out.sort_values(["total_alerts"], ascending=False)

# a) Grupo/Subgrupo
group_summary = summarize(iv, ["group","subgroup"]).sort_values(["group","total_alerts"], ascending=[True,False])

# b) Regla dentro de grupo
rule_summary  = summarize(iv, ["group","rule_code"]).sort_values(["group","total_alerts"], ascending=[True,False])

# c) Top reglas general
top_rules = rule_summary.sort_values("total_alerts", ascending=False).head(20).reset_index(drop=True)

# 4) Mostrar tablas en el notebook --------------------------------------
print(f"Total alertas IV: {len(iv):,} | Reglas mapeadas a grupos: {iv['group'].ne('No clasificado').mean()*100:.1f}%")
print("\n=== Resumen por GRUPO / SUBGRUPO ===")
display(group_summary)

print("\n=== Resumen por REGLA dentro de grupo ===")
display(rule_summary)

print("\n=== Top 20 reglas por Nº de alertas (global) ===")
display(top_rules)


Total alertas IV: 181 | Reglas mapeadas a grupos: 89.5%

=== Resumen por GRUPO / SUBGRUPO ===


Unnamed: 0,group,subgroup,total_alerts,suspicious,not_suspicious,fp_rate%,precision%
0,Comportamiento transaccional,—,29,1,27,93.1,3.45
1,Contraparte,—,14,1,13,92.86,7.14
2,Frecuencia,Inbound,14,3,11,78.57,21.43
3,Frecuencia,Outbound,12,4,8,66.67,33.33
4,IA,—,6,1,5,83.33,16.67
7,Monto transaccional,Outbound,41,0,40,97.56,0.0
6,Monto transaccional,,35,3,32,91.43,8.57
5,Monto transaccional,Inbound,8,0,8,100.0,0.0
8,No clasificado,—,19,0,19,100.0,0.0
9,Otros (descriptivos),—,3,1,2,66.67,33.33



=== Resumen por REGLA dentro de grupo ===


Unnamed: 0,group,rule_code,total_alerts,suspicious,not_suspicious,fp_rate%,precision%
2,Comportamiento transaccional,OUT>%IN,22,1,21,95.45,4.55
1,Comportamiento transaccional,IN>%OUT,4,0,4,100.0,0.0
0,Comportamiento transaccional,IN-OUT-1,3,0,2,66.67,0.0
3,Contraparte,NCU,14,1,13,92.86,7.14
4,Frecuencia,HANUMI,11,1,10,90.91,9.09
5,Frecuencia,HANUMO,9,1,8,88.89,11.11
6,Frecuencia,HNR-IN,3,2,1,33.33,66.67
7,Frecuencia,HNR-OUT,3,3,0,0.0,100.0
9,IA,AAD-SMUR,5,1,4,80.0,20.0
8,IA,AAD-IO,1,0,1,100.0,0.0



=== Top 20 reglas por Nº de alertas (global) ===


Unnamed: 0,group,rule_code,total_alerts,suspicious,not_suspicious,fp_rate%,precision%
0,Monto transaccional,P-LVAL,31,3,28,90.32,9.68
1,Comportamiento transaccional,OUT>%IN,22,1,21,95.45,4.55
2,Monto transaccional,PGAV-OUT,18,0,18,100.0,0.0
3,Contraparte,NCU,14,1,13,92.86,7.14
4,Frecuencia,HANUMI,11,1,10,90.91,9.09
5,No clasificado,HASUMI,10,0,10,100.0,0.0
6,Frecuencia,HANUMO,9,1,8,88.89,11.11
7,No clasificado,HASUMO,9,0,9,100.0,0.0
8,Monto transaccional,P-TLO,8,0,8,100.0,0.0
9,Monto transaccional,P-HSUMO,8,0,8,100.0,0.0
