In [1]:
import pandas as pd, numpy as np, re
from pathlib import Path

CONFIG = {
    "alerts_csv": "../../data/high_alerts_enriched_with_tx.csv", 
    "segments_csv": "../../data/retail_behavior_segments.csv",
}

pd.set_option("display.max_rows", 50)
pd.set_option("display.float_format", lambda x: f"{x:,.4f}")

def _norm_col(c):
    c = str(c).replace("\ufeff","").strip()
    c = re.sub(r'^[\'"]|[\'"]$', "", c)
    c = re.sub(r';+$', "", c)
    c = re.sub(r'\s+', '_', c)
    return c

def _primary_subject_id(s):
    """Intenta obtener un id cliente limpio desde subject_ids (toma el 1º token, quita caracteres no dígito)."""
    if pd.isna(s): 
        return None
    s = str(s).strip()
    if not s:
        return None
    first = re.split(r'[,\s]+', s)[0]
    digits = re.sub(r"[^\d]", "", first)
    return digits or first

def _status_bin(s):
    """1 si la alerta fue 'Suspicious', 0 si fue No-Suspicious/Closed sin hallazgo, NaN si desconocido."""
    if pd.isna(s): 
        return np.nan
    s = str(s).strip().lower()
    if s in {"suspicious", "true positive", "tp"}:
        return 1
    if s in {"not suspicious", "false positive", "fp", "closed", "dismissed"}:
        return 0
    return np.nan


In [2]:
alerts = pd.read_csv(CONFIG["alerts_csv"], encoding="utf-8-sig")
alerts.columns = [_norm_col(c) for c in alerts.columns]

# Campos esperados (según lo que compartiste)
expected = {"alert_id","rule_code","subject_ids","subject_names","number_of_transactions",
            "created_at","status","external_transaction_ids","tx_direction","tx_base_amount",
            "customer_type","customer_account_balance","customer_networth","customer_income",
            "customer_expected_amount","customer_sub_type"}
missing = [c for c in expected if c not in alerts.columns]
print("Faltan columnas (si alguna):", missing)

# Normalizaciones
alerts["created_at"] = pd.to_datetime(alerts.get("created_at"), errors="coerce")
alerts["tx_direction"] = alerts.get("tx_direction","").astype(str).str.strip().str.capitalize()
alerts["status_bin"] = alerts.get("status").map(_status_bin)
alerts["client_id"]  = alerts.get("subject_ids").map(_primary_subject_id)

# Señal “sin segmento” según columna del sistema origen
alerts["is_segmented_now"] = alerts.get("customer_sub_type").astype(str).str.strip().replace({"": np.nan, "NA": np.nan}).notna()

n_alerts   = len(alerts)
n_clients  = alerts["client_id"].nunique()
print(f"Alerts high: {n_alerts:,} | Clientes con alerts high: {n_clients:,}")
alerts.head(3)


Faltan columnas (si alguna): []
Alerts high: 2,147 | Clientes con alerts high: 765


Unnamed: 0,alert_id,rule_code,subject_ids,subject_names,number_of_transactions,created_at,status,external_transaction_ids,tx_direction,tx_base_amount,customer_type,customer_account_balance,customer_networth,customer_income,customer_expected_amount,customer_sub_type,status_bin,client_id,is_segmented_now
0,82271,AAD-LA,26375,MOREL BULICIC JORGE RAFAEL,2,2025-03-07 12:27:52.142219+00:00,Not Suspicious,"201955096, 68816944","Na, na","NA, NA","NA, NA","NA, NA","NA, NA","NA, NA","NA, NA",SIN_SEGMENTO,0.0,26375,True
1,83230,P-TLO,24618,GARCIA LABORA WALDO,1,2025-03-11 12:33:06.388175+00:00,Not Suspicious,202249097,Outbound,230017346.0,Individual,366027051.0,SIN CLASIFICACION,Entre 5 y 10 millones,1500000000.0,Retail,0.0,24618,True
2,83231,PGAV-OUT,24618,GARCIA LABORA WALDO,1,2025-03-11 12:33:06.388175+00:00,Not Suspicious,202249097,Outbound,230017346.0,Individual,366027051.0,SIN CLASIFICACION,Entre 5 y 10 millones,1500000000.0,Retail,0.0,24618,True


In [3]:
# Métricas por cliente
by_client = (alerts.groupby("client_id", dropna=False)
                    .agg(alerts=("alert_id","count"),
                         suspicious_cnt=("status_bin", lambda s: int((s==1).sum())),
                         fp_cnt=("status_bin",  lambda s: int((s==0).sum())),
                         any_segmented=("is_segmented_now","max"),
                         name=("subject_names","last"))
                    .reset_index())

by_client["fp_rate_%"] = np.where(by_client["alerts"]>0, 100*by_client["fp_cnt"]/by_client["alerts"], np.nan)
by_client = by_client.sort_values("alerts", ascending=False)

# Top-15 y “100% falsos positivos”
top15 = by_client.head(15).copy()
top15_all_fp = (top15["suspicious_cnt"] == 0).sum()

# 1% de clientes y % de alerts concentradas
k = max(1, int(round(0.01 * n_clients)))
topK = by_client.head(k)
share_alerts_topK = 100 * topK["alerts"].sum() / by_client["alerts"].sum()

# % no segmentados dentro del topK
pct_not_segmented_topK = 100 * (topK["any_segmented"] == False).mean()

print(f"Top 15 clientes: {len(top15)} | de ellos, {top15_all_fp} tienen 100% falsos positivos.")
print(f"Top {k} clientes (~1%): concentran {share_alerts_topK:.1f}% de todas las alerts high.")
print(f"Dentro de ese top {k}, {pct_not_segmented_topK:.1f}% no están segmentados (customer_sub_type vacío).")

# Muestra de tabla para el anexo / revisión
cols_show = ["client_id","name","alerts","suspicious_cnt","fp_cnt","fp_rate_%","any_segmented"]
top15[cols_show]


Top 15 clientes: 15 | de ellos, 12 tienen 100% falsos positivos.
Top 8 clientes (~1%): concentran 13.6% de todas las alerts high.
Dentro de ese top 8, 0.0% no están segmentados (customer_sub_type vacío).


Unnamed: 0,client_id,name,alerts,suspicious_cnt,fp_cnt,fp_rate_%,any_segmented
558,48643,AGRICOLA SIEMEL LIMITADA,57,0,57,100.0,True
599,52043,SEGURO INVERSIONES SPA,54,0,51,94.4444,True
521,43614,NUTRICION Y ALIMENTO S.A.,45,2,43,95.5556,True
538,45737,CHILEAN MARRONI FROZEN SPA,38,0,38,100.0,True
227,1855,MARTIN TARUD CAMILA,29,0,29,100.0,True
454,366,KURASZ ZAJACZKOWSKA ARTURO,28,0,28,100.0,True
581,50297,JELVES CARVAJAL EMILIO ALFONSO,21,7,14,66.6667,True
335,243,MARTIN TARUD VICENTE REINALDO,21,0,21,100.0,True
182,16710,GARIBALDI FRITIS MAURIZIO,21,2,19,90.4762,True
14,10508,GTD GRUPO TELEDUCTOS S.A.,21,0,21,100.0,True


In [4]:
# === Top clientes (más→menos alertas), Top-5 reglas por cliente, % sobre su total y segmento ORIGINAL ===
import pandas as pd, numpy as np, re

# 1) Asegurar 'alerts' cargado y normalizado mínimamente
if "alerts" not in globals():
    CONFIG = CONFIG if "CONFIG" in globals() else {"alerts_csv": "../../data/high_alerts_enriched_with_tx.csv"}
    alerts = pd.read_csv(CONFIG["alerts_csv"], encoding="utf-8-sig")

def _norm_col(c):
    c = str(c).replace("\ufeff","").strip()
    c = re.sub(r'^[\'"]|[\'"]$', "", c)
    c = re.sub(r';+$', "", c)
    c = re.sub(r'\s+', '_', c)
    return c

def _primary_subject_id(s):
    if pd.isna(s): return ""
    s = str(s).strip()
    if not s: return ""
    first = re.split(r'[,\s]+', s)[0]
    digits = re.sub(r"[^\d]", "", first)
    return digits or first

alerts = alerts.copy()
alerts.columns = [_norm_col(c) for c in alerts.columns]
alerts["client_id"] = alerts.get("subject_ids").map(_primary_subject_id).astype(str)
alerts["subject_names"] = alerts.get("subject_names").astype(str)

# segmento ORIGINAL normalizado (no usamos la nueva segmentación)
seg = alerts.get("customer_sub_type").astype(str).str.strip()
seg = seg.replace({"": np.nan, "NA": np.nan, "N/A": np.nan, "None": np.nan})
alerts["segment_original"] = seg.fillna("SIN SEGMENTO")

# 2) Totales por cliente (para ordenar y para %)
tot_client = (alerts
              .groupby("client_id", as_index=False)
              .agg(client_total_alerts=("alert_id","count"),
                   name=("subject_names","last"),
                   # segmento modal del cliente (si varios, toma el más frecuente)
                   segment_original=("segment_original",
                                     lambda s: s.value_counts().idxmax() if len(s) else "SIN SEGMENTO"))
              .sort_values("client_total_alerts", ascending=False))

TOP_N = 15  # puedes cambiarlo si quieres ver más clientes
top_ids = tot_client.head(TOP_N)["client_id"].tolist()

# 3) Conteos por regla dentro de cada cliente (solo Top-N clientes)
rc = (alerts[alerts["client_id"].isin(top_ids)]
      .groupby(["client_id","rule_code"], as_index=False)
      .agg(alerts_for_rule=("alert_id","count")))

# 4) % de cada regla sobre el total del cliente + ranking Top-5 por cliente
rc = rc.merge(tot_client[["client_id","client_total_alerts","name","segment_original"]],
              on="client_id", how="left")
rc["share_of_client_%"] = 100 * rc["alerts_for_rule"] / rc["client_total_alerts"]

# rank y filtro Top-5 por cliente
rc["rank_rule_by_client"] = (rc.groupby("client_id")["alerts_for_rule"]
                               .rank(method="dense", ascending=False).astype(int))
rc_top5 = rc[rc["rank_rule_by_client"] <= 5].copy()

# 5) Orden final: clientes (más→menos alertas) y dentro de cada cliente por rank
order_map = {cid:i for i, cid in enumerate(tot_client["client_id"].tolist())}
rc_top5["client_order"] = rc_top5["client_id"].map(order_map)
rc_top5 = rc_top5.sort_values(["client_order","rank_rule_by_client","alerts_for_rule"],
                              ascending=[True, True, False])

# 6) Columnas finales
final_cols = ["client_id","name","segment_original","client_total_alerts",
              "rank_rule_by_client","rule_code","alerts_for_rule","share_of_client_%"]
out_table = rc_top5[final_cols].reset_index(drop=True)

# 7) Mostrar con formato bonito
display(
    out_table.style.format({
        "client_total_alerts":"{:,.0f}",
        "alerts_for_rule":"{:,.0f}",
        "share_of_client_%":"{:,.2f}"
    })
)


Unnamed: 0,client_id,name,segment_original,client_total_alerts,rank_rule_by_client,rule_code,alerts_for_rule,share_of_client_%
0,48643,AGRICOLA SIEMEL LIMITADA,SIN_SEGMENTO,57,1,OUT>%IN,12,21.05
1,48643,AGRICOLA SIEMEL LIMITADA,SIN_SEGMENTO,57,1,P-HSUMI,12,21.05
2,48643,AGRICOLA SIEMEL LIMITADA,SIN_SEGMENTO,57,2,P-TLI,11,19.3
3,48643,AGRICOLA SIEMEL LIMITADA,SIN_SEGMENTO,57,3,IN>%OUT,7,12.28
4,48643,AGRICOLA SIEMEL LIMITADA,SIN_SEGMENTO,57,4,HNR-IN,6,10.53
5,48643,AGRICOLA SIEMEL LIMITADA,SIN_SEGMENTO,57,5,RVT-IN,5,8.77
6,52043,SEGURO INVERSIONES SPA,SIN_SEGMENTO,54,1,HNR-IN,14,25.93
7,52043,SEGURO INVERSIONES SPA,SIN_SEGMENTO,54,1,RVT-IN,14,25.93
8,52043,SEGURO INVERSIONES SPA,SIN_SEGMENTO,54,2,P-HVI,12,22.22
9,52043,SEGURO INVERSIONES SPA,SIN_SEGMENTO,54,3,HANUMI,8,14.81


In [5]:
# === ¿Saltan más alertas en clientes SIN SEGMENTO? (versión robusta) ===
import pandas as pd, numpy as np, re, unicodedata

def _status_bin(s):
    if pd.isna(s): return np.nan
    s = str(s).strip().lower()
    if s in {"suspicious","true positive","tp"}: return 1
    if s in {"not suspicious","false positive","fp","closed","dismissed"}: return 0
    return np.nan

def _primary_subject_id(s):
    """Toma el primer token de subject_ids y deja solo dígitos."""
    if pd.isna(s): return ""
    s = str(s).strip()
    if not s: return ""
    first = re.split(r'[,\s]+', s)[0]
    digits = re.sub(r"[^\d]", "", first)
    return digits or first

def _norm_text(s):
    """Lower, sin tildes, sin símbolos, espacios normalizados."""
    if pd.isna(s): s = ""
    s = str(s).strip().lower()
    s = ''.join(ch for ch in unicodedata.normalize('NFKD', s) if not unicodedata.combining(ch))
    s = re.sub(r'[^a-z0-9\s]', ' ', s)
    s = re.sub(r'\s+', ' ', s).strip()
    return s

UNSEG_PATTERNS = {
    "", "sin segmento", "sinsegmento", "sin segmentacion", "sin segmentación",
    "no segmentado", "no segmentada", "no segment", "no clasificado",
    "sin clasificacion", "na", "n a", "n/a", "none", "null"
}

# --- usar el DataFrame alerts ya cargado, o recargar si no existe ---
try:
    alerts
except NameError:
    alerts = pd.read_csv(CONFIG["alerts_csv"], encoding="utf-8-sig")

alerts = alerts.copy()

# Campos necesarios
if "status_bin" not in alerts.columns:
    alerts["status_bin"] = alerts.get("status").map(_status_bin)

# ID de cliente: subject_ids -> si vacío, cae a nombre normalizado
alerts["client_id"] = alerts.get("subject_ids").map(_primary_subject_id).astype(str)
name_key = alerts.get("subject_names").map(_norm_text).fillna("")
alerts.loc[alerts["client_id"].eq(""), "client_id"] = name_key[alerts["client_id"].eq("")]

# Normalizar el segmento ORIGINAL (de la propia tabla de alertas)
seg_raw = alerts.get("customer_sub_type")
if seg_raw is None:
    raise RuntimeError("No encuentro la columna 'customer_sub_type' en high_alerts_enriched_with_tx.")
seg_norm_key = seg_raw.map(_norm_text)

alerts["segment_original"] = seg_raw.astype(str).str.strip()
alerts.loc[seg_norm_key.isin(UNSEG_PATTERNS), "segment_original"] = "SIN SEGMENTO"
alerts["seg_group"] = np.where(alerts["segment_original"].eq("SIN SEGMENTO"),
                               "SIN SEGMENTO", "CON SEGMENTO")

# --- Métricas por grupo ---
g = (alerts.groupby("seg_group", dropna=False)
            .agg(
                alerts=("alert_id","count"),
                clients=("client_id","nunique"),
                suspicious=("status_bin", lambda s: int((s==1).sum()))
            )
     .assign(
         fp=lambda d: d["alerts"] - d["suspicious"],
         alerts_per_client=lambda d: d["alerts"] / d["clients"].replace(0, np.nan),
         susp_rate_pct=lambda d: 100 * d["suspicious"] / d["alerts"],
         fp_rate_pct=lambda d: 100 * d["fp"] / d["alerts"],
         alerts_share_pct=lambda d: 100 * d["alerts"] / d["alerts"].sum(),
         clients_share_pct=lambda d: 100 * d["clients"] / d["clients"].sum()
     )
     .reindex(["SIN SEGMENTO","CON SEGMENTO"])
)

g["alert_density_index"] = g["alerts_share_pct"] / g["clients_share_pct"]

# --- Resultados rápidos + muestra de valores clasificados como “sin segmento” ---
tot_alerts = int(g["alerts"].sum(skipna=True))
tot_clients = int(g["clients"].sum(skipna=True))
sin_alr = int(g.loc["SIN SEGMENTO","alerts"]) if "SIN SEGMENTO" in g.index and pd.notna(g.loc["SIN SEGMENTO","alerts"]) else 0
con_alr = int(g.loc["CON SEGMENTO","alerts"]) if "CON SEGMENTO" in g.index and pd.notna(g.loc["CON SEGMENTO","alerts"]) else 0
sin_cli = int(g.loc["SIN SEGMENTO","clients"]) if "SIN SEGMENTO" in g.index and pd.notna(g.loc["SIN SEGMENTO","clients"]) else 0
con_cli = int(g.loc["CON SEGMENTO","clients"]) if "CON SEGMENTO" in g.index and pd.notna(g.loc["CON SEGMENTO","clients"]) else 0

print(f"Alertas totales: {tot_alerts:,} | Clientes totales con alertas: {tot_clients:,}")
print(f"- SIN SEGMENTO: {sin_alr:,} alertas ({(sin_alr/tot_alerts*100 if tot_alerts else 0):,.1f}%) "
      f"sobre {sin_cli:,} clientes ({(sin_cli/tot_clients*100 if tot_clients else 0):,.1f}%).")
print(f"- CON SEGMENTO: {con_alr:,} alertas ({(con_alr/tot_alerts*100 if tot_alerts else 0):,.1f}%) "
      f"sobre {con_cli:,} clientes ({(con_cli/tot_clients*100 if tot_clients else 0):,.1f}%).")

display(
    g.style.format({
        "alerts":"{:,.0f}","clients":"{:,.0f}",
        "suspicious":"{:,.0f}","fp":"{:,.0f}",
        "alerts_per_client":"{:,.2f}",
        "susp_rate_pct":"{:,.2f}","fp_rate_pct":"{:,.2f}",
        "alerts_share_pct":"{:,.2f}","clients_share_pct":"{:,.2f}",
        "alert_density_index":"{:,.2f}"
    }).set_caption("Comparativa de alertas y clientes por grupo de segmentación (segmento original)")
)

# Muestra de cómo lucen los valores que fueron clasificados como “sin segmento”
ejemplos_unseg = (alerts.loc[alerts["seg_group"]=="SIN SEGMENTO","customer_sub_type"]
                  .value_counts(dropna=False)
                  .head(10))
if not ejemplos_unseg.empty:
    print("\nEjemplos de 'customer_sub_type' que se clasificaron como SIN SEGMENTO:")
    display(ejemplos_unseg.to_frame("ocurrencias"))
else:
    print("\nNo se encontraron filas clasificadas como SIN SEGMENTO; revisa los valores exactos de 'customer_sub_type'.")


Alertas totales: 2,147 | Clientes totales con alertas: 769
- SIN SEGMENTO: 684 alertas (31.9%) sobre 182 clientes (23.7%).
- CON SEGMENTO: 1,463 alertas (68.1%) sobre 587 clientes (76.3%).


Unnamed: 0_level_0,alerts,clients,suspicious,fp,alerts_per_client,susp_rate_pct,fp_rate_pct,alerts_share_pct,clients_share_pct,alert_density_index
seg_group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
SIN SEGMENTO,684,182,25,659,3.76,3.65,96.35,31.86,23.67,1.35
CON SEGMENTO,1463,587,96,1367,2.49,6.56,93.44,68.14,76.33,0.89



Ejemplos de 'customer_sub_type' que se clasificaron como SIN SEGMENTO:


Unnamed: 0_level_0,ocurrencias
customer_sub_type,Unnamed: 1_level_1
SIN_SEGMENTO,684


In [6]:
# Let's implement the simulation described by the user.

import pandas as pd
import numpy as np
from datetime import timedelta

# Paths
csv_path = "../../data/transacciones_cash_2025.csv"

# Parameters
customer_exact = "AGRICOLA SIEMEL LIMITADA                          "  # with trailing spaces

legacy_params = {"Amount": 30_000_000, "Percentage_Low": 90, "Percentage_High": 110}
corrected_params = {"Amount": 804_076_122, "Percentage_Low": 125, "Percentage_High": 200}

# Helpers
def simulate_alerts(df_daily, params):
    # Rolling 30-day sums (inclusive window ending each day)
    df_roll = df_daily.rolling(window=30, min_periods=1).sum()
    out30 = df_roll['OUT']
    in30 = df_roll['IN']

    # Conditions
    amt_ok = out30 > params["Amount"]
    # Avoid division by zero
    ratio = np.where(in30 > 0, (out30 / in30) * 100.0, np.nan)
    pct_ok = (ratio >= params["Percentage_Low"]) & (ratio <= params["Percentage_High"])
    alerts = amt_ok & pct_ok

    # Assemble results per day where alert is True
    result = pd.DataFrame({
        "date": df_daily.index,
        "out30_sum": out30,
        "in30_sum": in30,
        "ratio_pct": ratio,
        "amount_threshold": params["Amount"],
        "pct_low": params["Percentage_Low"],
        "pct_high": params["Percentage_High"],
        "alert": alerts
    })
    return result[result["alert"]].copy(), result

try:
    # Load data
    df = pd.read_csv(csv_path, parse_dates=["tx_date_time"], low_memory=False)
except FileNotFoundError:
    raise FileNotFoundError("No encontré el archivo /mnt/data/transacciones_cash_2025.csv. ¿Puedes cargarlo a la conversación?")

# Filter to the requested customer (try exact first, then fallback to stripped equality / contains)
df_client = df[df["customer_name"] == customer_exact].copy()

if df_client.empty:
    # Fallback: try stripping trailing spaces
    df2 = df.copy()
    df2["customer_name_stripped"] = df2["customer_name"].astype(str).str.strip()
    df_client = df2[df2["customer_name_stripped"].str.upper() == "AGRICOLA SIEMEL LIMITADA"].copy()

if df_client.empty:
    # Fallback: case-insensitive contains
    df_client = df[df["customer_name"].astype(str).str.contains("AGRICOLA SIEMEL LIMITADA", case=False, na=False)].copy()

# If still empty, stop with a helpful error
if df_client.empty:
    raise RuntimeError("No encontré transacciones para 'AGRICOLA SIEMEL LIMITADA'. Revisa el nombre exacto (incluyendo espacios) o comparte un identificador.")

# Keep only fields we need
df_client = df_client[["tx_date_time", "tx_direction", "tx_base_amount"]].copy()

# Normalize directions (expecting IN / OUT). Show unique for sanity:
unique_dirs = sorted(df_client["tx_direction"].astype(str).str.upper().unique())

# Map common variants to IN/OUT
dir_map = {
    "IN": "IN",
    "OUT": "OUT",
    "CREDIT": "IN",
    "DEBIT": "OUT",
    "INBOUND": "IN",
    "OUTBOUND": "OUT"
}
df_client["tx_direction_norm"] = df_client["tx_direction"].astype(str).str.upper().map(dir_map)

# Filter to known directions only
df_client = df_client[df_client["tx_direction_norm"].isin(["IN", "OUT"])].copy()

# Build daily sums by direction
df_client["date"] = df_client["tx_date_time"].dt.floor("D")

daily = (
    df_client
    .groupby(["date", "tx_direction_norm"])["tx_base_amount"]
    .sum()
    .unstack(fill_value=0)
    .rename(columns={"IN": "IN", "OUT": "OUT"})
)

# Ensure both columns exist
for col in ["IN", "OUT"]:
    if col not in daily.columns:
        daily[col] = 0.0

# Reindex to continuous daily frequency to capture rolling windows properly
full_index = pd.date_range(daily.index.min(), daily.index.max(), freq="D")
daily = daily.reindex(full_index, fill_value=0.0)
daily.index.name = "date"

# Run simulations
legacy_alerts, legacy_detail = simulate_alerts(daily[["OUT", "IN"]], legacy_params)
corrected_alerts, corrected_detail = simulate_alerts(daily[["OUT", "IN"]], corrected_params)

# Prepare concise summaries
legacy_count = len(legacy_alerts)
corrected_count = len(corrected_alerts)

# Add window start for readability (30-day trailing)
def add_window_bounds(df_alerts):
    df_alerts = df_alerts.copy()
    df_alerts["window_start"] = df_alerts["date"] - pd.Timedelta(days=29)
    df_alerts["window_end"] = df_alerts["date"]
    cols = ["window_start", "window_end", "out30_sum", "in30_sum", "ratio_pct", "amount_threshold", "pct_low", "pct_high"]
    return df_alerts[cols].reset_index(drop=True)

legacy_alerts_view = add_window_bounds(legacy_alerts)
corrected_alerts_view = add_window_bounds(corrected_alerts)

# Also provide a short textual summary
summary_text = f"""
Direcciones detectadas en datos: {unique_dirs}

Alertas con parámetros vigentes (Amount={legacy_params["Amount"]:,}, {legacy_params["Percentage_Low"]}-{legacy_params["Percentage_High"]}%): {legacy_count}
Alertas con parámetros corregidos (Amount={corrected_params["Amount"]:,}, {corrected_params["Percentage_Low"]}-{corrected_params["Percentage_High"]}%): {corrected_count}
"""
summary_text


"\nDirecciones detectadas en datos: ['INBOUND', 'OUTBOUND']\n\nAlertas con parámetros vigentes (Amount=30,000,000, 90-110%): 92\nAlertas con parámetros corregidos (Amount=804,076,122, 125-200%): 0\n"