# 02 – KPI Calculations (CV, CVM, IQR Outliers, Quartiles)

**Entrada:** `/lakehouse_sim/Files/raw/ops_daily.parquet`

**Salidas:**
- `/lakehouse_sim/Tables/agent_stability` (parquet files)
- `/lakehouse_sim/Tables/weekly_flags` (parquet files)


In [None]:
from pathlib import Path

PROJ_ROOT = Path(".").resolve()
LH_ROOT = PROJ_ROOT / "lakehouse_sim"
RAW_PATH = LH_ROOT / "Files" / "raw" / "ops_daily.parquet"

TABLES_DIR = LH_ROOT / "Tables"
TABLES_DIR.mkdir(parents=True, exist_ok=True)

OUT_AGENT = TABLES_DIR / "agent_stability.parquet"
OUT_WEEKLY = TABLES_DIR / "weekly_flags.parquet"

RAW_PATH, OUT_AGENT, OUT_WEEKLY

In [None]:
import numpy as np
import pandas as pd

def coef_variacion(series: pd.Series) -> float:
    m = series.mean()
    s = series.std(ddof=1)
    return float(s / m) if m != 0 else np.nan

def coef_variacion_mediana(series: pd.Series) -> float:
    med = series.median()
    mad = (series - med).abs().median()
    return float(1.4826 * mad / med) if med != 0 else np.nan

def iqr_bounds(series: pd.Series, k: float = 1.5):
    q1, q3 = series.quantile([0.25, 0.75])
    iqr = q3 - q1
    return q1 - k*iqr, q3 + k*iqr

In [None]:
# Cargar raw
df = pd.read_parquet(RAW_PATH)
df["date"] = pd.to_datetime(df["date"])
df["week"] = df["date"].dt.isocalendar().week.astype(int)

# Agregado semanal por agente
weekly = df.groupby(["agent_id", "team_id", "week"], as_index=False).agg(
    hours_mean=("productive_hours", "mean"),
    cases_mean=("cases_closed", "mean"),
)

# KPIs por agente
by_agent = weekly.groupby(["agent_id", "team_id"]).agg(
    cv_hours=("hours_mean", coef_variacion),
    cvm_hours=("hours_mean", coef_variacion_mediana),
    cv_cases=("cases_mean", coef_variacion),
    cvm_cases=("cases_mean", coef_variacion_mediana),
).reset_index()

# Cuartiles (menor CV = más estable)
by_agent["quartile_efficiency"] = pd.qcut(
    by_agent["cv_hours"].fillna(by_agent["cv_hours"].median()),
    4, labels=[1,2,3,4], duplicates="drop"
)

# Flags de outliers por equipo/semana
def flags_outliers(group_df: pd.DataFrame, col: str) -> pd.Series:
    lo, hi = iqr_bounds(group_df[col])
    return (group_df[col].lt(lo) | group_df[col].gt(hi)).astype(int)

weekly["out_hours_flag"] = (
    weekly.groupby(["team_id", "week"], group_keys=False)
    .apply(lambda g: flags_outliers(g, "hours_mean"))
    .reset_index(level=[0,1], drop=True)
)
weekly["out_cases_flag"] = (
    weekly.groupby(["team_id", "week"], group_keys=False)
    .apply(lambda g: flags_outliers(g, "cases_mean"))
    .reset_index(level=[0,1], drop=True)
)

by_agent.head(), weekly.head()

In [None]:
by_agent.to_parquet(OUT_AGENT, index=False)
weekly.to_parquet(OUT_WEEKLY, index=False)

print(f"✅ Wrote {len(by_agent)} rows → {OUT_AGENT}")
print(f"✅ Wrote {len(weekly)} rows → {OUT_WEEKLY}")