# 02 â€“ KPI Calculations (CV, CVM, IQR Outliers, Quartiles)

**Entrada:** `/lakehouse/default/Files/raw/ops_daily.parquet`

**Salidas:**
- `/lakehouse/default/Tables/agent_stability` (parquet files)
- `/lakehouse/default/Tables/weekly_flags` (parquet files)


In [None]:

import pandas as pd
import numpy as np
import os

raw_path = "/lakehouse/default/Files/raw/ops_daily.parquet"
df = pd.read_parquet(raw_path)

def coef_variacion(series: pd.Series) -> float:
    m = series.mean()
    s = series.std(ddof=1)
    return float(s / m) if m != 0 else np.nan

def coef_variacion_mediana(series: pd.Series) -> float:
    med = series.median()
    mad = (series - med).abs().median()
    return float(1.4826 * mad / med) if med != 0 else np.nan

def iqr_bounds(series: pd.Series, k: float = 1.5):
    q1, q3 = series.quantile([0.25, 0.75])
    iqr = q3 - q1
    return q1 - k*iqr, q3 + k*iqr

df['date'] = pd.to_datetime(df['date'])
df['week'] = df['date'].dt.isocalendar().week.astype(int)
weekly = df.groupby(['agent_id','team_id','week'], as_index=False).agg(
    hours_mean=('productive_hours','mean'),
    cases_mean=('cases_closed','mean')
)

by_agent = weekly.groupby(['agent_id','team_id']).agg(
    cv_hours=('hours_mean', coef_variacion),
    cvm_hours=('hours_mean', coef_variacion_mediana),
    cv_cases=('cases_mean', coef_variacion),
    cvm_cases=('cases_mean', coef_variacion_mediana)
).reset_index()

def quartile(s: pd.Series):
    return pd.qcut(s.fillna(s.median()), 4, labels=[1,2,3,4], duplicates='drop')

by_agent['quartile_efficiency'] = quartile(by_agent['cv_hours'])

def outlier_flags(df_week, col, group='team_id'):
    bounds = df_week.groupby(group)[col].apply(iqr_bounds).to_dict()
    def is_out(row):
        lo, hi = bounds[row[group]]
        return int(row[col] < lo or row[col] > hi)
    return df_week.apply(is_out, axis=1)

weekly['out_hours_flag'] = outlier_flags(weekly, 'hours_mean')
weekly['out_cases_flag'] = outlier_flags(weekly, 'cases_mean')

out1 = "/lakehouse/default/Tables/agent_stability"
out2 = "/lakehouse/default/Tables/weekly_flags"
os.makedirs(out1, exist_ok=True)
os.makedirs(out2, exist_ok=True)

by_agent.to_parquet(os.path.join(out1, "part-0000.parquet"), index=False)
weekly.to_parquet(os.path.join(out2, "part-0000.parquet"), index=False)

print(f"Wrote {len(by_agent)} rows to {out1}")
print(f"Wrote {len(weekly)} rows to {out2}")
