In [1]:
import pandas as pd
from pathlib import Path

In [2]:
def _find_col(df, candidates, required=True):
    """
    Busca la primera columna existente en 'candidates'.
    Si required=True y no encuentra ninguna, lanza error claro.
    """
    for c in candidates:
        if c in df.columns:
            return c
    if required:
        raise KeyError(f"No se encontró ninguna de las columnas: {candidates}")
    return None

def load_customers_std(path: str | Path) -> pd.DataFrame:
    """
    Carga un customers.csv y normaliza nombres básicos de columnas:
    - outcome_norm: served / abandoned / balked
    - profile
    - items
    """
    df = pd.read_csv(path)

    # outcome_norm
    if "outcome_norm" in df.columns:
        df["outcome_norm"] = df["outcome_norm"].astype(str)
    else:
        out_col = _find_col(df, ["outcome", "status"])
        df["outcome_norm"] = (
            df[out_col]
            .astype(str)
            .str.lower()
            .replace({"abandon": "abandoned", "balk": "balked"})
        )

    # profile
    prof_col = _find_col(df, ["profile", "perfil"])
    df["profile"] = df[prof_col].astype(str)

    # items
    items_col = _find_col(df, ["items", "n_items", "num_items"])
    df["items"] = pd.to_numeric(df[items_col], errors="coerce").fillna(0).astype(int)

    return df


def compute_metrics(df: pd.DataFrame, label: str) -> dict:
    """
    Calcula los KPIs pedidos sobre un customers.csv ya estandarizado.
    Devuelve un dict de DataFrames.
    """

    metrics = {}

    # 1) Cuánta gente se atiende, abandona y es balked (global y por perfil)
    status_cols = ["served", "abandoned", "balked"]
    status_df = df[df["outcome_norm"].isin(status_cols)].copy()

    # global
    status_global = status_df["outcome_norm"].value_counts().reindex(status_cols, fill_value=0)
    status_global.name = label
    metrics["status_global"] = status_global

    # por perfil
    status_by_profile = (
        status_df.groupby(["profile", "outcome_norm"])
        .size()
        .unstack(fill_value=0)
        .reindex(columns=status_cols, fill_value=0)
    )
    metrics["status_by_profile"] = status_by_profile

    # 2) Cantidad de ítems promedio que lleva cada perfil
    items_by_profile = df.groupby("profile")["items"].mean().to_frame(name=f"items_mean_{label}")
    metrics["items_by_profile"] = items_by_profile

    # 3) Cantidad de gente que atienden las diferentes cajas (solo served)
    lane_col = _find_col(df, ["lane_type", "lane", "cashier_type", "tipo_caja"], required=False)
    if lane_col is not None:
        served_df = df[df["outcome_norm"] == "served"].copy()
        people_by_lane = (
            served_df.groupby(df[lane_col].astype(str))
            .size()
            .to_frame(name=f"served_{label}")
        )
        metrics["people_by_lane"] = people_by_lane
    else:
        metrics["people_by_lane"] = None

    # 4) Tiempo promedio de atención por tipo de caja y perfil de cliente
    service_col = _find_col(df, ["service_time_s", "service_time", "tiempo_servicio_s"], required=False)
    if (lane_col is not None) and (service_col is not None):
        df_service = df.copy()
        df_service[service_col] = pd.to_numeric(df_service[service_col], errors="coerce")
        svc_by_lane_prof = (
            df_service
            .groupby([df[lane_col].astype(str), "profile"])[service_col]
            .mean()
            .to_frame(name=f"service_time_mean_{label}_s")
        )
        metrics["service_by_lane_profile"] = svc_by_lane_prof
    else:
        metrics["service_by_lane_profile"] = None

    # 5) Paciencia por tipo de perfil
    patience_col = _find_col(df, ["patience_s", "paciencia_s", "patience"], required=False)
    if patience_col is not None:
        df_pat = df.copy()
        df_pat[patience_col] = pd.to_numeric(df_pat[patience_col], errors="coerce")
        patience_by_profile = (
            df_pat.groupby("profile")[patience_col]
            .mean()
            .to_frame(name=f"patience_mean_{label}_s")
        )
        metrics["patience_by_profile"] = patience_by_profile
    else:
        metrics["patience_by_profile"] = None

    # 6) Intervalo por perfil de la cantidad de personas que había en la fila al encolarse
    qlen_col = _find_col(
        df,
        ["queue_len_at_arrival", "queue_length_at_arrival", "n_in_queue_at_arrival", "cola_al_llegar"],
        required=False,
    )
    if qlen_col is not None:
        df_q = df.copy()
        df_q[qlen_col] = pd.to_numeric(df_q[qlen_col], errors="coerce")
        queue_interval = (
            df_q.groupby("profile")[qlen_col]
            .agg(min="min", max="max", mean="mean")
            .rename(columns={
                "min": f"queue_min_{label}",
                "max": f"queue_max_{label}",
                "mean": f"queue_mean_{label}",
            })
        )
        metrics["queue_interval"] = queue_interval
    else:
        metrics["queue_interval"] = None

    return metrics



def compare_customers(base_path: str | Path, sim_path: str | Path):
    """
    Compara un customers.csv de caso base vs uno de simulación.
    Imprime tablas comparativas para cada KPI.
    """
    df_base = load_customers_std(base_path)
    df_sim = load_customers_std(sim_path)

    m_base = compute_metrics(df_base, "base")
    m_sim = compute_metrics(df_sim, "sim")

    print("\n=== 1) Gente atendida / abandonada / balked (GLOBAL) ===")
    status_global = pd.concat([m_base["status_global"], m_sim["status_global"]], axis=1)
    print(status_global.to_string())

    print("\n=== 2) Gente atendida / abandonada / balked POR PERFIL ===")
    status_by_profile = m_base["status_by_profile"].join(
        m_sim["status_by_profile"],
        lsuffix="_base",
        rsuffix="_sim",
        how="outer"
    ).fillna(0)
    print(status_by_profile.to_string())

    print("\n=== 3) Items promedio por perfil ===")
    items_compare = m_base["items_by_profile"].join(
        m_sim["items_by_profile"],
        how="outer",
    )
    print(items_compare.to_string())

    # 4) Gente atendida por tipo de caja
    base_lane = m_base["people_by_lane"]
    sim_lane = m_sim["people_by_lane"]
    if base_lane is not None or sim_lane is not None:
        print("\n=== 4) Gente atendida por tipo de caja (solo served) ===")
        base_df = base_lane if base_lane is not None else pd.DataFrame()
        sim_df = sim_lane if sim_lane is not None else pd.DataFrame()
        people_by_lane = base_df.join(
            sim_df,
            lsuffix="_base",
            rsuffix="_sim",
            how="outer"
        ).fillna(0)
        print(people_by_lane.to_string())

    # 5) Tiempo promedio de atención por tipo de caja y perfil
    base_svc = m_base["service_by_lane_profile"]
    sim_svc = m_sim["service_by_lane_profile"]
    if base_svc is not None or sim_svc is not None:
        print("\n=== 5) Tiempo promedio de atención por tipo de caja y perfil (segundos) ===")
        base_df = base_svc if base_svc is not None else pd.DataFrame()
        sim_df = sim_svc if sim_svc is not None else pd.DataFrame()
        svc_compare = base_df.join(
            sim_df,
            lsuffix="_base",
            rsuffix="_sim",
            how="outer"
        )
        print(svc_compare.to_string())

    # 6) Paciencia promedio por perfil
    base_pat = m_base["patience_by_profile"]
    sim_pat = m_sim["patience_by_profile"]
    if base_pat is not None or sim_pat is not None:
        print("\n=== 6) Paciencia promedio por perfil (segundos) ===")
        base_df = base_pat if base_pat is not None else pd.DataFrame()
        sim_df = sim_pat if sim_pat is not None else pd.DataFrame()
        patience_compare = base_df.join(
            sim_df,
            lsuffix="_base",
            rsuffix="_sim",
            how="outer"
        )
        print(patience_compare.to_string())

    # 7) Intervalo de personas en fila al encolarse
    base_q = m_base["queue_interval"]
    sim_q = m_sim["queue_interval"]
    if base_q is not None or sim_q is not None:
        print("\n=== 7) Intervalo y promedio de personas en fila al encolarse, por perfil ===")
        base_df = base_q if base_q is not None else pd.DataFrame()
        sim_df = sim_q if sim_q is not None else pd.DataFrame()
        queue_compare = base_df.join(
            sim_df,
            lsuffix="_base",
            rsuffix="_sim",
            how="outer"
        )
        print(queue_compare.to_string())

In [4]:
compare_customers("customers_base_dia1.csv",
                   "outputs_sample/Week-01/Day-01/customers.csv")


=== 1) Gente atendida / abandonada / balked (GLOBAL) ===
              base   sim
outcome_norm            
served        3124  2774
abandoned      431   150
balked           2    52

=== 2) Gente atendida / abandonada / balked POR PERFIL ===
outcome_norm       served_base  abandoned_base  balked_base  served_sim  abandoned_sim  balked_sim
profile                                                                                           
deal_hunter                273              40            1         265              6           8
express_basket            1077              41            1         932             11           8
family_cart                450              98            0         381             40           0
regular                    540             136            0         467             30          33
self_checkout_fan          331              15            0         286              6           3
weekly_planner             453             101            0     

In [None]:
compare_customers("customers_base_dia3.csv",
                   "outputs_sample/Week-01/Day-03/customers.csv")


=== 1) Gente atendida / abandonada / balked (GLOBAL) ===
              base   sim
outcome_norm            
served        3415  3120
abandoned      471   202
balked           2   151

=== 2) Gente atendida / abandonada / balked POR PERFIL ===
outcome_norm       served_base  abandoned_base  balked_base  served_sim  abandoned_sim  balked_sim
profile                                                                                           
deal_hunter                956             159            2         898             70          71
express_basket             937              62            0         818             17          30
family_cart                339              67            0         377             37           0
regular                    459              80            0         386             29          42
self_checkout_fan          307              19            0         260              3           8
weekly_planner             417              84            0     