In [1]:
import argparse
import os
import re
import sys
import pandas as pd
from pathlib import Path
import csv
import numpy as np

GROUP_PREFIXES = [
    "outdoor_only",
    "indoor_only",
    "garage_only",
    "outdoor_and_indoor",
    "outdoor_and_garage",
    "outdoor_indoor_and_garage",
    "all_data",
]

REQUIRED_COLS = {
    "dataset","input_size","mae","model_file","mse","output_size","r2","rmse","val_loss"
}
METRICS = ["mae", "mse", "r2", "rmse","val_loss"]

csv_path = Path("/home/admindi/sbenites/WirelessLocation/validation/model_per_dataset_validation/eval_results.csv")  # <-- change if needed
round_decimals = 2



def infer_model_group(model_file: str) -> str:
    # normalize
    base = os.path.basename(str(model_file or "")).lower().strip()
    base = os.path.splitext(base)[0]                 # drop extension
    base = re.sub(r"[\s\-]+", "_", base)             # spaces/hyphens -> underscore

    # longest-first to avoid overlaps
    for p in sorted(GROUP_PREFIXES, key=len, reverse=True):
        if base == p or base.startswith(p + "_"):
            return p
    return "unknown"

def split_by_group(csv_path: str) -> dict[str, pd.DataFrame]:
    df = pd.read_csv(csv_path, sep=None, engine="python", skipinitialspace=True)
    df["model_type"] = df["model_file"].apply(infer_model_group)

    groups = {p: df[df["model_type"] == p].copy() for p in GROUP_PREFIXES}
    groups["unknown"] = df[df["model_type"] == "unknown"].copy()

    
    return groups

def coerce_and_sanity_check(df: pd.DataFrame) -> pd.DataFrame:
    # Coerce to numeric
    for c in METRICS:
        df[c] = pd.to_numeric(df[c], errors="coerce")
    # Remove inf/NaN rows in metrics or required fields
    df = df.replace([np.inf, -np.inf], np.nan)
    df = df.dropna(subset=list(REQUIRED_COLS))
    # Basic physical/range checks
    df = df[(df["mae"] >= 0) & (df["mse"] >= 0) & (df["rmse"] >= 0)]
    # R^2 can be negative, but >1 is invalid
    df = df[df["r2"] <= 1]
    return df

def iqr_bounds(s: pd.Series, k: float = 1.5) -> tuple[float, float]:
    q1 = s.quantile(0.25)
    q3 = s.quantile(0.75)
    iqr = q3 - q1
    lo = q1 - k * iqr
    hi = q3 + k * iqr
    return lo, hi

def drop_outliers_iqr(df: pd.DataFrame,
                      cols: list[str],
                      group_cols: list[str],
                      k: float = 1.5) -> tuple[pd.DataFrame, pd.Series]:
    """
    Remove outliers per (group_cols) using IQR on each column in cols.
    Returns (filtered_df, outlier_mask) aligned to original df index.
    """
    mask = pd.Series(False, index=df.index)
    for col in cols:
        # compute per-group bounds and flag outside
        lo = df.groupby(group_cols)[col].transform(lambda s: iqr_bounds(s, k)[0])
        hi = df.groupby(group_cols)[col].transform(lambda s: iqr_bounds(s, k)[1])
        mask = mask | (df[col] < lo) | (df[col] > hi)
    return df.loc[~mask].copy(), mask


In [2]:
dataframe_group = split_by_group(csv_path)

for group_name in dataframe_group.keys():  # (this is a model_type bucket, not 'dataset')
    df = dataframe_group[group_name].copy()

    # Infer model_group consistently
    df["model_group"] = df["model_file"].apply(infer_model_group)

    # 1) Clean + sanity checks
    df = coerce_and_sanity_check(df)

    # 2) Optional: keep only best N by RMSE **after** cleaning (avoid bias)
    #    You can remove this if you prefer to use all rows.
    df = df.sort_values("rmse", ascending=True).head(1000)

    # 3) Per-group outlier removal (IQR) over all metrics
    clean_df, out_mask = drop_outliers_iqr(
        df,
        cols=METRICS,
        group_cols=["model_group", "dataset"],
        k=1.5,  # make this 2.0 if you want a looser filter
    )

    removed = int(out_mask.sum())
    kept = len(clean_df)
    total = len(df)
    print(f"[{group_name}] outliers removed: {removed} / {total} (kept {kept})")

    # 4) Aggregate (you can swap mean->median if you want extra robustness)
    grouped = (
        clean_df
        .groupby(["model_group", "dataset"], dropna=False)[METRICS]
        .mean()
        .sort_index()
    )

    wide = grouped.unstack("dataset")
    wide.columns = [f"{metric}__{dataset}" for metric, dataset in wide.columns]
    wide = wide.sort_index()

    # 5) Pretty display
    from IPython.display import display
    print(f"=== Averages by (model_group, dataset) — cleaned (IQR) — group: {group_name} ===")
    display(
        grouped.style
        .format(precision=round_decimals)
        .background_gradient(cmap="RdYlGn_r", subset=["mae","mse","rmse","val_loss"])
        .background_gradient(cmap="RdYlGn", subset=["r2"])
    )

[outdoor_only] outliers removed: 2 / 23 (kept 21)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: outdoor_only ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
outdoor_only,outdoor,5.52,49.82,0.44,6.97,50.0


[indoor_only] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: indoor_only ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


[garage_only] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: garage_only ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


[outdoor_and_indoor] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: outdoor_and_indoor ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


[outdoor_and_garage] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: outdoor_and_garage ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


[outdoor_indoor_and_garage] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: outdoor_indoor_and_garage ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


[all_data] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: all_data ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


[unknown] outliers removed: 0 / 0 (kept 0)
=== Averages by (model_group, dataset) — cleaned (IQR) — group: unknown ===


Unnamed: 0_level_0,Unnamed: 1_level_0,mae,mse,r2,rmse,val_loss
model_group,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
