In [None]:
import os
import json
import pandas as pd
from src.config import MODELS_DIR, MODEL_ALIASES

target_keys = [
    "timestamp",
    "model_purpose",
    "predicts",
    "special_features",
    "f1",
    "recall",
    "precision",
    "bal_accuracy",
    "roc_auc_score",
    "classification_report__no dia__f1-score",
    "classification_report__no dia__recall",
    "classification_report__dia__f1-score",
    "classification_report__dia__recall",
    "model_class",
]

subdirs = [
    d for d in os.listdir(MODELS_DIR) if os.path.isdir(d) and not d.startswith(".")
]


rows = []

for d in subdirs:
    result_path = os.path.join(MODELS_DIR, d, f"{d}.results.json")
    row = {}
    if os.path.exists(result_path):
        with open(result_path, "r") as f:
            data = json.load(f)

        for key in target_keys:
            val = data
            for part in key.split("__"):
                if isinstance(val, dict) and part in val:
                    val = val[part]
                else:
                    val = None
                    break
            row[key] = val
    else:
        continue
    row["dir"] = d
    rows.append(row)


df = pd.DataFrame(rows)
df["timestamp"] = pd.to_datetime(df["timestamp"])
df.set_index("timestamp", inplace=True)

if "roc_auc_score" in df.columns:
    df["roc_auc_score"] = pd.to_numeric(df["roc_auc_score"], errors="coerce")

for col in [c for c in df.columns if c.startswith("classification_report")]:
    df.rename(
        columns={
            col: col.replace("classification_report__", "cl_rep__").replace(" ", "_")
        },
        inplace=True,
    )

df.sort_values(by="f1", ascending=False, inplace=True)
display(df)

In [None]:
# df as markdown table
from tabulate import tabulate

print(tabulate(df.reset_index(), headers="keys", tablefmt="github", showindex=False))

In [None]:
# Select only numeric columns
num_cols = df.select_dtypes(include="number").columns

# top3_mask = df[num_cols].apply(lambda col: col.isin(col.nlargest(2)))
# df_top = df[top3_mask.any(axis=1)].copy()

In [None]:
import matplotlib.pyplot as plt

cmap = plt.get_cmap("tab20")
colors = list(cmap.colors)

linestyles = ["-", "--", "-.", ":"]
markers = ["o", "s", "D", "^", "v", "<", ">", "x", "+", "*", "p", "h"]

def style_generator():
    for marker in markers:
        for linestyle in linestyles:
            for color in colors:
                yield {"color": color, "linestyle": linestyle, "marker": marker}

In [None]:
df_max = df

mask_2cls = df_max["predicts"].map(lambda x: set(x) == set(["dia", "no dia"]))
mask_3cls = df_max["predicts"].map(lambda x: set(x) == set(["dia", "no dia", "pre"]))
df_max = df_max[mask_2cls ]

In [None]:
df_show = df_max

In [None]:
# Fallback for row labels
def get_row_label(row):
    name = row.name.strftime("%m-%d %H:%M:%S") + " "

    if pd.notna(row.get("model_class")) and row["model_class"] != "":
        if row["model_class"] in MODEL_ALIASES:
            name += MODEL_ALIASES[str(row["model_class"])]
        else:
            name += str(row["model_class"])
        name += " "

    if pd.notna(row.get("special_features")) and row["special_features"] != "":
        name += str(row["special_features"])
    else:
        name += str(row.get("model_purpose", ""))

    pred_lab = row["predicts"]

    name = f"{name} ({', '.join([str(x) for x in pred_lab])})"
    return name

In [None]:
df_show = df_show.copy()
styles = style_generator()

if "timestamp" in (list(df_show.index) + list(df_show.columns)):
    df_show.sort_values(by="timestamp", ascending=True, inplace=True)

# Create figure and axis
fig, ax = plt.subplots(figsize=(16, 7))
lines = []
alpha = 0.7

# Plot each row
for idx, row in df_show.iterrows():
    label = get_row_label(row)
    style = next(styles)
    (line,) = ax.plot(num_cols, row[num_cols], label=label, alpha=alpha, **style)
    lines.append((idx, line, row))

# Mark maximum value per column
for col in num_cols:
    max_idx = df_show[col].idxmax()
    row = df_show.loc[max_idx]
    y = row[col] + 0.05
    x = list(num_cols).index(col)

    # Get the color of the corresponding line
    line_color = next((l.get_color() for i, l, r in lines if i == max_idx), "black")
    ax.plot(x, y, marker="v", color=line_color, markersize=8, alpha=alpha)

# Axis and legend settings
ax.set_xticks(range(len(num_cols)))
ax.set_xticklabels(num_cols, rotation=20, ha="right")
ax.legend(bbox_to_anchor=(1.01, 1), loc="upper left", ncol=min(1, len(df_show)))
ax.set_title("Comparison of models by selected metrics")
ax.grid(axis="y", linestyle="--", linewidth=0.5)
ax.set_ylim(ymin=0, ymax=1)

plt.tight_layout()
plt.show()

In [None]:
def plot_single_model(data, label, styles, ax):
    (line,) = ax.plot(list(data.columns), data.iloc[0], label=label, **styles)

    for xx, col_name in enumerate(list(data.columns)):
        value = float(data.iloc[0][col_name])
        ax.annotate(
            f"{value:.2f}",
            xy=(xx, value),
            xytext=(5, 5),
            textcoords="offset points",
            ha="left",
            color=line.get_color(),
            fontsize=9,
        )


In [None]:
# Comparison of a single model vs all
target_model_timestamp = "2025-07-09 20:22:44"

plt.rcParams.update({"font.size": 14})
fig, ax = plt.subplots(figsize=(16, 7))

col_present = list(df.select_dtypes(include=["number"]).columns)

for predicts in ["dia,no dia", "dia,no dia,pre"]:
    df_max = df.reset_index().copy()
    df_max["rowp"] = df_max["predicts"].map(
        lambda x: ",".join(sorted([str(y) for y in x]))
    )

    mask_2cls = df_max["rowp"] == predicts
    df_max = df_max[mask_2cls]

    included_models = df_max.copy()

    df_max = df_max.set_index("rowp")

    for c in df_max.columns:
        df_max[c] = df_max[c].astype("float", errors="ignore")

    df_max.drop(
        [c for c in df_max.columns if c not in col_present],
        axis=1,
        inplace=True,
    )

    df_agg = df_max.groupby(df_max.index.get_level_values("rowp")).agg(
        ["min", "max", "median"]
    )

    # Über alle Gruppen
    for group in df_agg.index[::-1]:
        min_vals = df_agg.loc[group].xs("min", level=1)
        max_vals = df_agg.loc[group].xs("max", level=1)
        mean_vals = df_agg.loc[group].xs("median", level=1)

        x = list(range(len(mean_vals)))
        (line,) = ax.plot(
            x, mean_vals, label=f"{group} median", linestyle="--", alpha=0.7
        )
        ax.fill_between(x, min_vals, max_vals, alpha=0.3, label=f"{group} min/max")

        df_row_dist1 = (
            included_models.loc[:, col_present].sub(1).abs().pow(2).sum(axis=1)
        )
        best_row = df_row_dist1.idxmin()
        df_ = included_models.loc[best_row, :].to_frame().T

        plot_single_model(
            df_[col_present],
            predicts + " best (" + str(df_.loc[best_row, "timestamp"]) + ")",
            {"color": line.get_color()},
            ax=ax,
        )

all_models = df.reset_index().copy()
all_models = all_models[
    all_models["predicts"]
    .map(lambda x: ",".join(sorted([str(y) for y in x])))
    .isin(["dia,no dia", "dia,no dia,pre"])
]

for idx, row in all_models.nlargest(1, "timestamp").iterrows():
    plot_single_model(
        row.to_frame().T.loc[:, col_present],
        "last (" + str(row["dir"]) + ")",
        {},
        ax=ax,
    )

if (
    target_model_timestamp is not None
    and target_model_timestamp != ""
    and pd.to_datetime(target_model_timestamp) in all_models["timestamp"].values
):
    for idx, row in all_models[
        all_models["timestamp"].values == pd.to_datetime(target_model_timestamp)
    ].iterrows():
        plot_single_model(
            row.to_frame().T.loc[:, col_present],
            "selected (" + str(row["dir"]) + ")",
            {},
            ax=ax,
        )

ax.set_xticks(range(len(col_present)))
ax.set_xticklabels(
    col_present,
    rotation=20,
    ha="right",
)
ax.set_title("Comparison of a selected models vs all other by selected metrics")
ax.legend()
ax.set_ylim(0, 1)
ax.set_yticks([i / 10 for i in range(11)])
ax.yaxis.grid(True, linestyle="--", alpha=0.5)

plt.tight_layout()
plt.show()

In [None]:
target_model_timestamp in all_models["timestamp"]

In [None]:
    (target_model_timestamp is not None
    and target_model_timestamp != ""
    and pd.to_datetime(target_model_timestamp) in all_models["timestamp"])