In [17]:
import glob
import re
from pathlib import Path
from typing import Tuple
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# ------------------------------------------------------------
#  Media e CI95% tra repliche per CSV "scope,metric,value,...,time"
# ------------------------------------------------------------

def plot_rt_mean_ci(
    files_glob: str,
    metric_name: str = "mean_response_time",
    scopes: Tuple[str, ...] = ("OVERALL",),
    show_runs: bool = False,
    grid_dt: float | None = None,
    figsize=(10, 6),
    legend_ncol: int = 2
):
    """
    Legge N file CSV e, per ciascuno scope richiesto, calcola e disegna
    la media tra repliche e la CI95% nel tempo.

    Parametri:
    - files_glob : pattern dei file (es. "out/conv_obj1_run*_seed*.csv")
    - metric_name: metrica da tracciare (es. "mean_response_time", "std_response_time")
    - scopes     : tuple di scope (es. ("OVERALL",) oppure ("NODE_A","NODE_B","NODE_P"))
    - show_runs  : se True, mostra anche le singole repliche in trasparenza
    - grid_dt    : passo della griglia temporale comune. Se None -> auto (mediana dei passi)
    - figsize, legend_ncol: estetica
    """
    files = sorted(glob.glob(files_glob))
    if not files:
        raise FileNotFoundError(f"Nessun file trovato per: {files_glob}")

    # ---------- helper: fattore t a 95% (due code) senza SciPy
    def _t_multiplier(df: int) -> float:
        # valori critici t_0.975,df per df=1..30; oltre, approx normale 1.96
        t95 = {
            1:12.706, 2:4.303, 3:3.182, 4:2.776, 5:2.571, 6:2.447, 7:2.365, 8:2.306, 9:2.262, 10:2.228,
            11:2.201, 12:2.179, 13:2.160, 14:2.145, 15:2.131, 16:2.120, 17:2.110, 18:2.101, 19:2.093, 20:2.086,
            21:2.080, 22:2.074, 23:2.069, 24:2.064, 25:2.060, 26:2.056, 27:2.052, 28:2.048, 29:2.045, 30:2.042
        }
        return t95.get(int(df), 1.96)

    # ---------- helper: carica e filtra (scope, metric) -> (t, v) per una replica
    def _load_one_series(path: str, scope_sel: str, metric_sel: str) -> tuple[np.ndarray, np.ndarray]:
        df = pd.read_csv(path)
        # tipizzazione e igiene
        for c in ["scope", "metric"]:
            df[c] = df[c].astype(str)
        df["time"]  = pd.to_numeric(df["time"],  errors="coerce")
        df["value"] = pd.to_numeric(df["value"], errors="coerce")
        df = df.dropna(subset=["time", "value"])

        sub = df[(df["scope"] == scope_sel) & (df["metric"].str.lower() == metric_sel.lower())]
        if sub.empty:
            return np.array([]), np.array([])
        sub = sub.sort_values("time")

        # in caso di time duplicati, prendo l'ultimo (o potresti fare mean)
        sub = sub.groupby("time", as_index=False)["value"].last()

        return sub["time"].to_numpy(), sub["value"].to_numpy()

    # ---------- helper: determina passo griglia automatico
    def _auto_dt(all_times: list[np.ndarray]) -> float:
        diffs = []
        for t in all_times:
            if t.size >= 2:
                d = np.diff(t)
                # filtro outlier negativi/zero e valori troppo strani
                d = d[(d > 0) & np.isfinite(d)]
                if d.size:
                    diffs.append(np.median(d))
        if not diffs:
            return 1.0
        # mediana delle mediane
        return float(np.median(diffs))

    # ---------- figura con un asse per scope
    n = len(scopes)
    fig, axes = plt.subplots(n, 1, figsize=(figsize[0], max(figsize[1], 3*n)), sharex=True)
    if n == 1:
        axes = [axes]

    for ax, scope_sel in zip(axes, scopes):
        # carico tutte le serie (una per file) per questo scope
        series = []   # lista di tuple (t, v)
        labels = []   # nomi run (per opzionale show_runs)
        for f in files:
            t, v = _load_one_series(f, scope_sel, metric_name)
            if t.size == 0:
                continue
            series.append((t, v))
            stem = Path(f).stem
            m = re.search(r"(run\d+)", stem, re.IGNORECASE)
            labels.append(m.group(1) if m else stem)

        if not series:
            ax.text(0.5, 0.5, f"Nessuna serie trovata per {scope_sel}/{metric_name}",
                    ha="center", va="center")
            continue

        # griglia temporale comune
        if grid_dt is None:
            dt = _auto_dt([t for t, _ in series])
        else:
            dt = float(grid_dt)

        t_min = min(t[0] for t, _ in series if t.size)
        t_max = max(t[-1] for t, _ in series if t.size)
        grid = np.arange(t_min, t_max + dt/2, dt, dtype=float)

        # matrice valori: righe=repliche, colonne=tempo
        mat = np.full((len(series), grid.size), np.nan, dtype=float)

        for i, (t, v) in enumerate(series):
            # interpolazione lineare solo nel range [t.min, t.max], fuori -> NaN
            vi = np.interp(grid, t, v)
            mask_out = (grid < t.min()) | (grid > t.max())
            vi[mask_out] = np.nan
            mat[i, :] = vi

            if show_runs:
                ax.plot(grid, vi, alpha=0.25, linewidth=0.8)

        # statistiche tra repliche (ignorando NaN per punti fuori range)
        with np.errstate(invalid='ignore'):
            mean = np.nanmean(mat, axis=0)
            # std campionaria (ddof=1) solo dove ci sono almeno 2 repliche
            count = np.sum(~np.isnan(mat), axis=0)
            # somma dei quadrati centrati
            diffsq = (mat - mean) ** 2
            sse = np.nansum(diffsq, axis=0)
            std = np.full_like(mean, np.nan)
            ok = count >= 2
            std[ok] = np.sqrt(sse[ok] / (count[ok] - 1))
            se = np.full_like(mean, np.nan)
            se[ok] = std[ok] / np.sqrt(count[ok])

            # fattore t per ciascun punto (df = n-1)
            tmult = np.array([_t_multiplier(int(n-1)) if n>=2 else np.nan for n in count], dtype=float)
            half_width = tmult * se

        # plot media e CI
        ax.plot(grid, mean, label=f"mean {scope_sel}/{metric_name}", linewidth=1.8)
        ax.fill_between(grid, mean - half_width, mean + half_width,
                        alpha=0.25, label="CI 95%")

        ax.set_ylabel(f"{scope_sel}\n{metric_name}")
        ax.grid(True, linestyle="--", linewidth=0.5, alpha=0.6)
        ax.set_xlim(0,86400)

    axes[-1].set_xlabel("Tempo")
    axes[0].set_title(f"Media e CI95% tra repliche — {metric_name}")
    axes[0].legend(ncol=legend_ncol)
    plt.tight_layout()
    plt.show()


In [None]:
plot_rt_mean_ci(".output_simulation/conv_obj1_run*_seed*.csv",
                metric_name="mean_response_time",
                scopes=("OVERALL",),
                show_runs=False,   # True se vuoi anche le curve singole in trasparenza
                grid_dt=None)      # None = passo griglia stimato automaticamente
