In [1]:
########################################
#             PATH SETUP
########################################

import sys
import glob
import os
sys.path.insert(0, os.path.abspath("."))

########################################
#             LIBRERIES SETUP
########################################
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import time
from pathlib import Path
from collections.abc import Iterable
from typing import Dict
import seaborn as sns
import math, re
########################################
#             SIMULATION LIBRARIES
########################################
from lemer.rngs import MultiStreamRNG
from lemer.rvms import *
from typing import List, Optional, Tuple
from simulator.simulation import Simulation

In [2]:

BASE_DIR = ".output_simulation"

# configurazione all'inizio del notebook
OBJ = 3   # scegli 1, 2 o 3

# costruisco dinamicamente il nome del file
config_file = f"obj{OBJ}.json"
config_path = f"analytic_sweep_lambda.csv"
TITLE_PREFIX = f"OBJ{OBJ}"



import pandas as pd
import numpy as np

In [3]:
def find_csv_for_config(config_path: str, base_dir=BASE_DIR, target="results") -> List[str]:
    cfg_name = Path(config_path).stem
    pattern  = os.path.join(base_dir, f"{target}_{cfg_name}*.csv")
    return sorted(glob.glob(pattern))




def _parse_lambda_from_filename(fname: str) -> Optional[float]:
    base = Path(fname).stem.lower()
    m = re.search(r"(?:lam|lambda|gamma|load)[=_]?([0-9]+(?:\.[0-9]+)?)", base)
    if m: return float(m.group(1))
    # fallback prudente
    for tok in re.findall(r"([0-9]+\.[0-9]+)", base):
        val = float(tok)
        if 0.1 <= val <= 5.0:
            return val
    return None

def load_runs(csv_files: List[str]) -> pd.DataFrame:
    dfs = []
    for i, f in enumerate(csv_files):
        df = pd.read_csv(f)
        df["source"]  = os.path.basename(f)
        df["replica"] = i
        if "arrival_rate" not in df.columns:
            df["arrival_rate"] = _parse_lambda_from_filename(f)
        dfs.append(df)
    if not dfs:
        raise FileNotFoundError("Nessun CSV.")
    df = pd.concat(dfs, ignore_index=True)

    if "mean_response_time" not in df.columns:
        for alt in ("response_time_mean", "rt_mean"):
            if alt in df.columns:
                df = df.rename(columns={alt: "mean_response_time"})
                break
    return df


In [5]:
files = find_csv_for_config(config_file, base_dir=BASE_DIR)

In [6]:
df_all = load_runs(files)


In [8]:
import numpy as np
import pandas as pd

def extract_replica_means_from_summary(
    df_all: pd.DataFrame,
    lam: float,
    scope: str = "OVERALL",
    column: str = "mean_response_time",
):
    """
    Estrae un valore per replica dal CSV RIASSUNTIVO (una riga per run/scope),
    filtrando per λ e scope. Ritorna un array y con un valore per replica.
    """
    d = df_all.copy()
    d.columns = d.columns.str.strip().str.lower()

    required = {"scope", "arrival_rate", column}
    missing = required - set(d.columns)
    if missing:
        raise KeyError(f"Mancano colonne {sorted(missing)} nel DataFrame")

    mask = (
        (d["scope"].astype(str).str.upper() == scope.upper()) &
        (np.isclose(d["arrival_rate"].astype(float), float(lam), rtol=1e-6, atol=1e-9))
    )
    y = d.loc[mask, column].astype(float).to_numpy()

    if y.size == 0:
        raise ValueError(f"Nessun valore trovato per λ={lam}, scope={scope}, col={column}")
    return y

# t-critico 95% (df<=30 tabella, altrimenti 1.96)
_T95 = {
    1:12.706, 2:4.303, 3:3.182, 4:2.776, 5:2.571, 6:2.447, 7:2.365, 8:2.306, 9:2.262,
    10:2.228, 11:2.201, 12:2.179, 13:2.160, 14:2.145, 15:2.131, 16:2.120, 17:2.110,
    18:2.101, 19:2.093, 20:2.086, 21:2.080, 22:2.074, 23:2.069, 24:2.064, 25:2.060,
    26:2.056, 27:2.052, 28:2.048, 29:2.045, 30:2.042
}
def tcrit95(n: int) -> float:
    df = max(1, n-1)
    return _T95.get(min(df, 30), 1.96)

def ci95_from_samples(y: np.ndarray):
    """
    CI 95% su campioni indipendenti (repliche).
    Ritorna (mean, hw, lo, hi, rel_hw_pct).
    """
    y = np.asarray(y, dtype=float)
    n = y.size
    mean = float(np.mean(y))
    s = float(np.std(y, ddof=1)) if n > 1 else 0.0
    se = s / np.sqrt(n) if n > 0 else np.nan
    hw = tcrit95(n) * se
    lo, hi = mean - hw, mean + hw
    rel_hw_pct = (hw/mean*100.0) if mean != 0 else np.inf
    return mean, hw, lo, hi, rel_hw_pct

def required_R_for_relative_hw(y_pilot: np.ndarray, target_rel=0.05, min_R=8):
    """
    Stima quante repliche servono per avere half-width relativo <= target_rel.
    Usa varianza della pilota.
    """
    y = np.asarray(y_pilot, dtype=float)
    n = y.size
    mean = float(np.mean(y))
    s = float(np.std(y, ddof=1)) if n > 1 else 0.0
    if mean == 0 or s == 0:
        return max(min_R, n)  # fallback
    # iteriamo su R crescenti finché tcrit* s/sqrt(R) <= target_rel*mean
    R = max(min_R, n)
    while True:
        if tcrit95(R) * (s/np.sqrt(R)) <= target_rel * mean:
            return R
        R += 1


In [9]:
# OVERALL, λ=1.4, stimiamo CI su mean_response_time
vals = extract_replica_means_from_summary(df_all, lam=1.4, scope="OVERALL", column="mean_response_time")
m, hw, lo, hi, rel = ci95_from_samples(vals)
print(f"N={len(vals)} | mean={m:.6g} | CI95=[{lo:.6g}, {hi:.6g}]  ±{hw:.6g} (±{rel:.2f}%)")

# Quante repliche servono per ±5%?
target_rel=0.02
R_star = required_R_for_relative_hw(vals, target_rel=target_rel, min_R=8)
print(f"Repliche consigliate per {target_rel*100}% ≈ {R_star}")


N=40 | mean=2369.21 | CI95=[2328.05, 2410.37]  ±41.1622 (±1.74%)
Repliche consigliate per 2.0% ≈ 40
