In [54]:
import json
from pathlib import Path
from typing import Dict, Any

import pandas as pd


def analyze_experiments(base_dir: str) -> pd.DataFrame:
    """
    Analiza todos los trials contenidos en base_dir.

    Estructura esperada:
        base_dir/ExpID_trial_000/
            - config.json
            - metrics.csv
            - results.txt
            - trajectories.csv (no se usa aquí)

    Devuelve un DataFrame con medias y desviaciones típicas
    de todas las métricas numéricas finales por experiment_id.
    """
    base_path = Path(base_dir)
    experiment_folders = [f for f in base_path.iterdir() if f.is_dir()]

    aggregated: list[Dict[str, Any]] = []

    for folder in experiment_folders:
        config_path = folder / "config.json"
        metrics_path = folder / "metrics.csv"
        results_path = folder / "results.txt"

        if not metrics_path.exists() or not config_path.exists():
            print(f"⚠️  Faltan archivos en {folder}, se ignora.")
            continue

        # ----------------------------
        # Config
        # ----------------------------
        with open(config_path, "r", encoding="utf-8") as f:
            config = json.load(f)

        # Nombre base del experimento (A1, B2, etc.)
        exp_id = config["experiment_name"].split("_trial_")[0]

        # ----------------------------
        # Métricas por iteración
        # ----------------------------
        df_metrics = pd.read_csv(metrics_path)

        # Última fila = estado final del algoritmo
        final_row = df_metrics.iloc[-1]

        # ----------------------------
        # Tiempo y nº de iteraciones (results.txt)
        # ----------------------------
        runtime_seconds = None
        iterations_executed = None

        if results_path.exists():
            with open(results_path, "r", encoding="utf-8") as f:
                for line in f:
                    line = line.strip()
                    if line.startswith("Tiempo total (s):"):
                        # "Tiempo total (s): 0.123456"
                        try:
                            runtime_seconds = float(line.split(":")[1])
                        except Exception:
                            pass
                    elif line.startswith("Iteraciones ejecutadas:"):
                        try:
                            iterations_executed = int(line.split(":")[1])
                        except Exception:
                            pass

        aggregated.append(
            {
                "experiment_id": exp_id,
                "trial_name": config["experiment_name"],

                # ---- métricas finales del metrics.csv ----
                "final_best_fit": float(final_row["best_fit"]),
                "final_avg_velocity": float(final_row["avg_velocity"]),
                "final_diversity": float(final_row["diversity"]),
                "final_num_improving": float(final_row["num_improving"]),

                # ---- métricas globales (results.txt) ----
                "runtime_seconds": runtime_seconds,
                "iterations_executed": iterations_executed,

                # ---- parámetros de la config ----
                "w": float(config["w"]),
                "c1": float(config["c1"]),
                "c2": float(config["c2"]),
                "vmax": float(config["vmax"]),
                "swarm_size": int(config["swarm_size"]),
                "initializer_vmax_fraction": float(
                    config.get("initializer_vmax_fraction", 0.0)
                ),
            }
        )

    if not aggregated:
        raise ValueError(f"No se han encontrado trials válidos en {base_dir}")

    df_trials = pd.DataFrame(aggregated)

    # -----------------------------------
    # Agregado por experimento (solo numéricas)
    # -----------------------------------
    numeric_cols = df_trials.select_dtypes(include="number").columns

    summary = (
        df_trials.groupby("experiment_id")[numeric_cols]
        .agg(["mean", "std"])
        .reset_index()
    )

    # Aplanar MultiIndex de columnas: final_best_fit_mean, final_best_fit_std, etc.
    new_cols = []
    for col, stat in summary.columns:
        if stat == "":
            new_cols.append(col)
        else:
            new_cols.append(f"{col}_{stat}")
    summary.columns = new_cols

    return summary


In [55]:
letra = "D"
numero = "4"

dir = f"./results/experimentos_{letra}/experimento_{letra}{numero}/"
csv_file = dir + f"summary_experiment_{letra}{numero}.csv"



summary = analyze_experiments(dir)
summary.to_csv(csv_file, index=False)


In [56]:
summary

Unnamed: 0,experiment_id,final_best_fit_mean,final_best_fit_std,final_avg_velocity_mean,final_avg_velocity_std,final_diversity_mean,final_diversity_std,final_num_improving_mean,final_num_improving_std,runtime_seconds_mean,...,c1_mean,c1_std,c2_mean,c2_std,vmax_mean,vmax_std,swarm_size_mean,swarm_size_std,initializer_vmax_fraction_mean,initializer_vmax_fraction_std
0,D4,2.315992,1.615177,4.504911e-08,1.892007e-07,3.904209,0.432896,0.166667,0.379049,0.043411,...,2.0,0.0,0.0,0.0,0.5,0.0,20.0,0.0,1.0,0.0
