In [1]:
import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

import yaml
import pandas as pd
import mask_generator.settings as settings

In [2]:
EXPERIMENTS_DIR = "../experiments"

def load_yaml(path: str) -> dict:
    with open(path, "r") as f:
        return yaml.safe_load(f)

def load_run_yaml(run_path: str) -> dict:
    config_path = os.path.join(run_path, settings.config_filename)
    results_path = os.path.join(run_path, settings.results_filename)

    if not os.path.exists(config_path) or not os.path.exists(results_path):
        return {}

    config = load_yaml(config_path)
    results = load_yaml(results_path)

    return {
        "config": config,
        "results": results
    }

def flatten_dict(d: dict, parent_key: str = '', sep: str = '.') -> dict:
    items = []
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            items.extend(flatten_dict(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def load_experiments(experiments_dir: str = EXPERIMENTS_DIR) -> pd.DataFrame:
    run_dirs = [os.path.join(experiments_dir, d) for d in os.listdir(experiments_dir)
                if os.path.isdir(os.path.join(experiments_dir, d))]

    data = []
    for run_dir in run_dirs:
        run_data = load_run_yaml(run_dir)
        if not run_data:
            continue
        flat_config = flatten_dict(run_data["config"])
        flat_results = flatten_dict(run_data["results"])
        summary = {**flat_config, **flat_results}
        summary["run_dir"] = run_dir
        data.append(summary)

    df = pd.DataFrame(data)
    if df.empty:
        raise ValueError("No valid experiment data found.")
    df.reset_index(drop=True, inplace=True)
    return df

In [26]:
df = load_experiments(EXPERIMENTS_DIR)
df.columns

df.to_csv(os.path.join(EXPERIMENTS_DIR, "experiments_summary.csv"), index=False)

In [25]:
df1 = df.copy()
df1["augment_str"] = df1["training.augmentations"].apply(lambda x: str(x) if isinstance(x, list) else x)
top_augments = df1.groupby("augment_str")["test.dice"].max().sort_values(ascending=False)
print("Top augmentations by max test.dice:")
print(top_augments)

top_dropout = df1.groupby("model.dropout")["test.dice"].max().sort_values(ascending=False)
print("\nTop dropout values:")
print(top_dropout)

# grouped = df1.groupby("augmentations_key")['test.dice']
# mean_test_dice = grouped.mean().sort_values(ascending=False)
# max_test_dice = grouped.max().sort_values(ascending=False)

# print("Top augmentations par moyenne de test.dice :")
# print(mean_test_dice.head(10))

# print("\nTop augmentations par meilleur test.dice :")
# print(max_test_dice.head(10))

Top augmentations by max test.dice:
augment_str
['geometry', 'dropout', 'color_invariance', 'color_variation', 'blur', 'noise', 'weather']    0.571323
['geometry', 'color_variation']                                                               0.506237
['geometry', 'color_invariance', 'color_variation']                                           0.501950
['geometry', 'weather', 'color_variation', 'blur']                                            0.493002
['geometry', 'dropout', 'noise']                                                              0.478952
['geometry', 'weather', 'blur']                                                               0.475796
['geometry', 'weather']                                                                       0.439259
['geometry', 'color_invariance']                                                              0.425963
['geometry', 'dropout']                                                                       0.373711
['geometry', 'blur']     