# Imports

In [219]:
import numpy as np
import pandas as pd
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns

# General variables

 ## Filenames

In [220]:
# Define the pattern of filenames

GameTheoretic_filename_pattern_DQN =  re.compile(r"results_(?P<simulation_index>\d{3})_(?P<episodes>\d+)_DQN_"
                                                r"(?P<emotion>[^_]+)_(?P<see_emotions>[^_]+)_"
                                                r"(?P<alpha>[\d.]+)_(?P<beta>[\d.]+)_(?P<smoothing>[^_]+)_(?P<threshold>[\d.]+)_(?P<rounder>[\d.]+)_"
                                                r"(?P<learning_rate>[\d.]+)_(?P<gamma>[\d.]+)_(?P<epsilon>[\d.]+)_(?P<epsilon_decay>[\d.]+)_(?P<epsilon_min>[\d.]+)_"
                                                r"(?P<batch_size>[\d.]+)_(?P<hidden_size>[\d.]+)_(?P<update_target_every>[\d.]+)_"
                                                r"(?P<random_suffix>\d{6})_(?P<suffix>[a-zA-Z]+_[a-zA-Z]+)\.csv"
)

GameTheoretic_filename_pattern_QL = re.compile(r"results_(?P<simulation_index>\d{3})_(?P<episodes>\d+)_QLearning_"
                                              r"(?P<emotion>[^_]+)_(?P<see_emotions>[^_]+)_"
                                              r"(?P<alpha>[\d.]+)_(?P<beta>[\d.]+)_(?P<smoothing>[^_]+)_(?P<threshold>[\d.]+)_(?P<rounder>[\d.]+)_"
                                              r"(?P<learning_rate>[\d.]+)_(?P<gamma>[\d.]+)_(?P<epsilon>[\d.]+)_(?P<epsilon_decay>[\d.]+)_(?P<epsilon_min>[\d.]+)_"
                                              r"(?P<random_suffix>\d{6})_(?P<suffix>[a-zA-Z]+_[a-zA-Z]+)\.csv"
)


Maze2D_filename_order_QL = re.compile(
    r"maze2d_results_(?P<simulation_index>\d{3})_(?P<episodes>\d+)_QLearning_"
    r"(?P<emotion>[^_]+)_(?P<see_emotions>[^_]+)_"
    r"(?P<alpha>[\d.]+)_(?P<beta>[\d.]+)_(?P<smoothing>[^_]+)_(?P<threshold>[\d.]+)_(?P<rounder>[\d.]+)_"
    r"(?P<learning_rate>[\d.]+)_(?P<gamma>[\d.]+)_(?P<epsilon>[\d.]+)_(?P<epsilon_decay>[\d.]+)_(?P<epsilon_min>[\d.]+)_"
    r"(?P<random_suffix>\d{6})_(?P<suffix>[a-zA-Z]+_[a-zA-Z]+)\.csv"
)

Maze2D_filename_order_DQN = re.compile(
    r"maze2d_results_(?P<simulation_index>\d{3})_(?P<episodes>\d+)_DQN_"
    r"(?P<emotion>[^_]+)_(?P<see_emotions>[^_]+)_"
    r"(?P<alpha>[\d.]+)_(?P<beta>[\d.]+)_(?P<smoothing>[^_]+)_(?P<threshold>[\d.]+)_(?P<rounder>[\d.]+)_"
    r"(?P<learning_rate>[\d.]+)_(?P<gamma>[\d.]+)_(?P<epsilon>[\d.]+)_(?P<epsilon_decay>[\d.]+)_(?P<epsilon_min>[\d.]+)_"
    r"(?P<batch_size>[\d.]+)_(?P<hidden_size>[\d.]+)_(?P<update_target_every>[\d.]+)_"
    r"(?P<random_suffix>\d{6})_(?P<suffix>[a-zA-Z]+_[a-zA-Z]+)\.csv"
)

FILENAME_PATTERNS = [
    GameTheoretic_filename_pattern_DQN,
    GameTheoretic_filename_pattern_QL,
    Maze2D_filename_order_DQN,
    Maze2D_filename_order_QL
]

FILENAME_PATTERNS_PAIR = [
    ("Gametheoretic", GameTheoretic_filename_pattern_DQN),
    ("Gametheoretic", GameTheoretic_filename_pattern_QL),
    ("maze2d", Maze2D_filename_order_DQN),
    ("maze2d", Maze2D_filename_order_QL)
]

# Functions

## CSV processing

### Parameter recovery from filenames

In [221]:
import os
import pandas as pd

def parse_results_filenames(folder_path: str, filename_patterns=None) -> pd.DataFrame:
    """
    Scans a folder for result filenames and extracts simulation parameters into a DataFrame.

    Args:
        folder_path (str): Path to the folder containing result CSV files.
        filename_patterns (list): List of compiled regex patterns to match filenames.
                                  If None, use global FILENAME_PATTERNS.

    Returns:
        pd.DataFrame: DataFrame containing parsed parameters from filenames.
    """
    if filename_patterns is None:
        filename_patterns = FILENAME_PATTERNS

    data = []

    for filename in os.listdir(folder_path):
        if not filename.endswith(".csv"):
            continue

        matched = False
        for pattern in filename_patterns:
            match = pattern.match(filename)
            if match:
                file_data = match.groupdict()
                file_data["filename"] = filename
                data.append(file_data)
                matched = True
                break  # Stop at the first match
        
        if not matched:
            print(f"Warning: filename did not match any pattern: {filename}")

    if not data:
        print("No matching filenames found.")
        return pd.DataFrame()

    df = pd.DataFrame(data)

    # Optional: convert numeric fields from str to float/int
    for col in df.columns:
        if col not in {"filename", "emotion", "see_emotions", "suffix"}:
            try:
                df[col] = pd.to_numeric(df[col])
            except Exception:
                pass  # leave as string if conversion fails

    return df

In [222]:
def print_unique_parameter_values(df: pd.DataFrame, exclude: list = None):
    """
    Print a table with parameter names and their unique values.

    Args:
        df (pd.DataFrame): The input DataFrame with simulation parameters.
        exclude (list): Optional list of column names to exclude (e.g., ['filename', 'simulation_index']).
    """
    if exclude is None:
        exclude = ['filename', 'simulation_index']

    param_cols = [col for col in df.columns if col not in exclude]

    summary = {
        "parameter": [],
        "unique_values": []
    }

    for col in param_cols:
        summary["parameter"].append(col)
        summary["unique_values"].append(sorted(df[col].dropna().unique().tolist()))

    summary_df = pd.DataFrame(summary)
    print(summary_df)


### Aggregation of csv

In [223]:
def aggregate_results_by_suffix(folder_path: str, target_suffix: str, source_filter: str = None) -> pd.DataFrame:
    all_data = []

    for filename in os.listdir(folder_path):
        if not filename.endswith(".csv"):
            continue

        for source_type, pattern in FILENAME_PATTERNS_PAIR:
            if source_filter and source_type.lower() != source_filter.lower():
                continue

            match = pattern.match(filename)
            if match:
                metadata = match.groupdict()
                if metadata.get("suffix", "").strip() == target_suffix.strip():
                    file_path = os.path.join(folder_path, filename)
                    try:
                        df = pd.read_csv(file_path)
                        for key, value in metadata.items():
                            df[key] = value
                        df["source"] = source_type.lower()  # normalize
                        all_data.append(df)
                    except Exception as e:
                        print(f"Error reading {filename}: {e}")
                break

    if not all_data:
        print(f"No matching files found for suffix '{target_suffix}' and source '{source_filter}'.")
        return pd.DataFrame()

    final_df = pd.concat(all_data, ignore_index=True)

    for col in final_df.columns:
        if col not in {"emotion", "see_emotions", "suffix", "filename", "source"}:
            try:
                final_df[col] = pd.to_numeric(final_df[col])
            except:
                pass

    filtered_tag = f"_{source_filter.lower()}" if source_filter else ""
    output_filename = f"aggregated_{target_suffix}{filtered_tag}.csv"
    output_path = os.path.join(folder_path, output_filename)
    final_df.to_csv(output_path, index=False)
    print(f"Saved aggregated data to: {output_path}")

    return final_df


## Data analysis

 ### Learning verification

In [224]:
import pandas as pd
import matplotlib.pyplot as plt

def windowed_avg_combined_reward(
    df: pd.DataFrame,
    reward_prefix: str = "total_combined_reward_",
    episode_column: str = "episode",
    simulation_id_column: str = "simulation_index",
    window_size: int = 5,
    aggregation_mode: str = "mean",  # or "best"
    plot: bool = False
) -> pd.DataFrame:
    """
    Computes a windowed moving average of combined rewards per episode across simulations.

    Args:
        df (pd.DataFrame): Input dataframe.
        reward_prefix (str): Prefix of reward columns per agent.
        episode_column (str): Column name for episodes.
        simulation_id_column (str): Column indicating different simulations.
        window_size (int): Window size for moving average.
        aggregation_mode (str): 'mean' for average across agents, 'best' for max reward among agents.
        plot (bool): Whether to plot the result.

    Returns:
        pd.DataFrame: DataFrame with ['episode', 'aggregated_reward', 'moving_avg'].
    """
    reward_cols = [col for col in df.columns if col.startswith(reward_prefix)]
    if not reward_cols:
        raise ValueError(f"No columns found with prefix '{reward_prefix}'")

    if aggregation_mode == "mean":
        df["aggregated_reward"] = df[reward_cols].mean(axis=1)
    elif aggregation_mode == "best":
        df["aggregated_reward"] = df[reward_cols].max(axis=1)
    else:
        raise ValueError("aggregation_mode must be 'mean' or 'best'")

    episode_avg = (
        df.groupby(episode_column)["aggregated_reward"]
        .mean()
        .reset_index()
        .rename(columns={"aggregated_reward": "mean_reward"})
    )

    # Apply moving average
    episode_avg["moving_avg"] = (
        episode_avg["mean_reward"].rolling(window=window_size, min_periods=1, center=True).mean()
    )

    if plot:
        plt.figure(figsize=(10, 5))
        plt.plot(episode_avg[episode_column], episode_avg["moving_avg"], label=f"Moving Avg ({aggregation_mode})")
        plt.xlabel("Episode")
        plt.ylabel("Reward")
        plt.title(f"{aggregation_mode.capitalize()} Agent Reward (Window={window_size})")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()

    return episode_avg


### Variable_calculation

#### Gini coefficient

In [225]:
def gini_coefficient(arr: np.ndarray) -> float:
    """Compute Gini coefficient of a 1D numpy array."""
    arr = arr.flatten()
    if np.amin(arr) < 0:
        arr = arr - np.amin(arr)  # Shift if negative values present
    mean = np.mean(arr)
    if mean == 0:
        return 0.0
    n = len(arr)
    diff_sum = np.sum(np.abs(np.subtract.outer(arr, arr)))
    gini = diff_sum / (2 * n**2 * mean)
    return gini

def compute_gini_for_df(df: pd.DataFrame, prefix: str) -> pd.Series:
    """
    Compute Gini coefficient across columns starting with prefix for each row in df.
    
    Args:
        df: pandas DataFrame.
        prefix: string prefix for target columns.
        
    Returns:
        pandas Series with Gini coefficients per row.
    """
    cols = [col for col in df.columns if col.startswith(prefix)]
    if not cols:
        raise ValueError(f"No columns found starting with prefix '{prefix}'")

    gini_series = df[cols].apply(lambda row: gini_coefficient(row.values), axis=1)
    return gini_series

#### Efficiency

In [226]:
def compute_efficiency_for_df(df: pd.DataFrame, prefix: str, new_column_name: str) -> pd.DataFrame:
    """
    Compute the average across columns starting with a given prefix for each row in df.

    Args:
        df (pd.DataFrame): Input DataFrame.
        prefix (str): Prefix for selecting target columns.
        new_column_name (str): Name of the new column to store the computed sum.

    Returns:
        pd.DataFrame: DataFrame with the new column added.
    """
    cols = [col for col in df.columns if col.startswith(prefix)]
    if not cols:
        raise ValueError(f"No columns found starting with prefix '{prefix}'")

    df[new_column_name] = df[cols].mean(axis=1)
    return df


#### efficiency of agents : to discuss

In [227]:
import ast
import numpy as np
import pandas as pd

def _clean_initial_resources_column(df: pd.DataFrame, col_name: str) -> None: # Added because sometimes they were str rather than int
    """Convert list-like string in initial_resources column to numeric."""
    def extract_number(x):
        if pd.isna(x):
            return np.nan
        try:
            parsed = ast.literal_eval(x)
            if isinstance(parsed, (list, tuple)) and len(parsed) > 0:
                return float(parsed[0])
            else:
                return np.nan
        except:
            return np.nan

    if col_name in df.columns and df[col_name].dtype == object:
        df[col_name] = df[col_name].apply(extract_number)
        df[col_name] = pd.to_numeric(df[col_name], errors='coerce')

def GT_compute_and_merge_depletion_from_step(df: pd.DataFrame) -> None:
    _clean_initial_resources_column(df, 'initial_resources')

    group_cols = ['simulation_index', 'episode']
    grouped = df.groupby(group_cols)

    depletion_metrics = grouped.agg(
        final_resource=('resource_remaining', 'last'),
        avg_resource=('resource_remaining', 'mean'),
        initial_resource=('initial_resources', 'first')
    ).reset_index()

    depletion_metrics['depletion_final'] = 1 - depletion_metrics['final_resource'] / depletion_metrics['initial_resource']
    depletion_metrics['depletion_cumulative'] = 1 - depletion_metrics['avg_resource'] / depletion_metrics['initial_resource']

    merged = df.merge(
        depletion_metrics[group_cols + ['depletion_final', 'depletion_cumulative']],
        on=group_cols, how='left'
    )

    df.loc[:, 'depletion_final'] = merged['depletion_final']
    df.loc[:, 'depletion_cumulative'] = merged['depletion_cumulative']


def GT_compute_depletion_from_summary(df: pd.DataFrame) -> None:
    # Clean initial_resources column if needed
    _clean_initial_resources_column(df, 'initial_resources')

    df.loc[:, 'depletion_final'] = 1 - df['resource_remaining'] / df['initial_resources']
    df.loc[:, 'depletion_early'] = 1 - df['total_steps'] / df['max_steps']


### Data visualization

In [228]:
# plot the fluctuation of a value over episodes or steps
def plot_value_fluctuation_by_simulation(
    df: pd.DataFrame,
    value_col: str, # name of the column over which we study the fluctuation
    simulation_col: str = 'seed',
    episode_col: str = 'episode',
    step_col: str = 'step',
    is_step_csv: bool = False,
    title: str = None,
    ylabel: str = None,
    rolling_window: int = None,
):
    """
    Plot the fluctuation of a value averaged per simulation over episodes (summary) or (step, episode) (step CSV).

    Args:
        df (pd.DataFrame): The input DataFrame (step or summary).
        value_col (str): The column to track (e.g. 'resource_remaining', 'reward').
        simulation_col (str): Column identifying the simulation (default 'seed').
        episode_col (str): Column identifying the episode (default 'episode').
        step_col (str): Column identifying the step (default 'step').
        is_step_csv (bool): True if using step CSV; False if using summary CSV.
        title (str): Optional plot title.
        ylabel (str): Label for y-axis (default = value_col).
        rolling_window (int): Optional window for smoothing (rolling mean).
    """
    if is_step_csv:
        group_cols = [simulation_col, episode_col, step_col]
    else:
        group_cols = [simulation_col, episode_col]

    grouped = df.groupby(group_cols)[value_col].mean().reset_index()

    plt.figure(figsize=(10, 6))
    for sim_id, sim_df in grouped.groupby(simulation_col):
        x = sim_df[episode_col] if not is_step_csv else sim_df.groupby([episode_col])[step_col].apply(list)
        y = sim_df[value_col]

        if rolling_window:
            y = y.rolling(rolling_window, min_periods=1).mean()

        label = f"Sim {sim_id}"
        plt.plot(sim_df[episode_col] if not is_step_csv else range(len(y)), y, label=label)

    plt.xlabel('Episode' if not is_step_csv else 'Time (by episode/step)')
    plt.ylabel(ylabel if ylabel else value_col)
    plt.title(title or f"Fluctuation of {value_col} over time")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [229]:
# density function of the depletion per episode
def plot_depletion_density_across_simulations(
    df: pd.DataFrame,
    depletion_col: str = 'depletion_final',
    episode_col: str = 'episode',
    method: str = 'kde',  # 'kde' or 'hist'
    bandwidth: float = 0.3,
    bins: int = 30,
    title: str = "efficiency of agents Density Across Simulations",
    figsize: tuple = (12, 6)
):
    """
    Plot a 2D density or histogram of depletion values across all simulations over episodes.

    Args:
        df (pd.DataFrame): DataFrame containing at least 'episode' and 'depletion_final'.
        depletion_col (str): Column for efficiency of agents.
        episode_col (str): Column for episode index.
        method (str): 'kde' for smoothed density, 'hist' for 2D histogram.
        bandwidth (float): Smoothing parameter for KDE (ignored for hist).
        bins (int): Number of bins for histogram (ignored for KDE).
        title (str): Plot title.
        figsize (tuple): Size of the figure.
    """
    plt.figure(figsize=figsize)

    if method == 'kde':
        sns.kdeplot(
            data=df,
            x=episode_col,
            y=depletion_col,
            fill=True,
            cmap="mako",
            bw_adjust=bandwidth,
            levels=100,
            thresh=0.05
        )
    elif method == 'hist':
        sns.histplot(
            data=df,
            x=episode_col,
            y=depletion_col,
            bins=bins,
            pmax=0.95,
            cbar=True,
            cmap="mako"
        )
    else:
        raise ValueError("method must be 'kde' or 'hist'")

    plt.title(title)
    plt.xlabel("Episode Number")
    plt.ylabel("efficiency of agents")
    plt.grid(True)
    plt.tight_layout()
    plt.show()


# Analysis of data

In [230]:
# --- 1. Gather metadata from filenames ---
folder_path = "."  # Folder containing the CSV results

parameter_dataframe = parse_results_filenames(folder_path=folder_path)

# Define the specific RL parameters of interest
desired_params = [
    "emotion", "see_emotions", "alpha", "beta", "smoothing", "threshold",
    "learning_rate", "gamma", "epsilon", "epsilon_decay", "epsilon_min",
    "batch_size", "hidden_size", "update_target_every"
]

param_cols = [col for col in parameter_dataframe.columns if col in desired_params]
unique_values_per_param = {col: sorted(parameter_dataframe[col].unique()) for col in param_cols}

# Convert to DataFrame with param names as columns and unique set of values as rows
max_len = max(len(v) for v in unique_values_per_param.values())
for k in unique_values_per_param:
    unique_values_per_param[k] += [None] * (max_len - len(unique_values_per_param[k]))

# Display of the unique sets of parameter values
summary_df = pd.DataFrame(unique_values_per_param)
print(summary_df.to_string(index=False))

emotion see_emotions  alpha  beta smoothing  threshold  learning_rate  gamma  epsilon  epsilon_decay  epsilon_min  batch_size  hidden_size  update_target_every
average         True    0.5   0.5    linear        0.5          0.001   0.99      1.0          0.995         0.01          16           64                    5


In [231]:
# --- Aggregate data for Maze2D ---
df_maze_summary = aggregate_results_by_suffix(
    folder_path=folder_path,
    target_suffix="episode_summary",
    source_filter="maze2d"
)

df_maze_step = aggregate_results_by_suffix(
    folder_path=folder_path,
    target_suffix="step_data",
    source_filter="maze2d"
)

# --- Aggregate data for GameTheoretic environment only ---
df_gt_summary = aggregate_results_by_suffix(
    folder_path=folder_path,
    target_suffix="episode_summary",
    source_filter="Gametheoretic"
)

df_gt_step = aggregate_results_by_suffix(
    folder_path=folder_path,
    target_suffix="step_data",
    source_filter="Gametheoretic"
)

No matching files found for suffix 'episode_summary' and source 'maze2d'.
No matching files found for suffix 'step_data' and source 'maze2d'.
Saved aggregated data to: .\aggregated_episode_summary_gametheoretic.csv
Saved aggregated data to: .\aggregated_step_data_gametheoretic.csv


## Calculation of Dependent Variables

In [232]:
print(df_gt_summary.shape)
print(df_gt_step.shape)

(45, 32)
(16357, 36)


In [233]:
# sets the name of the columns to compute the gini coefficient and utility over
personal_column_gt_step_prefix = "personal_reward_"
personal_column_gt_summary_prefix = "total_personal_reward_"
personal_column_maze_step_prefix = "personal_reward_"
personal_column_maze_summary_prefix = "total_personal_reward_"


try:
    print([col for col in df_maze_summary.columns if col.startswith(personal_column_maze_step_prefix)])
    df_maze_summary["gini_personal_reward"] = compute_gini_for_df(df_maze_summary, prefix="total_personal_reward_")
    print('succesfull for 2D summary')
except Exception as e:
    print(f"Could not compute Gini for df_maze_summary: {e}")

try:
    print([col for col in df_maze_step.columns if col.startswith(personal_column_maze_step_prefix)])
    df_maze_step["gini_personal_reward"] = compute_gini_for_df(df_maze_step, prefix=personal_column_gt_step_prefix)
    print('succesfull for 2D step')
except Exception as e:
    print(f"Could not compute Gini for df_maze_step: {e}")

try:
    df_gt_summary["gini_personal_reward"] = compute_gini_for_df(df_gt_summary, prefix=personal_column_gt_summary_prefix)
    print('succesfull for GameTheoretic summary')
except Exception as e:
    print(f"Could not compute Gini for df_gt_summary: {e}")

try:
    df_gt_step["gini_personal_reward"] = compute_gini_for_df(df_gt_step, prefix=personal_column_gt_step_prefix)
    print('succesfull for GameTheoretic step')
except Exception as e:
    print(f"Could not compute Gini for df_gt_step: {e}")


[]
Could not compute Gini for df_maze_summary: No columns found starting with prefix 'total_personal_reward_'
[]
Could not compute Gini for df_maze_step: No columns found starting with prefix 'personal_reward_'
succesfull for GameTheoretic summary
Could not compute Gini for df_gt_step: '<=' not supported between instances of 'float' and 'str'


In [234]:
try:
    GT_compute_and_merge_depletion_from_step(df_gt_step)  # creates "depletion_final" and "depletion_cumulative"
    print("Successfully computed resource depletion from df_gt_step.")
except Exception as e:
    print(f"Could not compute resource depletion from df_gt_step: {e}")

try:
    GT_compute_depletion_from_summary(df_gt_summary)  # creates "depletion_cumulative" and "depletion_early"
    print("Successfully computed resource depletion from df_gt_summary.")
except Exception as e:
    print(f"Could not compute resource depletion from df_gt_summary: {e}")

# Will maybe need another formula for the 2D ?

try:
    GT_compute_and_merge_depletion_from_step(df_maze_step) 
    print("Successfully computed resource depletion from df_maze_step.")
except Exception as e:
    print(f"Could not compute resource depletion from df_maze_step: {e}")

try:
    GT_compute_depletion_from_summary(df_maze_summary)
    print("Successfully computed resource depletion from df_maze_summary.")
except Exception as e:
    print(f"Could not compute resource depletion from df_maze_summary: {e}")

Successfully computed resource depletion from df_gt_step.
Successfully computed resource depletion from df_gt_summary.
Could not compute resource depletion from df_maze_step: 'simulation_index'
Could not compute resource depletion from df_maze_summary: 'resource_remaining'


In [235]:
# Compute the Efficiency of agents: set to the personal reward (dependent on resource consumption and not the social reward)
prefix_step = personal_column_gt_step_prefix
prefix_summary = personal_column_gt_summary_prefix

try:
    compute_efficiency_for_df(df=df_gt_step, prefix=prefix_step, new_column_name=f"{prefix_step}_averaged_efficiency")
    print("Successfully computed efficiency of agents from df_gt_step.")
except Exception as e:
    print(f"Could not compute efficiency of agents from df_gt_step: {e}")

try:
    compute_efficiency_for_df(df=df_gt_summary, prefix=prefix_summary, new_column_name=f"{prefix_summary}_averaged_efficiency")
    print("Successfully computed efficiency of agents from df_gt_summary.")
except Exception as e:
    print(f"Could not compute efficiency of agents from df_gt_summary: {e}")

# Will maybe need another formula for the 2D?

try:
    compute_efficiency_for_df(df=df_maze_step, prefix=prefix_step, new_column_name=f"{prefix_step}_averaged_efficiency")
    print("Successfully computed efficiency of agents from df_maze_step.")
except Exception as e:
    print(f"Could not compute efficiency of agents from df_maze_step: {e}")

try:
    compute_efficiency_for_df(df=df_maze_summary, prefix=prefix_summary, new_column_name=f"{prefix_summary}_averaged_efficiency")
    print("Successfully computed efficiency of agents from df_maze_summary.")
except Exception as e:
    print(f"Could not compute efficiency of agents from df_maze_summary: {e}")


Could not compute efficiency of agents from df_gt_step: unsupported operand type(s) for +: 'float' and 'str'
Successfully computed efficiency of agents from df_gt_summary.
Could not compute efficiency of agents from df_maze_step: No columns found starting with prefix 'personal_reward_'
Could not compute efficiency of agents from df_maze_summary: No columns found starting with prefix 'total_personal_reward_'


## Learning verification

In [236]:
df = df_gt_summary

df = df.merge(parameter_dataframe, on="simulation_index", how="left")

# 2. Define which parameters define a unique experiment setting
core_params = [col for col in parameter_dataframe.columns if col not in ['filepath', 'random_suffix', 'suffix']]

# 3. Group by unique experimental configurations
for param_values, group in df.groupby(core_params):
    # Build readable label for logging
    label = ', '.join(f"{k}={v}" for k, v in zip(core_params, param_values))

    print(f"\n--- Processing Group: {label} ---")

    # 4. Call your existing function for each group
    windowed_df = windowed_avg_combined_reward(
        df=group,
        reward_prefix="total_combined_reward_",
        episode_column="episode",
        simulation_id_column="simulation_index",
        window_size=10,
        aggregation_mode="mean",
        plot=True  # Or False if you're running in batch
    )


KeyError: 'episodes'

## Data Vizualization

In [None]:
# Plot fluctuation of ending ressources per episode
plot_value_fluctuation_by_simulation(df_gt_summary, value_col='resource_remaining', is_step_csv=False)


In [None]:
# Plot density function of the proportion of depletion
plot_depletion_density_across_simulations(df_gt_summary)

## Data Analysis

In [None]:
# Stats : t-test to compare the empathic comparisons

# Summary