In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import ast

In [None]:
def get_run_data(run_id):
    df_conditions = pd.read_csv(f'./results/{run_id}/conditions.csv')
    df_conditions['run_id'] = run_id
    df_conditions_melted = df_conditions.melt(id_vars=['generation', 'run_id'], var_name='condition', value_name='value')

    df_actions = pd.read_csv(f'./results/{run_id}/actions.csv')
    df_actions['run_id'] = run_id
    df_actions_melted = df_actions.melt(id_vars=['generation', 'run_id'], var_name='action', value_name='value')

    df_performance = pd.read_csv(f'./results/{run_id}/performance.csv')
    df_performance = df_performance.drop(columns=['best_fitness', 'std'])

    df_performance['run_id'] = run_id
    df_performance_melted = df_performance.melt(id_vars=['generation', 'run_id'], var_name='metric', value_name='value')

    return df_conditions_melted, df_actions_melted, df_performance_melted

In [None]:
def parse_data(run_ids: list[int]) -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame, int]:
    all_conditions = []
    all_actions = []
    all_performance = []

    for run_id in run_ids:
        df_conditions, df_actions, df_performance = get_run_data(run_id)
        all_conditions.append(df_conditions)
        all_actions.append(df_actions)
        all_performance.append(df_performance)

    combined_conditions = pd.concat(all_conditions)
    combined_actions = pd.concat(all_actions)
    combined_performance = pd.concat(all_performance)

    final_generation_df = combined_conditions['generation'].max()
    
    return combined_conditions, combined_actions, combined_performance, final_generation_df


In [None]:
def create_condition_graph(combined_conditions: pd.DataFrame, final_generation_df: int) -> None:
    final_generation_conditions_mean_values = combined_conditions[
        combined_conditions['generation'] == final_generation_df
    ].groupby('condition')['value'].mean()

    ordered_conditions = final_generation_conditions_mean_values.sort_values(ascending=False).index.tolist()

    combined_conditions['condition'] = pd.Categorical(
        combined_conditions['condition'], categories=ordered_conditions, ordered=True
    )

    combined_df_conditions_melted_sorted = combined_conditions.sort_values(by='condition')

    fig_conditions, ax_conditions = plt.subplots(figsize=(15, 6))

    sns.lineplot(
        data=combined_df_conditions_melted_sorted,
        x='generation',
        y='value',
        hue='condition',
        ax=ax_conditions,
        estimator='mean',  # Plot the mean value for each condition per generation
        errorbar='sd',     # Show the standard deviation as a shaded area
        # You can also use errorbar=('ci', 95) for 95% confidence interval
    )

    ax_conditions.set_title("Mean Condition Frequencies Over Generations (Aggregated Across Runs)")
    ax_conditions.set_xlabel("Generation")
    ax_conditions.set_ylabel("Mean Frequency")

    handles_act, labels_act = ax_conditions.get_legend_handles_labels()

    new_labels_act = []
    for label in labels_act:
        # Get the average value at the final generation for this specific condition
        mean_value_at_final = final_generation_conditions_mean_values.get(label, 'N/A')

        if mean_value_at_final == 'N/A':
            new_labels_act.append(f"{label} (N/A)") # Handle cases where condition might not be in final_generation_conditions_mean_values
        else:
            new_labels_act.append(f"{label} ({mean_value_at_final:.0f})")

    # Update the legend with new labels and correct position
    ax_conditions.legend(handles=handles_act, labels=new_labels_act, bbox_to_anchor=(1.05, 1), loc='upper left')
    fig_conditions.tight_layout()
    plt.show()

    print("Final Generation conditions (mean across runs):")
    print(final_generation_conditions_mean_values.sort_values(ascending=False).to_string())

In [None]:
def create_action_graph(combined_actions: pd.DataFrame, final_generation_df: int) -> None:
    final_generation_actions_mean_values = combined_actions[
        combined_actions['generation'] == final_generation_df
    ].groupby('action')['value'].mean()

    ordered_actions = final_generation_actions_mean_values.sort_values(ascending=False).index.tolist()

    combined_actions['action'] = pd.Categorical(
        combined_actions['action'], categories=ordered_actions, ordered=True
    )

    combined_df_actions_melted_sorted = combined_actions.sort_values(by='action')

    fig_actions, ax_actions = plt.subplots(figsize=(15, 6))

    sns.lineplot(
        data=combined_df_actions_melted_sorted,
        x='generation',
        y='value',
        hue='action',
        ax=ax_actions,
        estimator='mean',  # Plot the mean value for each action per generation
        errorbar='sd',     # Show the standard deviation as a shaded area
    )

    ax_actions.set_title("Mean Action Frequencies Over Generations (Aggregated Across Runs)")
    ax_actions.set_xlabel("Generation")
    ax_actions.set_ylabel("Mean Frequency")

    handles_act, labels_act = ax_actions.get_legend_handles_labels()

    new_labels_act = []
    for label in labels_act:
        # Get the average value at the final generation for this specific action
        mean_value_at_final = final_generation_actions_mean_values.get(label, 'N/A')

        if mean_value_at_final == 'N/A':
            new_labels_act.append(f"{label} (N/A)") # Handle cases where action might not be in final_generation_actions_mean_values
        else:
            new_labels_act.append(f"{label} ({mean_value_at_final:.0f})")
    # Update the legend with new labels and correct position
    ax_actions.legend(handles=handles_act, labels=new_labels_act, bbox_to_anchor=(1.05, 1), loc='upper left')
    fig_actions.tight_layout()
    plt.show()

    print("Final Generation actions (mean across runs):")
    print(final_generation_actions_mean_values.sort_values(ascending=False).to_string())

In [None]:
def create_performance_graph(combined_performance: pd.DataFrame, final_generation_df: int, graph_location: str) -> None:
    final_generation_performance_mean_values = combined_performance[
        combined_performance['generation'] == final_generation_df
    ].groupby('metric')['value'].mean()

    ordered_performance = final_generation_performance_mean_values.sort_values(ascending=False).index.tolist()

    combined_performance['metric'] = pd.Categorical(
        combined_performance['metric'], categories=ordered_performance, ordered=True
    )

    combined_df_performance_melted_sorted = combined_performance.sort_values(by='metric')

    fig_performance, ax_performance = plt.subplots(figsize=(15, 6))

    sns.lineplot(
        data=combined_df_performance_melted_sorted,
        x='generation',
        y='value',
        hue='metric',
        ax=ax_performance,
        estimator='mean',
        errorbar='sd',
    )

    ax_performance.set_title("Mean Performance Metrics Over Generations (Aggregated Across Runs)")
    ax_performance.set_xlabel("Generation")
    ax_performance.set_ylabel("Mean Value")
    ax_performance.set_xlim(combined_performance['generation'].min(),
                            combined_performance['generation'].max())
    ax_performance.set_xticks(np.arange(0, 151, 10))


    # Default legend without modifying the labels, positioned at the bottom
    ax_performance.legend(
        # bbox_to_anchor=(0.5, -0.3),  # Centered below the plot
        loc='upper center',
        ncol=3,                      # Number of columns in the legend (adjust based on number of metrics)
        frameon=True                # Optional: cleaner look
    )

    fig_performance.tight_layout()
    fig_performance.savefig(graph_location, dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
runs = [
    ("graphs/baseline-fitness.png", [1000 + i for i in range(20)]),
    ("graphs/low-mutation-fitness.png", [2000 + i for i in range(20)]),
    ("graphs/high-mutation-fitness.png", [3000 + i for i in range(20)]),
    ("graphs/singlepoint-crossover-fitness.png", [4000 + i for i in range(20)]),
    ("graphs/uniform-crossover-fitness.png", [5000 + i for i in range(20)]),
    ("graphs/simpleagents-fitness.png", [7000 + i for i in range(10)]),
]

for graph_location, run_ids in runs:
    print(f"Processing runs: {run_ids} for graph: {graph_location}")
    combined_conditions, combined_actions, combined_performance, final_generation_df = parse_data(run_ids)

    create_performance_graph(combined_performance, final_generation_df, graph_location)