In [7]:
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tabulate import tabulate

In [8]:
data_path = "../logs"
exps = os.listdir(data_path)
results = {}

for exp in exps:
    try:
        nested_ = os.listdir(os.path.join(data_path, exp))
        file = os.path.join(data_path, exp, nested_[0], "train.log")
        data = pd.read_csv(file)

        # take all but last _s1, or _s2, or _s3 which are the seeds
        exp_name = "_".join(exp.split("_")[:-1])
        if exp_name not in results:
            results[exp_name] = []
        
        results[exp_name].append(data["episode_reward"][100:])
    except Exception as e:
        print(e)
        continue

In [9]:
# Prepare summary table
summary_data = {
    "Experiment": [],
    "Mean Reward": [],
    "Std Dev Reward": [],
    "Num Seeds": []
}

for exp_name, rewards in results.items():
    try:
        all_rewards = np.concatenate(rewards)
        summary_data["Experiment"].append(exp_name)
        summary_data["Mean Reward"].append(np.mean(all_rewards))
        summary_data["Std Dev Reward"].append(np.std(all_rewards))
        summary_data["Num Seeds"].append(len(rewards))
    except Exception as e:
        print(e)
        continue

# Convert to DataFrame and display
summary_df = pd.DataFrame(summary_data)
summary_df = summary_df.sort_values(by="Mean Reward", ascending=False)

In [10]:
summary_df = summary_df.sort_values(by="Experiment")
summary_df.to_csv("experiment_summary.csv", index=False)

In [11]:
summary_df["Algorithm"] = summary_df["Experiment"].apply(lambda x: x.split("_")[2])
summary_df["Environment"] = summary_df["Experiment"].apply(lambda x: x.split("_")[1].replace("-v5", ""))

In [12]:

table = summary_df.pivot_table(index='Environment', 
                         columns='Algorithm', 
                         values=['Mean Reward', 'Std Dev Reward'],
                         aggfunc='first')

# Create a new DataFrame to format the mean ± std
formatted_table = pd.DataFrame()

# Iterate over each column (algorithm) and create the formatted strings
for col in table.columns.levels[1]:  # Iterate over algorithms
    mean_col = ('Mean Reward', col)
    std_col = ('Std Dev Reward', col)
    formatted_table[col] = table[mean_col].astype(str) + " ± " + table[std_col].astype(str)

# Reset index for better readability
formatted_table.reset_index(inplace=True)

# Display the final table
print(tabulate(formatted_table, headers='keys', tablefmt='pretty'))

+---+------------------+-----------------------------------------+----------------------------------------+-----------------------------------------+--------------------------------------------+-----------------------------------------+-----------------------------------------+-----------------------------------------+------------------------------------------+
|   |   Environment    |              disagreement               |                  e3b                   |                extrinsic                |                    icm                     |              pseudocounts               |                   re3                   |                  ride                   |                   rnd                    |
+---+------------------+-----------------------------------------+----------------------------------------+-----------------------------------------+--------------------------------------------+-----------------------------------------+------------------------------------