In [15]:
import pandas as pd
import wandb
import os
from datetime import datetime
from rich.progress import Progress

api = wandb.Api(timeout=60)
entity = 'jayden-teoh'
project = 'MORL-Baselines'
if not entity:
    raise ValueError("Entity not provided and environment variable 'WANDB_ENTITY' is not set.")
print(f"Fetched entity from environment variable 'WANDB_ENTITY': {entity}.")

if not project:
    raise ValueError("Project not provided and environment variable 'WANDB_PROJECT' is not set.")
print(f"Fetched project from environment variable 'WANDB_PROJECT': {project}.")

Fetched entity from environment variable 'WANDB_ENTITY': jayden-teoh.
Fetched project from environment variable 'WANDB_PROJECT': MORL-Baselines.


In [16]:
filters = {"group": "domain_randomization", "tags": {"$in": ["MOHopperDR-v5"]}}
try:
    runs_sample = api.runs(path=f"{entity}/{project}", per_page=1)
    total_runs = len(runs_sample)
except Exception as e:
    raise ValueError(f"Invalid entity '{entity}' or project '{project}': {str(e)}\n\nAlso, make sure you are properly authenticated. You can authenticate by using 'wandb.login() or setting the environment variable 'WANDB_API_KEY'")

In [17]:
# Default CSV file name format
date_str = datetime.now().strftime("%m%d%y")
output_file = f"{entity}-{project}-{date_str}.csv"

all_runs_data = []
counter = 0
with Progress() as progress:
    task = progress.add_task("[cyan]Fetching runs...", total=total_runs)

    last_created_at = None
    while not progress.finished:
        filters = {"group": "domain_randomization"}
        if last_created_at:
            filters["created_at"] = {"$gt": last_created_at}

        runs = api.runs(path=f"{entity}/{project}", per_page=100, order="created_at", filters=filters)
        for run in runs:
            if run.state != "finished":
                continue
            run_data = {
                "name": run.name,
                "state": run.state,
                **run.summary._json_dict,
            }
            all_runs_data.append(run_data)
            progress.update(task, advance=1)
        if len(runs) > 0:
            last_created_at = runs[-1].created_at

df = pd.DataFrame(all_runs_data)
df.to_csv(output_file, index=False)
print(f"Saved {len(df)} runs to {output_file}")

Output()

Saved 743 runs to jayden-teoh-MORL-Baselines-090524.csv


In [18]:
df

Unnamed: 0,name,state,_runtime,_step,_timestamp,_wandb,charts/SPS,charts_0/episode_time,charts_0/timesteps_per_episode,charts_1/episode_time,...,losses_8/qf2_values,losses_8/qf_loss,losses_9/actor_loss,losses_9/alpha,losses_9/alpha_loss,losses_9/qf1_loss,losses_9/qf1_values,losses_9/qf2_loss,losses_9/qf2_values,losses_9/qf_loss
0,MOLunarLanderDR-v0__MORL-D(MOSACDiscrete)-SB__...,finished,28487.975864,88247,1.725460e+09,{'runtime': 31933},105.0,6.5000,1000.0,6.7500,...,,,,,,,,,,
1,MOLunarLanderDR-v0__SAC Discrete Action__92__1...,finished,14183.913646,61399,1.725407e+09,{'runtime': 31790},213.0,,,,...,,,,,,,,,,
2,MOLunarLanderDR-v0__MORL-D(MOSACDiscrete)-SB__...,finished,36261.477193,97535,1.725428e+09,{'runtime': 36455},83.0,7.2500,1000.0,7.5000,...,,,,,,,,,,
3,MOHopperDR-v5__MORL-D(MOSAC)-SB+PSA__5__172539...,finished,24873.086976,102704,1.725416e+09,{'runtime': 37142},121.0,5.5625,1000.0,5.5625,...,,,,,,,,,,
4,MOLunarLanderDR-v0__MORL-D(MOSACDiscrete)-SB__...,finished,41071.670322,97007,1.725426e+09,{'runtime': 42722},73.0,7.7500,1000.0,7.7500,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,MOLunarLanderDR-v0__GPI-PD__92__1723548458,finished,177403.848539,34736,1.723726e+09,{'runtime': 177439},,,,,...,,,,,,,,,,
739,MOLunarLanderDR-v0__GPI-PD__76__1723548435,finished,172651.707014,34736,1.723721e+09,{'runtime': 172685},,,,,...,,,,,,,,,,
740,MOLunarLanderDR-v0__GPI-PD__47__1723548398,finished,180677.328578,34736,1.723729e+09,{'runtime': 180713},,,,,...,,,,,,,,,,
741,MOLunarLanderDR-v0__GPI-PD__5__1723548376,finished,177532.494611,34762,1.723726e+09,{'runtime': 177563},,,,,...,,,,,,,,,,


In [19]:
# Split the 'name' column into 'env_id', 'algorithm', 'seed', and 'time'
df[['env_id', 'name', 'seed', 'time']] = df['name'].str.split('__', expand=True)

# Drop rows with missing global_step
df = df.dropna(subset=['global_step'])

# Convert global_step to integer to ensure correct sorting
df['global_step'] = df['global_step'].astype(int)

# Filter for only columns that start with "eval/", "name", and "global_step"
columns_to_keep = df.filter(regex='^(eval/|name|global_step|env_id|seed)').columns
df = df[columns_to_keep]

# Remove the "eval/" prefix from the column names
df.columns = df.columns.str.replace('^eval/', '', regex=True)

# Create the directory structure and save CSV files for each seed
output_directory = 'data'  # Base directory where everything will be saved

if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Loop through each unique combination of 'env_id' and 'algorithm'
for (env_id, algorithm), group in df.groupby(['env_id', 'name']):
    # Create folder for each env_id
    env_dir = os.path.join(output_directory, env_id)
    if not os.path.exists(env_dir):
        os.makedirs(env_dir)
    
    # Create folder for each algorithm within the corresponding env_id folder
    algo_dir = os.path.join(env_dir, algorithm)
    if not os.path.exists(algo_dir):
        os.makedirs(algo_dir)
    
    # Loop through each seed and save the corresponding data as a CSV file
    for seed, seed_data in group.groupby('seed'):
        # Sort by global_step in ascending order
        seed_data = seed_data.sort_values(by='global_step')

        # Set global_step as the index
        seed_data.set_index('global_step', inplace=True)
        
        # Ensure that rows with the same global_step are merged (if necessary)
        # This will collapse rows with the same 'global_step' by taking non-null values
        seed_data = seed_data.groupby('global_step').first()

        # Save the data for this seed
        seed_file_path = os.path.join(algo_dir, f'seed_{seed}.csv')
        seed_data.to_csv(seed_file_path, index=True)

print("Files have been successfully created.")


Files have been successfully created.
