In [1]:
import pandas as pd

from mujoco_playground import registry

import sys
sys.path.append("..")
from algorithms.utils.wrapper_gym import GymWrapper
from algorithms.offline.any_percent_bc import get_actor_from_checkpoint as get_actor_from_checkpoint_bc, eval_actor as eval_actor_bc
from algorithms.offline.td3_bc import get_actor_from_checkpoint as get_actor_from_checkpoint_td3_bc, eval_actor as eval_actor_td3_bc
from algorithms.offline.sac_n import get_actor_from_checkpoint as get_actor_from_checkpoint_sac_n, eval_actor as eval_actor_sac_n
get_actor_from_checkpoint = {
    "BC": get_actor_from_checkpoint_bc,
    "TD3-BC": get_actor_from_checkpoint_td3_bc,
    "SAC-N": get_actor_from_checkpoint_sac_n,
}
eval_actor = {
    "BC": eval_actor_bc,
    "TD3-BC": eval_actor_td3_bc,
    "SAC-N": eval_actor_sac_n,
}


In [2]:
!export XLA_PYTHON_CLIENT_PREALLOCATE=false

In [3]:
import os
import yaml
from pathlib import Path

def collect_checkpoint_info():
    base_path = Path("../checkpoints")
    info_dict = []
    
    # Iterate through algorithm directories
    for algo_dir in base_path.iterdir():
        if algo_dir.is_dir():
            # Iterate through experiment directories
            for exp_dir in algo_dir.iterdir():
                if exp_dir.is_dir():
                    config_file = exp_dir / "config.yaml"
                    if config_file.exists():
                        with open(config_file, 'r') as f:
                            config = yaml.safe_load(f)
                            info_dict.append({
                                'checkpoint_path': config.get('checkpoints_path'),
                                'dataset_id': config.get('dataset_id'),
                                'env': config.get('env'),
                                'model': exp_dir.name.split("-")[0],
                                'difficulty': config.get('dataset_id').split("-")[1]
                            })
    
    return info_dict

# Usage
path_model = collect_checkpoint_info()


In [None]:
path_model

In [5]:
import pandas as pd

# Read existing results if file exists, otherwise create empty DataFrame
try:
    existing_results = pd.read_csv("results_offline.csv")
    existing_checkpoints = set(existing_results['checkpoint_path'].values)
except FileNotFoundError:
    existing_checkpoints = set()

# Filter out paths that have already been evaluated
path_model = [p for p in path_model if p["checkpoint_path"] not in existing_checkpoints]


In [None]:
len(path_model)

In [None]:
results = []
for p in path_model:
    print("-"*100)
    print(f"ENV: {p['env']}")
    print("-"*100)
    print()
    checkpoint_path = os.path.join("..", p["checkpoint_path"])
    with open(os.path.join(checkpoint_path, "config.yaml")) as f:
        config = yaml.safe_load(f)

    if p["model"] == "TD3":
        p["model"] = "TD3-BC"
    if p["model"] == "SAC":
        p["model"] = "SAC-N"

    env = registry.load(p["env"])
    env_cfg = registry.get_default_config(p["env"])
    randomizer = registry.get_domain_randomizer(p["env"])

    render_trajectory = []

    def render_callback(_, state):
        render_trajectory.append(state)

    env_wrapped = GymWrapper(
        env,
        num_actors=1,
        seed=1,
        episode_length=env_cfg.episode_length,
        action_repeat=1,
        render_callback=render_callback,
        randomization_fn=randomizer,
        device="cuda",
    )

    state_dim = env_wrapped.observation_space.shape[0] 
    action_dim = env_wrapped.action_space.shape[0]
    max_action = 1.0

    # ------------- OFFLINE RL EVALUATION
    actor = get_actor_from_checkpoint[p["model"]](checkpoint_path=checkpoint_path, state_dim=state_dim, action_dim=action_dim, max_action=max_action)
    episode_rewards = eval_actor[p["model"]](actor=actor, env=env_wrapped, device=config["device"], n_episodes=10, seed=0)
    p["episode_rewards_mean"] = episode_rewards.mean()
    p["episode_rewards_std"] = episode_rewards.std()
    

In [None]:
pd.DataFrame.from_dict(path_model)

In [9]:
df = pd.read_csv("results_offline.csv")
df_new = pd.DataFrame.from_dict(path_model)
df_new = df_new.dropna(subset=["episode_rewards_mean"])
df = pd.concat([df, df_new], ignore_index=True)
df.to_csv("results_offline.csv", index=False)