In [None]:
import os

import jax

from ulee_repo.evaluations.diayn_evals import eval_diayn_finetune
from ulee_repo.evaluations.meta_learner_evals import eval_meta_learner_finetune, eval_meta_learner_finetune_on_meta_rl
from ulee_repo.evaluations.rollouts_on_trained import rollout_on_trained_weights
from ulee_repo.experiments.paths import build_best_weights_rollouts_path, build_finetuned_on_meta_rl_path, build_finetuned_weights_path, build_trained_weights_path


In [None]:
os.environ["WANDB_SILENT"] = "true"

## Rollouts on trained weights

In [None]:
eval_seed = 42
eval_rng = jax.random.key(eval_seed)
eval_num_envs = 16384
eval_num_episodes = 30

env_id = "XLand-MiniGrid-R4-13x13"
benchmark_id = "small-1m"

In [None]:
# ULEE

goal_search_algorithm = "random"
goal_sampling_method = "uniform"
seeds = [10, 20, 30, 40]


ulee_trained_weights_paths = [build_trained_weights_path("ulee", env_id, benchmark_id, seed, goal_search_algorithm, goal_sampling_method) for seed in seeds]

ulee_best_weights_rollouts_paths = [build_best_weights_rollouts_path("ulee", env_id, benchmark_id, seed, goal_search_algorithm, goal_sampling_method) for seed in seeds]


for weight_path, result_path in zip(ulee_trained_weights_paths, ulee_best_weights_rollouts_paths, strict=True):
    rollout_on_trained_weights(
        rng=eval_rng,
        num_envs=eval_num_envs,
        num_episodes=eval_num_episodes,
        algorithm_id="ulee",
        env_id=env_id,
        benchmark_id=benchmark_id,
        weights_path=weight_path,
        results_path=result_path,
        eval_on_test_benchmark=True,
    )


In [None]:
# RANDOM POLICY

seeds = [10, 20, 30, 40]

random_rollouts_paths = [build_best_weights_rollouts_path("random", env_id, benchmark_id, seed) for seed in seeds]

for result_path in random_rollouts_paths:
    rollout_on_trained_weights(
        rng=eval_rng, num_envs=eval_num_envs, num_episodes=eval_num_episodes, algorithm_id="random", env_id=env_id, benchmark_id=benchmark_id, weights_path=None, results_path=result_path
    )

In [None]:
# DIAYN

seeds = [10, 20, 30, 40]


diayn_trained_weights_paths = [build_trained_weights_path("diayn", env_id, benchmark_id, seed) for seed in seeds]

diayn_best_weights_rollouts_paths = [build_best_weights_rollouts_path("diayn", env_id, benchmark_id, seed) for seed in seeds]

for weight_path, result_path in zip(diayn_trained_weights_paths, diayn_best_weights_rollouts_paths, strict=True):
    rollout_on_trained_weights(
        rng=eval_rng,
        num_envs=eval_num_envs,
        num_episodes=eval_num_episodes,
        algorithm_id="diayn",
        env_id=env_id,
        benchmark_id=benchmark_id,
        weights_path=weight_path,
        results_path=result_path,
        eval_on_test_benchmark=True,
    )

In [None]:
# PPO

seeds = [10, 20, 30, 40]


ppo_trained_weights_paths = [build_trained_weights_path("ppo", env_id, benchmark_id, seed) for seed in seeds]

ppo_best_weights_rollouts_paths = [build_best_weights_rollouts_path("ppo", env_id, benchmark_id, seed) for seed in seeds]

for weight_path, result_path in zip(ppo_trained_weights_paths, ppo_best_weights_rollouts_paths, strict=True):
    rollout_on_trained_weights(
        rng=eval_rng,
        num_envs=eval_num_envs,
        num_episodes=eval_num_episodes,
        algorithm_id="standard_ppo",
        env_id=env_id,
        benchmark_id=benchmark_id,
        weights_path=weight_path,
        results_path=result_path,
        eval_on_test_benchmark=True,
    )


## Fine-tuning on fixed tasks

In [None]:
eval_seed = 42
eval_rng = jax.random.key(eval_seed)
env_id = "XLand-MiniGrid-R4-13x13"
benchmark_id = "small-1m"

num_envs = 2048
total_timesteps = 1_000_000_000
num_steps_per_env = 5120
num_steps_per_update = 256
eval_num_episodes = 30


In [None]:
# ULEE
goal_search_algorithm = "ppo"
goal_sampling_method = "bounded_uniform"
train_seeds = [10, 20, 30, 40]  # Seeds on which pre-training was performed
finetune_seeds = [210, 220, 230, 240]  # Seeds to carry out finetuning


ulee_trained_weights_paths = [build_trained_weights_path("ulee", env_id, benchmark_id, train_seed, goal_search_algorithm, goal_sampling_method) for train_seed in train_seeds]

ulee_finetuned_weights_paths = [
    build_finetuned_weights_path("ulee", env_id, benchmark_id, train_seed, finetune_seed, goal_search_algorithm, goal_sampling_method)
    for (train_seed, finetune_seed) in zip(train_seeds, finetune_seeds, strict=True)
]


# set extra configurations for fine-tuning
extra_configs = {
    "eval_num_episodes": eval_num_episodes,
}

# perform fine-tuning evaluation
for weight_path, result_path in zip(ulee_trained_weights_paths, ulee_finetuned_weights_paths, strict=True):
    eval_meta_learner_finetune(
        rng=eval_rng,
        env_id=env_id,
        benchmark_id=benchmark_id,
        weights_path=weight_path,
        results_path=result_path,
        num_envs=num_envs,
        total_timesteps=total_timesteps,
        num_steps_per_env=num_steps_per_env,
        num_steps_per_update=num_steps_per_update,
        eval_on_test_benchmark=True,
        **extra_configs,
    )


In [None]:
# DIAYN
train_seeds = [10, 20, 30, 40]  # Seeds on which pre-training was performed
finetune_seeds = [210, 220, 230, 240]  # Seeds to carry out finetuning


diayn_trained_weights_paths = [build_trained_weights_path("diayn", env_id, benchmark_id, train_seed) for train_seed in train_seeds]

diayn_finetuned_weights_paths = [
    build_finetuned_weights_path("diayn", env_id, benchmark_id, train_seed, finetune_seed) for (train_seed, finetune_seed) in zip(train_seeds, finetune_seeds, strict=True)
]


# set extra configurations for finetuning
extra_configs = {
    "num_eval_episodes_with_best_skill": eval_num_episodes,  # when finetuning, num_eval_episodes_with_best_skill controls de total number of eval episodes executed per environment
    "num_eval_episodes_per_skill": 10,  # when finetuning num_eval_episodes_per_skill controls the number of episodes per skill on each env used to determine the best skill for each env (which remains fixed throughout the finetuning process)
}

# perform finetuning evaluation
for weight_path, result_path in zip(diayn_trained_weights_paths, diayn_finetuned_weights_paths, strict=True):
    eval_diayn_finetune(
        rng=eval_rng,
        env_id=env_id,
        benchmark_id=benchmark_id,
        weights_path=weight_path,
        results_path=result_path,
        num_envs=num_envs,
        total_timesteps=total_timesteps,
        num_steps_per_env=num_steps_per_env,
        num_steps_per_update=num_steps_per_update,
        **extra_configs,
    )


## Method 4 - Evaluation of finetuning on meta RL

In [None]:
eval_seed = 42
eval_rng = jax.random.key(eval_seed)
env_id = "XLand-MiniGrid-R4-13x13"
benchmark_id = "small-1m"

num_envs = 2048
total_timesteps = 5_000_000_000
num_steps_per_env = 5120
num_steps_per_update = 256
eval_num_episodes = 25


In [None]:
# ULEE
goal_search_algorithm = "ppo"
goal_sampling_method = "bounded_uniform"
train_seeds = [10, 20, 30, 40]  # Seeds on which pre-training was performed
finetune_seeds = [210, 220, 230, 240]  # Seeds to carry out fine-tuning


ulee_trained_weights_paths = [build_trained_weights_path("ulee", env_id, benchmark_id, train_seed, goal_search_algorithm, goal_sampling_method) for train_seed in train_seeds]

ulee_finetuned_on_meta_rl_weights_paths = [
    build_finetuned_on_meta_rl_path("ulee", env_id, benchmark_id, train_seed, finetune_seed, goal_search_algorithm, goal_sampling_method)
    for (train_seed, finetune_seed) in zip(train_seeds, finetune_seeds, strict=True)
]


extra_configs = {
    "eval_num_episodes": eval_num_episodes,
}

# perform fine-tuning evaluation on meta rl for meta learning algorithm
for weight_path, result_path in zip(ulee_trained_weights_paths, ulee_finetuned_on_meta_rl_weights_paths, strict=True):
    eval_meta_learner_finetune_on_meta_rl(
        rng=eval_rng,
        env_id=env_id,
        benchmark_id=benchmark_id,
        weights_path=weight_path,
        results_path=result_path,
        num_envs=num_envs,
        total_timesteps=total_timesteps,
        num_steps_per_env=num_steps_per_env,
        num_steps_per_update=num_steps_per_update,
        **extra_configs,
    )


## Eval on MiniGrid environments

In [None]:
eval_seed = 42
eval_rng = jax.random.key(eval_seed)
eval_num_envs = 2048
eval_num_episodes = 30

benchmark_id = "small-1m"
envs_ids = [
    "MiniGrid-BlockedUnlockPickUp",
    "MiniGrid-DoorKey-5x5",
    "MiniGrid-DoorKey-8x8",
    "MiniGrid-DoorKey-16x16",
    "MiniGrid-Empty-8x8",
    "MiniGrid-Empty-16x16",
    "MiniGrid-EmptyRandom-8x8",
    "MiniGrid-EmptyRandom-16x16",
    "MiniGrid-FourRooms",
    "MiniGrid-LockedRoom",
    "MiniGrid-MemoryS8",
    "MiniGrid-MemoryS16",
    "MiniGrid-MemoryS64",
    "MiniGrid-Unlock",
    "MiniGrid-UnlockPickUp",
]

In [None]:
# ULEE

goal_search_algorithm = "ppo"
goal_sampling_method = "bounded_uniform"
seeds = [10, 20, 30, 40]


ulee_trained_weights_paths = [build_trained_weights_path("ulee", "XLand-MiniGrid-R4-13x13", "small-1m", seed, goal_search_algorithm, goal_sampling_method) for seed in seeds]

for env_id in envs_ids:
    ulee_best_weights_rollouts_paths = [build_best_weights_rollouts_path("ulee", env_id, benchmark_id, seed, goal_search_algorithm, goal_sampling_method) for seed in seeds]

    for weight_path, result_path in zip(ulee_trained_weights_paths, ulee_best_weights_rollouts_paths, strict=True):
        rollout_on_trained_weights(
            rng=eval_rng,
            num_envs=eval_num_envs,
            num_episodes=eval_num_episodes,
            algorithm_id="ulee",
            env_id=env_id,
            benchmark_id=benchmark_id,
            weights_path=weight_path,
            results_path=result_path,
            eval_on_test_benchmark=True,
        )
