In [3]:
import itertools

algorithms = ["pi", "vi", "cvi", "c51"] 
envs = ["taxi", "frozenlake"]
seeds = [0, 42, 100]

In [4]:
# Environment to config mapping
env_config_map = {
    "taxi": {
        "pi": "config/taxi_pi.yaml",
        "vi": "config/taxi_vi.yaml",
        "cvi": "config/taxi_cvi.yaml",
        "c51": "config/taxi_c51.yaml"
    },
    "frozenlake": {
        "pi": "config/frozenlake_pi.yaml",
        "vi": "config/frozenlake_vi.yaml",
        "cvi": "config/frozenlake_cvi.yaml",
        "c51": "config/frozenlake_c51.yaml"
    }
}

In [5]:
import time

total_experiments = len(algorithms) * len(envs) * len(seeds)
completed = 0
failed = 0

print(f"Starting {total_experiments} experiments...")
print(f"Algorithms: {algorithms}")
print(f"Environments: {envs}")
print(f"Seeds: {seeds}")

start_time = time.time()

for algo, env, seed in itertools.product(algorithms, envs, seeds):
    config_file = env_config_map[env][algo]
    
    run_name = f"{env}_{algo}_seed{seed}"
    tags = [f"algo_{algo}", f"env_{env}", f"seed_{seed}"]
    tags_str = f"\"['{'\',\''.join(tags)}']\""
    
    print(f"\n{'='*60}")
    print(f"üöÄ Running: {algo} on {env} with seed {seed}")
    print(f"{'='*60}")
    
    cmd = f"""
    python train.py \
        {config_file} \
        logger.do.online=true \
        logger.project_name=CVI-RL \
        logger.run_name={run_name} \
        logger.tags={tags_str} \
        seed={seed}
    """
    
    cmd = " ".join(cmd.split())
    print(f"Command: {cmd}\n")
    
    try:
        !{cmd}
        print(f"‚úÖ Completed: {algo} on {env} with seed {seed}")
    except Exception as e:
        print(f"‚ö†Ô∏è  WARNING: Experiment failed with error: {e}")
        failed += 1
    
    completed += 1
    print(f"\nProgress: {completed}/{total_experiments} completed ({failed} failed)")
    
    time.sleep(2)
            
elapsed_time = time.time() - start_time
print(f"\n{'='*60}")
print(f"All experiments completed!")
print(f"Total time: {elapsed_time/60:.2f} minutes")
print(f"Successful: {completed - failed}/{total_experiments}")
print(f"Failed: {failed}/{total_experiments}")
print(f"{'='*60}")

Starting 24 experiments...
Algorithms: ['pi', 'vi', 'cvi', 'c51']
Environments: ['taxi', 'frozenlake']
Seeds: [0, 42, 100]

üöÄ Running: pi on taxi with seed 0
Command: python train.py config/taxi_pi.yaml logger.do.online=true logger.project_name=CVI-RL logger.run_name=taxi_pi_seed0 logger.tags="['algo_pi','env_taxi','seed_0']" seed=0

Random seed set to: 0
Random seed set to: 0
[34m[1mwandb[0m: Currently logged in as: [33mthom-mousseau[0m ([33mfatty_data[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Currently logged in as: [33mthom-mousseau[0m ([33mfatty_data[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.17.9
[34m[1mwandb[0m: Tracking run with wandb version 0.17.9
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/Users/thomasmousseau/School/Maitrise/RL/CVI/wandb/run-20251213_165917-s2df513z[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[