# Benchmark

## Available Methods

A complete list of the methods can be found at: https://nathangavenski.github.io/IL-Datasets/

In [1]:
from benchmark.registers import get_methods

get_methods(["all"])

[benchmark.methods.bc.BC,
 benchmark.methods.bco.BCO,
 benchmark.methods.abco.ABCO,
 benchmark.methods.iupe.IUPE]

## Running a single benchmark method for an environment

In [2]:
from benchmark.methods import BC
from benchmark.registers import benchmark_environments
import gymnasium as gym
from imitation_datasets.dataset import BaselineDataset
from torch.utils.data import DataLoader

In [3]:
dataset_train = BaselineDataset("NathanGavenski/LunarLander-v2", source="hf", n_episodes=700)
dataset_eval = BaselineDataset("NathanGavenski/LunarLander-v2", source="hf", n_episodes=700, split="eval")
dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_eval = DataLoader(dataset_eval, batch_size=32, shuffle=True)

teacher_reward = dataset_train.average_reward
random_reward = benchmark_environments[0]["LunarLander-v2"]["random_reward"]

bc = BC(gym.make("LunarLander-v2"), enjoy_criteria=100, verbose=True)
bc = bc.train(
    n_epochs=1,
    train_dataset=dataloader_train,
    eval_dataset=dataloader_eval
).load()

metrics = bc._enjoy(teacher_reward=teacher_reward, random_reward=random_reward)

print()
print(f"Performance: {round(metrics['performance'], 4)} ± {round(metrics['performance_std'], 4)}")
print(f"Average Episodic Reward: {round(metrics['aer'], 4)} ± {round(metrics['aer_std'], 4)}")

Creating dataset: 100%|████████████████████████████████████████████| 700/700 [00:03<00:00, 204.31it/s]
Creating dataset: 100%|███████████████████████████████████████████| 300/300 [00:00<00:00, 1319.04it/s]
Behavioural Cloning: 100%|██████████████████████████████████████████████| 1/1 [00:21<00:00, 21.04s/it]



Performance: 0.9787 ± 0.047
Average Episodic Reward: 247.7645 ± 20.3482


## Running benchmark for all environments in many methods

In [None]:
from benchmark.benchmark import benchmark
from benchmark.registers import get_methods

benchmark(get_methods(["BC", "BCO"]))