In [1]:
import torch

assert torch.cuda.device_count() > 0, "This notebook needs a GPU to run!"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from warp_drive.env_wrapper import EnvWrapper
from warp_drive.training.trainer import Trainer
from warp_drive.utils.common import get_project_root

from example_envs.tag_continuous.generate_rollout_animation import (
    generate_tag_env_rollout_animation,
)

In [3]:
from gym.spaces import Discrete, MultiDiscrete
from IPython.display import HTML
import yaml
import numpy as np

In [4]:
# Set logger level e.g., DEBUG, INFO, WARNING, ERROR
import logging

logging.getLogger().setLevel(logging.ERROR)

In [5]:
# Load the run config.

# Here we show an example configures

CFG = """
# Sample YAML configuration for the tag continuous environment
name: "tag_continuous"

# Environment settings
env:
    num_preys: 50
    num_predators: 1
    stage_size: 30
    episode_length: 500
    preparation_length: 100
    max_acceleration: 0.1
    max_turn: 2.35  # 3*pi/4 radians
    num_acceleration_levels: 10
    num_turn_levels: 10
    eating_reward_for_predator: 10.0
    eating_penalty_for_prey: -10.0
    edge_hit_penalty: -0.0
    end_of_game_penalty : -1.0
    end_of_game_reward: 1.0
    use_full_observation: False
    eating_distance: 0.02
    seed: 274880
    env_backend: "numba"

# Trainer settings
trainer:
    num_envs: 400 # number of environment replicas
    train_batch_size: 10000 # total batch size used for training per iteration (across all the environments)
    num_episodes: 500 # number of episodes to run the training for (can be arbitrarily high)
# Policy network settings
policy: # list all the policies below
    prey:
        to_train: True # flag indicating whether the model needs to be trained
        algorithm: "A2C" # algorithm used to train the policy
        gamma: 0.98 # discount rate gamms
        lr: 0.005 # learning rate
        vf_loss_coeff: 1 # loss coefficient for the value function loss
        entropy_coeff:
        - [0, 0.5]
        - [2000000, 0.05]
        model: # policy model settings
            type: "fully_connected" # model type
            fc_dims: [256, 256] # dimension(s) of the fully connected layers as a list
            model_ckpt_filepath: "" # filepath (used to restore a previously saved model)
    predator:
        to_train: True
        algorithm: "A2C"
        gamma: 0.98
        lr: 0.002
        vf_loss_coeff: 1
        model:
            type: "fully_connected"
            fc_dims: [256, 256]
            model_ckpt_filepath: ""

# Checkpoint saving setting
saving:
    metrics_log_freq: 100 # how often (in iterations) to print the metrics
    model_params_save_freq: 5000 # how often (in iterations) to save the model parameters
    basedir: "/tmp" # base folder used for saving
    name: "collective_v0"
    tag: "50preys_1predator"

"""

run_config = yaml.safe_load(CFG)

In [6]:
from custom_env import * 

In [7]:
from warp_drive.utils.env_registrar import EnvironmentRegistrar
from custom_env import CustomEnv

env_registrar = EnvironmentRegistrar()
env_registrar.add_cuda_env_src_path(CustomEnv.name, "custom_env_step_numba", env_backend="numba")

env_registrar._customized_cuda_env_src_paths

{'pycuda': {}, 'numba': {'customenv': 'custom_env_step_numba'}}

In [8]:
env_wrapper = EnvWrapper(
    env_obj=CustomEnv(**run_config["env"]),
    num_envs=run_config["trainer"]["num_envs"],
    env_backend="numba",
    env_registrar=env_registrar
)

  deprecation(


function_manager: Setting Numba to use CUDA device 0


In [14]:
from warp_drive.env_cpu_gpu_consistency_checker import EnvironmentCPUvsGPU


env_configs = {
    "test1": {
        "num_preys": 4,
        "num_predators": 1,
    }
}

testing_class = EnvironmentCPUvsGPU(
    dual_mode_env_class=CustomEnv,
    env_configs=env_configs,
    num_envs=2,
    num_episodes=2,
    env_registrar=env_registrar,
)

testing_class.test_env_reset_and_step()

Performing the consistency checks for scenario: test1...


ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 1, the array at index 0 has size 5 and the array at index 3 has size 1

In [9]:
policy_tag_to_agent_id_map = {
    "predator": list(env_wrapper.env.predators),
    "prey": list(env_wrapper.env.preys),
}

In [10]:
trainer = Trainer(env_wrapper, run_config, policy_tag_to_agent_id_map)

AttributeError: 'CustomEnv' object has no attribute 'eating_reward_for_predator'