In [13]:
from typing import TypeVar

import numpy as np
from gymnasium import spaces
import torch
import gym
from lasertag import Lasertag

from syllabus.core import (
    TaskWrapper,
    PettingZooMultiProcessingSyncWrapper,
    make_multiprocessing_curriculum,
)
from syllabus.task_space import TaskSpace
from syllabus.curricula import DomainRandomization

ObsType = TypeVar("ObsType")
ActionType = TypeVar("ActionType")
AgentID = TypeVar("AgentID")

In [14]:
env = Lasertag(n_agents=2)

In [15]:
env.action_space

[Discrete(5), Discrete(5)]

In [65]:
env.observation_space

Dict('image': Box(0, 255, (2, 3, 5, 5), uint8))

In [69]:
obs = env.reset()["image"]
actions = {0: [1, 1], 1: [1, 1]}
obs, reward, done, info = env.step(actions)
obs["image"].shape

(2, 3, 5, 5)

In [84]:
def _np_array_to_pz_dict(array: np.ndarray) -> dict[int : np.ndarray]:
        """
        Returns a dictionary containing individual observations for each agent.
        Assumes that the batch dimension represents individual agents.
        """
        out = {}
        for idx, i in enumerate(array):
            out[idx] = i
        return out

def _singleton_to_pz_dict(value: bool) -> dict[int:bool]:
    """
    Broadcasts the `done` and `trunc` flags to dictionaries keyed by agent id.
    """
    return {idx: value for idx in range(2)}

In [94]:
class LasertagParallelWrapper(TaskWrapper):
    """
    Wrapper ensuring compatibility with the PettingZoo Parallel API.

    Lasertag Environment:
        * Action shape:  `n_agents` * `Discrete(5)`
        * Observation shape: Dict('image': Box(0, 255, (`n_agents`, 3, 5, 5), uint8))
    """

    def __init__(self, n_agents, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.n_agents = n_agents
        self.task = None
        self.episode_return = 0
        self.task_space = TaskSpace(spaces.MultiDiscrete(np.array([[2], [5]])))
        self.possible_agents = np.arange(self.n_agents)

    def _np_array_to_pz_dict(self, array: np.ndarray) -> dict[int : np.ndarray]:
        """
        Returns a dictionary containing individual observations for each agent.
        Assumes that the batch dimension represents individual agents.
        """
        out = {}
        for idx, i in enumerate(array):
            out[idx] = i
        return out

    def _singleton_to_pz_dict(self, value: bool) -> dict[int:bool]:
        """
        Broadcasts the `done` and `trunc` flags to dictionaries keyed by agent id.
        """
        return {idx: value for idx in range(self.n_agents)}

    def reset(self) -> tuple[dict[AgentID, ObsType], dict[AgentID, dict]]:
        """
        Resets the environment and returns a dictionary of observations
        keyed by agent ID.
        """
        obs = self.env.reset()
        pz_obs = self._np_array_to_pz_dict(obs["image"])

        return pz_obs

    def step(self, action: dict[AgentID, ActionType]) -> tuple[
        dict[AgentID, ObsType],
        dict[AgentID, float],
        dict[AgentID, bool],
        dict[AgentID, bool],
        dict[AgentID, dict],
    ]:
        """
        Takes inputs in the PettingZoo (PZ) Parallel API format, performs a step and
        returns outputs in PZ format.
        """
        obs, rew, done, info = self.env.step(action)
        obs = obs['image']
        trunc = 0  # there is no `truncated` flag in this environment
        self.task_completion = self._task_completion(obs, rew, done, trunc, info)
        # convert outputs back to PZ format
        obs, rew = tuple(map(self._np_array_to_pz_dict, [obs, rew]))
        done, trunc, info = tuple(
            map(self._singleton_to_pz_dict, [done, trunc, self.task_completion])
        )

        return self.observation(obs), rew, done, trunc, info

In [100]:
if __name__ == "__main__":
    """ALGO PARAMS"""
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    ent_coef = 0.1
    vf_coef = 0.1
    clip_coef = 0.1
    gamma = 0.99
    batch_size = 32
    stack_size = 3
    frame_size = (96, 96)
    action_size = 3
    max_cycles = 125
    total_episodes = 100

    # PLR Params
    num_steps = 128

    """ ENV SETUP """
    n_agents = 2
    env = Lasertag(n_agents=n_agents)

    """ CURRICULUM SETUP """
    env = LasertagParallelWrapper(env=env, n_agents=n_agents)
    curriculum = DomainRandomization(env.task_space)
    curriculum, task_queue, update_queue = make_multiprocessing_curriculum(curriculum)
    env = PettingZooMultiProcessingSyncWrapper(env, task_queue, update_queue, task_space=env.task_space)

Exception in thread update:
Traceback (most recent call last):
  File "c:\Users\ryanp\AppData\Local\Programs\Python\Python310\lib\threading.py", line 1016, in _bootstrap_inner


    self.run()
  File "c:\Users\ryanp\AppData\Local\Programs\Python\Python310\lib\threading.py", line 953, in run
    self._target(*self._args, **self._kwargs)
  File "c:\Users\ryanp\OneDrive\Documents\Syllabus-1\syllabus\core\curriculum_sync_wrapper.py", line 120, in _update_queues
    new_tasks = self.task_space.list_tasks()[self.num_assigned_tasks:self.num_assigned_tasks + requested_tasks]
  File "c:\Users\ryanp\OneDrive\Documents\Syllabus-1\syllabus\task_space\task_space.py", line 159, in list_tasks
    elif isinstance(gym_space, MultiDiscrete):
TypeError: 'NoneType' object is not iterable
