In [1]:
from scenic.zoo import ScenicZooEnv
from scenic.simulators.metadrive import MetaDriveSimulator
import scenic
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.utils import set_random_seed
import supersuit as ss
import os



In [2]:
root_user = os.path.expanduser("~")
root_user

'/Users/kxu'

In [3]:
sumo_map = root_user + "/ScenicGym/assets/maps/CARLA/Town01.net.xml"
obs_space_dict = {"agent0" :  gym.spaces.Box(-0.0, 1.0 , (249,), dtype=np.float32),
                 "agent1": gym.spaces.Box(-0.0, 1.0 , (249,), dtype=np.float32)}

action_space_dict = {'agent0': gym.spaces.Box(-1.0, 1.0, (2,), np.float32),
                     'agent1': gym.spaces.Box(-1.0, 1.0, (2,), np.float32)}

In [4]:
def create_env(need_monitor=False):
    scenario = scenic.scenarioFromFile("intersect_drive.scenic",
                                   model="scenic.simulators.metadrive.model",
                               mode2D=True)

    env = ScenicZooEnv(scenario, 
                       MetaDriveSimulator(sumo_map=sumo_map, render=False, real_time=False),
                       None, 
                       max_steps=50, 
                       observation_space = obs_space_dict, 
                       action_space = action_space_dict, 
                       agents=["agent0", "agent1"])
    #if need_monitor:
    #    env = Monitor(env)
    return env

def train(env_fn, steps: int = 10_000, seed: int | None = 0, **env_kwargs):
    # Train a single model to play as each agent in an AEC environment
    env = env_fn()

    # Add black death wrapper so the number of agents stays constant
    # MarkovVectorEnv does not support environments with varying numbers of active agents unless black_death is set to True
    env = ss.black_death_v3(env)

    # Pre-process using SuperSuit
#     visual_observation = not env.unwrapped.vector_state
#     if visual_observation:
#         # If the observation space is visual, reduce the color channels, resize from 512px to 84px, and apply frame stacking
#         env = ss.color_reduction_v0(env, mode="B")
#         env = ss.resize_v1(env, x_size=84, y_size=84)
#         env = ss.frame_stack_v1(env, 3)

    env.reset(seed=seed)

    print(f"Starting training on {str(env.metadata['name'])}.")

    env = ss.pettingzoo_env_to_vec_env_v1(env)
    env = ss.concat_vec_envs_v1(env, 8, num_cpus=1, base_class="stable_baselines3")

    # Use a CNN policy if the observation space is visual
    model = PPO(
        CnnPolicy if visual_observation else MlpPolicy,
        env,
        verbose=3,
        batch_size=256,
    )

    model.learn(total_timesteps=steps)

    model.save(f"{env.unwrapped.metadata.get('name')}_{time.strftime('%Y%m%d-%H%M%S')}")

    print("Model has been saved.")

    print(f"Finished training on {str(env.unwrapped.metadata['name'])}.")

    env.close()

In [5]:
train(create_env, seed=None)

AGENTS: ['agent0', 'agent1']
AGENTS: ['agent0', 'agent1']
black agents: ['agent0', 'agent1']
OBSS: {'agent0': array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 0.30923638,
       0.30387354, 0.2988948 , 0.29427534, 0.29650518, 0.31154627,
       0.32843173, 0.3475295 , 0.36928037, 1.        , 1.        ,
       1.        , 1.        , 1.        , 1.        , 1.        ,
       1.        , 

AssertionError: environment passed to pettingzoo_env_to_vec_env must have possible_agents attribute.

In [5]:
from functools import partial
from IPython.display import clear_output

In [6]:
set_random_seed(0)
train_env=DummyVecEnv([partial(create_env, True) for _ in range(4)])
model = PPO("MlpPolicy", 
            train_env,
            n_steps=4096,
            verbose=1)

model.learn(total_timesteps=1000)
clear_output()

ValueError: The environment is of type <class 'scenic.zoo.envs.scenic_zoo.ScenicZooEnv'>, not a Gymnasium environment. In this case, we expect OpenAI Gym to be installed and the environment to be an OpenAI Gym environment.

In [10]:
action = dict(agent0 = [1, 0], agent1=[0, 0])
for _ in range(2000):
    o, r, te, tc, info = env.step(action)
    if te or tc:
        break
        
env.close()
print("We done")
    

We done
