In [3]:
from copy import deepcopy
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.rllib.agents.ppo.ppo_policy import PPOTFPolicy
from ray import tune
from ray.tune.registry import register_env
from ray.tune import run_experiments

from flow.envs.multiagent import MultiAgentAccelPOEnv, MultiAgentEightEnv
from flow.networks import FigureEightNetwork
from flow.controllers import ContinuousRouter
from flow.controllers import IDMController
from flow.controllers import RLController
from flow.core.params import EnvParams
from flow.core.params import InitialConfig
from flow.core.params import NetParams
from flow.core.params import SumoParams
from flow.core.params import SumoCarFollowingParams
from flow.core.params import VehicleParams
from flow.networks.figure_eight import ADDITIONAL_NET_PARAMS
from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

import numpy as np
# time horizon of a single rollout
HORIZON = 1000
# number of rollouts per training iteration
N_ROLLOUTS = 1
# number of parallel workers
N_CPUS = 1
N_ITER = 100000
N_CHECK = 100
EP_MAX = 2
# We place one autonomous vehicle and 13 human-driven vehicles in the network
vehicles = VehicleParams()
# vehicles.add(
#     veh_id='human',
#     acceleration_controller=(IDMController, {
#         'noise': 0.2
#     }),
#     routing_controller=(ContinuousRouter, {}),
#     car_following_params=SumoCarFollowingParams(
#         speed_mode='obey_safe_speed',
#     ),
#     num_vehicles=0)
vehicles.add(
    veh_id='rl',
    acceleration_controller=(RLController, {}),
    routing_controller=(ContinuousRouter, {}),
    car_following_params=SumoCarFollowingParams(
        speed_mode='aggressive',
        # speed_mode="obey_safe_speed",
    ),
    num_vehicles=10)

# sumo-related parameters (see flow.core.params.SumoParams)
sim_params=SumoParams(
    sim_step=0.1,
    render=False,
)

env_params=EnvParams(
    horizon=HORIZON,
    additional_params={
        'target_velocity': 20,
        'max_accel': 3,
        'max_decel': 3,
        # 'perturb_weight': 0.03,
        'sort_vehicles': False
    },
)

net=NetParams(
    additional_params=deepcopy(ADDITIONAL_NET_PARAMS),
)
# name of the network class the experiment is running on
network=FigureEightNetwork(
    name="marl_eight",
    vehicles=vehicles,
    net_params=net)

# name of the flow environment the experiment is running on
env = MultiAgentEightEnv(env_params, sim_params, network)

In [4]:
state = env.reset()

In [13]:
ids = env.k.vehicle.get_rl_ids()
ids

['rl_0',
 'rl_1',
 'rl_2',
 'rl_3',
 'rl_4',
 'rl_5',
 'rl_6',
 'rl_7',
 'rl_8',
 'rl_9']

In [12]:
ids[9]

'rl_9'

In [14]:
state['rl_0']

array([0.0006636 , 0.        , 0.        , 0.08359956, 0.        ,
       0.1088496 ])

In [16]:
a = state['rl_0'].shape

In [17]:
a[0]

6

In [19]:
b = [1] * 3
b

[1, 1, 1]

In [27]:
np.random.randint(3)

0