In [1]:
"""Runs the environments located in flow/benchmarks.

The environment file can be modified in the imports to change the environment
this runner script is executed on. This file runs the PPO algorithm in rllib
and utilizes the hyper-parameters specified in:
Proximal Policy Optimization Algorithms by Schulman et. al.
"""
import os
import json
import argparse

import ray
import ray.tune as tune
from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from algorithms.envs.flow.utils.registry import make_create_env
from algorithms.envs.flow.utils.rllib import FlowParamsEncoder


In [2]:

benchmark_name = 'grid0'
# number of rollouts per training iteration
num_rollouts = 1
# number of parallel workers
num_cpus = 1
# Import the benchmark and fetch its flow_params
benchmark = __import__(
    "algorithms.envs.flow.benchmarks.%s" % benchmark_name, fromlist=["flow_params"])
flow_params = benchmark.flow_params
flow_params['env'].evaluate = True

# get the env name and a creator for the environment
create_env, env_name = make_create_env(params=flow_params, version=0)

# initialize a ray instance
ray.init()

alg_run = "PPO"

horizon = flow_params["env"].horizon
agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = min(num_cpus, num_rollouts)
config["train_batch_size"] = horizon * num_rollouts
config["use_gae"] = True
config["horizon"] = horizon
gae_lambda = 0.97
step_size = 5e-4
if benchmark_name == "grid0":
    gae_lambda = 0.5
    step_size = 5e-5
elif benchmark_name == "grid1":
    gae_lambda = 0.3
config["lambda"] = gae_lambda
config["lr"] = step_size
config["vf_clip_param"] = 1e6
config["num_sgd_iter"] = 10
config['clip_actions'] = False  # FIXME(ev) temporary ray bug
config["model"]["fcnet_hiddens"] = [100, 50, 25]
config["observation_filter"] = "NoFilter"

# save the flow params for replay
flow_json = json.dumps(
    flow_params, cls=FlowParamsEncoder, sort_keys=True, indent=4)
config['env_config']['flow_params'] = flow_json
config['env_config']['run'] = alg_run

# Register as rllib env
register_env(env_name, create_env)



In [3]:
base_dir = '/root/ray_results/grid_0/'
dir_ls = os.listdir(base_dir)
subdir = []
for direc in dir_ls:
    if str.find(direc, '.') == -1 and str.find(direc, '08-30') != -1:
        subdir.append(direc)
print(subdir)

subdir = [base_dir + direc for direc in subdir]

['PPO_TrafficLightGridBenchmarkEnv-v0_e3367_00000_0_2021-08-30_18-12-50', 'PPO_TrafficLightGridBenchmarkEnv-v0_e3367_00001_1_2021-08-30_18-12-50', 'PPO_TrafficLightGridBenchmarkEnv-v0_e3367_00002_2_2021-08-30_18-12-50', 'PPO_TrafficLightGridBenchmarkEnv-v0_f4028_00000_0_2021-08-30_18-13-19', 'PPO_TrafficLightGridBenchmarkEnv-v0_f4028_00001_1_2021-08-30_18-13-19', 'PPO_TrafficLightGridBenchmarkEnv-v0_f4028_00002_2_2021-08-30_18-13-19', 'PPO_TrafficLightGridBenchmarkEnv-v0_02039_00000_0_2021-08-30_18-13-42', 'PPO_TrafficLightGridBenchmarkEnv-v0_02039_00001_1_2021-08-30_18-13-42', 'PPO_TrafficLightGridBenchmarkEnv-v0_02039_00002_2_2021-08-30_18-13-42']


In [5]:
results = []
exp_tag = {
    "run": alg_run,
    "env": env_name,
    "config": {
        **config
    },
    "max_failures": 999,
    "stop": {
        "training_iteration": 1
    },
}
for direc in subdir:
    test_result = {}
    dir_ls = os.listdir(direc)
    for sdir in dir_ls:
        if sdir.find('checkpoint') == 0:
            ckpt_base_dir = direc + '/{}'.format(sdir)
            iter = int(sdir[11:])
            exp_tag['restore'] = ckpt_base_dir + '/checkpoint-{}'.format(iter)
            trials = tune.run_experiments({
                flow_params["exp_tag"]: exp_tag
            })
            test_result[str(iter)] = trials
    results.append(test_result)

2021-09-07 05:07:46,368	ERROR syncer.py:72 -- Log sync requires rsync to be installed.


Trial name,status,loc
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00000,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00001,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00002,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00003,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00004,PENDING,


[2m[36m(pid=89057)[0m 2021-09-07 05:07:49,780	INFO trainer.py:714 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=89057)[0m 2021-09-07 05:07:49,780	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=89057)[0m 2021-09-07 05:07:49,781	INFO trainer.py:728 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.
[2m[36m(pid=89047)[0m 2021-09-07 05:07:49,882	INFO trainer.py:714 -- Tip: set framework=tfe or the --eager flag to enable TensorFlow eager execution
[2m[36m(pid=89047)[0m 2021-09-07 05:07:49,883	INFO ppo.py:159 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
[2m[36m(pid=89047)[0m 2021-09-07 05:07:49,883	INFO traine

Trial name,status,loc
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00000,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00001,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00002,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00003,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00004,PENDING,


[2m[36m(pid=89061)[0m Instructions for updating:
[2m[36m(pid=89061)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89061)[0m Instructions for updating:
[2m[36m(pid=89061)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89058)[0m Instructions for updating:
[2m[36m(pid=89058)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89058)[0m Instructions for updating:
[2m[36m(pid=89058)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89059)[0m Instructions for updating:
[2m[36m(pid=89059)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89059)[0m Instructions for updating:
[2m[36m(pid=89059)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89048)[0m Instructions for updating:
[2m[36m(pid=89048)[0m If using Keras pass *_constraint arguments to layers.
[2m[36m(pid=89048)[0m Instructions for updating:
[2m[36m(pid=89048)[0m If usi

Trial name,status,loc
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00000,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00001,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00002,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00003,RUNNING,
PPO_TrafficLightGridBenchmarkEnv-v0_8a00f_00004,PENDING,


[2m[36m(pid=89057)[0m 2021-09-07 05:07:54,838	ERROR worker.py:428 -- SystemExit was raised from the worker
[2m[36m(pid=89057)[0m Traceback (most recent call last):
[2m[36m(pid=89057)[0m   File "python/ray/_raylet.pyx", line 640, in ray._raylet.task_execution_handler
[2m[36m(pid=89057)[0m   File "python/ray/_raylet.pyx", line 488, in ray._raylet.execute_task
[2m[36m(pid=89057)[0m   File "python/ray/_raylet.pyx", line 525, in ray._raylet.execute_task
[2m[36m(pid=89057)[0m   File "python/ray/_raylet.pyx", line 532, in ray._raylet.execute_task
[2m[36m(pid=89057)[0m   File "python/ray/_raylet.pyx", line 536, in ray._raylet.execute_task
[2m[36m(pid=89057)[0m   File "python/ray/_raylet.pyx", line 486, in ray._raylet.execute_task.function_executor
[2m[36m(pid=89057)[0m   File "/root/miniconda3/envs/flow/lib/python3.7/site-packages/ray/_private/function_manager.py", line 563, in actor_method_executor
[2m[36m(pid=89057)[0m     return method(__ray_actor, *args, **kwa

Trial name,status,loc
PPO_TrafficLightGridBenchmarkEnv-v0_8f577_00000,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8f577_00001,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8f577_00002,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8f577_00003,PENDING,
PPO_TrafficLightGridBenchmarkEnv-v0_8f577_00004,PENDING,
