# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=120                             #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario_2"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv_sharedPolicy_4veh"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW', 'TenaciousDScenario', 'IntersectionTWScenario_2']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv', 'MultiAgentIntersectionEnv_baseline_1', 'MultiAgentIntersectionEnv_baseline_2', 'MultiAgentIntersectionEnv_baseline_3', 'MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit', 'MultiTenaciousDEnv', 'MultiAgentIntersectionEnv_sharedPolicy_2veh', 'MultiAgentIntersectionEnv_sharedPolicy_4veh']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 80,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=0,                               #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=4,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 200

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-05-02_10-37-15_29615/logs.
Waiting for redis server at 127.0.0.1:45900 to respond...
Waiting for redis server at 127.0.0.1:56386 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8891/notebooks/ray_ui.ipynb?token=531b4b319102dd18351d8d87b240733bacbc0a59861293a8



{'node_ip_address': '172.16.123.117',
 'object_store_addresses': ['/tmp/ray/session_2019-05-02_10-37-15_29615/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-05-02_10-37-15_29615/sockets/raylet'],
 'redis_address': '172.16.123.117:45900',
 'webui_url': 'http://localhost:8891/notebooks/ray_ui.ipynb?token=531b4b319102dd18351d8d87b240733bacbc0a59861293a8'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate default 0.999
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network defaule 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_paramshttps://www.tourdatenarchiv.de/setlist/71/07/Es-wird-eng/Frankfurt-Main-Festhalle/
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return 'rl_0'

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['rl_0']
        }
    })

 Starting SUMO on port 49175


New Teamspirit:
0.04880195980933988
-0.5698350356183577
[('bottom_intersection', 33.435266406731216), ('bottom_intersection', 49.43253509925844), ('top_intersection', 13.348699041252347), ('top_intersection', 52.85898909213359)]


In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0_2019-05-02_10-37-18hbo7ufcc -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_10-39-22
  done: false
  episode_len_mean: 119.25
  episode_reward_max: 44.71218454332695
  episode_reward_mean: 18.806414594426464
  episode_reward_min: -389.4910954789854
  episodes_this_iter: 200
  episodes_total: 200
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20570.786
    load_time_ms: 4

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 470 s, 5 iter, 120000 ts, -62.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_10-47-05
  done: false
  episode_len_mean: 106.5911111111111
  episode_reward_max: 210.9356255193253
  episode_reward_mean: -46.558701744159364
  episode_reward_min: -350.2312174305498
  episodes_this_iter: 225
  episodes_total: 1258
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20545.255
    load_time_ms: 11.05
    num_steps_sampled: 144000
    num_steps_trained: 144000
    rl_0:
      cur_kl_coeff: 0.01250000111758709
      cur_lr: 4.999999873689376e-05
      entropy: 1.3857861757278442
      kl: 0.0058530461974442005
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_10-56-40
  done: false
  episode_len_mean: 95.95617529880478
  episode_reward_max: 306.7063225335668
  episode_reward_mean: -59.62094419283321
  episode_reward_min: -357.6593962816094
  episodes_this_iter: 251
  episodes_total: 2464
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 23900.515
    load_time_ms: 4.205
    num_steps_sampled: 264000
    num_steps_trained: 264000
    rl_0:
      cur_kl_coeff: 0.00039062503492459655
      cur_lr: 4.999999873689376e-05
      entropy: 1.352575659751892
      kl: 0.00715090986341238
      policy_loss: -0.0019051320850849152
      total_loss: 589.0093994140625
      vf_explained_var: 0.5111197829246521
      vf_loss: 589.01123046875
    sample_time_ms: 79718.244
    update_time_ms: 6.567
  iterations_since_restore: 11
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy_r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 1660 s, 16 iter, 384000 ts, -11.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-06-54
  done: false
  episode_len_mean: 90.2593984962406
  episode_reward_max: 375.74816309415246
  episode_reward_mean: -45.605702323018804
  episode_reward_min: -362.0269074270523
  episodes_this_iter: 266
  episodes_total: 4013
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 23838.648
    load_time_ms: 4.084
    num_steps_sampled: 408000
    num_steps_trained: 408000
    rl_0:
      cur_kl_coeff: 6.103516170696821e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.314544439315796
      kl: 0.004707448184490204

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-14-24
  done: false
  episode_len_mean: 90.3796992481203
  episode_reward_max: 443.45326225471337
  episode_reward_mean: 17.117335466812612
  episode_reward_min: -358.6292672963119
  episodes_this_iter: 266
  episodes_total: 5340
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 19337.344
    load_time_ms: 3.914
    num_steps_sampled: 528000
    num_steps_trained: 528000
    rl_0:
      cur_kl_coeff: 1.9073488033427566e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.2755107879638672
      kl: 0.005140467546880245
      policy_loss: -0.0012375077931210399
      total_loss: 539.9653930664062
      vf_explained_var: 0.7504706382751465
      vf_loss: 539.9666137695312
    sample_time_ms: 74500.491
    update_time_ms: 6.277
  iterations_since_restore: 22
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  poli

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 2701 s, 27 iter, 648000 ts, 55 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-24-18
  done: false
  episode_len_mean: 89.97752808988764
  episode_reward_max: 482.1485572816804
  episode_reward_mean: 74.45601876789944
  episode_reward_min: -357.5074132481072
  episodes_this_iter: 267
  episodes_total: 6939
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20464.328
    load_time_ms: 3.775
    num_steps_sampled: 672000
    num_steps_trained: 672000
    rl_0:
      cur_kl_coeff: 2.980232505223057e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.2340079545974731
      kl: 0.0046685910783708096
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-31-35
  done: false
  episode_len_mean: 88.45018450184502
  episode_reward_max: 523.5683042821342
  episode_reward_mean: 83.5853741624893
  episode_reward_min: -358.3969620523684
  episodes_this_iter: 271
  episodes_total: 8262
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 19918.444
    load_time_ms: 3.637
    num_steps_sampled: 792000
    num_steps_trained: 792000
    rl_0:
      cur_kl_coeff: 9.313226578822054e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.1857696771621704
      kl: 0.004802929703146219
      policy_loss: -0.001211196300573647
      total_loss: 535.0654907226562
      vf_explained_var: 0.829883873462677
      vf_loss: 535.0667724609375
    sample_time_ms: 73429.448
    update_time_ms: 5.467
  iterations_since_restore: 33
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy_re

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 3669 s, 38 iter, 912000 ts, 150 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-40-22
  done: false
  episode_len_mean: 86.35971223021583
  episode_reward_max: 540.0922585309808
  episode_reward_mean: 71.16264994079128
  episode_reward_min: -356.86085659097273
  episodes_this_iter: 278
  episodes_total: 9896
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18294.987
    load_time_ms: 3.637
    num_steps_sampled: 936000
    num_steps_trained: 936000
    rl_0:
      cur_kl_coeff: 1.4551916529409459e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1290558576583862
      kl: 0.0062414114363491535

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-47-40
  done: false
  episode_len_mean: 84.89752650176679
  episode_reward_max: 562.4580069108613
  episode_reward_mean: 96.71122057745224
  episode_reward_min: -356.52151953633074
  episodes_this_iter: 283
  episodes_total: 11261
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18305.934
    load_time_ms: 3.626
    num_steps_sampled: 1056000
    num_steps_trained: 1056000
    rl_0:
      cur_kl_coeff: 4.547473915440456e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.0904924869537354
      kl: 0.005477508530020714
      policy_loss: -0.0010644840076565742
      total_loss: 550.9598388671875
      vf_explained_var: 0.8588311076164246
      vf_loss: 550.9609375
    sample_time_ms: 69204.958
    update_time_ms: 5.283
  iterations_since_restore: 44
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy_r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 4631 s, 49 iter, 1176000 ts, 133 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_11-56-24
  done: false
  episode_len_mean: 92.11538461538461
  episode_reward_max: 559.2702026021935
  episode_reward_mean: 169.3328276658146
  episode_reward_min: -360.12429927852486
  episodes_this_iter: 260
  episodes_total: 12889
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18246.268
    load_time_ms: 3.523
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
    rl_0:
      cur_kl_coeff: 7.105427992875712e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0614876747131348
      kl: 0.0058499826118350

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-04-04
  done: false
  episode_len_mean: 89.54681647940075
  episode_reward_max: 563.987105526123
  episode_reward_mean: 156.96635014525665
  episode_reward_min: -353.0255586805662
  episodes_this_iter: 267
  episodes_total: 14241
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18330.787
    load_time_ms: 3.512
    num_steps_sampled: 1320000
    num_steps_trained: 1320000
    rl_0:
      cur_kl_coeff: 2.22044624777366e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.006966233253479
      kl: 0.007145662792026997
      policy_loss: -0.001686106319539249
      total_loss: 604.7735595703125
      vf_explained_var: 0.8491467833518982
      vf_loss: 604.7752685546875
    sample_time_ms: 71116.406
    update_time_ms: 5.529
  iterations_since_restore: 55
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 5618 s, 60 iter, 1440000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-12-54
  done: false
  episode_len_mean: 91.48669201520913
  episode_reward_max: 573.1614023058575
  episode_reward_mean: 198.68009349864778
  episode_reward_min: -353.34787937350745
  episodes_this_iter: 263
  episodes_total: 15864
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18396.78
    load_time_ms: 3.59
    num_steps_sampled: 1464000
    num_steps_trained: 1464000
    rl_0:
      cur_kl_coeff: 3.469447262146344e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9605714678764343
      kl: 0.00617224490270018

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-20-17
  done: false
  episode_len_mean: 85.53928571428571
  episode_reward_max: 597.2733717735116
  episode_reward_mean: 143.57204275578667
  episode_reward_min: -356.73142563469673
  episodes_this_iter: 280
  episodes_total: 17260
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18653.075
    load_time_ms: 3.561
    num_steps_sampled: 1584000
    num_steps_trained: 1584000
    rl_0:
      cur_kl_coeff: 1.0842022694207325e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.9013547897338867
      kl: 0.006985373329371214
      policy_loss: -0.001549607957713306
      total_loss: 565.0505981445312
      vf_explained_var: 0.8756411671638489
      vf_loss: 565.0521850585938
    sample_time_ms: 69658.293
    update_time_ms: 5.351
  iterations_since_restore: 66
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  p

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 6650 s, 71 iter, 1704000 ts, 138 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-30-16
  done: false
  episode_len_mean: 86.13718411552347
  episode_reward_max: 596.3323714235129
  episode_reward_mean: 142.325333492792
  episode_reward_min: -356.3762550431547
  episodes_this_iter: 277
  episodes_total: 18932
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20191.94
    load_time_ms: 3.678
    num_steps_sampled: 1728000
    num_steps_trained: 1728000
    rl_0:
      cur_kl_coeff: 1.6940660459698945e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.8460237383842468
      kl: 0.007563835009932518

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-37-36
  done: false
  episode_len_mean: 84.06315789473685
  episode_reward_max: 603.9137109201827
  episode_reward_mean: 130.19107802066264
  episode_reward_min: -350.2070850393542
  episodes_this_iter: 285
  episodes_total: 20310
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 19576.046
    load_time_ms: 3.707
    num_steps_sampled: 1848000
    num_steps_trained: 1848000
    rl_0:
      cur_kl_coeff: 5.29395639365592e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.7983914613723755
      kl: 0.0064638229086995125
      policy_loss: -0.001161759253591299
      total_loss: 829.5853881835938
      vf_explained_var: 0.8245661854743958
      vf_loss: 829.5866088867188
    sample_time_ms: 73901.213
    update_time_ms: 5.673
  iterations_since_restore: 77
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  pol

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 7631 s, 82 iter, 1968000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-46-26
  done: false
  episode_len_mean: 87.83088235294117
  episode_reward_max: 609.9127199055969
  episode_reward_mean: 182.69472809351458
  episode_reward_min: -358.0130672277744
  episodes_this_iter: 272
  episodes_total: 21964
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18237.116
    load_time_ms: 3.709
    num_steps_sampled: 1992000
    num_steps_trained: 1992000
    rl_0:
      cur_kl_coeff: 8.271806865087375e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.7669594883918762
      kl: 0.0069916788488626

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_12-53-47
  done: false
  episode_len_mean: 81.9795918367347
  episode_reward_max: 607.8535976044473
  episode_reward_mean: 115.06395252975283
  episode_reward_min: -353.66131395245475
  episodes_this_iter: 294
  episodes_total: 23348
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18224.39
    load_time_ms: 3.689
    num_steps_sampled: 2112000
    num_steps_trained: 2112000
    rl_0:
      cur_kl_coeff: 2.5849396453398048e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.7184059023857117
      kl: 0.008777911774814129
      policy_loss: -0.001307475147768855
      total_loss: 827.1179809570312
      vf_explained_var: 0.8439996838569641
      vf_loss: 827.1193237304688
    sample_time_ms: 69875.332
    update_time_ms: 5.034
  iterations_since_restore: 88
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  pol

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 8620 s, 93 iter, 2232000 ts, 214 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_13-02-57
  done: false
  episode_len_mean: 87.33090909090909
  episode_reward_max: 602.067230282257
  episode_reward_mean: 178.24238886820206
  episode_reward_min: -357.3984137124486
  episodes_this_iter: 275
  episodes_total: 24992
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18501.322
    load_time_ms: 3.846
    num_steps_sampled: 2256000
    num_steps_trained: 2256000
    rl_0:
      cur_kl_coeff: 4.038968195843445e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.6888623833656311
      kl: 0.00913595873862505

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_13-10-19
  done: false
  episode_len_mean: 86.89492753623189
  episode_reward_max: 612.1446614736902
  episode_reward_mean: 183.84050650356252
  episode_reward_min: -350.8423403858942
  episodes_this_iter: 276
  episodes_total: 26329
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18528.873
    load_time_ms: 3.865
    num_steps_sampled: 2376000
    num_steps_trained: 2376000
    rl_0:
      cur_kl_coeff: 1.2621775612010766e-30
      cur_lr: 4.999999873689376e-05
      entropy: 0.6643702983856201
      kl: 0.007227424532175064
      policy_loss: -0.000945896958000958
      total_loss: 688.9576416015625
      vf_explained_var: 0.8646296262741089
      vf_loss: 688.9586791992188
    sample_time_ms: 71724.448
    update_time_ms: 5.181
  iterations_since_restore: 99
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  po

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 9589 s, 104 iter, 2496000 ts, 256 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_13-19-05
  done: false
  episode_len_mean: 86.35740072202167
  episode_reward_max: 619.6959049030495
  episode_reward_mean: 165.41321704015022
  episode_reward_min: -350.0202822118905
  episodes_this_iter: 277
  episodes_total: 27943
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18265.336
    load_time_ms: 3.569
    num_steps_sampled: 2520000
    num_steps_trained: 2520000
    rl_0:
      cur_kl_coeff: 1.9721524393766821e-32
      cur_lr: 4.999999873689376e-05
      entropy: 0.6088237166404724
      kl: 0.00887087918817

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_13-27-39
  done: false
  episode_len_mean: 92.39382239382239
  episode_reward_max: 619.1814056824721
  episode_reward_mean: 254.1291519462642
  episode_reward_min: -356.7521997174043
  episodes_this_iter: 259
  episodes_total: 29278
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20429.588
    load_time_ms: 3.595
    num_steps_sampled: 2640000
    num_steps_trained: 2640000
    rl_0:
      cur_kl_coeff: 1.2325952746104263e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.5580199360847473
      kl: 0.009211837314069271
      policy_loss: -0.0013614711351692677
      total_loss: 685.732421875
      vf_explained_var: 0.8440340161323547
      vf_loss: 685.7337646484375
    sample_time_ms: 74698.963
    update_time_ms: 6.09
  iterations_since_restore: 110
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 11017 s, 115 iter, 2760000 ts, 203 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_13-43-34
  done: false
  episode_len_mean: 89.28252788104089
  episode_reward_max: 618.1243016385981
  episode_reward_mean: 210.60651914218835
  episode_reward_min: -354.4143967028126
  episodes_this_iter: 269
  episodes_total: 30900
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 30316.04
    load_time_ms: 4.363
    num_steps_sampled: 2784000
    num_steps_trained: 2784000
    rl_0:
      cur_kl_coeff: 1.9259301165787911e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.5081906914710999
      kl: 0.01008611079305

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_13-51-04
  done: false
  episode_len_mean: 87.46909090909091
  episode_reward_max: 623.1209844791413
  episode_reward_mean: 199.52108357090893
  episode_reward_min: -354.0600038655473
  episodes_this_iter: 275
  episodes_total: 32233
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 26802.915
    load_time_ms: 4.283
    num_steps_sampled: 2904000
    num_steps_trained: 2904000
    rl_0:
      cur_kl_coeff: 2.407412645723489e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.47941499948501587
      kl: 0.0116651002317667
      policy_loss: -0.0015540955355390906
      total_loss: 706.50048828125
      vf_explained_var: 0.8583970665931702
      vf_loss: 706.5020751953125
    sample_time_ms: 96948.297
    update_time_ms: 8.131
  iterations_since_restore: 121
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  poli

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 12031 s, 126 iter, 3024000 ts, 203 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-00-14
  done: false
  episode_len_mean: 89.08208955223881
  episode_reward_max: 625.2198671369011
  episode_reward_mean: 216.82436051270287
  episode_reward_min: -357.43406202871927
  episodes_this_iter: 268
  episodes_total: 33873
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20502.549
    load_time_ms: 3.617
    num_steps_sampled: 3048000
    num_steps_trained: 3048000
    rl_0:
      cur_kl_coeff: 1.2037063228617445e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.4116303324699402
      kl: 0.011129845865

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-07-35
  done: false
  episode_len_mean: 89.3731343283582
  episode_reward_max: 625.1173804937074
  episode_reward_mean: 222.51785878022446
  episode_reward_min: -349.25432913032563
  episodes_this_iter: 268
  episodes_total: 35240
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 20337.526
    load_time_ms: 3.695
    num_steps_sampled: 3168000
    num_steps_trained: 3168000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.38431286811828613
      kl: 0.011162007227540016
      policy_loss: -0.0005629229708574712
      total_loss: 772.8761596679688
      vf_explained_var: 0.8386759161949158
      vf_loss: 772.8768310546875
    sample_time_ms: 69939.165
    update_time_ms: 5.546
  iterations_since_restore: 132
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 13031 s, 137 iter, 3288000 ts, 245 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-16-28
  done: false
  episode_len_mean: 84.16140350877193
  episode_reward_max: 632.4086830725822
  episode_reward_mean: 170.21214877304988
  episode_reward_min: -354.94430612328614
  episodes_this_iter: 285
  episodes_total: 36865
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18464.089
    load_time_ms: 3.683
    num_steps_sampled: 3312000
    num_steps_trained: 3312000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.34644612669944763
      kl: 0.011477303691

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-25-34
  done: false
  episode_len_mean: 91.55172413793103
  episode_reward_max: 635.4223816470782
  episode_reward_mean: 251.39871900354626
  episode_reward_min: -354.28111247535713
  episodes_this_iter: 261
  episodes_total: 38217
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 19636.336
    load_time_ms: 3.803
    num_steps_sampled: 3432000
    num_steps_trained: 3432000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.341434121131897
      kl: 0.011419259011745453
      policy_loss: -0.0008403187384828925
      total_loss: 682.2095947265625
      vf_explained_var: 0.8581069111824036
      vf_loss: 682.2103881835938
    sample_time_ms: 79370.049
    update_time_ms: 7.403
  iterations_since_restore: 143
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  p

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 14217 s, 148 iter, 3552000 ts, 239 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-36-26
  done: false
  episode_len_mean: 91.22053231939164
  episode_reward_max: 633.2347522454295
  episode_reward_mean: 256.0649512061719
  episode_reward_min: -352.5121633312857
  episodes_this_iter: 263
  episodes_total: 39831
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 21610.272
    load_time_ms: 4.007
    num_steps_sampled: 3576000
    num_steps_trained: 3576000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.32601457834243774
      kl: 0.01234877761453

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-44-55
  done: false
  episode_len_mean: 87.66788321167883
  episode_reward_max: 631.2411965239359
  episode_reward_mean: 207.45396656040236
  episode_reward_min: -356.4847781707215
  episodes_this_iter: 274
  episodes_total: 41207
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 21746.648
    load_time_ms: 3.998
    num_steps_sampled: 3696000
    num_steps_trained: 3696000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.2958919405937195
      kl: 0.011483888141810894
      policy_loss: 4.702170826931251e-06
      total_loss: 835.0214233398438
      vf_explained_var: 0.8367815613746643
      vf_loss: 835.021484375
    sample_time_ms: 84845.127
    update_time_ms: 6.477
  iterations_since_restore: 154
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 15275 s, 159 iter, 3816000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_14-53-54
  done: false
  episode_len_mean: 90.56439393939394
  episode_reward_max: 634.9628674536231
  episode_reward_mean: 245.9334480092195
  episode_reward_min: -352.9140692205035
  episodes_this_iter: 264
  episodes_total: 42850
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 19867.523
    load_time_ms: 3.791
    num_steps_sampled: 3840000
    num_steps_trained: 3840000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.2623051404953003
      kl: 0.012673323042690

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_15-01-22
  done: false
  episode_len_mean: 86.00358422939068
  episode_reward_max: 633.3031617660454
  episode_reward_mean: 186.86793519539907
  episode_reward_min: -356.14287738855904
  episodes_this_iter: 279
  episodes_total: 44243
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 18889.033
    load_time_ms: 3.677
    num_steps_sampled: 3960000
    num_steps_trained: 3960000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.21115796267986298
      kl: 0.012431297451257706
      policy_loss: 0.00016732708900235593
      total_loss: 610.74609375
      vf_explained_var: 0.8934885859489441
      vf_loss: 610.7459106445312
    sample_time_ms: 70660.017
    update_time_ms: 5.355
  iterations_since_restore: 165
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  poli

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=29660], 16387 s, 170 iter, 4080000 ts, 213 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_15-13-02
  done: false
  episode_len_mean: 86.52688172043011
  episode_reward_max: 640.691631062095
  episode_reward_mean: 199.4859566112759
  episode_reward_min: -355.1839836238594
  episodes_this_iter: 279
  episodes_total: 45891
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 23198.541
    load_time_ms: 4.099
    num_steps_sampled: 4104000
    num_steps_trained: 4104000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.16033411026000977
      kl: 0.015169168822467

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_15-24-15
  done: false
  episode_len_mean: 87.78832116788321
  episode_reward_max: 641.5453994260478
  episode_reward_mean: 206.55034429643183
  episode_reward_min: -353.6894277979851
  episodes_this_iter: 274
  episodes_total: 47256
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 28740.947
    load_time_ms: 4.754
    num_steps_sampled: 4224000
    num_steps_trained: 4224000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.12763839960098267
      kl: 0.013762842863798141
      policy_loss: 0.0004640940169338137
      total_loss: 612.9729614257812
      vf_explained_var: 0.8857985138893127
      vf_loss: 612.9724731445312
    sample_time_ms: 98764.702
    update_time_ms: 10.037
  iterations_since_restore: 176
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  

  custom_metrics: {}
  date: 2019-05-02_15-33-34
  done: false
  episode_len_mean: 89.46468401486989
  episode_reward_max: 642.8047096518987
  episode_reward_mean: 240.57656092214492
  episode_reward_min: -349.508659526333
  episodes_this_iter: 269
  episodes_total: 48639
  experiment_id: 617513c78ba74eb29654585d798eef12
  hostname: Gandalf
  info:
    grad_time_ms: 28013.175
    load_time_ms: 4.546
    num_steps_sampled: 4344000
    num_steps_trained: 4344000
    rl_0:
      cur_kl_coeff: 6.018531614308722e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.10201722383499146
      kl: 0.014210670255124569
      policy_loss: 0.001091307494789362
      total_loss: 740.3943481445312
      vf_explained_var: 0.8542518019676208
      vf_loss: 740.3931884765625
    sample_time_ms: 95004.589
    update_time_ms: 11.693
  iterations_since_restore: 181
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 29660
  policy_reward_mean:
    rl_0: 60.144140230536244
  time_since_resto