# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500                                 #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-14_14-59-27_15055/logs.
Waiting for redis server at 127.0.0.1:41538 to respond...
Waiting for redis server at 127.0.0.1:42848 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=5529d516fdc919df987ccd1ccf94fefb3de0cf496ff147bb



{'node_ip_address': '172.16.123.117',
 'object_store_addresses': ['/tmp/ray/session_2019-03-14_14-59-27_15055/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-14_14-59-27_15055/sockets/raylet'],
 'redis_address': '172.16.123.117:41538',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=5529d516fdc919df987ccd1ccf94fefb3de0cf496ff147bb'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return agent_id

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn)
        }
    })

 Starting SUMO on port 44893


11.609282163268665
6.485841348992942


In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.6/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-14_14-59-309afvmegl -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-00-51
  done: false
  episode_len_mean: 461.4761904761905
  episode_reward_max: 225.63778774179508
  episode_reward_mean: 50.44007541538655
  episode_reward_min: -152.90116484063756
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 8428.011
    load_time_ms: 205.795
    num_steps_sampled: 10000
    num_s

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-02-42
  done: false
  episode_len_mean: 452.82
  episode_reward_max: 282.4621064690395
  episode_reward_mean: 79.4900714626834
  episode_reward_min: -161.11675332149227
  episodes_this_iter: 23
  episodes_total: 109
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 4372.321
    load_time_ms: 43.36
    num_steps_sampled: 50000
    num_steps_trained: 50000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4114465713500977
      kl: 0.003478548489511013
      policy_loss: -0.0024012215435504913
      total_loss: 161.74497985839844
      vf_explained_var: 0.10363620519638062
      vf_loss: 161.74734497070312
    rl_1:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.421002745628357
      kl: 0.000883040833286941
      policy_loss: -0.0005220648599788547
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 258 s, 9 iter, 90000 ts, 73.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-04-51
  done: false
  episode_len_mean: 369.99
  episode_reward_max: 352.88021066119705
  episode_reward_mean: 112.82567416225835
  episode_reward_min: -161.6657970811671
  episodes_this_iter: 27
  episodes_total: 244
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3870.91
    load_time_ms: 23.153
    num_steps_sampled: 100000
    num_steps_trained: 100000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.388471245765686
      kl: 0.0014920317335054278
      policy_loss: -0.0014330461854115129
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 377 s, 14 iter, 140000 ts, 162 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-06-49
  done: false
  episode_len_mean: 293.84
  episode_reward_max: 353.29070915225674
  episode_reward_mean: 182.0457125177287
  episode_reward_min: -159.23639323454432
  episodes_this_iter: 36
  episodes_total: 409
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3308.827
    load_time_ms: 2.693
    num_steps_sampled: 150000
    num_steps_trained: 150000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3777779340744019
      kl: 0.009372466243803501
      policy_loss: -0.003262387355789542
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 495 s, 19 iter, 190000 ts, 219 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-08-48
  done: false
  episode_len_mean: 230.05
  episode_reward_max: 374.8026812543819
  episode_reward_mean: 215.33033365358585
  episode_reward_min: -159.30313442704693
  episodes_this_iter: 47
  episodes_total: 621
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3236.785
    load_time_ms: 2.541
    num_steps_sampled: 200000
    num_steps_trained: 200000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.4009674787521362
      kl: 0.0044362470507621765
      policy_loss: -0.0015330089954659343
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 615 s, 24 iter, 240000 ts, 203 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-10-49
  done: false
  episode_len_mean: 179.48
  episode_reward_max: 312.8545125531083
  episode_reward_mean: 226.33432489181908
  episode_reward_min: -161.88239958521294
  episodes_this_iter: 57
  episodes_total: 886
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3218.172
    load_time_ms: 2.71
    num_steps_sampled: 250000
    num_steps_trained: 250000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.4180691242218018
      kl: 0.005351012106984854
      policy_loss: -0.002654706360772252
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 735 s, 29 iter, 290000 ts, 242 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-12-49
  done: false
  episode_len_mean: 162.33
  episode_reward_max: 284.62871949598616
  episode_reward_mean: 243.27058905925128
  episode_reward_min: -141.8048505700731
  episodes_this_iter: 63
  episodes_total: 1184
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3208.885
    load_time_ms: 2.837
    num_steps_sampled: 300000
    num_steps_trained: 300000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.3863162994384766
      kl: 0.01059985812753439
      policy_loss: -0.005376099608838558
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 872 s, 34 iter, 340000 ts, 261 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-15-11
  done: false
  episode_len_mean: 149.5
  episode_reward_max: 306.4606188595306
  episode_reward_mean: 266.8796624125178
  episode_reward_min: 226.62915356633152
  episodes_this_iter: 68
  episodes_total: 1511
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3637.797
    load_time_ms: 2.926
    num_steps_sampled: 350000
    num_steps_trained: 350000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.335817575454712
      kl: 0.00470380112528801
      policy_loss: -0.0009689115104265511
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1042 s, 39 iter, 390000 ts, 265 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-18-09
  done: false
  episode_len_mean: 142.92
  episode_reward_max: 311.1173947305601
  episode_reward_mean: 248.70658965697078
  episode_reward_min: -145.41234501312942
  episodes_this_iter: 71
  episodes_total: 1851
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3917.057
    load_time_ms: 2.974
    num_steps_sampled: 400000
    num_steps_trained: 400000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.2958980798721313
      kl: 0.004384838044643402
      policy_loss: -0.0027951200027018785
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1180 s, 44 iter, 440000 ts, 273 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-20-14
  done: false
  episode_len_mean: 139.19
  episode_reward_max: 303.56098750293785
  episode_reward_mean: 280.37871407784843
  episode_reward_min: 255.10111813973037
  episodes_this_iter: 72
  episodes_total: 2210
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3488.174
    load_time_ms: 2.743
    num_steps_sampled: 450000
    num_steps_trained: 450000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.2789556980133057
      kl: 0.017654959112405777
      policy_loss: -0.0030077595729380846
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1302 s, 49 iter, 490000 ts, 289 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-22-17
  done: false
  episode_len_mean: 134.94
  episode_reward_max: 319.369354569989
  episode_reward_mean: 291.95916607741884
  episode_reward_min: 263.0934669273648
  episodes_this_iter: 75
  episodes_total: 2573
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3253.628
    load_time_ms: 2.568
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 1.8189894306509108e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.258284091949463
      kl: 0.007650570012629032
      policy_loss: -0.0021321531385183334
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1425 s, 54 iter, 540000 ts, 295 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-24-20
  done: false
  episode_len_mean: 129.89
  episode_reward_max: 333.9256552885023
  episode_reward_mean: 301.8222666392396
  episode_reward_min: 269.5807270589719
  episodes_this_iter: 77
  episodes_total: 2948
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3297.292
    load_time_ms: 2.58
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 4.547473576627277e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.212695598602295
      kl: 0.012561974115669727
      policy_loss: -0.003497864119708538
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1557 s, 59 iter, 590000 ts, 304 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-26-48
  done: false
  episode_len_mean: 126.48
  episode_reward_max: 338.285089864742
  episode_reward_mean: 305.0278891222402
  episode_reward_min: 271.2475624672532
  episodes_this_iter: 79
  episodes_total: 3343
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3634.957
    load_time_ms: 2.749
    num_steps_sampled: 600000
    num_steps_trained: 600000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1468698978424072
      kl: 0.0046281698159873486
      policy_loss: -0.0029053145553916693
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1721 s, 64 iter, 640000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-29-18
  done: false
  episode_len_mean: 125.62
  episode_reward_max: 346.6889135744394
  episode_reward_mean: 313.4344862340645
  episode_reward_min: 276.34340356988724
  episodes_this_iter: 79
  episodes_total: 3741
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3832.209
    load_time_ms: 2.897
    num_steps_sampled: 650000
    num_steps_trained: 650000
    rl_0:
      cur_kl_coeff: 1.421085492696024e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.0633472204208374
      kl: 0.0074365208856761456
      policy_loss: -0.002822505310177803
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 1865 s, 69 iter, 690000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-31-48
  done: false
  episode_len_mean: 122.03
  episode_reward_max: 346.16845472064034
  episode_reward_mean: 310.99605858098334
  episode_reward_min: 275.18381929286824
  episodes_this_iter: 82
  episodes_total: 4148
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3734.495
    load_time_ms: 2.69
    num_steps_sampled: 700000
    num_steps_trained: 700000
    rl_0:
      cur_kl_coeff: 1.77635686587003e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.9144368171691895
      kl: 0.010570749640464783
      policy_loss: -0.003826047293841839
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2009 s, 74 iter, 740000 ts, 310 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-34-07
  done: false
  episode_len_mean: 123.01
  episode_reward_max: 343.0279838522836
  episode_reward_mean: 313.35142070725857
  episode_reward_min: 280.9083265223367
  episodes_this_iter: 81
  episodes_total: 4556
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3671.204
    load_time_ms: 2.798
    num_steps_sampled: 750000
    num_steps_trained: 750000
    rl_0:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.8577484488487244
      kl: 0.005945899989455938
      policy_loss: -0.001601989846676588
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2154 s, 79 iter, 790000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-37-13
  done: false
  episode_len_mean: 123.62
  episode_reward_max: 348.35193854162475
  episode_reward_mean: 319.1435194543915
  episode_reward_min: 285.62221895774127
  episodes_this_iter: 82
  episodes_total: 4958
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 4037.329
    load_time_ms: 2.857
    num_steps_sampled: 800000
    num_steps_trained: 800000
    rl_0:
      cur_kl_coeff: 2.775557602921922e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.7387590408325195
      kl: 0.011686421930789948
      policy_loss: -0.0022722615394741297
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2319 s, 84 iter, 840000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-39-16
  done: false
  episode_len_mean: 125.24
  episode_reward_max: 350.5130494120937
  episode_reward_mean: 316.2166081417861
  episode_reward_min: 284.8094890076721
  episodes_this_iter: 80
  episodes_total: 5359
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3852.921
    load_time_ms: 2.658
    num_steps_sampled: 850000
    num_steps_trained: 850000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.6735144853591919
      kl: 0.008607061579823494
      policy_loss: -0.0013328040950000286
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2440 s, 89 iter, 890000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-41-17
  done: false
  episode_len_mean: 126.28
  episode_reward_max: 349.87702007821133
  episode_reward_mean: 315.82920664260126
  episode_reward_min: 278.6916216588692
  episodes_this_iter: 80
  episodes_total: 5757
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3206.333
    load_time_ms: 2.714
    num_steps_sampled: 900000
    num_steps_trained: 900000
    rl_0:
      cur_kl_coeff: 1.0842021886413758e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6214117407798767
      kl: 0.011848149821162224
      policy_loss: 0.002416628645732999
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2562 s, 94 iter, 940000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-43-19
  done: false
  episode_len_mean: 126.55
  episode_reward_max: 351.55891099155707
  episode_reward_mean: 318.943997362948
  episode_reward_min: 286.14800215869195
  episodes_this_iter: 78
  episodes_total: 6153
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3225.897
    load_time_ms: 2.704
    num_steps_sampled: 950000
    num_steps_trained: 950000
    rl_0:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.4961884319782257
      kl: 0.00988792348653078
      policy_loss: 0.000776217901147902
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2684 s, 99 iter, 990000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-45-22
  done: false
  episode_len_mean: 127.09
  episode_reward_max: 350.24931071400687
  episode_reward_mean: 319.0207300167894
  episode_reward_min: 284.1041248085353
  episodes_this_iter: 79
  episodes_total: 6548
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3220.028
    load_time_ms: 2.634
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
    rl_0:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.43828126788139343
      kl: 0.005542269442230463
      policy_loss: 0.0032982409466058016
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2804 s, 104 iter, 1040000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-47-22
  done: false
  episode_len_mean: 128.04
  episode_reward_max: 353.0927211807667
  episode_reward_mean: 320.1064397680283
  episode_reward_min: 282.31084455440316
  episodes_this_iter: 78
  episodes_total: 6938
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3200.813
    load_time_ms: 2.57
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.4317009747028351
      kl: 0.013635065406560898
      policy_loss: 0.00501002324745059
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 2926 s, 109 iter, 1090000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-49-24
  done: false
  episode_len_mean: 129.15
  episode_reward_max: 360.1907607705892
  episode_reward_mean: 318.9074958099684
  episode_reward_min: 285.76808058687914
  episodes_this_iter: 78
  episodes_total: 7327
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3202.014
    load_time_ms: 2.639
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.31722551584243774
      kl: 0.013085901737213135
      policy_loss: 0.0016319149872288108
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 3047 s, 114 iter, 1140000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-51-26
  done: false
  episode_len_mean: 129.7
  episode_reward_max: 358.10600306364006
  episode_reward_mean: 318.8393864130172
  episode_reward_min: 283.19074838473136
  episodes_this_iter: 77
  episodes_total: 7716
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3211.599
    load_time_ms: 2.729
    num_steps_sampled: 1150000
    num_steps_trained: 1150000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.28965887427330017
      kl: 0.010879440233111382
      policy_loss: 0.0063300770707428455
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 3192 s, 119 iter, 1190000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-53-57
  done: false
  episode_len_mean: 129.55
  episode_reward_max: 363.46390969943644
  episode_reward_mean: 316.9087487295968
  episode_reward_min: 287.0361001436934
  episodes_this_iter: 77
  episodes_total: 8100
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3475.632
    load_time_ms: 2.6
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.2731184959411621
      kl: 0.022384418174624443
      policy_loss: 0.01098710298538208
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 3352 s, 124 iter, 1240000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-57-03
  done: false
  episode_len_mean: 130.1
  episode_reward_max: 367.8516282212098
  episode_reward_mean: 320.6403648847514
  episode_reward_min: 285.4066313086209
  episodes_this_iter: 77
  episodes_total: 8485
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 4664.724
    load_time_ms: 4.483
    num_steps_sampled: 1250000
    num_steps_trained: 1250000
    rl_0:
      cur_kl_coeff: 7.940931829470712e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.279278427362442
      kl: 0.027184681966900826
      policy_loss: 0.004711077082902193
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 3517 s, 129 iter, 1290000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_15-59-21
  done: false
  episode_len_mean: 130.24
  episode_reward_max: 361.5330222600573
  episode_reward_mean: 321.5106852723445
  episode_reward_min: 286.69050301741686
  episodes_this_iter: 77
  episodes_total: 8869
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 4563.125
    load_time_ms: 4.427
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
    rl_0:
      cur_kl_coeff: 1.1911402871801952e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.2197788506746292
      kl: 0.023089325055480003
      policy_loss: 0.00540499622002244
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 3672 s, 134 iter, 1340000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-01-58
  done: false
  episode_len_mean: 130.77
  episode_reward_max: 363.0940171055902
  episode_reward_mean: 318.27082205786064
  episode_reward_min: 281.0260132923256
  episodes_this_iter: 76
  episodes_total: 9252
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3724.477
    load_time_ms: 2.993
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
    rl_0:
      cur_kl_coeff: 4.020098597423056e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.21968834102153778
      kl: 0.02682606317102909
      policy_loss: 0.0067974720150232315
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 3828 s, 139 iter, 1390000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-04-29
  done: false
  episode_len_mean: 132.7
  episode_reward_max: 358.187602548145
  episode_reward_mean: 317.62074203736097
  episode_reward_min: 283.55452322493943
  episodes_this_iter: 75
  episodes_total: 9633
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3804.099
    load_time_ms: 3.249
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
    rl_0:
      cur_kl_coeff: 4.020098597423056e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.20338605344295502
      kl: 0.015923021361231804
      policy_loss: 0.0009787207236513495
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4027 s, 144 iter, 1440000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-08-00
  done: false
  episode_len_mean: 131.93
  episode_reward_max: 360.47915012770557
  episode_reward_mean: 318.82473809989494
  episode_reward_min: 280.8859661768723
  episodes_this_iter: 76
  episodes_total: 10014
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 4631.473
    load_time_ms: 3.238
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
    rl_0:
      cur_kl_coeff: 1.3567833279062402e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.2594224512577057
      kl: 0.028265738859772682
      policy_loss: 0.003208385780453682
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4167 s, 149 iter, 1490000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-10-08
  done: false
  episode_len_mean: 131.58
  episode_reward_max: 354.7747898679599
  episode_reward_mean: 321.0298876683201
  episode_reward_min: 287.3893799166414
  episodes_this_iter: 77
  episodes_total: 10389
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 4360.197
    load_time_ms: 3.251
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
    rl_0:
      cur_kl_coeff: 3.3919583197656005e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.17033007740974426
      kl: 0.009174066595733166
      policy_loss: 0.00016366060299333185
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4288 s, 154 iter, 1540000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-12-07
  done: false
  episode_len_mean: 134.46
  episode_reward_max: 352.3179935288737
  episode_reward_mean: 316.1724650697139
  episode_reward_min: 278.4456618978549
  episodes_this_iter: 74
  episodes_total: 10765
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3123.919
    load_time_ms: 2.714
    num_steps_sampled: 1550000
    num_steps_trained: 1550000
    rl_0:
      cur_kl_coeff: 1.6959791598828003e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.30316218733787537
      kl: 0.01622210070490837
      policy_loss: 0.001000070944428444
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4407 s, 159 iter, 1590000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-14-06
  done: false
  episode_len_mean: 136.71
  episode_reward_max: 360.71119539164346
  episode_reward_mean: 318.5556147065802
  episode_reward_min: 280.0225547472259
  episodes_this_iter: 74
  episodes_total: 11132
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3092.118
    load_time_ms: 2.495
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
    rl_0:
      cur_kl_coeff: 2.5439677143050236e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.26094529032707214
      kl: 0.02733551897108555
      policy_loss: 0.0031220854725688696
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4528 s, 164 iter, 1640000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-16-08
  done: false
  episode_len_mean: 134.01
  episode_reward_max: 356.72427673060804
  episode_reward_mean: 318.79452436329024
  episode_reward_min: 283.4274240951088
  episodes_this_iter: 74
  episodes_total: 11501
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3133.685
    load_time_ms: 2.39
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
    rl_0:
      cur_kl_coeff: 3.815952123660169e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.18664288520812988
      kl: 0.016583584249019623
      policy_loss: 0.0028547022957354784
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4653 s, 169 iter, 1690000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-18-14
  done: false
  episode_len_mean: 132.6
  episode_reward_max: 358.2735176618807
  episode_reward_mean: 321.9342831904561
  episode_reward_min: 275.3589962830865
  episodes_this_iter: 75
  episodes_total: 11869
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3381.374
    load_time_ms: 2.457
    num_steps_sampled: 1700000
    num_steps_trained: 1700000
    rl_0:
      cur_kl_coeff: 5.723929920984245e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.12601986527442932
      kl: 0.020427528768777847
      policy_loss: 0.0011489071184769273
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4772 s, 174 iter, 1740000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-20-13
  done: false
  episode_len_mean: 137.45
  episode_reward_max: 355.1923158888531
  episode_reward_mean: 315.95691711348235
  episode_reward_min: 279.6732908961781
  episodes_this_iter: 73
  episodes_total: 12238
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3365.035
    load_time_ms: 2.46
    num_steps_sampled: 1750000
    num_steps_trained: 1750000
    rl_0:
      cur_kl_coeff: 8.58589077939966e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.2696291506290436
      kl: 0.027404919266700745
      policy_loss: 0.003363653551787138
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 4899 s, 179 iter, 1790000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-22-20
  done: false
  episode_len_mean: 137.51
  episode_reward_max: 351.1968090346899
  episode_reward_mean: 315.7129316022581
  episode_reward_min: 278.1023573176204
  episodes_this_iter: 72
  episodes_total: 12596
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3284.572
    load_time_ms: 2.493
    num_steps_sampled: 1800000
    num_steps_trained: 1800000
    rl_0:
      cur_kl_coeff: 4.29294538969983e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.2589666545391083
      kl: 0.006507233716547489
      policy_loss: 0.001275925082154572
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 5018 s, 184 iter, 1840000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-24-22
  done: false
  episode_len_mean: 140.62
  episode_reward_max: 346.29698737655207
  episode_reward_mean: 315.6784571765328
  episode_reward_min: 279.74752402977816
  episodes_this_iter: 72
  episodes_total: 12954
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3533.787
    load_time_ms: 2.583
    num_steps_sampled: 1850000
    num_steps_trained: 1850000
    rl_0:
      cur_kl_coeff: 1.0732363474249576e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.21886497735977173
      kl: 0.015416436828672886
      policy_loss: -0.001124072354286909
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15100], 5141 s, 189 iter, 1890000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-14_16-26-23
  done: false
  episode_len_mean: 145.24
  episode_reward_max: 350.1908647660648
  episode_reward_mean: 314.38821084049965
  episode_reward_min: 10.400580736107893
  episodes_this_iter: 72
  episodes_total: 13298
  experiment_id: a07979e1ce864d8ea1fbe22a9b6262d7
  hostname: Gandalf
  info:
    grad_time_ms: 3458.755
    load_time_ms: 2.444
    num_steps_sampled: 1900000
    num_steps_trained: 1900000
    rl_0:
      cur_kl_coeff: 5.366181737124788e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.15816277265548706
      kl: 0.014654543250799179
      policy_loss: 0.00043008619104512036
  