# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=39.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-02-22_00-55-48_9744/logs.
Waiting for redis server at 127.0.0.1:34548 to respond...
Waiting for redis server at 127.0.0.1:26306 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=29bcce1bdc62dd4c2b2de9bde51d3a1d039d9d2877165009



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-02-22_00-55-48_9744/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-02-22_00-55-48_9744/sockets/raylet'],
 'redis_address': '192.168.2.102:34548',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=29bcce1bdc62dd4c2b2de9bde51d3a1d039d9d2877165009'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return agent_id

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn)
        }
    })

 Starting SUMO on port 41527


18.60796873449424
1.6004638567410308


In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-02-22_00-55-50s2hylffo -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_00-57-26
  done: false
  episode_len_mean: 452.3333333333333
  episode_reward_max: 150.5330812030182
  episode_reward_mean: 58.40426735411326
  episode_reward_min: -133.99335420565546
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 6746.774
    load_time_ms: 195.097
    num_steps_sampled: 10000
    num_st

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_00-59-42
  done: false
  episode_len_mean: 410.83
  episode_reward_max: 231.32417470785919
  episode_reward_mean: 43.186451504935505
  episode_reward_min: -181.00289670748407
  episodes_this_iter: 26
  episodes_total: 118
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 5356.996
    load_time_ms: 41.405
    num_steps_sampled: 50000
    num_steps_trained: 50000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4217296838760376
      kl: 0.0008324080263264477
      policy_loss: -0.001443819492124021
      total_loss: 90.03878784179688
      vf_explained_var: 0.1024768203496933
      vf_loss: 90.04022979736328
    rl_1:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4223064184188843
      kl: 0.008418683893978596
      policy_loss: -0.005134430713951588
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 317 s, 9 iter, 90000 ts, 57.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-02-23
  done: false
  episode_len_mean: 278.2
  episode_reward_max: 228.1251415289585
  episode_reward_mean: 59.67285583145381
  episode_reward_min: -178.0155407905042
  episodes_this_iter: 35
  episodes_total: 282
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 4935.097
    load_time_ms: 22.135
    num_steps_sampled: 100000
    num_steps_trained: 100000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.4096665382385254
      kl: 0.0052779545076191425
      policy_loss: -0.0030526025220751762
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 442 s, 14 iter, 140000 ts, 34.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-04-34
  done: false
  episode_len_mean: 230.33
  episode_reward_max: 210.0741878541329
  episode_reward_mean: 26.773882147799007
  episode_reward_min: -181.22632574289983
  episodes_this_iter: 41
  episodes_total: 482
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 4156.522
    load_time_ms: 3.024
    num_steps_sampled: 150000
    num_steps_trained: 150000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.4177895784378052
      kl: 0.001920485869050026
      policy_loss: -0.0008132904767990112
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 596 s, 19 iter, 190000 ts, 83 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-07-02
  done: false
  episode_len_mean: 192.6
  episode_reward_max: 217.9487933486108
  episode_reward_mean: 71.53229679893875
  episode_reward_min: -182.99226451535193
  episodes_this_iter: 55
  episodes_total: 720
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3829.584
    load_time_ms: 2.998
    num_steps_sampled: 200000
    num_steps_trained: 200000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.4036473035812378
      kl: 0.002852262929081917
      policy_loss: -0.0009723320254124701
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 714 s, 24 iter, 240000 ts, 57.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-09-01
  done: false
  episode_len_mean: 142.62
  episode_reward_max: 191.06416004405324
  episode_reward_mean: 45.65777919020197
  episode_reward_min: -177.4451894578212
  episodes_this_iter: 70
  episodes_total: 1038
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3546.303
    load_time_ms: 2.607
    num_steps_sampled: 250000
    num_steps_trained: 250000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.352170705795288
      kl: 0.00601216359063983
      policy_loss: -0.002343680476769805
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 832 s, 29 iter, 290000 ts, 83.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-11-00
  done: false
  episode_len_mean: 126.46
  episode_reward_max: 196.00777381878666
  episode_reward_mean: 88.30811412007628
  episode_reward_min: -171.46730875455376
  episodes_this_iter: 80
  episodes_total: 1390
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3244.299
    load_time_ms: 2.498
    num_steps_sampled: 300000
    num_steps_trained: 300000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.309882402420044
      kl: 0.006305908318608999
      policy_loss: -0.0023808313999325037
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 952 s, 34 iter, 340000 ts, 78.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-13-00
  done: false
  episode_len_mean: 133.04
  episode_reward_max: 191.34156187989777
  episode_reward_mean: 92.7033461381825
  episode_reward_min: -166.96971562038738
  episodes_this_iter: 74
  episodes_total: 1773
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3255.744
    load_time_ms: 2.492
    num_steps_sampled: 350000
    num_steps_trained: 350000
    rl_0:
      cur_kl_coeff: 1.1641532356165829e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3345189094543457
      kl: 0.0043305582366883755
      policy_loss: -0.0024988208897411823
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1072 s, 39 iter, 390000 ts, 91.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-14-59
  done: false
  episode_len_mean: 123.17
  episode_reward_max: 183.76372176653925
  episode_reward_mean: 107.89954889743233
  episode_reward_min: -174.8045651300975
  episodes_this_iter: 81
  episodes_total: 2167
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3254.583
    load_time_ms: 2.324
    num_steps_sampled: 400000
    num_steps_trained: 400000
    rl_0:
      cur_kl_coeff: 7.275957722603643e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.300398588180542
      kl: 0.002726372331380844
      policy_loss: -0.0017250390956178308
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1191 s, 44 iter, 440000 ts, 94.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-16-59
  done: false
  episode_len_mean: 124.83
  episode_reward_max: 177.61336910425362
  episode_reward_mean: 97.97900039431347
  episode_reward_min: -173.63214305032636
  episodes_this_iter: 78
  episodes_total: 2580
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3252.311
    load_time_ms: 2.324
    num_steps_sampled: 450000
    num_steps_trained: 450000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.3546369075775146
      kl: 0.0035981046967208385
      policy_loss: -0.00208056322298944
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1310 s, 49 iter, 490000 ts, 119 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-18-58
  done: false
  episode_len_mean: 99.76
  episode_reward_max: 183.20735750867684
  episode_reward_mean: 95.3049512976476
  episode_reward_min: -178.28803995755143
  episodes_this_iter: 100
  episodes_total: 3007
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3250.333
    load_time_ms: 2.558
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 7.10542746348012e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.218662977218628
      kl: 0.008256793953478336
      policy_loss: -0.0021999841555953026
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1429 s, 54 iter, 540000 ts, 120 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-20-58
  done: false
  episode_len_mean: 119.43
  episode_reward_max: 166.82631410536604
  episode_reward_mean: 111.75185505431448
  episode_reward_min: -175.94042179534165
  episodes_this_iter: 85
  episodes_total: 3434
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3245.659
    load_time_ms: 2.621
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.3379099369049072
      kl: 0.0026477437932044268
      policy_loss: -0.00229283282533288
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1549 s, 59 iter, 590000 ts, 103 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-22-58
  done: false
  episode_len_mean: 121.01
  episode_reward_max: 177.6724142742527
  episode_reward_mean: 119.51392545446436
  episode_reward_min: -184.60027983653436
  episodes_this_iter: 79
  episodes_total: 3869
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3249.082
    load_time_ms: 2.467
    num_steps_sampled: 600000
    num_steps_trained: 600000
    rl_0:
      cur_kl_coeff: 6.938894007304805e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.3544281721115112
      kl: 0.0037880586460232735
      policy_loss: -0.0017219346482306719
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1669 s, 64 iter, 640000 ts, 123 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-24-58
  done: false
  episode_len_mean: 116.53
  episode_reward_max: 185.61868709532342
  episode_reward_mean: 117.51709928127646
  episode_reward_min: -164.22868930936664
  episodes_this_iter: 83
  episodes_total: 4297
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3254.363
    load_time_ms: 2.329
    num_steps_sampled: 650000
    num_steps_trained: 650000
    rl_0:
      cur_kl_coeff: 4.336808754565503e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.3345259428024292
      kl: 0.002412100788205862
      policy_loss: -0.0008797586197033525
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1788 s, 69 iter, 690000 ts, 130 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-26-57
  done: false
  episode_len_mean: 103.38
  episode_reward_max: 192.28956044282694
  episode_reward_mean: 106.02320458531928
  episode_reward_min: -169.51618376640812
  episodes_this_iter: 95
  episodes_total: 4759
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3256.346
    load_time_ms: 2.283
    num_steps_sampled: 700000
    num_steps_trained: 700000
    rl_0:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.3520702123641968
      kl: 0.0028067873790860176
      policy_loss: -0.0009513999102637172
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 1908 s, 74 iter, 740000 ts, 124 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-28-58
  done: false
  episode_len_mean: 93.89719626168224
  episode_reward_max: 194.61435917495743
  episode_reward_mean: 119.91462911940617
  episode_reward_min: -164.0726252878084
  episodes_this_iter: 107
  episodes_total: 5229
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3263.367
    load_time_ms: 2.413
    num_steps_sampled: 750000
    num_steps_trained: 750000
    rl_0:
      cur_kl_coeff: 4.235164799380374e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.221577525138855
      kl: 0.002850819379091263
      policy_loss: -0.00233428180217742

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2028 s, 79 iter, 790000 ts, 115 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-30-59
  done: false
  episode_len_mean: 106.88
  episode_reward_max: 197.80522678870406
  episode_reward_mean: 137.96776481653148
  episode_reward_min: -155.4265601379507
  episodes_this_iter: 94
  episodes_total: 5750
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3273.581
    load_time_ms: 2.401
    num_steps_sampled: 800000
    num_steps_trained: 800000
    rl_0:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.3067679405212402
      kl: 0.004810798447579145
      policy_loss: -0.0020466588903218508
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2149 s, 84 iter, 840000 ts, 148 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-32-59
  done: false
  episode_len_mean: 95.73076923076923
  episode_reward_max: 197.625680390365
  episode_reward_mean: 136.3229222583298
  episode_reward_min: -169.6557618109527
  episodes_this_iter: 104
  episodes_total: 6274
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3265.646
    load_time_ms: 2.385
    num_steps_sampled: 850000
    num_steps_trained: 850000
    rl_0:
      cur_kl_coeff: 4.1359031243948966e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.1536918878555298
      kl: 0.006888661067932844
      policy_loss: -0.003233571071177721

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2270 s, 89 iter, 890000 ts, 136 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-35-00
  done: false
  episode_len_mean: 95.61682242990655
  episode_reward_max: 200.5341628117068
  episode_reward_mean: 136.1765847871039
  episode_reward_min: -161.97623648490605
  episodes_this_iter: 107
  episodes_total: 6800
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3257.665
    load_time_ms: 2.644
    num_steps_sampled: 900000
    num_steps_trained: 900000
    rl_0:
      cur_kl_coeff: 2.5849394527468104e-27
      cur_lr: 4.999999873689376e-05
      entropy: 1.1423107385635376
      kl: 0.008469866588711739
      policy_loss: -0.0028184142429381

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2390 s, 94 iter, 940000 ts, 141 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-37-00
  done: false
  episode_len_mean: 92.3302752293578
  episode_reward_max: 202.27794971693302
  episode_reward_mean: 135.2832726993241
  episode_reward_min: -173.24682042904897
  episodes_this_iter: 109
  episodes_total: 7312
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3243.943
    load_time_ms: 2.624
    num_steps_sampled: 950000
    num_steps_trained: 950000
    rl_0:
      cur_kl_coeff: 8.077935789833782e-29
      cur_lr: 4.999999873689376e-05
      entropy: 1.0847843885421753
      kl: 0.008772800676524639
      policy_loss: -0.00157787767238914

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2509 s, 99 iter, 990000 ts, 143 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-39-00
  done: false
  episode_len_mean: 95.35238095238095
  episode_reward_max: 199.7523566971562
  episode_reward_mean: 143.77085820215322
  episode_reward_min: -157.4879959620276
  episodes_this_iter: 105
  episodes_total: 7840
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3217.093
    load_time_ms: 2.491
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
    rl_0:
      cur_kl_coeff: 2.524354934323057e-30
      cur_lr: 4.999999873689376e-05
      entropy: 1.0698421001434326
      kl: 0.0029913298785686493
      policy_loss: -0.00278805289417

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2629 s, 104 iter, 1040000 ts, 140 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-41-00
  done: false
  episode_len_mean: 91.23636363636363
  episode_reward_max: 203.0704429355403
  episode_reward_mean: 127.6976301554117
  episode_reward_min: -157.79176197465821
  episodes_this_iter: 110
  episodes_total: 8360
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3216.487
    load_time_ms: 2.411
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
    rl_0:
      cur_kl_coeff: 7.888609169759553e-32
      cur_lr: 4.999999873689376e-05
      entropy: 1.0163953304290771
      kl: 0.004330936353653669
      policy_loss: -0.0024453932419

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2748 s, 109 iter, 1090000 ts, 136 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-43-00
  done: false
  episode_len_mean: 93.26666666666667
  episode_reward_max: 210.88944635949395
  episode_reward_mean: 147.91999434651288
  episode_reward_min: -152.42524727820813
  episodes_this_iter: 105
  episodes_total: 8883
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3217.594
    load_time_ms: 2.382
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
    rl_0:
      cur_kl_coeff: 2.4651903655498604e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.9759768843650818
      kl: 0.007545963395386934
      policy_loss: -0.0037575322

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2867 s, 114 iter, 1140000 ts, 154 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-44-59
  done: false
  episode_len_mean: 101.13
  episode_reward_max: 208.4654511647646
  episode_reward_mean: 146.4024077637333
  episode_reward_min: -159.21002897069718
  episodes_this_iter: 98
  episodes_total: 9376
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3215.031
    load_time_ms: 2.561
    num_steps_sampled: 1150000
    num_steps_trained: 1150000
    rl_0:
      cur_kl_coeff: 1.5407439784686627e-34
      cur_lr: 4.999999873689376e-05
      entropy: 1.062874674797058
      kl: 0.007326159160584211
      policy_loss: -0.004514588974416256
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 2986 s, 119 iter, 1190000 ts, 152 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-46-57
  done: false
  episode_len_mean: 94.12380952380953
  episode_reward_max: 210.92661764470157
  episode_reward_mean: 155.09882058904358
  episode_reward_min: -151.4583109369103
  episodes_this_iter: 105
  episodes_total: 9898
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3189.345
    load_time_ms: 2.557
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
    rl_0:
      cur_kl_coeff: 4.814824932714571e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.9081196784973145
      kl: 0.00774905551224947
      policy_loss: -0.0025208829902

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3105 s, 124 iter, 1240000 ts, 163 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-48-56
  done: false
  episode_len_mean: 100.4950495049505
  episode_reward_max: 208.51627507865103
  episode_reward_mean: 150.225537627938
  episode_reward_min: -127.90528628016834
  episodes_this_iter: 101
  episodes_total: 10412
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3157.058
    load_time_ms: 2.452
    num_steps_sampled: 1250000
    num_steps_trained: 1250000
    rl_0:
      cur_kl_coeff: 3.009265582946607e-37
      cur_lr: 4.999999873689376e-05
      entropy: 1.0646454095840454
      kl: 0.015234129503369331
      policy_loss: -0.007100748829

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3223 s, 129 iter, 1290000 ts, 149 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-50-55
  done: false
  episode_len_mean: 94.62264150943396
  episode_reward_max: 211.47166334904776
  episode_reward_mean: 154.05684372416968
  episode_reward_min: -138.84916721172465
  episodes_this_iter: 106
  episodes_total: 10937
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.801
    load_time_ms: 2.218
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
    rl_0:
      cur_kl_coeff: 3.7615819786832586e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.9800622463226318
      kl: 0.00941486656665802
      policy_loss: -0.0051525528

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3341 s, 134 iter, 1340000 ts, 157 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-52-52
  done: false
  episode_len_mean: 92.27777777777777
  episode_reward_max: 214.83843914616887
  episode_reward_mean: 160.63596060970852
  episode_reward_min: 110.01537044831736
  episodes_this_iter: 108
  episodes_total: 11470
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.812
    load_time_ms: 2.43
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
    rl_0:
      cur_kl_coeff: 2.3509892621639607e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.9036900401115417
      kl: 0.009391574189066887
      policy_loss: -0.00483608571

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3458 s, 139 iter, 1390000 ts, 163 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-54-50
  done: false
  episode_len_mean: 91.53211009174312
  episode_reward_max: 213.01073204493125
  episode_reward_mean: 152.96498665151236
  episode_reward_min: -118.24823525485651
  episodes_this_iter: 109
  episodes_total: 12006
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3140.988
    load_time_ms: 2.558
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
    rl_0:
      cur_kl_coeff: 7.346867718608583e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.9320637583732605
      kl: 0.007695375941693783
      policy_loss: -0.0040323999

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3575 s, 144 iter, 1440000 ts, 164 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-56-48
  done: false
  episode_len_mean: 97.28571428571429
  episode_reward_max: 212.95652179969258
  episode_reward_mean: 144.37572063655637
  episode_reward_min: -147.43210515292972
  episodes_this_iter: 105
  episodes_total: 12546
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3135.128
    load_time_ms: 2.385
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
    rl_0:
      cur_kl_coeff: 2.2953268845640504e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.9516928195953369
      kl: 0.006627310533076525
      policy_loss: -0.004734646

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3691 s, 149 iter, 1490000 ts, 160 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_01-58-44
  done: false
  episode_len_mean: 90.36936936936937
  episode_reward_max: 207.06467006501225
  episode_reward_mean: 156.49971680513815
  episode_reward_min: -136.67140530528525
  episodes_this_iter: 111
  episodes_total: 13076
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.449
    load_time_ms: 2.582
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
    rl_0:
      cur_kl_coeff: 7.146622168056567e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.889825701713562
      kl: 0.0059203216806054115
      policy_loss: -0.0037762124

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3807 s, 154 iter, 1540000 ts, 156 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-00-39
  done: false
  episode_len_mean: 91.02752293577981
  episode_reward_max: 208.7400335752029
  episode_reward_mean: 155.62530180425966
  episode_reward_min: -130.26837932078308
  episodes_this_iter: 109
  episodes_total: 13628
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.126
    load_time_ms: 2.499
    num_steps_sampled: 1550000
    num_steps_trained: 1550000
    rl_0:
      cur_kl_coeff: 2.802596928649634e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.8481937050819397
      kl: 0.00461295573040843
      policy_loss: -0.001007138169

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 3923 s, 159 iter, 1590000 ts, 157 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-02-36
  done: false
  episode_len_mean: 92.52777777777777
  episode_reward_max: 213.40632648116653
  episode_reward_mean: 158.73113021733937
  episode_reward_min: -122.36234082954762
  episodes_this_iter: 108
  episodes_total: 14169
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3140.277
    load_time_ms: 2.381
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7690914869308472
      kl: 0.0071768066845834255
      policy_loss: -0.0025051177944988012
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4039 s, 164 iter, 1640000 ts, 164 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-04-32
  done: false
  episode_len_mean: 90.08108108108108
  episode_reward_max: 211.94422031385798
  episode_reward_mean: 161.2660109031587
  episode_reward_min: -132.94352168218984
  episodes_this_iter: 111
  episodes_total: 14710
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3142.58
    load_time_ms: 2.538
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7975943088531494
      kl: 0.008673271164298058
      policy_loss: -0.007300197146832943
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4155 s, 169 iter, 1690000 ts, 162 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-06-29
  done: false
  episode_len_mean: 93.20560747663552
  episode_reward_max: 214.6981636039369
  episode_reward_mean: 157.70760085172293
  episode_reward_min: -142.39986846586845
  episodes_this_iter: 107
  episodes_total: 15260
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3131.486
    load_time_ms: 2.46
    num_steps_sampled: 1700000
    num_steps_trained: 1700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8442091345787048
      kl: 0.007426272612065077
      policy_loss: -0.00477562565356493
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4271 s, 174 iter, 1740000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-08-24
  done: false
  episode_len_mean: 89.29464285714286
  episode_reward_max: 209.41640501583998
  episode_reward_mean: 157.79880242512436
  episode_reward_min: -160.6989924356549
  episodes_this_iter: 112
  episodes_total: 15810
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.557
    load_time_ms: 2.165
    num_steps_sampled: 1750000
    num_steps_trained: 1750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7811149954795837
      kl: 0.008918333798646927
      policy_loss: -0.0027872263453900814
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4387 s, 179 iter, 1790000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-10-21
  done: false
  episode_len_mean: 87.91228070175438
  episode_reward_max: 214.4643142481328
  episode_reward_mean: 152.9905630824355
  episode_reward_min: -178.1845085440903
  episodes_this_iter: 114
  episodes_total: 16363
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.043
    load_time_ms: 2.367
    num_steps_sampled: 1800000
    num_steps_trained: 1800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7593287229537964
      kl: 0.011253923177719116
      policy_loss: -0.005245225969702005
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4504 s, 184 iter, 1840000 ts, 165 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-12-18
  done: false
  episode_len_mean: 88.2566371681416
  episode_reward_max: 215.70688841037867
  episode_reward_mean: 162.99075053207537
  episode_reward_min: -142.41165538848648
  episodes_this_iter: 113
  episodes_total: 16927
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3120.558
    load_time_ms: 2.505
    num_steps_sampled: 1850000
    num_steps_trained: 1850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7158814072608948
      kl: 0.008593151345849037
      policy_loss: -0.0031834898982197046
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4620 s, 189 iter, 1890000 ts, 159 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-14-15
  done: false
  episode_len_mean: 89.32142857142857
  episode_reward_max: 213.39380890285258
  episode_reward_mean: 167.3347243311568
  episode_reward_min: -119.85187765873303
  episodes_this_iter: 112
  episodes_total: 17491
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3121.737
    load_time_ms: 2.507
    num_steps_sampled: 1900000
    num_steps_trained: 1900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6389119029045105
      kl: 0.0073425378650426865
      policy_loss: -0.0025706165470182896
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4737 s, 194 iter, 1940000 ts, 164 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-16-12
  done: false
  episode_len_mean: 89.16964285714286
  episode_reward_max: 205.95488989382395
  episode_reward_mean: 165.39601860505394
  episode_reward_min: -176.09920575306677
  episodes_this_iter: 112
  episodes_total: 18050
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.606
    load_time_ms: 2.529
    num_steps_sampled: 1950000
    num_steps_trained: 1950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6852743625640869
      kl: 0.005021451972424984
      policy_loss: -0.00333999702706933
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4854 s, 199 iter, 1990000 ts, 157 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-18-09
  done: false
  episode_len_mean: 88.38938053097345
  episode_reward_max: 213.39408298804491
  episode_reward_mean: 165.73051647228849
  episode_reward_min: -171.81886684627446
  episodes_this_iter: 113
  episodes_total: 18616
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.809
    load_time_ms: 2.318
    num_steps_sampled: 2000000
    num_steps_trained: 2000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6303781867027283
      kl: 0.005323466844856739
      policy_loss: -0.0010842429473996162
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 4970 s, 204 iter, 2040000 ts, 161 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-20-05
  done: false
  episode_len_mean: 88.54867256637168
  episode_reward_max: 216.03828538440723
  episode_reward_mean: 167.9409984701639
  episode_reward_min: 120.98124761841059
  episodes_this_iter: 113
  episodes_total: 19180
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3126.321
    load_time_ms: 2.396
    num_steps_sampled: 2050000
    num_steps_trained: 2050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6296117901802063
      kl: 0.009111898951232433
      policy_loss: -0.005573394242674112
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5087 s, 209 iter, 2090000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-22-02
  done: false
  episode_len_mean: 89.28318584070796
  episode_reward_max: 215.43264681485306
  episode_reward_mean: 171.37471126086675
  episode_reward_min: -169.27765497633354
  episodes_this_iter: 113
  episodes_total: 19745
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.007
    load_time_ms: 2.366
    num_steps_sampled: 2100000
    num_steps_trained: 2100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6355801820755005
      kl: 0.016208697110414505
      policy_loss: -0.005895405076444149
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5203 s, 214 iter, 2140000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-23-59
  done: false
  episode_len_mean: 88.72566371681415
  episode_reward_max: 216.25672660648056
  episode_reward_mean: 168.3986482114569
  episode_reward_min: -120.55745288477479
  episodes_this_iter: 113
  episodes_total: 20305
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3140.282
    load_time_ms: 2.372
    num_steps_sampled: 2150000
    num_steps_trained: 2150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6372653245925903
      kl: 0.008053267374634743
      policy_loss: -0.0017769323894754052
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5320 s, 219 iter, 2190000 ts, 169 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-25-55
  done: false
  episode_len_mean: 88.26548672566372
  episode_reward_max: 214.95855746998544
  episode_reward_mean: 164.4818139851543
  episode_reward_min: -153.26395639859612
  episodes_this_iter: 113
  episodes_total: 20872
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3124.239
    load_time_ms: 2.334
    num_steps_sampled: 2200000
    num_steps_trained: 2200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6182886958122253
      kl: 0.012506430968642235
      policy_loss: -0.006767910905182362
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5436 s, 224 iter, 2240000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-27-52
  done: false
  episode_len_mean: 89.27927927927928
  episode_reward_max: 216.49950202873845
  episode_reward_mean: 172.49361782486667
  episode_reward_min: -130.5235689517239
  episodes_this_iter: 111
  episodes_total: 21433
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.838
    load_time_ms: 2.242
    num_steps_sampled: 2250000
    num_steps_trained: 2250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6343989968299866
      kl: 0.0055646738037467
      policy_loss: -0.00028175354236736894
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5552 s, 229 iter, 2290000 ts, 164 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-29-48
  done: false
  episode_len_mean: 89.67857142857143
  episode_reward_max: 216.4180974424489
  episode_reward_mean: 170.41257368626125
  episode_reward_min: 126.48260782381655
  episodes_this_iter: 112
  episodes_total: 21997
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.024
    load_time_ms: 2.211
    num_steps_sampled: 2300000
    num_steps_trained: 2300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6438950300216675
      kl: 0.00710958382114768
      policy_loss: -0.0017577593680471182
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5667 s, 234 iter, 2340000 ts, 153 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-31-44
  done: false
  episode_len_mean: 87.59649122807018
  episode_reward_max: 212.96921928267483
  episode_reward_mean: 159.58978653553973
  episode_reward_min: -185.32658151955755
  episodes_this_iter: 114
  episodes_total: 22563
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3143.938
    load_time_ms: 2.176
    num_steps_sampled: 2350000
    num_steps_trained: 2350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6337860822677612
      kl: 0.013494334183633327
      policy_loss: -0.004278395790606737
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5784 s, 239 iter, 2390000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-33-40
  done: false
  episode_len_mean: 89.24107142857143
  episode_reward_max: 209.4308556065503
  episode_reward_mean: 171.7754825593106
  episode_reward_min: -180.35609823887475
  episodes_this_iter: 112
  episodes_total: 23123
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3131.481
    load_time_ms: 2.223
    num_steps_sampled: 2400000
    num_steps_trained: 2400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6445105075836182
      kl: 0.004254501778632402
      policy_loss: -0.0035444865934550762
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 5901 s, 244 iter, 2440000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-35-38
  done: false
  episode_len_mean: 89.34234234234235
  episode_reward_max: 213.86126144808532
  episode_reward_mean: 170.82422736869466
  episode_reward_min: 125.05916355513554
  episodes_this_iter: 111
  episodes_total: 23684
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.187
    load_time_ms: 2.25
    num_steps_sampled: 2450000
    num_steps_trained: 2450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6669011116027832
      kl: 0.008227027021348476
      policy_loss: -0.004457819741219282
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6018 s, 249 iter, 2490000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-37-35
  done: false
  episode_len_mean: 89.41071428571429
  episode_reward_max: 214.02481687695834
  episode_reward_mean: 168.2568484553799
  episode_reward_min: -169.13180010937953
  episodes_this_iter: 112
  episodes_total: 24245
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.212
    load_time_ms: 2.27
    num_steps_sampled: 2500000
    num_steps_trained: 2500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6126981377601624
      kl: 0.007137584965676069
      policy_loss: -0.005718528758734465
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6134 s, 254 iter, 2540000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-39-32
  done: false
  episode_len_mean: 89.19469026548673
  episode_reward_max: 211.53673357919152
  episode_reward_mean: 168.5192429632058
  episode_reward_min: -177.18100213471223
  episodes_this_iter: 113
  episodes_total: 24804
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.375
    load_time_ms: 2.295
    num_steps_sampled: 2550000
    num_steps_trained: 2550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7008447051048279
      kl: 0.012733625248074532
      policy_loss: -0.00780462846159935
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6250 s, 259 iter, 2590000 ts, 170 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-41-27
  done: false
  episode_len_mean: 88.94690265486726
  episode_reward_max: 219.68104680473482
  episode_reward_mean: 168.8125832211654
  episode_reward_min: 122.01421556845864
  episodes_this_iter: 113
  episodes_total: 25363
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.836
    load_time_ms: 2.633
    num_steps_sampled: 2600000
    num_steps_trained: 2600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6630837917327881
      kl: 0.006612969562411308
      policy_loss: -0.0024585432838648558
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6366 s, 264 iter, 2640000 ts, 169 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-43-24
  done: false
  episode_len_mean: 88.12389380530973
  episode_reward_max: 216.52906728010015
  episode_reward_mean: 160.0746825881564
  episode_reward_min: -178.52399071026036
  episodes_this_iter: 113
  episodes_total: 25926
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3134.113
    load_time_ms: 2.757
    num_steps_sampled: 2650000
    num_steps_trained: 2650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6461805105209351
      kl: 0.008876198902726173
      policy_loss: -0.0020405149552971125
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6483 s, 269 iter, 2690000 ts, 164 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-45-21
  done: false
  episode_len_mean: 89.55855855855856
  episode_reward_max: 213.21614317424687
  episode_reward_mean: 167.92653557300753
  episode_reward_min: -127.84779385938893
  episodes_this_iter: 111
  episodes_total: 26486
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.256
    load_time_ms: 2.462
    num_steps_sampled: 2700000
    num_steps_trained: 2700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6546441316604614
      kl: 0.01099341083317995
      policy_loss: -0.004004354123026133
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6599 s, 274 iter, 2740000 ts, 151 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-47-17
  done: false
  episode_len_mean: 89.08035714285714
  episode_reward_max: 216.91030349092202
  episode_reward_mean: 173.3921550850399
  episode_reward_min: 124.60408327459305
  episodes_this_iter: 112
  episodes_total: 27047
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3125.85
    load_time_ms: 2.586
    num_steps_sampled: 2750000
    num_steps_trained: 2750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6880559325218201
      kl: 0.0075601194985210896
      policy_loss: -0.004142117220908403
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6716 s, 279 iter, 2790000 ts, 143 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-49-14
  done: false
  episode_len_mean: 88.14035087719299
  episode_reward_max: 214.82773182609589
  episode_reward_mean: 161.72545037367078
  episode_reward_min: -176.54230870552857
  episodes_this_iter: 114
  episodes_total: 27613
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3122.021
    load_time_ms: 2.507
    num_steps_sampled: 2800000
    num_steps_trained: 2800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6652410626411438
      kl: 0.010341600514948368
      policy_loss: -0.0038652473594993353
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6833 s, 284 iter, 2840000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-51-11
  done: false
  episode_len_mean: 88.04385964912281
  episode_reward_max: 214.59446876793805
  episode_reward_mean: 161.9523342220224
  episode_reward_min: -166.5295077457677
  episodes_this_iter: 114
  episodes_total: 28175
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3126.323
    load_time_ms: 2.242
    num_steps_sampled: 2850000
    num_steps_trained: 2850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7255606055259705
      kl: 0.008680031634867191
      policy_loss: -0.00446805777028203
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 6948 s, 289 iter, 2890000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-53-08
  done: false
  episode_len_mean: 90.10810810810811
  episode_reward_max: 218.5015231050061
  episode_reward_mean: 169.36807588678127
  episode_reward_min: 125.72170862525151
  episodes_this_iter: 111
  episodes_total: 28731
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3131.18
    load_time_ms: 2.4
    num_steps_sampled: 2900000
    num_steps_trained: 2900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7984143495559692
      kl: 0.007247145287692547
      policy_loss: -0.004240608308464289
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7066 s, 294 iter, 2940000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-55-05
  done: false
  episode_len_mean: 90.41441441441441
  episode_reward_max: 217.23479857419653
  episode_reward_mean: 169.0478029887739
  episode_reward_min: 125.49611511699567
  episodes_this_iter: 111
  episodes_total: 29289
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.312
    load_time_ms: 2.417
    num_steps_sampled: 2950000
    num_steps_trained: 2950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8017580509185791
      kl: 0.01056694146245718
      policy_loss: -0.004478799179196358
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7182 s, 299 iter, 2990000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-57-02
  done: false
  episode_len_mean: 89.5625
  episode_reward_max: 214.07123570826852
  episode_reward_mean: 167.2805968719251
  episode_reward_min: -161.61709725860595
  episodes_this_iter: 112
  episodes_total: 29847
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3147.566
    load_time_ms: 2.279
    num_steps_sampled: 3000000
    num_steps_trained: 3000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8271234035491943
      kl: 0.008794151246547699
      policy_loss: -0.006158644799143076
      total_loss: 7.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7299 s, 304 iter, 3040000 ts, 158 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_02-58-59
  done: false
  episode_len_mean: 90.73873873873873
  episode_reward_max: 210.22176557047328
  episode_reward_mean: 166.50631608449768
  episode_reward_min: 118.09181204038654
  episodes_this_iter: 111
  episodes_total: 30401
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3137.84
    load_time_ms: 2.42
    num_steps_sampled: 3050000
    num_steps_trained: 3050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8879607915878296
      kl: 0.011541498824954033
      policy_loss: -0.005447035189718008
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7415 s, 309 iter, 3090000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-00-55
  done: false
  episode_len_mean: 89.88288288288288
  episode_reward_max: 215.7307557782544
  episode_reward_mean: 167.24428931596378
  episode_reward_min: -175.50051461618517
  episodes_this_iter: 111
  episodes_total: 30955
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3128.876
    load_time_ms: 2.467
    num_steps_sampled: 3100000
    num_steps_trained: 3100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8468536138534546
      kl: 0.005166919436305761
      policy_loss: -0.0017222166061401367
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7531 s, 314 iter, 3140000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-02-52
  done: false
  episode_len_mean: 91.58715596330275
  episode_reward_max: 222.1042942983031
  episode_reward_mean: 170.65823684772536
  episode_reward_min: 132.8508287782268
  episodes_this_iter: 109
  episodes_total: 31504
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3149.232
    load_time_ms: 2.317
    num_steps_sampled: 3150000
    num_steps_trained: 3150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9361534118652344
      kl: 0.004039893392473459
      policy_loss: -0.0023788867983967066
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7648 s, 319 iter, 3190000 ts, 158 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-04-48
  done: false
  episode_len_mean: 91.15454545454546
  episode_reward_max: 223.77559607669684
  episode_reward_mean: 169.68855123863338
  episode_reward_min: -129.7445160754146
  episodes_this_iter: 110
  episodes_total: 32047
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3122.098
    load_time_ms: 2.3
    num_steps_sampled: 3200000
    num_steps_trained: 3200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8471457362174988
      kl: 0.005764192435890436
      policy_loss: -0.004712502937763929
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7764 s, 324 iter, 3240000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-06-45
  done: false
  episode_len_mean: 91.55454545454545
  episode_reward_max: 220.03401809093657
  episode_reward_mean: 172.26211982520135
  episode_reward_min: 129.0082135957488
  episodes_this_iter: 110
  episodes_total: 32596
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.714
    load_time_ms: 2.197
    num_steps_sampled: 3250000
    num_steps_trained: 3250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9386898875236511
      kl: 0.006844683550298214
      policy_loss: -0.003537952434271574
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7881 s, 329 iter, 3290000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-08-42
  done: false
  episode_len_mean: 93.33333333333333
  episode_reward_max: 217.64595243704343
  episode_reward_mean: 168.6467802811921
  episode_reward_min: -165.6701758954618
  episodes_this_iter: 108
  episodes_total: 33142
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.744
    load_time_ms: 2.198
    num_steps_sampled: 3300000
    num_steps_trained: 3300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9754568338394165
      kl: 0.014492548070847988
      policy_loss: -0.006666089408099651
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 7996 s, 334 iter, 3340000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-10-38
  done: false
  episode_len_mean: 90.09909909909909
  episode_reward_max: 215.39511591696382
  episode_reward_mean: 174.6088501994413
  episode_reward_min: 133.22153190413184
  episodes_this_iter: 111
  episodes_total: 33690
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3137.858
    load_time_ms: 2.28
    num_steps_sampled: 3350000
    num_steps_trained: 3350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8697085380554199
      kl: 0.008524708449840546
      policy_loss: -0.0043764798901975155
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8112 s, 339 iter, 3390000 ts, 170 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-12-34
  done: false
  episode_len_mean: 90.2072072072072
  episode_reward_max: 221.0472507907882
  episode_reward_mean: 172.4569133568074
  episode_reward_min: -121.54529584562424
  episodes_this_iter: 111
  episodes_total: 34240
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.588
    load_time_ms: 2.245
    num_steps_sampled: 3400000
    num_steps_trained: 3400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8864238262176514
      kl: 0.004896250087767839
      policy_loss: -0.0013370501110330224
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8228 s, 344 iter, 3440000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-14-29
  done: false
  episode_len_mean: 89.58558558558559
  episode_reward_max: 214.86063568168345
  episode_reward_mean: 164.93700971508034
  episode_reward_min: -167.90928427641416
  episodes_this_iter: 111
  episodes_total: 34799
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.691
    load_time_ms: 2.162
    num_steps_sampled: 3450000
    num_steps_trained: 3450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9353564381599426
      kl: 0.011141346767544746
      policy_loss: -0.008544322103261948
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8344 s, 349 iter, 3490000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-16-26
  done: false
  episode_len_mean: 91.55454545454545
  episode_reward_max: 217.44159340065872
  episode_reward_mean: 170.48518142781177
  episode_reward_min: -106.57552666145676
  episodes_this_iter: 110
  episodes_total: 35343
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3151.94
    load_time_ms: 2.204
    num_steps_sampled: 3500000
    num_steps_trained: 3500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9168961644172668
      kl: 0.005857947748154402
      policy_loss: -0.0021300597582012415
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8460 s, 354 iter, 3540000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-18-22
  done: false
  episode_len_mean: 88.52212389380531
  episode_reward_max: 218.87540136493044
  episode_reward_mean: 166.02374538733196
  episode_reward_min: -177.26884699833062
  episodes_this_iter: 113
  episodes_total: 35897
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3142.709
    load_time_ms: 2.169
    num_steps_sampled: 3550000
    num_steps_trained: 3550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8614920377731323
      kl: 0.004345229361206293
      policy_loss: -0.0008581596193835139
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8577 s, 359 iter, 3590000 ts, 170 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-20-19
  done: false
  episode_len_mean: 91.62385321100918
  episode_reward_max: 221.9167510937725
  episode_reward_mean: 163.93750793039766
  episode_reward_min: -156.21565967833163
  episodes_this_iter: 109
  episodes_total: 36450
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.172
    load_time_ms: 2.206
    num_steps_sampled: 3600000
    num_steps_trained: 3600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8526279926300049
      kl: 0.007506038993597031
      policy_loss: -0.0019221729598939419
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8693 s, 364 iter, 3640000 ts, 158 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-22-15
  done: false
  episode_len_mean: 89.63392857142857
  episode_reward_max: 219.76302575003714
  episode_reward_mean: 174.44912581040555
  episode_reward_min: -130.29801348983884
  episodes_this_iter: 112
  episodes_total: 36994
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.985
    load_time_ms: 2.385
    num_steps_sampled: 3650000
    num_steps_trained: 3650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7229860424995422
      kl: 0.00911990087479353
      policy_loss: -0.004237710498273373
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8809 s, 369 iter, 3690000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-24-11
  done: false
  episode_len_mean: 91.52727272727273
  episode_reward_max: 223.64063987127233
  episode_reward_mean: 175.0166281687626
  episode_reward_min: 126.1446778296198
  episodes_this_iter: 110
  episodes_total: 37542
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3138.895
    load_time_ms: 2.391
    num_steps_sampled: 3700000
    num_steps_trained: 3700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.760453999042511
      kl: 0.008261550217866898
      policy_loss: -0.004960278049111366
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 8925 s, 374 iter, 3740000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-26-08
  done: false
  episode_len_mean: 89.67567567567568
  episode_reward_max: 224.04998347773747
  episode_reward_mean: 167.85972984462407
  episode_reward_min: -141.02467080795407
  episodes_this_iter: 111
  episodes_total: 38093
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3141.27
    load_time_ms: 2.275
    num_steps_sampled: 3750000
    num_steps_trained: 3750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7508754730224609
      kl: 0.006689560599625111
      policy_loss: -0.0037755637895315886
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9041 s, 379 iter, 3790000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-28-04
  done: false
  episode_len_mean: 91.19090909090909
  episode_reward_max: 215.62344958537236
  episode_reward_mean: 173.15056730162294
  episode_reward_min: 126.64993645419118
  episodes_this_iter: 110
  episodes_total: 38635
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3125.777
    load_time_ms: 2.311
    num_steps_sampled: 3800000
    num_steps_trained: 3800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.763934314250946
      kl: 0.007054861634969711
      policy_loss: -0.0033007990568876266
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9158 s, 384 iter, 3840000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-30-02
  done: false
  episode_len_mean: 89.89189189189189
  episode_reward_max: 223.6493767166172
  episode_reward_mean: 178.55720923522787
  episode_reward_min: 135.95643987496277
  episodes_this_iter: 111
  episodes_total: 39189
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3130.131
    load_time_ms: 2.348
    num_steps_sampled: 3850000
    num_steps_trained: 3850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6763870716094971
      kl: 0.011580422520637512
      policy_loss: -0.004997448064386845
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9274 s, 389 iter, 3890000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-31-57
  done: false
  episode_len_mean: 90.26126126126127
  episode_reward_max: 222.27175108318525
  episode_reward_mean: 173.85509371216835
  episode_reward_min: -152.89136950361365
  episodes_this_iter: 111
  episodes_total: 39740
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3134.766
    load_time_ms: 2.423
    num_steps_sampled: 3900000
    num_steps_trained: 3900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6778016090393066
      kl: 0.010861286893486977
      policy_loss: -0.004668664652854204
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9390 s, 394 iter, 3940000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-33-54
  done: false
  episode_len_mean: 95.15094339622641
  episode_reward_max: 217.63571481362277
  episode_reward_mean: 169.3581843330206
  episode_reward_min: -149.21330104005722
  episodes_this_iter: 106
  episodes_total: 40285
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3141.677
    load_time_ms: 2.473
    num_steps_sampled: 3950000
    num_steps_trained: 3950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.733020007610321
      kl: 0.013373903930187225
      policy_loss: -0.005012885667383671
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9506 s, 399 iter, 3990000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-35-51
  done: false
  episode_len_mean: 91.54128440366972
  episode_reward_max: 220.11177205213247
  episode_reward_mean: 175.47028699917772
  episode_reward_min: -50.236900424206254
  episodes_this_iter: 109
  episodes_total: 40827
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3177.174
    load_time_ms: 2.443
    num_steps_sampled: 4000000
    num_steps_trained: 4000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6475359201431274
      kl: 0.01200912520289421
      policy_loss: -0.002916331635788083
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9624 s, 404 iter, 4040000 ts, 176 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-37-48
  done: false
  episode_len_mean: 91.84545454545454
  episode_reward_max: 219.1497931557059
  episode_reward_mean: 174.99705837239816
  episode_reward_min: -33.91378072095736
  episodes_this_iter: 110
  episodes_total: 41380
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3167.064
    load_time_ms: 2.411
    num_steps_sampled: 4050000
    num_steps_trained: 4050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6595895290374756
      kl: 0.007855185307562351
      policy_loss: -0.0025626271963119507
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9740 s, 409 iter, 4090000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-39-44
  done: false
  episode_len_mean: 90.89090909090909
  episode_reward_max: 213.14563648602265
  episode_reward_mean: 178.7947572788275
  episode_reward_min: 147.30713210760254
  episodes_this_iter: 110
  episodes_total: 41922
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.448
    load_time_ms: 2.357
    num_steps_sampled: 4100000
    num_steps_trained: 4100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.635381817817688
      kl: 0.007579364813864231
      policy_loss: -0.0031263886485248804
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9856 s, 414 iter, 4140000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-41-41
  done: false
  episode_len_mean: 90.6
  episode_reward_max: 219.26209639818205
  episode_reward_mean: 178.2855143857363
  episode_reward_min: -173.62786975626793
  episodes_this_iter: 110
  episodes_total: 42469
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.734
    load_time_ms: 2.343
    num_steps_sampled: 4150000
    num_steps_trained: 4150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6274426579475403
      kl: 0.010119442827999592
      policy_loss: -0.0033470934722572565
      total_loss: 32.2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 9973 s, 419 iter, 4190000 ts, 181 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-43-38
  done: false
  episode_len_mean: 91.31192660550458
  episode_reward_max: 221.60790877066296
  episode_reward_mean: 180.8749423761388
  episode_reward_min: 138.56718121079928
  episodes_this_iter: 109
  episodes_total: 43017
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3142.294
    load_time_ms: 2.437
    num_steps_sampled: 4200000
    num_steps_trained: 4200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6292871236801147
      kl: 0.006143894046545029
      policy_loss: -0.0018451922805979848
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10089 s, 424 iter, 4240000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-45-35
  done: false
  episode_len_mean: 91.28181818181818
  episode_reward_max: 222.55450822279215
  episode_reward_mean: 178.4300250488726
  episode_reward_min: 143.4525074716325
  episodes_this_iter: 110
  episodes_total: 43570
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3148.561
    load_time_ms: 2.326
    num_steps_sampled: 4250000
    num_steps_trained: 4250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.628645122051239
      kl: 0.013787872157990932
      policy_loss: -0.0039597442373633385
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10206 s, 429 iter, 4290000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-47-32
  done: false
  episode_len_mean: 90.31531531531532
  episode_reward_max: 219.9250820404108
  episode_reward_mean: 173.4173858788434
  episode_reward_min: -140.37599857721867
  episodes_this_iter: 111
  episodes_total: 44122
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.086
    load_time_ms: 2.096
    num_steps_sampled: 4300000
    num_steps_trained: 4300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6135630011558533
      kl: 0.019288288429379463
      policy_loss: -0.009510987438261509
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10323 s, 434 iter, 4340000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-49-28
  done: false
  episode_len_mean: 90.68181818181819
  episode_reward_max: 223.3286132780479
  episode_reward_mean: 179.4034253780402
  episode_reward_min: 139.77925798567276
  episodes_this_iter: 110
  episodes_total: 44674
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.381
    load_time_ms: 2.334
    num_steps_sampled: 4350000
    num_steps_trained: 4350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5208104252815247
      kl: 0.011642680503427982
      policy_loss: -0.006713186856359243
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10439 s, 439 iter, 4390000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-51-25
  done: false
  episode_len_mean: 91.26363636363637
  episode_reward_max: 221.2574467006446
  episode_reward_mean: 171.8776794081289
  episode_reward_min: -146.96018693101047
  episodes_this_iter: 110
  episodes_total: 45224
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3137.081
    load_time_ms: 2.625
    num_steps_sampled: 4400000
    num_steps_trained: 4400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5391667485237122
      kl: 0.010651148855686188
      policy_loss: -0.0014424179680645466
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10554 s, 444 iter, 4440000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-53-21
  done: false
  episode_len_mean: 90.97272727272727
  episode_reward_max: 222.46650373422838
  episode_reward_mean: 176.91903558594868
  episode_reward_min: 139.31766787947478
  episodes_this_iter: 110
  episodes_total: 45773
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3143.039
    load_time_ms: 2.515
    num_steps_sampled: 4450000
    num_steps_trained: 4450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5186630487442017
      kl: 0.00947515107691288
      policy_loss: -0.003928683698177338
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10672 s, 449 iter, 4490000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-55-19
  done: false
  episode_len_mean: 91.68807339449542
  episode_reward_max: 218.43286792645083
  episode_reward_mean: 178.74197775216228
  episode_reward_min: 146.7490329595033
  episodes_this_iter: 109
  episodes_total: 46317
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.069
    load_time_ms: 2.315
    num_steps_sampled: 4500000
    num_steps_trained: 4500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5102900266647339
      kl: 0.008670409210026264
      policy_loss: -0.00462861405685544
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10790 s, 454 iter, 4540000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-57-16
  done: false
  episode_len_mean: 90.28181818181818
  episode_reward_max: 219.09193339896592
  episode_reward_mean: 178.24756724911154
  episode_reward_min: -119.006356785115
  episodes_this_iter: 110
  episodes_total: 46869
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3130.151
    load_time_ms: 2.334
    num_steps_sampled: 4550000
    num_steps_trained: 4550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.43363332748413086
      kl: 0.011432794854044914
      policy_loss: -0.00195028237067163
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 10904 s, 459 iter, 4590000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_03-59-11
  done: false
  episode_len_mean: 90.9
  episode_reward_max: 219.0674262546999
  episode_reward_mean: 178.2266232751606
  episode_reward_min: 144.18236331311374
  episodes_this_iter: 110
  episodes_total: 47422
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3116.306
    load_time_ms: 2.611
    num_steps_sampled: 4600000
    num_steps_trained: 4600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.39389869570732117
      kl: 0.006300134584307671
      policy_loss: -0.0023860507644712925
      total_loss: 3.29

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11021 s, 464 iter, 4640000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-01-09
  done: false
  episode_len_mean: 95.05714285714286
  episode_reward_max: 219.72722297313334
  episode_reward_mean: 172.69976848297387
  episode_reward_min: -126.15883867096544
  episodes_this_iter: 105
  episodes_total: 47967
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.902
    load_time_ms: 2.583
    num_steps_sampled: 4650000
    num_steps_trained: 4650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.48916366696357727
      kl: 0.01490121427923441
      policy_loss: -0.005439485423266888
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11138 s, 469 iter, 4690000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-03-06
  done: false
  episode_len_mean: 90.32142857142857
  episode_reward_max: 216.07087294985146
  episode_reward_mean: 177.80827797339455
  episode_reward_min: 142.34112943292132
  episodes_this_iter: 112
  episodes_total: 48519
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3159.989
    load_time_ms: 2.189
    num_steps_sampled: 4700000
    num_steps_trained: 4700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4256884753704071
      kl: 0.006373842246830463
      policy_loss: -0.003073599189519882
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11254 s, 474 iter, 4740000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-05-02
  done: false
  episode_len_mean: 90.84684684684684
  episode_reward_max: 220.57471937238435
  episode_reward_mean: 176.5974557161089
  episode_reward_min: 142.41877863501088
  episodes_this_iter: 111
  episodes_total: 49069
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3169.97
    load_time_ms: 2.224
    num_steps_sampled: 4750000
    num_steps_trained: 4750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3751617968082428
      kl: 0.009506159462034702
      policy_loss: -0.0024755308404564857
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11370 s, 479 iter, 4790000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-06-58
  done: false
  episode_len_mean: 91.68807339449542
  episode_reward_max: 224.65807048759692
  episode_reward_mean: 178.92851113460037
  episode_reward_min: 142.17216071652004
  episodes_this_iter: 109
  episodes_total: 49614
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.914
    load_time_ms: 2.416
    num_steps_sampled: 4800000
    num_steps_trained: 4800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.39998120069503784
      kl: 0.007498788647353649
      policy_loss: -0.0026596705429255962
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11486 s, 484 iter, 4840000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-08-55
  done: false
  episode_len_mean: 90.2
  episode_reward_max: 216.83283177808386
  episode_reward_mean: 162.85802391730985
  episode_reward_min: -173.7708405562255
  episodes_this_iter: 110
  episodes_total: 50163
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.697
    load_time_ms: 2.32
    num_steps_sampled: 4850000
    num_steps_trained: 4850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3960415720939636
      kl: 0.009415820240974426
      policy_loss: -0.0012629283592104912
      total_loss: 174.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11603 s, 489 iter, 4890000 ts, 157 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-10-51
  done: false
  episode_len_mean: 91.21818181818182
  episode_reward_max: 222.51537865987376
  episode_reward_mean: 168.04975832559978
  episode_reward_min: -155.81379901106288
  episodes_this_iter: 110
  episodes_total: 50720
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3152.313
    load_time_ms: 2.322
    num_steps_sampled: 4900000
    num_steps_trained: 4900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3576447069644928
      kl: 0.009567847475409508
      policy_loss: -0.0006522359326481819
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11719 s, 494 iter, 4940000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-12-47
  done: false
  episode_len_mean: 92.60185185185185
  episode_reward_max: 225.01905508842415
  episode_reward_mean: 175.73068556685973
  episode_reward_min: -149.965587135356
  episodes_this_iter: 108
  episodes_total: 51262
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3123.246
    load_time_ms: 2.47
    num_steps_sampled: 4950000
    num_steps_trained: 4950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.345965176820755
      kl: 0.011131174862384796
      policy_loss: -0.002360375365242362
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11836 s, 499 iter, 4990000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-14-45
  done: false
  episode_len_mean: 91.65137614678899
  episode_reward_max: 220.63935278925138
  episode_reward_mean: 178.65604678951576
  episode_reward_min: -171.71827823057066
  episodes_this_iter: 109
  episodes_total: 51804
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3138.416
    load_time_ms: 2.403
    num_steps_sampled: 5000000
    num_steps_trained: 5000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2800767719745636
      kl: 0.012391932308673859
      policy_loss: -0.0036662076599895954
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 11953 s, 504 iter, 5040000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-16-42
  done: false
  episode_len_mean: 91.56880733944953
  episode_reward_max: 224.97899293332648
  episode_reward_mean: 180.85416850227188
  episode_reward_min: 145.40970876378734
  episodes_this_iter: 109
  episodes_total: 52345
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3153.082
    load_time_ms: 2.256
    num_steps_sampled: 5050000
    num_steps_trained: 5050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.25259774923324585
      kl: 0.008018746972084045
      policy_loss: -0.0015420118579640985
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12068 s, 509 iter, 5090000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-18-38
  done: false
  episode_len_mean: 92.32407407407408
  episode_reward_max: 222.89462874671543
  episode_reward_mean: 182.82765083376222
  episode_reward_min: 143.8233608131533
  episodes_this_iter: 108
  episodes_total: 52883
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3150.734
    load_time_ms: 2.124
    num_steps_sampled: 5100000
    num_steps_trained: 5100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.17913872003555298
      kl: 0.008171743713319302
      policy_loss: -0.002312474185600877
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12183 s, 514 iter, 5140000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-20-33
  done: false
  episode_len_mean: 93.79439252336448
  episode_reward_max: 221.3147731264678
  episode_reward_mean: 175.7681950265926
  episode_reward_min: -153.9789495979148
  episodes_this_iter: 107
  episodes_total: 53422
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3141.433
    load_time_ms: 2.327
    num_steps_sampled: 5150000
    num_steps_trained: 5150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.22660742700099945
      kl: 0.012144886888563633
      policy_loss: -0.0018563541816547513
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12299 s, 519 iter, 5190000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-22-29
  done: false
  episode_len_mean: 92.94444444444444
  episode_reward_max: 220.1956973374762
  episode_reward_mean: 176.82903981953334
  episode_reward_min: -105.95159625984266
  episodes_this_iter: 108
  episodes_total: 53963
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3127.589
    load_time_ms: 2.533
    num_steps_sampled: 5200000
    num_steps_trained: 5200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1418997049331665
      kl: 0.01181873306632042
      policy_loss: -0.001393117825500667
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12416 s, 524 iter, 5240000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-24-26
  done: false
  episode_len_mean: 93.3177570093458
  episode_reward_max: 219.57587030049078
  episode_reward_mean: 172.9899404286537
  episode_reward_min: -56.35198996873956
  episodes_this_iter: 107
  episodes_total: 54503
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.337
    load_time_ms: 2.341
    num_steps_sampled: 5250000
    num_steps_trained: 5250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1507939100265503
      kl: 0.01577414944767952
      policy_loss: -0.0004926433903165162
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12532 s, 529 iter, 5290000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-26-22
  done: false
  episode_len_mean: 92.18348623853211
  episode_reward_max: 222.69679777874552
  episode_reward_mean: 177.54087574463313
  episode_reward_min: -164.6488133486579
  episodes_this_iter: 109
  episodes_total: 55042
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.792
    load_time_ms: 2.311
    num_steps_sampled: 5300000
    num_steps_trained: 5300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.039727065712213516
      kl: 0.01507138554006815
      policy_loss: -0.0061575621366500854
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12649 s, 534 iter, 5340000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-28-20
  done: false
  episode_len_mean: 92.74074074074075
  episode_reward_max: 218.01183543278822
  episode_reward_mean: 178.88036809325607
  episode_reward_min: 146.1065232014531
  episodes_this_iter: 108
  episodes_total: 55581
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3151.917
    load_time_ms: 2.349
    num_steps_sampled: 5350000
    num_steps_trained: 5350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.06989459693431854
      kl: 0.016688993200659752
      policy_loss: 0.0006168138352222741
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12765 s, 539 iter, 5390000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-30-16
  done: false
  episode_len_mean: 92.26851851851852
  episode_reward_max: 222.35898900454706
  episode_reward_mean: 184.41676116861996
  episode_reward_min: 148.96996532210585
  episodes_this_iter: 108
  episodes_total: 56121
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3138.421
    load_time_ms: 2.352
    num_steps_sampled: 5400000
    num_steps_trained: 5400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.02027786895632744
      kl: 0.014234197326004505
      policy_loss: -0.0027849699836224318
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12882 s, 544 iter, 5440000 ts, 181 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-32-14
  done: false
  episode_len_mean: 92.4770642201835
  episode_reward_max: 222.57803347910883
  episode_reward_mean: 179.88918082654368
  episode_reward_min: 146.34618583810777
  episodes_this_iter: 109
  episodes_total: 56662
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3143.313
    load_time_ms: 2.386
    num_steps_sampled: 5450000
    num_steps_trained: 5450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.01893557608127594
      kl: 0.014733193442225456
      policy_loss: -0.0020073633641004562
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 12999 s, 549 iter, 5490000 ts, 184 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-34-10
  done: false
  episode_len_mean: 91.72222222222223
  episode_reward_max: 217.26638170210174
  episode_reward_mean: 178.6132681165067
  episode_reward_min: 147.05676017953306
  episodes_this_iter: 108
  episodes_total: 57202
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.284
    load_time_ms: 2.268
    num_steps_sampled: 5500000
    num_steps_trained: 5500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.02869790606200695
      kl: 0.015352091751992702
      policy_loss: 0.00020632702216971666
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13117 s, 554 iter, 5540000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-36-08
  done: false
  episode_len_mean: 92.99074074074075
  episode_reward_max: 220.0591182042873
  episode_reward_mean: 176.0974154531276
  episode_reward_min: -151.79706103113227
  episodes_this_iter: 108
  episodes_total: 57744
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3143.434
    load_time_ms: 2.084
    num_steps_sampled: 5550000
    num_steps_trained: 5550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.02935134805738926
      kl: 0.024639127776026726
      policy_loss: -0.0061862231232225895
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13233 s, 559 iter, 5590000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-38-05
  done: false
  episode_len_mean: 92.66666666666667
  episode_reward_max: 226.86630944059053
  episode_reward_mean: 177.48794211357037
  episode_reward_min: -124.7164676195646
  episodes_this_iter: 108
  episodes_total: 58280
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.432
    load_time_ms: 2.178
    num_steps_sampled: 5600000
    num_steps_trained: 5600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.017728056758642197
      kl: 0.02481766603887081
      policy_loss: -0.004247876815497875
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13349 s, 564 iter, 5640000 ts, 183 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-40-01
  done: false
  episode_len_mean: 93.25
  episode_reward_max: 222.51739039306247
  episode_reward_mean: 181.76729031927553
  episode_reward_min: -136.2034229449707
  episodes_this_iter: 108
  episodes_total: 58816
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3148.356
    load_time_ms: 2.312
    num_steps_sampled: 5650000
    num_steps_trained: 5650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.05126745626330376
      kl: 0.01309160701930523
      policy_loss: -0.00029808320687152445
      total_loss: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13466 s, 569 iter, 5690000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-41-58
  done: false
  episode_len_mean: 96.08653846153847
  episode_reward_max: 224.01393971412676
  episode_reward_mean: 175.4821660244137
  episode_reward_min: -157.6275353043024
  episodes_this_iter: 104
  episodes_total: 59346
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3150.454
    load_time_ms: 2.404
    num_steps_sampled: 5700000
    num_steps_trained: 5700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.06363976001739502
      kl: 0.01781683973968029
      policy_loss: -0.0034934182185679674
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13583 s, 574 iter, 5740000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-43-56
  done: false
  episode_len_mean: 92.63888888888889
  episode_reward_max: 221.29599655242956
  episode_reward_mean: 175.8987769629947
  episode_reward_min: -153.24465291500084
  episodes_this_iter: 108
  episodes_total: 59880
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3142.58
    load_time_ms: 2.439
    num_steps_sampled: 5750000
    num_steps_trained: 5750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.09735173732042313
      kl: 0.01065945066511631
      policy_loss: -0.0020944576244801283
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13699 s, 579 iter, 5790000 ts, 162 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-45-52
  done: false
  episode_len_mean: 91.44954128440367
  episode_reward_max: 221.1320585541779
  episode_reward_mean: 144.18186906584975
  episode_reward_min: -159.21936536168775
  episodes_this_iter: 109
  episodes_total: 60420
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3152.098
    load_time_ms: 2.356
    num_steps_sampled: 5800000
    num_steps_trained: 5800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.07664763927459717
      kl: 0.017159758135676384
      policy_loss: -0.0007446068921126425
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13814 s, 584 iter, 5840000 ts, 152 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-47-47
  done: false
  episode_len_mean: 90.1891891891892
  episode_reward_max: 224.88768520841566
  episode_reward_mean: 141.65608534083515
  episode_reward_min: -159.2481724993712
  episodes_this_iter: 111
  episodes_total: 60962
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3128.968
    load_time_ms: 2.265
    num_steps_sampled: 5850000
    num_steps_trained: 5850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.03229796886444092
      kl: 0.01633453741669655
      policy_loss: -0.0035540079697966576
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 13932 s, 589 iter, 5890000 ts, 150 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-49-45
  done: false
  episode_len_mean: 91.31192660550458
  episode_reward_max: 221.28527641887183
  episode_reward_mean: 163.13907935003252
  episode_reward_min: -164.02893392293822
  episodes_this_iter: 109
  episodes_total: 61517
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3127.378
    load_time_ms: 2.317
    num_steps_sampled: 5900000
    num_steps_trained: 5900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.07809716463088989
      kl: 0.014943043701350689
      policy_loss: -0.0020793976727873087
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14049 s, 594 iter, 5940000 ts, 156 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-51-42
  done: false
  episode_len_mean: 92.03669724770643
  episode_reward_max: 220.452450690874
  episode_reward_mean: 148.9573740026162
  episode_reward_min: -161.9398654777641
  episodes_this_iter: 109
  episodes_total: 62057
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3127.476
    load_time_ms: 2.372
    num_steps_sampled: 5950000
    num_steps_trained: 5950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.012886254116892815
      kl: 0.017995843663811684
      policy_loss: -0.0025999583303928375
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14165 s, 599 iter, 5990000 ts, 156 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-53-39
  done: false
  episode_len_mean: 92.89814814814815
  episode_reward_max: 218.68428649193328
  episode_reward_mean: 167.65138427723144
  episode_reward_min: -169.63324871454842
  episodes_this_iter: 108
  episodes_total: 62598
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3135.617
    load_time_ms: 2.362
    num_steps_sampled: 6000000
    num_steps_trained: 6000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.07871878147125244
      kl: 0.016901804134249687
      policy_loss: -0.004777256399393082
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14283 s, 604 iter, 6040000 ts, 183 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-55-37
  done: false
  episode_len_mean: 93.14018691588785
  episode_reward_max: 219.36221550036313
  episode_reward_mean: 175.30356234474132
  episode_reward_min: -150.94484291721275
  episodes_this_iter: 107
  episodes_total: 63139
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3162.18
    load_time_ms: 2.399
    num_steps_sampled: 6050000
    num_steps_trained: 6050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.10584156215190887
      kl: 0.023823311552405357
      policy_loss: 0.00046599635970778763
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14399 s, 609 iter, 6090000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-57-33
  done: false
  episode_len_mean: 93.12037037037037
  episode_reward_max: 221.8800957647205
  episode_reward_mean: 179.54241663953314
  episode_reward_min: 147.36755345171846
  episodes_this_iter: 108
  episodes_total: 63675
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3140.613
    load_time_ms: 2.225
    num_steps_sampled: 6100000
    num_steps_trained: 6100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.046770721673965454
      kl: 0.018999213352799416
      policy_loss: 0.001376982661895454
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14516 s, 614 iter, 6140000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_04-59-30
  done: false
  episode_len_mean: 98.03921568627452
  episode_reward_max: 220.56776752323324
  episode_reward_mean: 178.11066594756892
  episode_reward_min: 5.357862552018303
  episodes_this_iter: 102
  episodes_total: 64207
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.393
    load_time_ms: 2.426
    num_steps_sampled: 6150000
    num_steps_trained: 6150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.09896938502788544
      kl: 0.012421480379998684
      policy_loss: -0.0016649439930915833
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14632 s, 619 iter, 6190000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-01-26
  done: false
  episode_len_mean: 91.66666666666667
  episode_reward_max: 222.250995521105
  episode_reward_mean: 180.6705334858827
  episode_reward_min: 141.27274115645872
  episodes_this_iter: 108
  episodes_total: 64743
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.482
    load_time_ms: 2.405
    num_steps_sampled: 6200000
    num_steps_trained: 6200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1773919314146042
      kl: 11.454299926757812
      policy_loss: 0.08509930223226547
      total_los

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14748 s, 624 iter, 6240000 ts, 154 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-03-22
  done: false
  episode_len_mean: 87.7280701754386
  episode_reward_max: 225.19078616071252
  episode_reward_mean: 153.30463847290744
  episode_reward_min: -168.04343249442377
  episodes_this_iter: 114
  episodes_total: 65316
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3143.681
    load_time_ms: 2.28
    num_steps_sampled: 6250000
    num_steps_trained: 6250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.14347708225250244
      kl: 0.03446187078952789
      policy_loss: 0.0036578660365194082
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14863 s, 629 iter, 6290000 ts, 158 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-05-18
  done: false
  episode_len_mean: 89.99090909090908
  episode_reward_max: 222.97657865405847
  episode_reward_mean: 167.537524037891
  episode_reward_min: -162.34235554014
  episodes_this_iter: 110
  episodes_total: 65872
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3134.459
    load_time_ms: 2.423
    num_steps_sampled: 6300000
    num_steps_trained: 6300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.13680624961853027
      kl: 0.03506672382354736
      policy_loss: 0.001170328469015658
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 14980 s, 634 iter, 6340000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-07-15
  done: false
  episode_len_mean: 89.41441441441441
  episode_reward_max: 222.0891457754197
  episode_reward_mean: 175.21989573255684
  episode_reward_min: -165.84527118055257
  episodes_this_iter: 111
  episodes_total: 66429
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3125.986
    load_time_ms: 2.314
    num_steps_sampled: 6350000
    num_steps_trained: 6350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1701761931180954
      kl: 0.014452364295721054
      policy_loss: -0.00027444795705378056
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15096 s, 639 iter, 6390000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-09-11
  done: false
  episode_len_mean: 91.02727272727273
  episode_reward_max: 222.1132908119571
  episode_reward_mean: 172.58847687248516
  episode_reward_min: -154.47142065991048
  episodes_this_iter: 110
  episodes_total: 66978
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3122.854
    load_time_ms: 2.372
    num_steps_sampled: 6400000
    num_steps_trained: 6400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.08161045610904694
      kl: 0.02304127626121044
      policy_loss: -0.0007905973470769823
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15212 s, 644 iter, 6440000 ts, 181 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-11-08
  done: false
  episode_len_mean: 91.81481481481481
  episode_reward_max: 224.30374629564204
  episode_reward_mean: 180.28800021903626
  episode_reward_min: 148.82605192478619
  episodes_this_iter: 108
  episodes_total: 67525
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3129.485
    load_time_ms: 2.413
    num_steps_sampled: 6450000
    num_steps_trained: 6450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.06661023944616318
      kl: 0.01831035129725933
      policy_loss: 0.00015255837934091687
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15329 s, 649 iter, 6490000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-13-05
  done: false
  episode_len_mean: 91.31192660550458
  episode_reward_max: 223.78455717027902
  episode_reward_mean: 177.27104169003456
  episode_reward_min: 143.86107493206907
  episodes_this_iter: 109
  episodes_total: 68074
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3130.757
    load_time_ms: 2.416
    num_steps_sampled: 6500000
    num_steps_trained: 6500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.11936338990926743
      kl: 0.018680747598409653
      policy_loss: 0.0006752089248038828
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15446 s, 654 iter, 6540000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-15-03
  done: false
  episode_len_mean: 91.38181818181818
  episode_reward_max: 215.98103785828707
  episode_reward_mean: 174.99390516914127
  episode_reward_min: -153.2166100682798
  episodes_this_iter: 110
  episodes_total: 68621
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.449
    load_time_ms: 2.32
    num_steps_sampled: 6550000
    num_steps_trained: 6550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.13047093152999878
      kl: 0.03480612859129906
      policy_loss: -0.008637750521302223
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15563 s, 659 iter, 6590000 ts, 152 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-17-00
  done: false
  episode_len_mean: 90.05454545454545
  episode_reward_max: 225.14177759715818
  episode_reward_mean: 165.36094009796452
  episode_reward_min: -158.78255422816954
  episodes_this_iter: 110
  episodes_total: 69175
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3166.703
    load_time_ms: 2.357
    num_steps_sampled: 6600000
    num_steps_trained: 6600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.08921685069799423
      kl: 0.02064974047243595
      policy_loss: -0.0008990292553789914
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15680 s, 664 iter, 6640000 ts, 163 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-18-56
  done: false
  episode_len_mean: 89.90990990990991
  episode_reward_max: 217.7529109281921
  episode_reward_mean: 164.74717937041774
  episode_reward_min: -154.07153307877672
  episodes_this_iter: 111
  episodes_total: 69732
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.491
    load_time_ms: 2.561
    num_steps_sampled: 6650000
    num_steps_trained: 6650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.05971936136484146
      kl: 0.023165855556726456
      policy_loss: -0.000576937454752624
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15796 s, 669 iter, 6690000 ts, 181 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-20-53
  done: false
  episode_len_mean: 91.68807339449542
  episode_reward_max: 220.92207435873522
  episode_reward_mean: 178.5587174706033
  episode_reward_min: 142.15933751331974
  episodes_this_iter: 109
  episodes_total: 70281
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3135.645
    load_time_ms: 2.475
    num_steps_sampled: 6700000
    num_steps_trained: 6700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.0831599086523056
      kl: 0.01744709350168705
      policy_loss: -0.0004627641465049237
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 15912 s, 674 iter, 6740000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-22-49
  done: false
  episode_len_mean: 91.28440366972477
  episode_reward_max: 221.67693049791126
  episode_reward_mean: 180.2422602407744
  episode_reward_min: 143.7158546881561
  episodes_this_iter: 109
  episodes_total: 70829
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3127.271
    load_time_ms: 2.468
    num_steps_sampled: 6750000
    num_steps_trained: 6750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.06304167956113815
      kl: 0.01722095161676407
      policy_loss: 0.0007054475136101246
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16028 s, 679 iter, 6790000 ts, 162 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-24-46
  done: false
  episode_len_mean: 90.23423423423424
  episode_reward_max: 221.8132498134088
  episode_reward_mean: 169.98824527450472
  episode_reward_min: -137.35135543962954
  episodes_this_iter: 111
  episodes_total: 71383
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3116.57
    load_time_ms: 2.427
    num_steps_sampled: 6800000
    num_steps_trained: 6800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.05080167576670647
      kl: 0.025006461888551712
      policy_loss: -0.0012417647521942854
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16144 s, 684 iter, 6840000 ts, 181 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-26-42
  done: false
  episode_len_mean: 90.61818181818182
  episode_reward_max: 222.66934995152656
  episode_reward_mean: 178.39364468382925
  episode_reward_min: 145.884800750026
  episodes_this_iter: 110
  episodes_total: 71931
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3119.694
    load_time_ms: 2.404
    num_steps_sampled: 6850000
    num_steps_trained: 6850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.017358461394906044
      kl: 0.013726494275033474
      policy_loss: -0.0022203971166163683
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16261 s, 689 iter, 6890000 ts, 176 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-28-39
  done: false
  episode_len_mean: 90.27927927927928
  episode_reward_max: 226.22188268974006
  episode_reward_mean: 170.56891026470413
  episode_reward_min: -163.4780770622483
  episodes_this_iter: 111
  episodes_total: 72476
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3126.571
    load_time_ms: 2.441
    num_steps_sampled: 6900000
    num_steps_trained: 6900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.06586579978466034
      kl: 0.015218468382954597
      policy_loss: -0.004484444856643677
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16377 s, 694 iter, 6940000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-30-35
  done: false
  episode_len_mean: 90.52678571428571
  episode_reward_max: 223.45705366110593
  episode_reward_mean: 182.1329359638472
  episode_reward_min: 145.3819880127242
  episodes_this_iter: 112
  episodes_total: 73024
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3122.779
    load_time_ms: 2.414
    num_steps_sampled: 6950000
    num_steps_trained: 6950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.0836658701300621
      kl: 0.009956288151443005
      policy_loss: -0.00026253730175085366
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16493 s, 699 iter, 6990000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-32-31
  done: false
  episode_len_mean: 92.21296296296296
  episode_reward_max: 222.8401624720855
  episode_reward_mean: 176.54996600911468
  episode_reward_min: 144.21149133121483
  episodes_this_iter: 108
  episodes_total: 73567
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3134.116
    load_time_ms: 2.436
    num_steps_sampled: 7000000
    num_steps_trained: 7000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1038510873913765
      kl: 0.016165856271982193
      policy_loss: -0.002331838011741638
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16609 s, 704 iter, 7040000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-34-28
  done: false
  episode_len_mean: 92.35185185185185
  episode_reward_max: 224.2069435773369
  episode_reward_mean: 179.19188704456852
  episode_reward_min: 142.99900566828313
  episodes_this_iter: 108
  episodes_total: 74110
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3137.276
    load_time_ms: 2.413
    num_steps_sampled: 7050000
    num_steps_trained: 7050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.16580216586589813
      kl: 0.01754380576312542
      policy_loss: 0.00014615636609960347
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16727 s, 709 iter, 7090000 ts, 176 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-36-26
  done: false
  episode_len_mean: 91.91743119266054
  episode_reward_max: 223.5204336628174
  episode_reward_mean: 179.10091943286957
  episode_reward_min: 145.5228419220738
  episodes_this_iter: 109
  episodes_total: 74655
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.776
    load_time_ms: 2.39
    num_steps_sampled: 7100000
    num_steps_trained: 7100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.17525765299797058
      kl: 0.016026634722948074
      policy_loss: -0.0020791650749742985
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16844 s, 714 iter, 7140000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-38-23
  done: false
  episode_len_mean: 90.86363636363636
  episode_reward_max: 223.77537813640677
  episode_reward_mean: 182.57165161345458
  episode_reward_min: 145.34089431151244
  episodes_this_iter: 110
  episodes_total: 75201
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3160.922
    load_time_ms: 2.337
    num_steps_sampled: 7150000
    num_steps_trained: 7150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2824693024158478
      kl: 0.016784917563199997
      policy_loss: -0.002655167831107974
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 16961 s, 719 iter, 7190000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-40-20
  done: false
  episode_len_mean: 91.26605504587155
  episode_reward_max: 220.21611666325532
  episode_reward_mean: 179.31780523492338
  episode_reward_min: 148.12464377826936
  episodes_this_iter: 109
  episodes_total: 75743
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3156.747
    load_time_ms: 2.426
    num_steps_sampled: 7200000
    num_steps_trained: 7200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.25937938690185547
      kl: 0.03582847863435745
      policy_loss: 0.0031089261174201965
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17077 s, 724 iter, 7240000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-42-16
  done: false
  episode_len_mean: 93.1574074074074
  episode_reward_max: 220.41204191011795
  episode_reward_mean: 173.554684517262
  episode_reward_min: -152.35227974024602
  episodes_this_iter: 108
  episodes_total: 76282
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.488
    load_time_ms: 2.622
    num_steps_sampled: 7250000
    num_steps_trained: 7250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.19954290986061096
      kl: 0.01485726423561573
      policy_loss: -0.0006322638364508748
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17194 s, 729 iter, 7290000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-44-13
  done: false
  episode_len_mean: 92.29629629629629
  episode_reward_max: 220.85121838385732
  episode_reward_mean: 176.97592564348167
  episode_reward_min: 147.23320090491046
  episodes_this_iter: 108
  episodes_total: 76823
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3148.309
    load_time_ms: 2.49
    num_steps_sampled: 7300000
    num_steps_trained: 7300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.27340519428253174
      kl: 0.023582259193062782
      policy_loss: 0.002860462758690119
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17310 s, 734 iter, 7340000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-46-09
  done: false
  episode_len_mean: 90.63636363636364
  episode_reward_max: 221.74998957349842
  episode_reward_mean: 176.21053479331917
  episode_reward_min: -177.20617429547073
  episodes_this_iter: 110
  episodes_total: 77369
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3142.267
    load_time_ms: 2.309
    num_steps_sampled: 7350000
    num_steps_trained: 7350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.24847939610481262
      kl: 0.027227338403463364
      policy_loss: 0.0006476971902884543
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17426 s, 739 iter, 7390000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-48-06
  done: false
  episode_len_mean: 91.55963302752293
  episode_reward_max: 219.41344687741292
  episode_reward_mean: 179.94739325634586
  episode_reward_min: 147.98414333365693
  episodes_this_iter: 109
  episodes_total: 77917
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3128.984
    load_time_ms: 2.371
    num_steps_sampled: 7400000
    num_steps_trained: 7400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.23687365651130676
      kl: 0.01511272881180048
      policy_loss: -0.0010953368619084358
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17543 s, 744 iter, 7440000 ts, 169 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-50-03
  done: false
  episode_len_mean: 91.60550458715596
  episode_reward_max: 220.097755721328
  episode_reward_mean: 171.81893100817217
  episode_reward_min: -175.84557633796123
  episodes_this_iter: 109
  episodes_total: 78466
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3129.275
    load_time_ms: 2.453
    num_steps_sampled: 7450000
    num_steps_trained: 7450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.20764392614364624
      kl: 0.016968922689557076
      policy_loss: 0.0014369008131325245
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17659 s, 749 iter, 7490000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-51-59
  done: false
  episode_len_mean: 91.45871559633028
  episode_reward_max: 223.33234598105352
  episode_reward_mean: 181.85175332948674
  episode_reward_min: 144.09937476151802
  episodes_this_iter: 109
  episodes_total: 79011
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3137.951
    load_time_ms: 2.389
    num_steps_sampled: 7500000
    num_steps_trained: 7500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2768994867801666
      kl: 0.01603863574564457
      policy_loss: -0.0017581498250365257
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17775 s, 754 iter, 7540000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-53-55
  done: false
  episode_len_mean: 92.10909090909091
  episode_reward_max: 220.16528832197167
  episode_reward_mean: 171.35727061070799
  episode_reward_min: -163.50787164730275
  episodes_this_iter: 110
  episodes_total: 79555
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3168.054
    load_time_ms: 2.259
    num_steps_sampled: 7550000
    num_steps_trained: 7550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2269664704799652
      kl: 0.03184931352734566
      policy_loss: -0.0033201612532138824
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 17891 s, 759 iter, 7590000 ts, 184 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-55-52
  done: false
  episode_len_mean: 91.42201834862385
  episode_reward_max: 218.34576177361183
  episode_reward_mean: 181.34799578229428
  episode_reward_min: 143.5919719849726
  episodes_this_iter: 109
  episodes_total: 80099
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3154.895
    load_time_ms: 2.374
    num_steps_sampled: 7600000
    num_steps_trained: 7600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.28729048371315
      kl: 0.021372202783823013
      policy_loss: 0.001637088949792087
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18008 s, 764 iter, 7640000 ts, 181 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-57-50
  done: false
  episode_len_mean: 91.1
  episode_reward_max: 221.01739479499224
  episode_reward_mean: 179.325691576646
  episode_reward_min: 145.61857210534714
  episodes_this_iter: 110
  episodes_total: 80651
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.29
    load_time_ms: 2.41
    num_steps_sampled: 7650000
    num_steps_trained: 7650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2563319504261017
      kl: 0.023108532652258873
      policy_loss: 0.0016250688349828124
      total_loss: 1.58898

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18124 s, 769 iter, 7690000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_05-59-46
  done: false
  episode_len_mean: 89.82142857142857
  episode_reward_max: 220.30834021900097
  episode_reward_mean: 176.65597660186975
  episode_reward_min: -149.27418602375698
  episodes_this_iter: 112
  episodes_total: 81206
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3135.55
    load_time_ms: 2.301
    num_steps_sampled: 7700000
    num_steps_trained: 7700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.29543638229370117
      kl: 0.01766241155564785
      policy_loss: -0.003496799385175109
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18240 s, 774 iter, 7740000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-01-42
  done: false
  episode_len_mean: 89.20535714285714
  episode_reward_max: 218.9829261032319
  episode_reward_mean: 172.5370099606956
  episode_reward_min: -175.7670076258406
  episodes_this_iter: 112
  episodes_total: 81765
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3117.25
    load_time_ms: 2.3
    num_steps_sampled: 7750000
    num_steps_trained: 7750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3373015522956848
      kl: 0.011108504608273506
      policy_loss: -0.0016420785104855895
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18356 s, 779 iter, 7790000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-03-38
  done: false
  episode_len_mean: 89.15044247787611
  episode_reward_max: 222.90081408891552
  episode_reward_mean: 164.66877451544372
  episode_reward_min: -157.59686354271844
  episodes_this_iter: 113
  episodes_total: 82324
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3130.743
    load_time_ms: 2.472
    num_steps_sampled: 7800000
    num_steps_trained: 7800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.28313204646110535
      kl: 0.012840967625379562
      policy_loss: -0.00503659900277853
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18472 s, 784 iter, 7840000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-05-34
  done: false
  episode_len_mean: 88.29203539823008
  episode_reward_max: 211.20047747092337
  episode_reward_mean: 172.87004784698615
  episode_reward_min: -155.2585250382166
  episodes_this_iter: 113
  episodes_total: 82884
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3128.374
    load_time_ms: 2.439
    num_steps_sampled: 7850000
    num_steps_trained: 7850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.31146329641342163
      kl: 0.016456443816423416
      policy_loss: -0.003766549052670598
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18588 s, 789 iter, 7890000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-07-30
  done: false
  episode_len_mean: 89.97272727272727
  episode_reward_max: 224.0485990968463
  episode_reward_mean: 181.7052581382619
  episode_reward_min: 144.9719493518001
  episodes_this_iter: 110
  episodes_total: 83442
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3138.519
    load_time_ms: 2.287
    num_steps_sampled: 7900000
    num_steps_trained: 7900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.36864423751831055
      kl: 0.020896902307868004
      policy_loss: -0.0020364795345813036
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18703 s, 794 iter, 7940000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-09-26
  done: false
  episode_len_mean: 89.95495495495496
  episode_reward_max: 220.625371259626
  episode_reward_mean: 179.90494478388442
  episode_reward_min: 147.4972671443526
  episodes_this_iter: 111
  episodes_total: 83999
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3155.006
    load_time_ms: 2.361
    num_steps_sampled: 7950000
    num_steps_trained: 7950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.36393457651138306
      kl: 0.03423184156417847
      policy_loss: -6.918948201928288e-05
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18820 s, 799 iter, 7990000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-11-23
  done: false
  episode_len_mean: 90.54464285714286
  episode_reward_max: 220.72215398982866
  episode_reward_mean: 176.9643686137028
  episode_reward_min: 140.28921785377318
  episodes_this_iter: 112
  episodes_total: 84558
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3136.561
    load_time_ms: 2.311
    num_steps_sampled: 8000000
    num_steps_trained: 8000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.31011763215065
      kl: 0.019441528245806694
      policy_loss: -0.0007729888893663883
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 18937 s, 804 iter, 8040000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-13-20
  done: false
  episode_len_mean: 90.0
  episode_reward_max: 218.80439379120457
  episode_reward_mean: 173.60114722675493
  episode_reward_min: -150.05600910190594
  episodes_this_iter: 111
  episodes_total: 85115
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.97
    load_time_ms: 2.288
    num_steps_sampled: 8050000
    num_steps_trained: 8050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.37014517188072205
      kl: 0.014954652637243271
      policy_loss: -0.0035031007137149572
      total_loss: 2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19054 s, 809 iter, 8090000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-15-17
  done: false
  episode_len_mean: 89.22522522522523
  episode_reward_max: 215.24795441548704
  episode_reward_mean: 173.14853096047557
  episode_reward_min: -154.75489245589927
  episodes_this_iter: 111
  episodes_total: 85673
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3133.451
    load_time_ms: 2.245
    num_steps_sampled: 8100000
    num_steps_trained: 8100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3862001597881317
      kl: 0.013036680407822132
      policy_loss: -0.0009314786293543875
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19170 s, 814 iter, 8140000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-17-14
  done: false
  episode_len_mean: 89.76785714285714
  episode_reward_max: 219.22667693625732
  episode_reward_mean: 176.12513185508462
  episode_reward_min: 144.33094384446758
  episodes_this_iter: 112
  episodes_total: 86232
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3127.874
    load_time_ms: 2.337
    num_steps_sampled: 8150000
    num_steps_trained: 8150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.42589691281318665
      kl: 0.02063382975757122
      policy_loss: 0.00017419907089788467
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19286 s, 819 iter, 8190000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-19-11
  done: false
  episode_len_mean: 89.32432432432432
  episode_reward_max: 217.93389498347392
  episode_reward_mean: 174.335092734104
  episode_reward_min: 145.49283166329582
  episodes_this_iter: 111
  episodes_total: 86791
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3134.647
    load_time_ms: 2.653
    num_steps_sampled: 8200000
    num_steps_trained: 8200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4245564043521881
      kl: 0.025637630373239517
      policy_loss: -0.0030846791341900826
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19404 s, 824 iter, 8240000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-21-08
  done: false
  episode_len_mean: 90.46846846846847
  episode_reward_max: 219.00934513464762
  episode_reward_mean: 179.2243628386112
  episode_reward_min: 142.07473303573872
  episodes_this_iter: 111
  episodes_total: 87349
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.511
    load_time_ms: 2.615
    num_steps_sampled: 8250000
    num_steps_trained: 8250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4696820080280304
      kl: 0.021395424380898476
      policy_loss: 0.0009912388632073998
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19520 s, 829 iter, 8290000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-23-05
  done: false
  episode_len_mean: 89.29464285714286
  episode_reward_max: 219.44186599859162
  episode_reward_mean: 175.22676694475928
  episode_reward_min: -149.71409281880068
  episodes_this_iter: 112
  episodes_total: 87907
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3145.611
    load_time_ms: 2.417
    num_steps_sampled: 8300000
    num_steps_trained: 8300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5268866419792175
      kl: 0.0140357855707407
      policy_loss: -0.006036193110048771
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19637 s, 834 iter, 8340000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-25-02
  done: false
  episode_len_mean: 89.59821428571429
  episode_reward_max: 216.2271439971493
  episode_reward_mean: 174.14678116597133
  episode_reward_min: 136.96212395954032
  episodes_this_iter: 112
  episodes_total: 88466
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3151.033
    load_time_ms: 2.295
    num_steps_sampled: 8350000
    num_steps_trained: 8350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.48428794741630554
      kl: 0.021191412582993507
      policy_loss: -0.002487204037606716
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19753 s, 839 iter, 8390000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-26-58
  done: false
  episode_len_mean: 89.73873873873873
  episode_reward_max: 221.75416158124574
  episode_reward_mean: 174.06698722586864
  episode_reward_min: 139.81846908207592
  episodes_this_iter: 111
  episodes_total: 89023
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3141.446
    load_time_ms: 2.285
    num_steps_sampled: 8400000
    num_steps_trained: 8400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.48794862627983093
      kl: 0.016479989513754845
      policy_loss: 0.0022083139047026634
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19870 s, 844 iter, 8440000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-28-54
  done: false
  episode_len_mean: 89.66666666666667
  episode_reward_max: 217.31310784135525
  episode_reward_mean: 177.7458119615533
  episode_reward_min: 145.68370889792763
  episodes_this_iter: 111
  episodes_total: 89579
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3135.138
    load_time_ms: 2.454
    num_steps_sampled: 8450000
    num_steps_trained: 8450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.578363835811615
      kl: 0.019382594153285027
      policy_loss: 0.00046377486432902515
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 19985 s, 849 iter, 8490000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-30-51
  done: false
  episode_len_mean: 89.19642857142857
  episode_reward_max: 219.8790740052961
  episode_reward_mean: 175.45813956405354
  episode_reward_min: 138.61901526813526
  episodes_this_iter: 112
  episodes_total: 90136
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3156.125
    load_time_ms: 2.445
    num_steps_sampled: 8500000
    num_steps_trained: 8500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5733966827392578
      kl: 1.08303964138031
      policy_loss: 0.010883764363825321
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20102 s, 854 iter, 8540000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-32-48
  done: false
  episode_len_mean: 89.10619469026548
  episode_reward_max: 221.10072754301285
  episode_reward_mean: 175.28932025877802
  episode_reward_min: 134.19090013502026
  episodes_this_iter: 113
  episodes_total: 90694
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3131.91
    load_time_ms: 2.257
    num_steps_sampled: 8550000
    num_steps_trained: 8550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6177864670753479
      kl: 0.029921675100922585
      policy_loss: 0.0022981560323387384
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20219 s, 859 iter, 8590000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-34-45
  done: false
  episode_len_mean: 89.25892857142857
  episode_reward_max: 218.29739043352012
  episode_reward_mean: 168.76083537237324
  episode_reward_min: -158.41400734981852
  episodes_this_iter: 112
  episodes_total: 91249
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.746
    load_time_ms: 2.137
    num_steps_sampled: 8600000
    num_steps_trained: 8600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6006514430046082
      kl: 0.0235245693475008
      policy_loss: -0.008886785246431828
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20336 s, 864 iter, 8640000 ts, 170 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-36-42
  done: false
  episode_len_mean: 88.929203539823
  episode_reward_max: 220.5033557409695
  episode_reward_mean: 175.9342591681071
  episode_reward_min: 143.63713674378394
  episodes_this_iter: 113
  episodes_total: 91806
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3152.454
    load_time_ms: 2.21
    num_steps_sampled: 8650000
    num_steps_trained: 8650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6630882620811462
      kl: 0.03247763589024544
      policy_loss: 0.004951892886310816
      total_los

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20454 s, 869 iter, 8690000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-38-40
  done: false
  episode_len_mean: 89.59821428571429
  episode_reward_max: 215.6376407020544
  episode_reward_mean: 170.52061478535794
  episode_reward_min: -150.10780932826833
  episodes_this_iter: 112
  episodes_total: 92364
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3138.187
    load_time_ms: 2.46
    num_steps_sampled: 8700000
    num_steps_trained: 8700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6696062088012695
      kl: 0.03446514904499054
      policy_loss: 0.0012294587213546038
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20570 s, 874 iter, 8740000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-40-37
  done: false
  episode_len_mean: 90.25
  episode_reward_max: 216.42500008271145
  episode_reward_mean: 174.75130072303935
  episode_reward_min: 138.15459344217592
  episodes_this_iter: 112
  episodes_total: 92920
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.587
    load_time_ms: 2.434
    num_steps_sampled: 8750000
    num_steps_trained: 8750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6426583528518677
      kl: 0.09418211877346039
      policy_loss: 0.013170046731829643
      total_loss: 1.35

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20686 s, 879 iter, 8790000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-42-34
  done: false
  episode_len_mean: 90.7909090909091
  episode_reward_max: 218.29542089417043
  episode_reward_mean: 170.64670615596833
  episode_reward_min: 137.78611371729275
  episodes_this_iter: 110
  episodes_total: 93475
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3135.153
    load_time_ms: 2.18
    num_steps_sampled: 8800000
    num_steps_trained: 8800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6193140745162964
      kl: 0.03452860563993454
      policy_loss: 0.000130237007397227
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20804 s, 884 iter, 8840000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-44-31
  done: false
  episode_len_mean: 89.52678571428571
  episode_reward_max: 212.15651203957123
  episode_reward_mean: 172.09708676963288
  episode_reward_min: 137.7787668064128
  episodes_this_iter: 112
  episodes_total: 94034
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3129.656
    load_time_ms: 2.377
    num_steps_sampled: 8850000
    num_steps_trained: 8850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7113729119300842
      kl: 0.026757655665278435
      policy_loss: 0.005819134879857302
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 20920 s, 889 iter, 8890000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-46-27
  done: false
  episode_len_mean: 89.21428571428571
  episode_reward_max: 219.3332217406815
  episode_reward_mean: 172.8101576755608
  episode_reward_min: 141.93093341047447
  episodes_this_iter: 112
  episodes_total: 94590
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3129.502
    load_time_ms: 2.671
    num_steps_sampled: 8900000
    num_steps_trained: 8900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6562435030937195
      kl: 0.0276467427611351
      policy_loss: 0.00044390998664312065
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21036 s, 894 iter, 8940000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-48-23
  done: false
  episode_len_mean: 90.04504504504504
  episode_reward_max: 217.92179219667855
  episode_reward_mean: 174.85111286635728
  episode_reward_min: 142.33045691068017
  episodes_this_iter: 111
  episodes_total: 95145
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3137.642
    load_time_ms: 2.533
    num_steps_sampled: 8950000
    num_steps_trained: 8950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7121616005897522
      kl: 0.022890079766511917
      policy_loss: 0.0020627945195883512
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21152 s, 899 iter, 8990000 ts, 176 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-50-19
  done: false
  episode_len_mean: 90.74545454545455
  episode_reward_max: 214.19334018832052
  episode_reward_mean: 174.2729290173771
  episode_reward_min: 138.52569962123798
  episodes_this_iter: 110
  episodes_total: 95701
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3141.364
    load_time_ms: 2.232
    num_steps_sampled: 9000000
    num_steps_trained: 9000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7364770174026489
      kl: 0.038428209722042084
      policy_loss: 0.002137688221409917
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21268 s, 904 iter, 9040000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-52-16
  done: false
  episode_len_mean: 89.78378378378379
  episode_reward_max: 218.74194303213116
  episode_reward_mean: 174.72055547734834
  episode_reward_min: 134.96071641451778
  episodes_this_iter: 111
  episodes_total: 96256
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3118.632
    load_time_ms: 2.165
    num_steps_sampled: 9050000
    num_steps_trained: 9050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.733126699924469
      kl: 0.04322052001953125
      policy_loss: 0.006493557710200548
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21385 s, 909 iter, 9090000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-54-13
  done: false
  episode_len_mean: 89.13392857142857
  episode_reward_max: 214.41374656827838
  episode_reward_mean: 173.0878647147853
  episode_reward_min: 139.01506180995122
  episodes_this_iter: 112
  episodes_total: 96814
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.499
    load_time_ms: 2.205
    num_steps_sampled: 9100000
    num_steps_trained: 9100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6903122067451477
      kl: 0.20512259006500244
      policy_loss: 0.00010818128066603094
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21500 s, 914 iter, 9140000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-56-09
  done: false
  episode_len_mean: 89.90090090090091
  episode_reward_max: 219.76051226880267
  episode_reward_mean: 170.5919083921555
  episode_reward_min: 137.65445917564887
  episodes_this_iter: 111
  episodes_total: 97369
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3159.048
    load_time_ms: 2.152
    num_steps_sampled: 9150000
    num_steps_trained: 9150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6580333113670349
      kl: 0.030019909143447876
      policy_loss: -0.001354491920210421
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21616 s, 919 iter, 9190000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_06-58-05
  done: false
  episode_len_mean: 89.57142857142857
  episode_reward_max: 215.85846778304185
  episode_reward_mean: 176.4264684118609
  episode_reward_min: 139.35202652854846
  episodes_this_iter: 112
  episodes_total: 97927
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3129.538
    load_time_ms: 2.165
    num_steps_sampled: 9200000
    num_steps_trained: 9200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7676118612289429
      kl: 0.025806402787566185
      policy_loss: 0.001885516569018364
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21734 s, 924 iter, 9240000 ts, 171 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-00-03
  done: false
  episode_len_mean: 89.63963963963964
  episode_reward_max: 220.19769830283377
  episode_reward_mean: 171.35469505215593
  episode_reward_min: 133.24585616963427
  episodes_this_iter: 111
  episodes_total: 98484
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3123.676
    load_time_ms: 2.337
    num_steps_sampled: 9250000
    num_steps_trained: 9250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7297329902648926
      kl: 0.045421864837408066
      policy_loss: 0.0036605708301067352
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21850 s, 929 iter, 9290000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-01-59
  done: false
  episode_len_mean: 89.70535714285714
  episode_reward_max: 221.4722178642461
  episode_reward_mean: 170.07980674043102
  episode_reward_min: -149.79264420856447
  episodes_this_iter: 112
  episodes_total: 99041
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.688
    load_time_ms: 2.432
    num_steps_sampled: 9300000
    num_steps_trained: 9300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6833673715591431
      kl: 0.0452534519135952
      policy_loss: -0.011126845143735409
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 21966 s, 934 iter, 9340000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-03-55
  done: false
  episode_len_mean: 89.41071428571429
  episode_reward_max: 220.22151181519655
  episode_reward_mean: 171.6850313364642
  episode_reward_min: 138.88575963396357
  episodes_this_iter: 112
  episodes_total: 99598
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.388
    load_time_ms: 2.358
    num_steps_sampled: 9350000
    num_steps_trained: 9350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7092666029930115
      kl: 0.03858180716633797
      policy_loss: 0.004062363412231207
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22081 s, 939 iter, 9390000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-05-51
  done: false
  episode_len_mean: 90.09009009009009
  episode_reward_max: 216.15560923822133
  episode_reward_mean: 172.72405972091738
  episode_reward_min: 139.4447079029593
  episodes_this_iter: 111
  episodes_total: 100155
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3146.861
    load_time_ms: 2.256
    num_steps_sampled: 9400000
    num_steps_trained: 9400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6847039461135864
      kl: 0.028594855219125748
      policy_loss: 0.001390133867971599
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22197 s, 944 iter, 9440000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-07-47
  done: false
  episode_len_mean: 90.33333333333333
  episode_reward_max: 219.02046223332752
  episode_reward_mean: 173.8791540925772
  episode_reward_min: 138.76292444185296
  episodes_this_iter: 111
  episodes_total: 100711
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3147.808
    load_time_ms: 2.32
    num_steps_sampled: 9450000
    num_steps_trained: 9450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6724333167076111
      kl: 0.08338326960802078
      policy_loss: 0.01107343751937151
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22313 s, 949 iter, 9490000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-09-43
  done: false
  episode_len_mean: 89.7909090909091
  episode_reward_max: 216.97031583841078
  episode_reward_mean: 171.62611971597363
  episode_reward_min: -149.1599609532459
  episodes_this_iter: 110
  episodes_total: 101269
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3130.256
    load_time_ms: 2.375
    num_steps_sampled: 9500000
    num_steps_trained: 9500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7343094944953918
      kl: 0.043330319225788116
      policy_loss: -0.011356105096638203
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22428 s, 954 iter, 9540000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-11-38
  done: false
  episode_len_mean: 89.13513513513513
  episode_reward_max: 219.31720539652108
  episode_reward_mean: 173.76379354522192
  episode_reward_min: 138.5180651411349
  episodes_this_iter: 111
  episodes_total: 101828
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3131.493
    load_time_ms: 2.336
    num_steps_sampled: 9550000
    num_steps_trained: 9550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7206405997276306
      kl: 9.651677131652832
      policy_loss: 0.09054666012525558
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22543 s, 959 iter, 9590000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-13-34
  done: false
  episode_len_mean: 89.57142857142857
  episode_reward_max: 212.99534527361394
  episode_reward_mean: 176.81482732112778
  episode_reward_min: 141.52527668851965
  episodes_this_iter: 112
  episodes_total: 102389
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3120.927
    load_time_ms: 2.268
    num_steps_sampled: 9600000
    num_steps_trained: 9600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7459509968757629
      kl: 0.028316160663962364
      policy_loss: 0.00020423397654667497
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22660 s, 964 iter, 9640000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-15-31
  done: false
  episode_len_mean: 89.04464285714286
  episode_reward_max: 216.83154893179906
  episode_reward_mean: 170.98015103440318
  episode_reward_min: -155.6829073779354
  episodes_this_iter: 112
  episodes_total: 102951
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3143.795
    load_time_ms: 2.216
    num_steps_sampled: 9650000
    num_steps_trained: 9650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6860938668251038
      kl: 0.02561785839498043
      policy_loss: 0.00041383475763723254
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22777 s, 969 iter, 9690000 ts, 170 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-17-28
  done: false
  episode_len_mean: 88.58771929824562
  episode_reward_max: 213.56813476722436
  episode_reward_mean: 170.91902781967826
  episode_reward_min: 142.34331440690673
  episodes_this_iter: 114
  episodes_total: 103514
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3144.246
    load_time_ms: 2.433
    num_steps_sampled: 9700000
    num_steps_trained: 9700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6667861938476562
      kl: 0.025461550801992416
      policy_loss: 0.0006003379821777344
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 22893 s, 974 iter, 9740000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-19-24
  done: false
  episode_len_mean: 88.15789473684211
  episode_reward_max: 222.3585348376351
  episode_reward_mean: 172.7185947855199
  episode_reward_min: 139.20943846759582
  episodes_this_iter: 114
  episodes_total: 104078
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3139.584
    load_time_ms: 2.416
    num_steps_sampled: 9750000
    num_steps_trained: 9750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.68463534116745
      kl: 0.028995037078857422
      policy_loss: 0.0023553892970085144
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 23010 s, 979 iter, 9790000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-21-21
  done: false
  episode_len_mean: 89.00877192982456
  episode_reward_max: 219.92691782574195
  episode_reward_mean: 172.35301989271431
  episode_reward_min: 134.57191434726897
  episodes_this_iter: 114
  episodes_total: 104643
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3148.575
    load_time_ms: 2.532
    num_steps_sampled: 9800000
    num_steps_trained: 9800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6927513480186462
      kl: 0.021913006901741028
      policy_loss: 0.0011805612593889236
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 23126 s, 984 iter, 9840000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-23-19
  done: false
  episode_len_mean: 88.72566371681415
  episode_reward_max: 219.15082119139663
  episode_reward_mean: 166.0422927508677
  episode_reward_min: -155.67650538992325
  episodes_this_iter: 113
  episodes_total: 105205
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3127.128
    load_time_ms: 2.598
    num_steps_sampled: 9850000
    num_steps_trained: 9850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6548426747322083
      kl: 0.010501096956431866
      policy_loss: -0.0025811451487243176
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 23244 s, 989 iter, 9890000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-25-16
  done: false
  episode_len_mean: 87.79824561403508
  episode_reward_max: 220.51451930363538
  episode_reward_mean: 173.54931023102603
  episode_reward_min: 140.49670728590198
  episodes_this_iter: 114
  episodes_total: 105770
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3119.179
    load_time_ms: 2.374
    num_steps_sampled: 9900000
    num_steps_trained: 9900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7044300436973572
      kl: 0.0344371497631073
      policy_loss: 0.0049403090961277485
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 23362 s, 994 iter, 9940000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-27-14
  done: false
  episode_len_mean: 88.54867256637168
  episode_reward_max: 218.4291441224203
  episode_reward_mean: 172.25061893527476
  episode_reward_min: 138.9276787990522
  episodes_this_iter: 113
  episodes_total: 106335
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3138.589
    load_time_ms: 2.409
    num_steps_sampled: 9950000
    num_steps_trained: 9950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6861922144889832
      kl: 0.026977520436048508
      policy_loss: 0.0005824969848617911
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=9795], 23479 s, 999 iter, 9990000 ts, 172 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-22_07-29-11
  done: true
  episode_len_mean: 88.12389380530973
  episode_reward_max: 216.21244233528506
  episode_reward_mean: 170.08159574685854
  episode_reward_min: -151.20701492835047
  episodes_this_iter: 113
  episodes_total: 106896
  experiment_id: e9a0a10898fb45ea8bdfb380c7f1a8bb
  hostname: Gandalf
  info:
    grad_time_ms: 3132.1
    load_time_ms: 2.481
    num_steps_sampled: 10000000
    num_steps_trained: 10000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6723099946975708
      kl: 0.01799936592578888
      policy_loss: -0.005848770495504141
      to