# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-15_19-46-27_6072/logs.
Waiting for redis server at 127.0.0.1:10965 to respond...
Waiting for redis server at 127.0.0.1:25163 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=1e041811bcf829f6ccbd0bd889d94173ae35b399dc0d81bc



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-03-15_19-46-27_6072/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-15_19-46-27_6072/sockets/raylet'],
 'redis_address': '192.168.2.102:10965',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=1e041811bcf829f6ccbd0bd889d94173ae35b399dc0d81bc'}

In [11]:
def executeTraining():
    
    # The algorithm or model to train. This may refer to "
    #      "the name of a built-on algorithm (e.g. RLLib's DQN "
    #      "or PPO), or a user-defined trainable function or "
    #      "class registered in the tune registry.")
    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS  # number of parallel workers
    config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
    config["use_gae"] = True  # using generalized advantage estimation
    config["lambda"] = 0.97  
    #config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
    #config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
    config["kl_target"] = 0.02  # target KL divergence
    config["num_sgd_iter"] = 10  # number of SGD iterations
    config["horizon"] = HORIZON  # rollout horizon

    # save the flow params for replay
    flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                           indent=4)  # generating a string version of flow_params
    config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
    config['env_config']['run'] = alg_run

    # Call the utility function make_create_env to be able to 
    # register the Flow env for this experiment
    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env with Gym
    register_env(gym_name, create_env)
    
    # multi agent policy mapping
    test_env = create_env()
    obs_space = test_env.observation_space
    act_space = test_env.action_space

    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    # Setup PG with an ensemble of `num_policies` different policy graphs
    policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
    def policy_mapping_fn(agent_id):
        return agent_id

    config.update({
            'multiagent': {
                'policy_graphs': policy_graphs,
                'policy_mapping_fn': tune.function(policy_mapping_fn)
            }
        })
    
    trials = run_experiments({
        flow_params["exp_tag"]: {
            "run": alg_run,  # RL algorithm to run
            "env": gym_name,  # environment name generated earlier
            "config": {  # configuration params (must match "run" value)
                **config
            },
            "checkpoint_freq": 1,  # number of iterations between checkpoints
            "max_failures": 999,
            "stop": {  # stopping conditions
                "training_iteration": 250,  # number of iterations to stop after
            },
        },
    })

In [12]:
executeTraining()

 Starting SUMO on port 35983
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 2.6/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-15_19-46-30g0u_vvol -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 2.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



14.185246391238888
20.975612210853335


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-47-25
  done: false
  episode_len_mean: 444.0952380952381
  episode_reward_max: 222.678072652914
  episode_reward_mean: 44.649078989591274
  episode_reward_min: -150.1624173850891
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 4278.209
    load_time_ms: 148.835
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4204784631729126
      kl: 0.0006464617908932269
      policy_loss: -0.001489831367507577
      total_loss: 125.28028869628906
      vf_explained_var: 0.040251243859529495
      vf_loss: 125.28165435791016
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4194120168685913
      kl: 0.0007001225021667778
      policy_loss: -0.000922587234

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 120 s, 5 iter, 50000 ts, 102 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-49-24
  done: false
  episode_len_mean: 422.51
  episode_reward_max: 320.92394146143164
  episode_reward_mean: 127.84448235504853
  episode_reward_min: -164.6448944023465
  episodes_this_iter: 27
  episodes_total: 138
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3360.487
    load_time_ms: 27.139
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4370638132095337
      kl: 0.01881728507578373
      policy_loss: -0.004027616698294878
      total_loss

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 238 s, 10 iter, 100000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-51-21
  done: false
  episode_len_mean: 248.56
  episode_reward_max: 336.25555493786123
  episode_reward_mean: 157.23802503195415
  episode_reward_min: -163.62544917168447
  episodes_this_iter: 42
  episodes_total: 329
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3167.169
    load_time_ms: 2.743
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0007812500116415322
      cur_lr: 4.999999873689376e-05
      entropy: 1.4655736684799194
      kl: 0.00417287228628993
      policy_loss: -0.0023978312965482473
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 355 s, 15 iter, 150000 ts, 176 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-53-19
  done: false
  episode_len_mean: 195.26
  episode_reward_max: 343.42339037495935
  episode_reward_mean: 169.7631790185138
  episode_reward_min: -162.0439022632803
  episodes_this_iter: 52
  episodes_total: 567
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3158.051
    load_time_ms: 2.881
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 2.441406286379788e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.4498140811920166
      kl: 0.005784483160823584
      policy_loss: -0.0018718900391831994
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 474 s, 20 iter, 200000 ts, 176 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-55-18
  done: false
  episode_len_mean: 156.92
  episode_reward_max: 387.25996003801174
  episode_reward_mean: 188.77956721914455
  episode_reward_min: -157.66415117182817
  episodes_this_iter: 66
  episodes_total: 875
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3154.935
    load_time_ms: 2.853
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 1.5258789289873675e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3782248497009277
      kl: 0.003847062587738037
      policy_loss: -0.0013614809140563011
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 593 s, 25 iter, 250000 ts, 129 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-57-17
  done: false
  episode_len_mean: 134.22
  episode_reward_max: 375.4305560670486
  episode_reward_mean: 176.21423244280834
  episode_reward_min: -162.74678151018816
  episodes_this_iter: 72
  episodes_total: 1237
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3152.087
    load_time_ms: 2.614
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 4.7683716530855236e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.4035781621932983
      kl: 0.0020864924881607294
      policy_loss: -0.0006687003769911826
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 712 s, 30 iter, 300000 ts, 185 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_19-59-16
  done: false
  episode_len_mean: 129.08
  episode_reward_max: 345.2799965609105
  episode_reward_mean: 195.177041952402
  episode_reward_min: -159.06041731022214
  episodes_this_iter: 78
  episodes_total: 1618
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3154.472
    load_time_ms: 2.483
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 2.9802322831784522e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.4185521602630615
      kl: 0.005693553015589714
      policy_loss: -0.0008630232186987996
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 831 s, 35 iter, 350000 ts, 192 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-01-16
  done: false
  episode_len_mean: 120.9
  episode_reward_max: 361.2534625593972
  episode_reward_mean: 181.13430201835686
  episode_reward_min: -155.2557519524413
  episodes_this_iter: 83
  episodes_total: 2019
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3152.872
    load_time_ms: 2.422
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 9.313225884932663e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3701233863830566
      kl: 0.005829905159771442
      policy_loss: -0.0034680634271353483
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 951 s, 40 iter, 400000 ts, 215 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-03-17
  done: false
  episode_len_mean: 123.48
  episode_reward_max: 367.75483239220534
  episode_reward_mean: 225.75384347707742
  episode_reward_min: -155.27530890447292
  episodes_this_iter: 82
  episodes_total: 2421
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3153.252
    load_time_ms: 2.509
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.316955804824829
      kl: 0.0010025127558037639
      policy_loss: -0.0008732013520784676
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1071 s, 45 iter, 450000 ts, 208 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-05-16
  done: false
  episode_len_mean: 118.77
  episode_reward_max: 362.95785254371884
  episode_reward_mean: 208.87492217854867
  episode_reward_min: -165.0400072687794
  episodes_this_iter: 83
  episodes_total: 2835
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3159.656
    load_time_ms: 2.597
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.8189894306509108e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.3486456871032715
      kl: 0.0021137623116374016
      policy_loss: 0.0001312130771111697
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1190 s, 50 iter, 500000 ts, 262 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-07-16
  done: false
  episode_len_mean: 128.57
  episode_reward_max: 359.6311616440817
  episode_reward_mean: 257.82675355195397
  episode_reward_min: -147.58168040177742
  episodes_this_iter: 81
  episodes_total: 3233
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3162.998
    load_time_ms: 2.562
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 5.684341970784096e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.3957316875457764
      kl: 0.010703399777412415
      policy_loss: -0.0023218877613544464
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1310 s, 55 iter, 550000 ts, 288 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-09-16
  done: false
  episode_len_mean: 126.43
  episode_reward_max: 355.00465384887565
  episode_reward_mean: 281.0540728684131
  episode_reward_min: -139.79746027324416
  episodes_this_iter: 79
  episodes_total: 3630
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3165.935
    load_time_ms: 2.515
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 3.55271373174006e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.330775260925293
      kl: 0.011040101759135723
      policy_loss: -0.005760569591075182
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1429 s, 60 iter, 600000 ts, 302 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-11-15
  done: false
  episode_len_mean: 126.9
  episode_reward_max: 365.0543379990529
  episode_reward_mean: 303.36904160075454
  episode_reward_min: 238.71241592556456
  episodes_this_iter: 79
  episodes_total: 4027
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3163.002
    load_time_ms: 2.596
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.279351830482483
      kl: 0.002019092906266451
      policy_loss: -0.0017999073024839163
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1548 s, 65 iter, 650000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-13-15
  done: false
  episode_len_mean: 128.43
  episode_reward_max: 378.19584817266593
  episode_reward_mean: 314.8605435557809
  episode_reward_min: 259.9738962189277
  episodes_this_iter: 78
  episodes_total: 4423
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3160.48
    load_time_ms: 2.765
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 5.551115205843844e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.2112810611724854
      kl: 0.018392013385891914
      policy_loss: -0.008236470632255077
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1669 s, 70 iter, 700000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-15-15
  done: false
  episode_len_mean: 127.63
  episode_reward_max: 366.7217526649231
  episode_reward_mean: 309.99194859502745
  episode_reward_min: -118.34271817872643
  episodes_this_iter: 79
  episodes_total: 4812
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3167.614
    load_time_ms: 2.633
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 3.4694470036524025e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1862714290618896
      kl: 0.006175288464874029
      policy_loss: -0.0036226452793926
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1789 s, 75 iter, 750000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-17-16
  done: false
  episode_len_mean: 129.35
  episode_reward_max: 397.41354415072175
  episode_reward_mean: 323.9653746270608
  episode_reward_min: 278.99905163255806
  episodes_this_iter: 78
  episodes_total: 5202
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3174.19
    load_time_ms: 2.556
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 4.336808754565503e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.085440754890442
      kl: 0.005955588538199663
      policy_loss: -0.0013468739343807101
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 1909 s, 80 iter, 800000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-19-16
  done: false
  episode_len_mean: 130.99
  episode_reward_max: 382.12543687979974
  episode_reward_mean: 330.5973598094089
  episode_reward_min: 281.2132404334613
  episodes_this_iter: 75
  episodes_total: 5584
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3188.962
    load_time_ms: 2.451
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 2.7105054716034394e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.0142618417739868
      kl: 0.006661172956228256
      policy_loss: -0.0018115115817636251
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2027 s, 85 iter, 850000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-21-14
  done: false
  episode_len_mean: 130.82
  episode_reward_max: 375.81436801513695
  episode_reward_mean: 327.10293872955646
  episode_reward_min: 278.59365298381886
  episodes_this_iter: 76
  episodes_total: 5966
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3135.072
    load_time_ms: 2.225
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 8.470329598760748e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.9927891492843628
      kl: 0.008320878259837627
      policy_loss: -0.002933378564193845
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2144 s, 90 iter, 900000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-23-11
  done: false
  episode_len_mean: 131.78
  episode_reward_max: 383.0632358740377
  episode_reward_mean: 331.4956774500336
  episode_reward_min: 281.98124047574555
  episodes_this_iter: 76
  episodes_total: 6348
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3073.491
    load_time_ms: 2.302
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 4.235164799380374e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.9036825299263
      kl: 0.008814498782157898
      policy_loss: -0.0033573557157069445
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2260 s, 95 iter, 950000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-25-07
  done: false
  episode_len_mean: 132.45
  episode_reward_max: 372.89747973089686
  episode_reward_mean: 333.93535231116834
  episode_reward_min: 289.592649394188
  episodes_this_iter: 75
  episodes_total: 6726
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3053.185
    load_time_ms: 2.402
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.8283362984657288
      kl: 0.005277267191559076
      policy_loss: -0.0014051728649064898
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2376 s, 100 iter, 1000000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-27-03
  done: false
  episode_len_mean: 132.83
  episode_reward_max: 376.0222344449236
  episode_reward_mean: 332.720624378788
  episode_reward_min: 292.8262153119884
  episodes_this_iter: 76
  episodes_total: 7106
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3047.405
    load_time_ms: 2.571
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 1.6543612497579586e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.7971827387809753
      kl: 0.005901138763874769
      policy_loss: -0.0006846529431641102
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2490 s, 105 iter, 1050000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-28-57
  done: false
  episode_len_mean: 135.57
  episode_reward_max: 384.4544842231421
  episode_reward_mean: 333.5272487447092
  episode_reward_min: 294.779011880667
  episodes_this_iter: 73
  episodes_total: 7479
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3044.369
    load_time_ms: 2.476
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 1.0339757810987241e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8669157028198242
      kl: 0.006750829052180052
      policy_loss: -0.0013842827174812555
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2606 s, 110 iter, 1100000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-30-53
  done: false
  episode_len_mean: 134.49
  episode_reward_max: 375.8604772640205
  episode_reward_mean: 332.34846450154095
  episode_reward_min: 295.71025088292276
  episodes_this_iter: 74
  episodes_total: 7853
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3046.784
    load_time_ms: 2.418
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 2.5849394527468104e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.732043981552124
      kl: 0.017100000753998756
      policy_loss: -0.0012121709296479821
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2720 s, 115 iter, 1150000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-32-47
  done: false
  episode_len_mean: 133.88
  episode_reward_max: 381.49568973889956
  episode_reward_mean: 332.4200608386967
  episode_reward_min: 291.7881376854919
  episodes_this_iter: 74
  episodes_total: 8226
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3056.336
    load_time_ms: 2.435
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 1.2924697263734052e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.6708748936653137
      kl: 0.018741093575954437
      policy_loss: -0.0023535951040685177
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2834 s, 120 iter, 1200000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-34-42
  done: false
  episode_len_mean: 135.14
  episode_reward_max: 378.94631180308767
  episode_reward_mean: 331.5703213468746
  episode_reward_min: 293.9090492596479
  episodes_this_iter: 74
  episodes_total: 8596
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3037.547
    load_time_ms: 2.335
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 3.231174315933513e-28
      cur_lr: 4.999999873689376e-05
      entropy: 0.6093357801437378
      kl: 0.01077259425073862
      policy_loss: -0.0015331193571910262
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 2950 s, 125 iter, 1250000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-36-38
  done: false
  episode_len_mean: 133.62
  episode_reward_max: 373.521371408883
  episode_reward_mean: 333.5783020391419
  episode_reward_min: 290.3319339667117
  episodes_this_iter: 75
  episodes_total: 8968
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3043.474
    load_time_ms: 2.274
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 4.038967894916891e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.4399690330028534
      kl: 0.016306916251778603
      policy_loss: 0.004811305087059736
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3066 s, 130 iter, 1300000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-38-34
  done: false
  episode_len_mean: 133.68
  episode_reward_max: 378.1944044327105
  episode_reward_mean: 331.2555392183848
  episode_reward_min: 291.89522519428374
  episodes_this_iter: 75
  episodes_total: 9341
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3043.672
    load_time_ms: 2.41
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 4.038967894916891e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.3607267141342163
      kl: 0.022755125537514687
      policy_loss: 0.010349126532673836
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3180 s, 135 iter, 1350000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-40-29
  done: false
  episode_len_mean: 134.73
  episode_reward_max: 376.5184157608375
  episode_reward_mean: 331.51464183222066
  episode_reward_min: 291.71871213510997
  episodes_this_iter: 74
  episodes_total: 9712
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3046.566
    load_time_ms: 2.55
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 4.038967894916891e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.321259081363678
      kl: 0.02002248726785183
      policy_loss: 0.012116656638681889
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3295 s, 140 iter, 1400000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-42-24
  done: false
  episode_len_mean: 134.82
  episode_reward_max: 367.7526667707244
  episode_reward_mean: 328.9107142881871
  episode_reward_min: 292.5497534841355
  episodes_this_iter: 74
  episodes_total: 10082
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3069.816
    load_time_ms: 2.384
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 9.087679192964136e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.2912539839744568
      kl: 0.018399124965071678
      policy_loss: 0.01331794261932373
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3411 s, 145 iter, 1450000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-44-20
  done: false
  episode_len_mean: 133.01
  episode_reward_max: 377.67645400793145
  episode_reward_mean: 332.6802484132224
  episode_reward_min: 286.50191598186916
  episodes_this_iter: 75
  episodes_total: 10452
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3053.905
    load_time_ms: 2.319
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 3.067091825426526e-28
      cur_lr: 4.999999873689376e-05
      entropy: 0.1925295889377594
      kl: 0.04963141679763794
      policy_loss: 0.027564076706767082
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3525 s, 150 iter, 1500000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-46-14
  done: false
  episode_len_mean: 137.42
  episode_reward_max: 376.424924322624
  episode_reward_mean: 334.3858637051363
  episode_reward_min: 297.3118118588166
  episodes_this_iter: 73
  episodes_total: 10819
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3051.466
    load_time_ms: 2.342
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 5.175717651009522e-28
      cur_lr: 4.999999873689376e-05
      entropy: 0.2671734094619751
      kl: 216.17823791503906
      policy_loss: 0.152012899518013
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3640 s, 155 iter, 1550000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-48-10
  done: false
  episode_len_mean: 136.94
  episode_reward_max: 376.45777053137533
  episode_reward_mean: 332.79350919836975
  episode_reward_min: 297.86884775744386
  episodes_this_iter: 73
  episodes_total: 11184
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3079.566
    load_time_ms: 2.29
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.7468047854566178e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.2901708781719208
      kl: 77.27303314208984
      policy_loss: 0.07403235882520676
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3755 s, 160 iter, 1600000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-50-05
  done: false
  episode_len_mean: 137.86
  episode_reward_max: 372.8290520174024
  episode_reward_mean: 332.75091718005865
  episode_reward_min: 297.0842867070762
  episodes_this_iter: 73
  episodes_total: 11547
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3052.083
    load_time_ms: 2.409
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 5.895464152763768e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.26984506845474243
      kl: 1.170664668083191
      policy_loss: 0.050632722675800323
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3871 s, 165 iter, 1650000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-52-01
  done: false
  episode_len_mean: 135.68
  episode_reward_max: 378.60764414084196
  episode_reward_mean: 333.84001930607513
  episode_reward_min: 296.621144242277
  episodes_this_iter: 74
  episodes_total: 11915
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3071.44
    load_time_ms: 2.421
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 4.4768685291540484e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.24513275921344757
      kl: 0.9184761047363281
      policy_loss: 0.03928543999791145
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 3986 s, 170 iter, 1700000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-53-56
  done: false
  episode_len_mean: 136.97
  episode_reward_max: 372.9094748668597
  episode_reward_mean: 333.0183676379504
  episode_reward_min: 287.00529095225687
  episodes_this_iter: 73
  episodes_total: 12280
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3074.538
    load_time_ms: 2.286
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 6.715301715210304e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.31205442547798157
      kl: 6.254944801330566
      policy_loss: 0.03370391204953194
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4101 s, 175 iter, 1750000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-55-52
  done: false
  episode_len_mean: 138.39
  episode_reward_max: 378.5598206637609
  episode_reward_mean: 333.01235178634795
  episode_reward_min: 298.5064530341851
  episodes_this_iter: 72
  episodes_total: 12644
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3041.404
    load_time_ms: 2.311
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 3.3996224485864687e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.27960026264190674
      kl: 0.02032110095024109
      policy_loss: 0.004010998178273439
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4216 s, 180 iter, 1800000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-57-47
  done: false
  episode_len_mean: 135.04
  episode_reward_max: 390.3620816030128
  episode_reward_mean: 334.7309891346987
  episode_reward_min: 299.99441902976474
  episodes_this_iter: 74
  episodes_total: 13009
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3067.274
    load_time_ms: 2.233
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 1.1473720556264762e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.2763991355895996
      kl: 0.021969208493828773
      policy_loss: 0.007820222526788712
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4332 s, 185 iter, 1850000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_20-59-42
  done: false
  episode_len_mean: 133.97
  episode_reward_max: 384.7281779040131
  episode_reward_mean: 336.090087560904
  episode_reward_min: 290.8048700921197
  episodes_this_iter: 74
  episodes_total: 13380
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3058.933
    load_time_ms: 2.475
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 1.1473720556264762e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.2834087312221527
      kl: 0.017401617020368576
      policy_loss: 0.0026918284129351377
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4446 s, 190 iter, 1900000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-01-37
  done: false
  episode_len_mean: 133.53
  episode_reward_max: 379.12581220345606
  episode_reward_mean: 333.87015614979725
  episode_reward_min: 296.32695018850484
  episodes_this_iter: 75
  episodes_total: 13753
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3051.698
    load_time_ms: 2.452
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 2.5815877414571537e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.22461512684822083
      kl: 0.30927756428718567
      policy_loss: 0.015222057700157166
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4563 s, 195 iter, 1950000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-03-34
  done: false
  episode_len_mean: 134.31
  episode_reward_max: 370.92457737470966
  episode_reward_mean: 335.10757157698094
  episode_reward_min: 296.49272734810927
  episodes_this_iter: 74
  episodes_total: 14126
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3069.811
    load_time_ms: 2.277
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 2.9042868747406867e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.2724052965641022
      kl: 0.01919480226933956
      policy_loss: 0.001779236481525004
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4677 s, 200 iter, 2000000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-05-28
  done: false
  episode_len_mean: 133.5
  episode_reward_max: 378.6078203620389
  episode_reward_mean: 335.3687551062781
  episode_reward_min: 297.78554265549786
  episodes_this_iter: 74
  episodes_total: 14499
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3071.563
    load_time_ms: 2.449
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 7.260717186851717e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.33203157782554626
      kl: 0.010699491947889328
      policy_loss: -0.0006904068868607283
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4792 s, 205 iter, 2050000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-07-24
  done: false
  episode_len_mean: 132.32
  episode_reward_max: 377.12799705816036
  episode_reward_mean: 336.62480742843474
  episode_reward_min: 293.838963769607
  episodes_this_iter: 75
  episodes_total: 14876
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3057.955
    load_time_ms: 2.383
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 7.260717186851717e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.2866377830505371
      kl: 0.014398044906556606
      policy_loss: 0.0006062519969418645
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 4907 s, 210 iter, 2100000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-09-19
  done: false
  episode_len_mean: 132.05
  episode_reward_max: 377.7530416560027
  episode_reward_mean: 337.8642224190366
  episode_reward_min: 292.40930572631675
  episodes_this_iter: 75
  episodes_total: 15255
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3041.526
    load_time_ms: 2.617
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 7.260717186851717e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.2623480260372162
      kl: 0.88394695520401
      policy_loss: 0.015223138965666294
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5022 s, 215 iter, 2150000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-11-14
  done: false
  episode_len_mean: 133.52
  episode_reward_max: 375.7270528413464
  episode_reward_mean: 338.4431317452851
  episode_reward_min: 293.88321019988274
  episodes_this_iter: 75
  episodes_total: 15632
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3053.438
    load_time_ms: 2.682
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.6336613177378297e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.2198897898197174
      kl: 0.9827868938446045
      policy_loss: 0.023677941411733627
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5137 s, 220 iter, 2200000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-13-10
  done: false
  episode_len_mean: 133.95
  episode_reward_max: 373.453943667963
  episode_reward_mean: 335.55524453410516
  episode_reward_min: 290.9198468391717
  episodes_this_iter: 76
  episodes_total: 16009
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3079.074
    load_time_ms: 2.53
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 2.4504918779991314e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.2728967070579529
      kl: 0.015211724676191807
      policy_loss: -0.0003564131329767406
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5253 s, 225 iter, 2250000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-15-06
  done: false
  episode_len_mean: 132.0
  episode_reward_max: 378.87571778970624
  episode_reward_mean: 336.8029650095878
  episode_reward_min: 297.1805481742939
  episodes_this_iter: 75
  episodes_total: 16386
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3062.023
    load_time_ms: 2.367
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 2.4504918779991314e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.1557491272687912
      kl: 0.018040603026747704
      policy_loss: 0.0008071725023910403
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5368 s, 230 iter, 2300000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-17-01
  done: false
  episode_len_mean: 132.83
  episode_reward_max: 376.0079025967342
  episode_reward_mean: 336.09442573225505
  episode_reward_min: 303.329654710816
  episodes_this_iter: 75
  episodes_total: 16764
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3053.698
    load_time_ms: 2.38
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 2.4504918779991314e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.1983441561460495
      kl: 0.4503346383571625
      policy_loss: 0.0035076458007097244
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5484 s, 235 iter, 2350000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-18-57
  done: false
  episode_len_mean: 134.11
  episode_reward_max: 377.59590564972484
  episode_reward_mean: 337.31836896015227
  episode_reward_min: 300.8485046741618
  episodes_this_iter: 75
  episodes_total: 17141
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3054.978
    load_time_ms: 2.56
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 3.675737619783471e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.12725260853767395
      kl: 0.01436846237629652
      policy_loss: 0.0015510079683735967
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5600 s, 240 iter, 2400000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-20-53
  done: false
  episode_len_mean: 133.35
  episode_reward_max: 375.65775034075926
  episode_reward_mean: 338.2878251784064
  episode_reward_min: 300.87224803569507
  episodes_this_iter: 74
  episodes_total: 17518
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3057.658
    load_time_ms: 2.419
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 3.675737619783471e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.1142442375421524
      kl: 0.013013147749006748
      policy_loss: 0.0007367464131675661
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=6120], 5714 s, 245 iter, 2450000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-22-48
  done: false
  episode_len_mean: 131.37
  episode_reward_max: 381.64288893959696
  episode_reward_mean: 339.51939884282785
  episode_reward_min: 295.70689529911033
  episodes_this_iter: 76
  episodes_total: 17893
  experiment_id: 63596cf8d9ad463ca16636d281b94ee1
  hostname: Gandalf
  info:
    grad_time_ms: 3061.917
    load_time_ms: 2.422
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 3.675737619783471e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.018014317378401756
      kl: 0.01143646240234375
      policy_loss: -0.0014840251533314586
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=6120], 5830 s, 250 iter, 2500000 ts, 340 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=6120], 5830 s, 250 iter, 2500000 ts, 340 rew



In [13]:
executeTraining()

 Starting SUMO on port 43023
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.5/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-15_21-24-22fzs0dk1f -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



12.259954930914837
18.211412982384548


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-25-19
  done: false
  episode_len_mean: 469.15
  episode_reward_max: 233.25595572584197
  episode_reward_mean: 45.87050406939879
  episode_reward_min: -145.51127729836082
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 4174.203
    load_time_ms: 142.568
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4204384088516235
      kl: 0.000547407369595021
      policy_loss: -0.0002819827350322157
      total_loss: 95.99311065673828
      vf_explained_var: 0.010727101005613804
      vf_loss: 95.9932861328125
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4141510725021362
      kl: 0.0012733839685097337
      policy_loss: -0.002084715524688363
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 117 s, 5 iter, 50000 ts, 55.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-27-15
  done: false
  episode_len_mean: 412.18
  episode_reward_max: 312.4888142283766
  episode_reward_mean: 61.24008140214944
  episode_reward_min: -166.35666841719097
  episodes_this_iter: 24
  episodes_total: 139
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3380.001
    load_time_ms: 25.709
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4106189012527466
      kl: 0.003628503531217575
      policy_loss: -0.0017596714897081256
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 233 s, 10 iter, 100000 ts, 169 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-29-11
  done: false
  episode_len_mean: 345.49
  episode_reward_max: 383.30460544980144
  episode_reward_mean: 179.1155746675401
  episode_reward_min: -165.5313706812177
  episodes_this_iter: 33
  episodes_total: 276
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3214.233
    load_time_ms: 2.33
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.4065637588500977
      kl: 0.004592257551848888
      policy_loss: -0.0021631610579788685
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 349 s, 15 iter, 150000 ts, 214 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-31-07
  done: false
  episode_len_mean: 262.57
  episode_reward_max: 397.33573792183387
  episode_reward_mean: 215.94793239045129
  episode_reward_min: -162.18531667706924
  episodes_this_iter: 39
  episodes_total: 462
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3194.856
    load_time_ms: 2.471
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.4066945314407349
      kl: 0.0019726091995835304
      policy_loss: -0.001464539091102779
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 465 s, 20 iter, 200000 ts, 191 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-33-04
  done: false
  episode_len_mean: 208.78
  episode_reward_max: 362.98571425423796
  episode_reward_mean: 183.45240746260956
  episode_reward_min: -163.80531640830822
  episodes_this_iter: 49
  episodes_total: 688
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3205.554
    load_time_ms: 2.543
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3740535974502563
      kl: 0.004585606046020985
      policy_loss: -0.0013610401656478643
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 583 s, 25 iter, 250000 ts, 198 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-35-02
  done: false
  episode_len_mean: 170.17
  episode_reward_max: 369.6138063667119
  episode_reward_mean: 174.95716669423993
  episode_reward_min: -158.62287387540414
  episodes_this_iter: 59
  episodes_total: 959
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3216.464
    load_time_ms: 2.529
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.348367691040039
      kl: 0.0031386897899210453
      policy_loss: -0.0009150310070253909
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 701 s, 30 iter, 300000 ts, 214 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-37-00
  done: false
  episode_len_mean: 171.18
  episode_reward_max: 376.2170961852713
  episode_reward_mean: 196.88293171386718
  episode_reward_min: -162.23727896374118
  episodes_this_iter: 61
  episodes_total: 1241
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3209.68
    load_time_ms: 2.527
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.307662844657898
      kl: 0.00494017731398344
      policy_loss: -0.001575456466525793
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 819 s, 35 iter, 350000 ts, 203 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-38-58
  done: false
  episode_len_mean: 160.74
  episode_reward_max: 385.5592148940938
  episode_reward_mean: 203.7610120669525
  episode_reward_min: -156.91443774117857
  episodes_this_iter: 62
  episodes_total: 1527
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3214.078
    load_time_ms: 2.52
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2431294918060303
      kl: 0.009159254841506481
      policy_loss: -0.0016852552071213722
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 936 s, 40 iter, 400000 ts, 178 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-40-56
  done: false
  episode_len_mean: 144.77
  episode_reward_max: 352.4683914571657
  episode_reward_mean: 197.39020245486634
  episode_reward_min: -161.51374366313752
  episodes_this_iter: 68
  episodes_total: 1856
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3199.582
    load_time_ms: 2.431
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 7.275957722603643e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.1672520637512207
      kl: 0.006882939487695694
      policy_loss: -0.0023514507338404655
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1055 s, 45 iter, 450000 ts, 229 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-42-54
  done: false
  episode_len_mean: 147.5
  episode_reward_max: 379.8340791764004
  episode_reward_mean: 215.95522253331342
  episode_reward_min: -164.16496033251576
  episodes_this_iter: 68
  episodes_total: 2182
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3207.832
    load_time_ms: 2.271
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.254664421081543
      kl: 0.0029235694091767073
      policy_loss: -0.00024477779516018927
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1172 s, 50 iter, 500000 ts, 204 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-44-52
  done: false
  episode_len_mean: 139.4
  episode_reward_max: 385.31844356534606
  episode_reward_mean: 206.71221664262794
  episode_reward_min: -161.46127845279278
  episodes_this_iter: 70
  episodes_total: 2529
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3204.138
    load_time_ms: 2.396
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 7.10542746348012e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.1998326778411865
      kl: 0.015029444359242916
      policy_loss: -0.0037296521477401257
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1289 s, 55 iter, 550000 ts, 190 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-46-50
  done: false
  episode_len_mean: 129.05
  episode_reward_max: 342.1911895198019
  episode_reward_mean: 200.81810108014528
  episode_reward_min: -163.66382933916472
  episodes_this_iter: 76
  episodes_total: 2905
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3152.278
    load_time_ms: 2.413
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.0569254159927368
      kl: 0.006413089111447334
      policy_loss: -0.0023864193353801966
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1407 s, 60 iter, 600000 ts, 215 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-48-48
  done: false
  episode_len_mean: 132.63
  episode_reward_max: 375.4507774257427
  episode_reward_mean: 209.52481528411906
  episode_reward_min: -158.64195460911833
  episodes_this_iter: 76
  episodes_total: 3288
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3128.44
    load_time_ms: 2.267
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.775557602921922e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.1289432048797607
      kl: 0.00472133606672287
      policy_loss: -0.000915242126211524
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1524 s, 65 iter, 650000 ts, 228 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-50-45
  done: false
  episode_len_mean: 127.26
  episode_reward_max: 378.95800062651597
  episode_reward_mean: 207.46107816122077
  episode_reward_min: -154.8534106965875
  episodes_this_iter: 79
  episodes_total: 3664
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3139.651
    load_time_ms: 2.525
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1253163814544678
      kl: 0.009610388427972794
      policy_loss: -0.002045031636953354
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1639 s, 70 iter, 700000 ts, 228 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-52-40
  done: false
  episode_len_mean: 124.74
  episode_reward_max: 370.45579253318914
  episode_reward_mean: 220.13412015185364
  episode_reward_min: -163.65240430679933
  episodes_this_iter: 80
  episodes_total: 4056
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3126.725
    load_time_ms: 2.593
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 1.0842021886413758e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0642799139022827
      kl: 0.0053406679071486
      policy_loss: -0.0014286214718595147
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1755 s, 75 iter, 750000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-54-36
  done: false
  episode_len_mean: 126.57
  episode_reward_max: 377.1803484300376
  episode_reward_mean: 208.6564141784406
  episode_reward_min: -159.87770859761756
  episodes_this_iter: 76
  episodes_total: 4452
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3092.81
    load_time_ms: 2.526
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 3.3881318395042993e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.1481841802597046
      kl: 0.005757330451160669
      policy_loss: -0.002867823699489236
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1870 s, 80 iter, 800000 ts, 220 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-56-32
  done: false
  episode_len_mean: 122.33
  episode_reward_max: 382.60947268902606
  episode_reward_mean: 222.19723111574123
  episode_reward_min: -157.715468014975
  episodes_this_iter: 82
  episodes_total: 4859
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3102.985
    load_time_ms: 2.672
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.0049971342086792
      kl: 0.007241829764097929
      policy_loss: -0.0019749230705201626
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 1985 s, 85 iter, 850000 ts, 214 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_21-58-27
  done: false
  episode_len_mean: 115.36
  episode_reward_max: 375.9868074926849
  episode_reward_mean: 184.09818268794686
  episode_reward_min: -154.3695441960785
  episodes_this_iter: 86
  episodes_total: 5275
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3110.058
    load_time_ms: 2.57
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 6.617444999031835e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.905032217502594
      kl: 0.010480481199920177
      policy_loss: -0.002210312057286501
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2101 s, 90 iter, 900000 ts, 256 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-00-23
  done: false
  episode_len_mean: 122.82
  episode_reward_max: 362.592164279666
  episode_reward_mean: 232.53037574656875
  episode_reward_min: -152.78973723389015
  episodes_this_iter: 83
  episodes_total: 5682
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3115.193
    load_time_ms: 2.359
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 4.1359031243948966e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.7032977342605591
      kl: 0.008009900338947773
      policy_loss: -0.0022651595063507557
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2217 s, 95 iter, 950000 ts, 220 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-02-19
  done: false
  episode_len_mean: 127.72
  episode_reward_max: 377.6030780171606
  episode_reward_mean: 258.24274214803614
  episode_reward_min: -157.3940369738412
  episodes_this_iter: 76
  episodes_total: 6085
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3114.04
    load_time_ms: 2.178
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.2924697263734052e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.8559353947639465
      kl: 0.003744718851521611
      policy_loss: -0.002797388471662998
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2332 s, 100 iter, 1000000 ts, 238 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-04-14
  done: false
  episode_len_mean: 120.26
  episode_reward_max: 362.3806523301541
  episode_reward_mean: 214.10843486015358
  episode_reward_min: -152.80017230162
  episodes_this_iter: 83
  episodes_total: 6490
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3107.794
    load_time_ms: 2.399
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 4.038967894916891e-29
      cur_lr: 4.999999873689376e-05
      entropy: 1.0168142318725586
      kl: 0.013128288090229034
      policy_loss: -0.004607161041349173
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2447 s, 105 iter, 1050000 ts, 194 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-06-10
  done: false
  episode_len_mean: 123.91
  episode_reward_max: 368.8270291311778
  episode_reward_mean: 226.44665744927028
  episode_reward_min: -166.2103613905276
  episodes_this_iter: 79
  episodes_total: 6892
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3131.995
    load_time_ms: 2.561
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 2.524354934323057e-30
      cur_lr: 4.999999873689376e-05
      entropy: 1.1083025932312012
      kl: 0.008873618207871914
      policy_loss: -0.0016362577443942428
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2564 s, 110 iter, 1100000 ts, 235 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-08-06
  done: false
  episode_len_mean: 120.56
  episode_reward_max: 372.88038294638386
  episode_reward_mean: 201.93747587888527
  episode_reward_min: -161.76748162375492
  episodes_this_iter: 84
  episodes_total: 7297
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3109.276
    load_time_ms: 2.445
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 7.888609169759553e-32
      cur_lr: 4.999999873689376e-05
      entropy: 1.0923850536346436
      kl: 0.007209344767034054
      policy_loss: -0.003081991570070386
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2679 s, 115 iter, 1150000 ts, 220 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-10-02
  done: false
  episode_len_mean: 124.11
  episode_reward_max: 369.00078306415065
  episode_reward_mean: 222.50633588773093
  episode_reward_min: -154.55141088531502
  episodes_this_iter: 80
  episodes_total: 7701
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3117.07
    load_time_ms: 2.452
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 4.930380731099721e-33
      cur_lr: 4.999999873689376e-05
      entropy: 1.1729494333267212
      kl: 0.00541686499491334
      policy_loss: -0.0033824683632701635
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2794 s, 120 iter, 1200000 ts, 220 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-11-57
  done: false
  episode_len_mean: 121.29
  episode_reward_max: 382.3651137286022
  episode_reward_mean: 221.88732688408632
  episode_reward_min: -163.2389055888959
  episodes_this_iter: 83
  episodes_total: 8111
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3116.738
    load_time_ms: 2.575
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.5407439784686627e-34
      cur_lr: 4.999999873689376e-05
      entropy: 0.9983541965484619
      kl: 0.003392260055989027
      policy_loss: -0.0010087403934448957
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 2909 s, 125 iter, 1250000 ts, 244 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-13-52
  done: false
  episode_len_mean: 117.53
  episode_reward_max: 359.42698901700703
  episode_reward_mean: 218.2184709399552
  episode_reward_min: -168.1318170170419
  episodes_this_iter: 86
  episodes_total: 8524
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3102.371
    load_time_ms: 2.552
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 4.814824932714571e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.7906954288482666
      kl: 0.009552023373544216
      policy_loss: -0.002424484584480524
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3024 s, 130 iter, 1300000 ts, 243 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-15-47
  done: false
  episode_len_mean: 121.67
  episode_reward_max: 385.8184128117503
  episode_reward_mean: 232.9734063696282
  episode_reward_min: -152.13975349686228
  episodes_this_iter: 84
  episodes_total: 8930
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3108.772
    load_time_ms: 2.313
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 3.009265582946607e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.8681617975234985
      kl: 0.006541876588016748
      policy_loss: -0.0012601007474586368
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3140 s, 135 iter, 1350000 ts, 245 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-17-44
  done: false
  episode_len_mean: 115.78
  episode_reward_max: 385.32837416119577
  episode_reward_mean: 205.71600122984987
  episode_reward_min: -164.24476021471418
  episodes_this_iter: 88
  episodes_total: 9342
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3095.048
    load_time_ms: 2.303
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 9.403954246058914e-39
      cur_lr: 4.999999873689376e-05
      entropy: 1.0287377834320068
      kl: 0.0029790711123496294
      policy_loss: -0.0008026716532185674
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3255 s, 140 iter, 1400000 ts, 217 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-19-39
  done: false
  episode_len_mean: 113.34
  episode_reward_max: 365.675897614366
  episode_reward_mean: 198.57541880066478
  episode_reward_min: -165.15043999574382
  episodes_this_iter: 87
  episodes_total: 9768
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3120.439
    load_time_ms: 2.284
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 2.93873307445879e-40
      cur_lr: 4.999999873689376e-05
      entropy: 0.9424148797988892
      kl: 0.005188794806599617
      policy_loss: -0.0006194081506691873
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3371 s, 145 iter, 1450000 ts, 243 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-21-35
  done: false
  episode_len_mean: 128.18
  episode_reward_max: 378.10453935092795
  episode_reward_mean: 280.6088918014087
  episode_reward_min: -149.13881979454908
  episodes_this_iter: 77
  episodes_total: 10182
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3129.872
    load_time_ms: 2.213
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 9.184110135184851e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.8144186735153198
      kl: 0.007091976702213287
      policy_loss: -0.0004275526152923703
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3487 s, 150 iter, 1500000 ts, 218 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-23-32
  done: false
  episode_len_mean: 115.36
  episode_reward_max: 380.12597397539804
  episode_reward_mean: 215.12150298669013
  episode_reward_min: -158.32335428717195
  episodes_this_iter: 85
  episodes_total: 10621
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3130.082
    load_time_ms: 2.398
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 5.74532370373175e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.7523866891860962
      kl: 0.006962417624890804
      policy_loss: -0.0009785895235836506
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3603 s, 155 iter, 1550000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-25-28
  done: false
  episode_len_mean: 114.83
  episode_reward_max: 377.2160150799464
  episode_reward_mean: 207.76109230303163
  episode_reward_min: -162.08445645549102
  episodes_this_iter: 87
  episodes_total: 11062
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3132.84
    load_time_ms: 2.437
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.8216880036222622e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.8840720057487488
      kl: 0.007279830984771252
      policy_loss: -0.002352328272536397
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3720 s, 160 iter, 1600000 ts, 245 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-27-24
  done: false
  episode_len_mean: 114.8
  episode_reward_max: 387.33921661411836
  episode_reward_mean: 223.21254378744692
  episode_reward_min: -161.56577718976882
  episodes_this_iter: 87
  episodes_total: 11491
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3110.182
    load_time_ms: 2.395
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 1.401298464324817e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.6730775237083435
      kl: 0.011213681660592556
      policy_loss: -0.0024419985711574554
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3835 s, 165 iter, 1650000 ts, 191 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-29-20
  done: false
  episode_len_mean: 116.99
  episode_reward_max: 371.4416396862061
  episode_reward_mean: 235.87054093987987
  episode_reward_min: -168.33678040314646
  episodes_this_iter: 85
  episodes_total: 11937
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3097.063
    load_time_ms: 2.438
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6898240447044373
      kl: 0.005727417767047882
      policy_loss: -0.0012862744042649865
      total_loss: 619

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 3950 s, 170 iter, 1700000 ts, 175 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-31-15
  done: false
  episode_len_mean: 113.62
  episode_reward_max: 380.73909695568983
  episode_reward_mean: 215.88074655954392
  episode_reward_min: -165.94332789525552
  episodes_this_iter: 87
  episodes_total: 12385
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3119.353
    load_time_ms: 2.406
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6466781497001648
      kl: 0.009691033512353897
      policy_loss: -6.155354640213773e-05
      total_loss: 71

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4065 s, 175 iter, 1750000 ts, 204 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-33-10
  done: false
  episode_len_mean: 114.41
  episode_reward_max: 379.45600166873993
  episode_reward_mean: 218.7124818848244
  episode_reward_min: -159.5579185374778
  episodes_this_iter: 87
  episodes_total: 12822
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3121.72
    load_time_ms: 2.474
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5967831611633301
      kl: 0.009233919903635979
      policy_loss: -0.0010529247811064124
      total_loss: 770.3

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4181 s, 180 iter, 1800000 ts, 243 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-35-08
  done: false
  episode_len_mean: 118.97
  episode_reward_max: 384.761535953338
  episode_reward_mean: 236.79740177186824
  episode_reward_min: -164.27021766445023
  episodes_this_iter: 85
  episodes_total: 13247
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3090.828
    load_time_ms: 2.425
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7082278728485107
      kl: 0.00863910187035799
      policy_loss: -0.00043470360105857253
      total_loss: 672.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4297 s, 185 iter, 1850000 ts, 236 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-37-03
  done: false
  episode_len_mean: 112.5
  episode_reward_max: 367.67554349331397
  episode_reward_mean: 218.9953286700333
  episode_reward_min: -164.42507714985595
  episodes_this_iter: 88
  episodes_total: 13695
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3102.724
    load_time_ms: 2.59
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5722836256027222
      kl: 0.012838783673942089
      policy_loss: -0.0015225443057715893
      total_loss: 787.3

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4412 s, 190 iter, 1900000 ts, 215 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-38-59
  done: false
  episode_len_mean: 110.23
  episode_reward_max: 370.79413771957064
  episode_reward_mean: 203.62163327624006
  episode_reward_min: -167.9993847886526
  episodes_this_iter: 91
  episodes_total: 14144
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3125.864
    load_time_ms: 2.521
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6213579177856445
      kl: 0.00842702854424715
      policy_loss: -0.0007935443427413702
      total_loss: 947.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4528 s, 195 iter, 1950000 ts, 217 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-40-55
  done: false
  episode_len_mean: 113.17
  episode_reward_max: 381.06179498358125
  episode_reward_mean: 236.98250790455077
  episode_reward_min: -163.08236878086257
  episodes_this_iter: 88
  episodes_total: 14595
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3112.302
    load_time_ms: 2.375
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.33799734711647034
      kl: 0.014494752511382103
      policy_loss: -0.0021310055162757635
      total_loss: 7

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4644 s, 200 iter, 2000000 ts, 197 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-42-51
  done: false
  episode_len_mean: 107.27
  episode_reward_max: 382.66331331426176
  episode_reward_mean: 186.36659103329242
  episode_reward_min: -168.744992506856
  episodes_this_iter: 92
  episodes_total: 15048
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3100.318
    load_time_ms: 2.468
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7807875871658325
      kl: 0.014839710667729378
      policy_loss: -0.0010986224515363574
      total_loss: 949.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4761 s, 205 iter, 2050000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-44-48
  done: false
  episode_len_mean: 109.71
  episode_reward_max: 372.00278746865723
  episode_reward_mean: 207.9003165241028
  episode_reward_min: -161.23725938160493
  episodes_this_iter: 91
  episodes_total: 15514
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3128.234
    load_time_ms: 2.392
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5464769005775452
      kl: 0.011116825975477695
      policy_loss: 0.00029914750484749675
      total_loss: 866

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4878 s, 210 iter, 2100000 ts, 182 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-46-45
  done: false
  episode_len_mean: 104.16
  episode_reward_max: 377.8793650869533
  episode_reward_mean: 175.667572458508
  episode_reward_min: -166.44915801816705
  episodes_this_iter: 98
  episodes_total: 15991
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3151.045
    load_time_ms: 2.514
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5386320352554321
      kl: 0.01470652874559164
      policy_loss: 0.0007835061405785382
      total_loss: 1076.08

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 4994 s, 215 iter, 2150000 ts, 213 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-48-41
  done: false
  episode_len_mean: 112.53
  episode_reward_max: 375.3321937077884
  episode_reward_mean: 223.22503385364624
  episode_reward_min: -168.140871401678
  episodes_this_iter: 89
  episodes_total: 16448
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3123.104
    load_time_ms: 2.647
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.599127471446991
      kl: 0.022411826997995377
      policy_loss: 0.0008763552759774029
      total_loss: 738.251

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 5109 s, 220 iter, 2200000 ts, 240 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-50-37
  done: false
  episode_len_mean: 113.67
  episode_reward_max: 377.67155770486755
  episode_reward_mean: 227.5897138420145
  episode_reward_min: -164.5775517609549
  episodes_this_iter: 89
  episodes_total: 16899
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3098.246
    load_time_ms: 2.556
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6432874798774719
      kl: 0.0322430394589901
      policy_loss: 0.0014807999832555652
      total_loss: 653.105

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 5225 s, 225 iter, 2250000 ts, 216 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-52-33
  done: false
  episode_len_mean: 106.77
  episode_reward_max: 389.6877103731459
  episode_reward_mean: 199.65357540358767
  episode_reward_min: -166.93978931208645
  episodes_this_iter: 94
  episodes_total: 17363
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3123.163
    load_time_ms: 2.39
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5223647356033325
      kl: 0.007589490152895451
      policy_loss: -0.0011304061627015471
      total_loss: 928.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 5341 s, 230 iter, 2300000 ts, 215 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-54-30
  done: false
  episode_len_mean: 107.07
  episode_reward_max: 387.04390283114094
  episode_reward_mean: 194.47776723951586
  episode_reward_min: -164.60555063527147
  episodes_this_iter: 91
  episodes_total: 17823
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3139.916
    load_time_ms: 2.327
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4826791286468506
      kl: 0.027690110728144646
      policy_loss: -0.000799387285951525
      total_loss: 886

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 5458 s, 235 iter, 2350000 ts, 179 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-56-26
  done: false
  episode_len_mean: 103.37
  episode_reward_max: 392.33766690789196
  episode_reward_mean: 178.87855835259379
  episode_reward_min: -168.32256515109964
  episodes_this_iter: 97
  episodes_total: 18295
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3129.453
    load_time_ms: 2.315
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.29651811718940735
      kl: 0.014863050542771816
      policy_loss: -0.0001721356820780784
      total_loss: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 5574 s, 240 iter, 2400000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_22-58-23
  done: false
  episode_len_mean: 102.06
  episode_reward_max: 395.73790529901345
  episode_reward_mean: 174.33661404435682
  episode_reward_min: -162.7019110153013
  episodes_this_iter: 99
  episodes_total: 18759
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3137.246
    load_time_ms: 2.398
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.34160444140434265
      kl: 0.015673520043492317
      policy_loss: 0.00010449802357470617
      total_loss: 10

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7461], 5690 s, 245 iter, 2450000 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-00-20
  done: false
  episode_len_mean: 107.72
  episode_reward_max: 380.0780686264396
  episode_reward_mean: 215.46463808784833
  episode_reward_min: -167.963295128083
  episodes_this_iter: 92
  episodes_total: 19234
  experiment_id: 3b01ae105e764e23bbe0124f66bb6454
  hostname: Gandalf
  info:
    grad_time_ms: 3120.016
    load_time_ms: 2.484
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.188462495803833
      kl: 0.012412231415510178
      policy_loss: -9.230127761838958e-05
      total_loss: 818.14

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=7461], 5808 s, 250 iter, 2500000 ts, 219 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=7461], 5808 s, 250 iter, 2500000 ts, 219 rew



In [14]:
executeTraining()

 Starting SUMO on port 55481
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-15_23-01-55in0lfhts -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



7.570610352052126
7.44379132756061


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-02-54
  done: false
  episode_len_mean: 498.65
  episode_reward_max: 170.47475611806004
  episode_reward_mean: 87.70432500911178
  episode_reward_min: -119.30654216352495
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 4084.608
    load_time_ms: 142.385
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4246633052825928
      kl: 0.001074183383025229
      policy_loss: -0.0021634234581142664
      total_loss: 22.524871826171875
      vf_explained_var: 0.17586717009544373
      vf_loss: 22.526817321777344
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4232540130615234
      kl: 0.0012878070119768381
      policy_loss: -0.0021028874907642603
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 116 s, 5 iter, 50000 ts, 142 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-04-48
  done: false
  episode_len_mean: 359.09
  episode_reward_max: 326.06584826141716
  episode_reward_mean: 154.8339446892749
  episode_reward_min: -151.84461367284308
  episodes_this_iter: 32
  episodes_total: 150
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3267.225
    load_time_ms: 25.741
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.3979257345199585
      kl: 0.0054590436629951
      policy_loss: -0.0032013137824833393
      total_los

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 232 s, 10 iter, 100000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-06-45
  done: false
  episode_len_mean: 252.28
  episode_reward_max: 350.59919410284596
  episode_reward_mean: 187.93934713895578
  episode_reward_min: -164.57936496086586
  episodes_this_iter: 40
  episodes_total: 330
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3114.317
    load_time_ms: 2.393
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.3838242292404175
      kl: 0.0072004892863333225
      policy_loss: -0.0033651250414550304
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 349 s, 15 iter, 150000 ts, 258 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-08-43
  done: false
  episode_len_mean: 196.41
  episode_reward_max: 337.84472487140107
  episode_reward_mean: 266.7705277022587
  episode_reward_min: -156.98562962493125
  episodes_this_iter: 52
  episodes_total: 573
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3103.689
    load_time_ms: 2.392
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3879212141036987
      kl: 0.0012298186775296926
      policy_loss: -0.0009360687690787017
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 467 s, 20 iter, 200000 ts, 285 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-10-41
  done: false
  episode_len_mean: 188.2
  episode_reward_max: 359.52779806855096
  episode_reward_mean: 297.09407317090705
  episode_reward_min: -139.0427618385579
  episodes_this_iter: 54
  episodes_total: 831
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3098.885
    load_time_ms: 2.546
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3208365440368652
      kl: 0.006732753477990627
      policy_loss: -0.0015038966666907072
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 586 s, 25 iter, 250000 ts, 306 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-12-40
  done: false
  episode_len_mean: 159.47
  episode_reward_max: 367.2004272198293
  episode_reward_mean: 307.6972373346832
  episode_reward_min: -153.40265035856044
  episodes_this_iter: 62
  episodes_total: 1126
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3123.767
    load_time_ms: 2.408
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 9.536743306171047e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.2652729749679565
      kl: 0.014978492632508278
      policy_loss: -0.00375431589782238
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 704 s, 30 iter, 300000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-14-38
  done: false
  episode_len_mean: 150.15
  episode_reward_max: 375.9148730739635
  episode_reward_mean: 330.2889817521002
  episode_reward_min: 275.5174416250596
  episodes_this_iter: 68
  episodes_total: 1453
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3101.718
    load_time_ms: 2.279
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.222822666168213
      kl: 0.01318854559212923
      policy_loss: -0.0031147825066000223
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 822 s, 35 iter, 350000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-16-36
  done: false
  episode_len_mean: 136.6
  episode_reward_max: 364.3829293112878
  episode_reward_mean: 315.37517713502103
  episode_reward_min: -123.28555932878814
  episodes_this_iter: 74
  episodes_total: 1814
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3073.766
    load_time_ms: 2.402
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 1.4901161415892261e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.2428020238876343
      kl: 0.002564899157732725
      policy_loss: -0.002467632759362459
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 940 s, 40 iter, 400000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-18-34
  done: false
  episode_len_mean: 133.74
  episode_reward_max: 362.8055514288474
  episode_reward_mean: 322.0954365065569
  episode_reward_min: 287.3700251125703
  episodes_this_iter: 75
  episodes_total: 2187
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3043.506
    load_time_ms: 2.584
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 9.313225884932663e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.1641511917114258
      kl: 0.0104531766846776
      policy_loss: -0.0031385032925754786
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1060 s, 45 iter, 450000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-20-34
  done: false
  episode_len_mean: 133.56
  episode_reward_max: 377.4746166939901
  episode_reward_mean: 337.1574848570865
  episode_reward_min: 290.95198985576474
  episodes_this_iter: 74
  episodes_total: 2561
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3007.265
    load_time_ms: 2.645
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1425268650054932
      kl: 0.010391488671302795
      policy_loss: -0.0029361145570874214
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1178 s, 50 iter, 500000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-22-34
  done: false
  episode_len_mean: 131.99
  episode_reward_max: 376.4695831495266
  episode_reward_mean: 332.3682245245393
  episode_reward_min: 293.3401900427254
  episodes_this_iter: 76
  episodes_total: 2940
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3009.591
    load_time_ms: 2.464
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 2.9103830890414573e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.048168420791626
      kl: 0.025389522314071655
      policy_loss: -0.005792652256786823
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1295 s, 55 iter, 550000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-24-30
  done: false
  episode_len_mean: 134.73
  episode_reward_max: 376.53835803174303
  episode_reward_mean: 336.7062122723065
  episode_reward_min: 295.1940207841407
  episodes_this_iter: 75
  episodes_total: 3314
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3015.088
    load_time_ms: 2.41
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.062902808189392
      kl: 0.01229006052017212
      policy_loss: -0.0032019743230193853
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1410 s, 60 iter, 600000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-26-25
  done: false
  episode_len_mean: 132.77
  episode_reward_max: 379.7853319984312
  episode_reward_mean: 335.4915126532312
  episode_reward_min: 158.56275317489542
  episodes_this_iter: 75
  episodes_total: 3689
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3007.858
    load_time_ms: 2.542
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 4.547473576627277e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.9730664491653442
      kl: 0.0037529037799686193
      policy_loss: -0.0019237742526456714
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1525 s, 65 iter, 650000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-28-20
  done: false
  episode_len_mean: 131.59
  episode_reward_max: 369.44363447660675
  episode_reward_mean: 337.9430840791714
  episode_reward_min: 298.6535277376039
  episodes_this_iter: 76
  episodes_total: 4068
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2988.652
    load_time_ms: 2.542
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 2.842170985392048e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.8839089870452881
      kl: 0.0076442742720246315
      policy_loss: -0.0025507521349936724
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1640 s, 70 iter, 700000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-30-16
  done: false
  episode_len_mean: 132.39
  episode_reward_max: 374.94079394934107
  episode_reward_mean: 336.51466421871936
  episode_reward_min: 290.4956103339767
  episodes_this_iter: 76
  episodes_total: 4445
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2995.427
    load_time_ms: 2.286
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 1.77635686587003e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.8972395062446594
      kl: 0.005020459648221731
      policy_loss: -0.00021761999232694507
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1755 s, 75 iter, 750000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-32-11
  done: false
  episode_len_mean: 133.86
  episode_reward_max: 378.9268893014564
  episode_reward_mean: 336.2041190232369
  episode_reward_min: 296.8928515890895
  episodes_this_iter: 75
  episodes_total: 4820
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2999.393
    load_time_ms: 2.259
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.1102230411687688e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.7973099946975708
      kl: 0.009418988600373268
      policy_loss: -0.0017170482315123081
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1869 s, 80 iter, 800000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-34-05
  done: false
  episode_len_mean: 133.66
  episode_reward_max: 392.128040702376
  episode_reward_mean: 340.42656266661055
  episode_reward_min: 148.62153603224965
  episodes_this_iter: 75
  episodes_total: 5194
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2975.797
    load_time_ms: 2.516
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 6.938894007304805e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.6921762228012085
      kl: 0.005871878005564213
      policy_loss: 0.0019601131789386272
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 1982 s, 85 iter, 850000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-35-58
  done: false
  episode_len_mean: 133.16
  episode_reward_max: 380.30849705724825
  episode_reward_mean: 338.9227224348641
  episode_reward_min: 293.75467613792955
  episodes_this_iter: 76
  episodes_total: 5570
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2965.574
    load_time_ms: 2.525
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 2.1684043772827515e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6395883560180664
      kl: 0.009630017913877964
      policy_loss: -0.0009242318919859827
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2097 s, 90 iter, 900000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-37-53
  done: false
  episode_len_mean: 133.39
  episode_reward_max: 381.37463243371246
  episode_reward_mean: 340.0576445679007
  episode_reward_min: 290.7952445717048
  episodes_this_iter: 75
  episodes_total: 5943
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2988.049
    load_time_ms: 2.388
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.554775059223175
      kl: 0.004975048825144768
      policy_loss: 9.391719504492357e-05
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2212 s, 95 iter, 950000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-39-48
  done: false
  episode_len_mean: 133.68
  episode_reward_max: 384.12704928100806
  episode_reward_mean: 340.7551172738297
  episode_reward_min: 167.52152969139854
  episodes_this_iter: 75
  episodes_total: 6317
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2990.684
    load_time_ms: 2.45
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.4531172215938568
      kl: 0.009364457800984383
      policy_loss: 0.001630492857657373
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2325 s, 100 iter, 1000000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-41-42
  done: false
  episode_len_mean: 134.2
  episode_reward_max: 387.52324935262
  episode_reward_mean: 336.78300688596966
  episode_reward_min: 170.455628487151
  episodes_this_iter: 74
  episodes_total: 6690
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2972.02
    load_time_ms: 2.424
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.38919752836227417
      kl: 0.0084281787276268
      policy_loss: 0.00295302108861506
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2439 s, 105 iter, 1050000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-43-35
  done: false
  episode_len_mean: 134.45
  episode_reward_max: 386.29831142742205
  episode_reward_mean: 341.47573167643816
  episode_reward_min: 288.26632441331253
  episodes_this_iter: 75
  episodes_total: 7063
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2985.818
    load_time_ms: 2.487
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.3267817199230194
      kl: 0.015534990467131138
      policy_loss: 0.004494604654610157
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2553 s, 110 iter, 1100000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-45-30
  done: false
  episode_len_mean: 133.98
  episode_reward_max: 383.57391813641175
  episode_reward_mean: 331.0950576877764
  episode_reward_min: 165.41676954304097
  episodes_this_iter: 74
  episodes_total: 7436
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3025.719
    load_time_ms: 2.511
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.22544914484024048
      kl: 0.009759061969816685
      policy_loss: 0.0020414222963154316
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2668 s, 115 iter, 1150000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-47-25
  done: false
  episode_len_mean: 133.96
  episode_reward_max: 372.0240834815608
  episode_reward_mean: 337.1652545820469
  episode_reward_min: 288.87336172809864
  episodes_this_iter: 75
  episodes_total: 7810
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3024.713
    load_time_ms: 2.347
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.17576585710048676
      kl: 0.00852571427822113
      policy_loss: 0.004200345370918512
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2782 s, 120 iter, 1200000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-49-19
  done: false
  episode_len_mean: 134.27
  episode_reward_max: 382.2886263376061
  episode_reward_mean: 335.0852467706406
  episode_reward_min: 287.8973003169023
  episodes_this_iter: 75
  episodes_total: 8183
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2981.633
    load_time_ms: 2.289
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.15957985818386078
      kl: 0.014233443886041641
      policy_loss: 0.005291277542710304
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 2896 s, 125 iter, 1250000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-51-14
  done: false
  episode_len_mean: 134.68
  episode_reward_max: 391.1223440560881
  episode_reward_mean: 331.92464323412975
  episode_reward_min: 150.89173558126703
  episodes_this_iter: 75
  episodes_total: 8555
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2985.385
    load_time_ms: 2.403
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.13408628106117249
      kl: 0.015279765240848064
      policy_loss: -0.0038544272538274527
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3011 s, 130 iter, 1300000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-53-09
  done: false
  episode_len_mean: 134.49
  episode_reward_max: 385.8481843557323
  episode_reward_mean: 334.1415894602397
  episode_reward_min: 281.33871281621464
  episodes_this_iter: 75
  episodes_total: 8927
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3017.729
    load_time_ms: 2.483
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 1.985232957367678e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.13298331201076508
      kl: 0.02735053561627865
      policy_loss: 0.013257407583296299
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3126 s, 135 iter, 1350000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-55-04
  done: false
  episode_len_mean: 134.06
  episode_reward_max: 377.6528889465552
  episode_reward_mean: 334.5136962927408
  episode_reward_min: 153.69714798959876
  episodes_this_iter: 74
  episodes_total: 9299
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3003.593
    load_time_ms: 2.409
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 1.985232957367678e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.03702544420957565
      kl: 0.023928744718432426
      policy_loss: 0.009539034217596054
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3240 s, 140 iter, 1400000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-56-57
  done: false
  episode_len_mean: 135.15
  episode_reward_max: 380.85912055017457
  episode_reward_mean: 334.80446755436185
  episode_reward_min: 291.6275085338717
  episodes_this_iter: 74
  episodes_total: 9670
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2972.436
    load_time_ms: 2.518
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 2.23338769333622e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.05849035084247589
      kl: 0.10008352249860764
      policy_loss: 0.013592556118965149
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3355 s, 145 iter, 1450000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-15_23-58-53
  done: false
  episode_len_mean: 136.98
  episode_reward_max: 389.22164798646077
  episode_reward_mean: 330.8844533424649
  episode_reward_min: 153.026308978237
  episodes_this_iter: 72
  episodes_total: 10037
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2986.001
    load_time_ms: 2.559
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 5.02512324677882e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.05019626021385193
      kl: 0.21626067161560059
      policy_loss: 0.008036550134420395
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3468 s, 150 iter, 1500000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-00-47
  done: false
  episode_len_mean: 135.27
  episode_reward_max: 379.24782742380745
  episode_reward_mean: 336.8544057477346
  episode_reward_min: 139.50351529009265
  episodes_this_iter: 74
  episodes_total: 10406
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3002.364
    load_time_ms: 2.438
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 2.5439677143050236e-23
      cur_lr: 4.999999873689376e-05
      entropy: -0.011522195301949978
      kl: 40.1021614074707
      policy_loss: 0.013854935765266418
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3583 s, 155 iter, 1550000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-02-42
  done: false
  episode_len_mean: 136.33
  episode_reward_max: 394.44308918323037
  episode_reward_mean: 335.10885612269857
  episode_reward_min: 277.1448825473789
  episodes_this_iter: 74
  episodes_total: 10774
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2988.487
    load_time_ms: 2.316
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.2878835853555129e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.09401428699493408
      kl: 0.016851291060447693
      policy_loss: 0.004174085799604654
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3698 s, 160 iter, 1600000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-04-37
  done: false
  episode_len_mean: 139.45
  episode_reward_max: 394.26372896579636
  episode_reward_mean: 331.6454211198852
  episode_reward_min: 122.50483944526684
  episodes_this_iter: 72
  episodes_total: 11136
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2992.015
    load_time_ms: 2.515
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 2.897738540366447e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.14626801013946533
      kl: 0.5877218842506409
      policy_loss: 0.014412607997655869
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3813 s, 165 iter, 1650000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-06-33
  done: false
  episode_len_mean: 139.55
  episode_reward_max: 397.86973824080815
  episode_reward_mean: 310.4460747217599
  episode_reward_min: 78.19396647661927
  episodes_this_iter: 72
  episodes_total: 11491
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3024.095
    load_time_ms: 2.657
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 1.4669802796331986e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.15103432536125183
      kl: 0.025663848966360092
      policy_loss: 0.002358253113925457
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 3929 s, 170 iter, 1700000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-08-29
  done: false
  episode_len_mean: 139.56
  episode_reward_max: 397.0766924101424
  episode_reward_mean: 311.2709654404672
  episode_reward_min: 64.3975742103857
  episodes_this_iter: 72
  episodes_total: 11849
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3023.257
    load_time_ms: 2.478
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 4.951058052487036e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.21972545981407166
      kl: 0.045532193034887314
      policy_loss: 0.002481726696714759
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4045 s, 175 iter, 1750000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-10-25
  done: false
  episode_len_mean: 136.0
  episode_reward_max: 394.23420251276013
  episode_reward_mean: 322.6209675616651
  episode_reward_min: 108.69345202526381
  episodes_this_iter: 74
  episodes_total: 12214
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3005.891
    load_time_ms: 2.343
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 8.354909504317013e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.17100819945335388
      kl: 0.0338938869535923
      policy_loss: 0.0060699740424752235
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4160 s, 180 iter, 1800000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-12-21
  done: false
  episode_len_mean: 137.01
  episode_reward_max: 402.8455540367331
  episode_reward_mean: 318.5470932888674
  episode_reward_min: 103.30186473709432
  episodes_this_iter: 72
  episodes_total: 12579
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2993.35
    load_time_ms: 2.272
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 1.2532369507133705e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.2327170968055725
      kl: 0.7959517240524292
      policy_loss: 0.01416824571788311
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4274 s, 185 iter, 1850000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-14-15
  done: false
  episode_len_mean: 133.08
  episode_reward_max: 391.7374042032769
  episode_reward_mean: 333.12803883819544
  episode_reward_min: 128.774816565633
  episodes_this_iter: 75
  episodes_total: 12952
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2999.994
    load_time_ms: 2.637
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 9.399271879692093e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.20565265417099
      kl: 0.01085569802671671
      policy_loss: -0.0015686347614973783
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4390 s, 190 iter, 1900000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-16-11
  done: false
  episode_len_mean: 132.54
  episode_reward_max: 395.61801792727823
  episode_reward_mean: 333.7670853654823
  episode_reward_min: 167.75672947679845
  episodes_this_iter: 76
  episodes_total: 13329
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3004.57
    load_time_ms: 2.679
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 1.1749089849615117e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.17626646161079407
      kl: 0.019128959625959396
      policy_loss: 0.0011265891371294856
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4505 s, 195 iter, 1950000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-18-06
  done: false
  episode_len_mean: 132.45
  episode_reward_max: 386.28324955077784
  episode_reward_mean: 339.1303547867259
  episode_reward_min: 166.6504379954003
  episodes_this_iter: 75
  episodes_total: 13706
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2984.172
    load_time_ms: 2.536
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 1.1749089849615117e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.09936365485191345
      kl: 0.02196331135928631
      policy_loss: 0.0009350453619845212
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4621 s, 200 iter, 2000000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-20-03
  done: false
  episode_len_mean: 133.44
  episode_reward_max: 386.6854274159
  episode_reward_mean: 340.8776057669205
  episode_reward_min: 167.08845062470425
  episodes_this_iter: 76
  episodes_total: 14082
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3001.209
    load_time_ms: 2.459
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 1.7623636289035613e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.16078102588653564
      kl: 0.07091172784566879
      policy_loss: 0.005409415811300278
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4737 s, 205 iter, 2050000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-21-59
  done: false
  episode_len_mean: 133.11
  episode_reward_max: 389.8791549220087
  episode_reward_mean: 337.730674334327
  episode_reward_min: 139.1617943093965
  episodes_this_iter: 74
  episodes_total: 14455
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3019.086
    load_time_ms: 2.441
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 8.921968471409823e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.23847855627536774
      kl: 15.747385025024414
      policy_loss: 0.0020489974413067102
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4852 s, 210 iter, 2100000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-23-54
  done: false
  episode_len_mean: 131.76
  episode_reward_max: 391.11484135582316
  episode_reward_mean: 341.93033890295396
  episode_reward_min: 140.42993976918967
  episodes_this_iter: 76
  episodes_total: 14835
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2992.84
    load_time_ms: 2.558
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 2.0074429464568884e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.23168091475963593
      kl: 0.01943860575556755
      policy_loss: -0.0001351255486952141
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 4966 s, 215 iter, 2150000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-25-48
  done: false
  episode_len_mean: 132.41
  episode_reward_max: 392.8868200284838
  episode_reward_mean: 335.05115672707893
  episode_reward_min: 137.74787767789866
  episodes_this_iter: 76
  episodes_total: 15213
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2979.123
    load_time_ms: 2.372
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.0037214732284442e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.23896446824073792
      kl: 3.630250930786133
      policy_loss: 0.0297969002276659
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 5080 s, 220 iter, 2200000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-27-42
  done: false
  episode_len_mean: 132.04
  episode_reward_max: 391.36181657868605
  episode_reward_mean: 339.6047951188136
  episode_reward_min: 139.66874969544665
  episodes_this_iter: 76
  episodes_total: 15593
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2976.089
    load_time_ms: 2.321
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 1.5055821290633096e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.1864660680294037
      kl: 0.016069000586867332
      policy_loss: -0.0013496953761205077
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 5196 s, 225 iter, 2250000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-29-38
  done: false
  episode_len_mean: 132.26
  episode_reward_max: 396.34111533458196
  episode_reward_mean: 344.01200283397554
  episode_reward_min: 160.14939623935112
  episodes_this_iter: 76
  episodes_total: 15971
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3000.269
    load_time_ms: 2.636
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 2.258373032036251e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.14721240103244781
      kl: 0.012276813387870789
      policy_loss: -0.0013770642690360546
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 5310 s, 230 iter, 2300000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-31-33
  done: false
  episode_len_mean: 134.66
  episode_reward_max: 390.577757421024
  episode_reward_mean: 344.56615113915416
  episode_reward_min: 293.62940296470833
  episodes_this_iter: 74
  episodes_total: 16345
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3034.007
    load_time_ms: 2.681
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 5.081338837405425e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.2217559814453125
      kl: 0.011322722770273685
      policy_loss: -0.0017108035972341895
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 5425 s, 235 iter, 2350000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-33-27
  done: false
  episode_len_mean: 134.59
  episode_reward_max: 396.89290736701
  episode_reward_mean: 345.30241429447807
  episode_reward_min: 150.50929002300256
  episodes_this_iter: 74
  episodes_total: 16718
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3005.296
    load_time_ms: 2.518
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 7.622009871695271e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.2368461638689041
      kl: 6.815205097198486
      policy_loss: -0.005711275152862072
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 5539 s, 240 iter, 2400000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-35-22
  done: false
  episode_len_mean: 137.92
  episode_reward_max: 393.84725847771773
  episode_reward_mean: 344.30323599919
  episode_reward_min: 153.836589030631
  episodes_this_iter: 72
  episodes_total: 17084
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 2995.643
    load_time_ms: 2.657
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 1.1433015130660333e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.23505260050296783
      kl: 0.15773719549179077
      policy_loss: 0.008639855310320854
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=7521], 5653 s, 245 iter, 2450000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-37-16
  done: false
  episode_len_mean: 137.12
  episode_reward_max: 395.91621094658973
  episode_reward_mean: 343.8763363077261
  episode_reward_min: -131.81601546986894
  episodes_this_iter: 73
  episodes_total: 17448
  experiment_id: 9b970809237441b9a50eed3b1415f24a
  hostname: Gandalf
  info:
    grad_time_ms: 3011.178
    load_time_ms: 2.626
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 3.8586414272192547e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.24488331377506256
      kl: 0.02620983123779297
      policy_loss: -0.007303346414119005
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=7521], 5767 s, 250 iter, 2500000 ts, 333 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=7521], 5767 s, 250 iter, 2500000 ts, 333 rew



In [15]:
executeTraining()

 Starting SUMO on port 52327
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_00-38-49cd6pa1wc -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



8.301965510956174
10.710899513891299


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-39-26
  done: false
  episode_len_mean: 486.5
  episode_reward_max: 189.42097382279732
  episode_reward_mean: 57.57677621561136
  episode_reward_min: -160.5236817288316
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 4020.143
    load_time_ms: 141.281
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4184932708740234
      kl: 0.0012693508761003613
      policy_loss: -0.0016403376357629895
      total_loss: 68.26170349121094
      vf_explained_var: 0.01592213287949562
      vf_loss: 68.26306915283203
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4191821813583374
      kl: 0.0013002032646909356
      policy_loss: -0.0016276350943371654
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 115 s, 5 iter, 50000 ts, 115 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-41-20
  done: false
  episode_len_mean: 416.98
  episode_reward_max: 377.8943891226113
  episode_reward_mean: 116.38375656777349
  episode_reward_min: -164.2774211541708
  episodes_this_iter: 28
  episodes_total: 137
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3217.456
    load_time_ms: 25.681
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4185805320739746
      kl: 0.001790710142813623
      policy_loss: -0.0019309070194140077
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 230 s, 10 iter, 100000 ts, 155 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-43-15
  done: false
  episode_len_mean: 294.22
  episode_reward_max: 378.01737622420296
  episode_reward_mean: 143.7713342143519
  episode_reward_min: -153.4764137094124
  episodes_this_iter: 38
  episodes_total: 297
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3077.137
    load_time_ms: 2.478
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.4411182403564453
      kl: 0.000992991030216217
      policy_loss: -0.0004092831222806126
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 344 s, 15 iter, 150000 ts, 161 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-45-09
  done: false
  episode_len_mean: 259.23
  episode_reward_max: 379.11198064600427
  episode_reward_mean: 200.28541928364214
  episode_reward_min: -159.8750112790483
  episodes_this_iter: 39
  episodes_total: 492
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3111.399
    load_time_ms: 2.327
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.4343302249908447
      kl: 0.003333219327032566
      policy_loss: -0.0016504965024068952
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 459 s, 20 iter, 200000 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-47-05
  done: false
  episode_len_mean: 195.91
  episode_reward_max: 385.11030767870704
  episode_reward_mean: 188.39098403745743
  episode_reward_min: -164.5694038824888
  episodes_this_iter: 51
  episodes_total: 741
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3089.334
    load_time_ms: 2.36
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 1.9073486612342094e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.412397861480713
      kl: 0.0030604221392422915
      policy_loss: -0.0018619486363604665
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 575 s, 25 iter, 250000 ts, 187 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-49-01
  done: false
  episode_len_mean: 165.44
  episode_reward_max: 390.8918146537367
  episode_reward_mean: 196.01981257944786
  episode_reward_min: -167.42198604277203
  episodes_this_iter: 59
  episodes_total: 1038
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3059.122
    load_time_ms: 2.364
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 5.9604645663569045e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.4513643980026245
      kl: 0.00356932170689106
      policy_loss: -0.0016634032363072038
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 693 s, 30 iter, 300000 ts, 232 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-51-00
  done: false
  episode_len_mean: 157.15
  episode_reward_max: 376.988687544101
  episode_reward_mean: 230.44394207458913
  episode_reward_min: -155.6901010701804
  episodes_this_iter: 64
  episodes_total: 1356
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3079.885
    load_time_ms: 2.356
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.8626451769865326e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.438873291015625
      kl: 0.006658628117293119
      policy_loss: -0.0026058154180645943
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 810 s, 35 iter, 350000 ts, 295 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-52-57
  done: false
  episode_len_mean: 151.17
  episode_reward_max: 375.8701587205072
  episode_reward_mean: 271.3333553323311
  episode_reward_min: -155.7607928951257
  episodes_this_iter: 68
  episodes_total: 1684
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3077.915
    load_time_ms: 2.401
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 1.1641532356165829e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.4171193838119507
      kl: 0.004803942982107401
      policy_loss: -0.002885368186980486
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 927 s, 40 iter, 400000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-54-54
  done: false
  episode_len_mean: 150.92
  episode_reward_max: 374.4742321787901
  episode_reward_mean: 308.00284903140846
  episode_reward_min: -152.27497361226708
  episodes_this_iter: 67
  episodes_total: 2020
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3074.352
    load_time_ms: 2.34
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.3835746049880981
      kl: 0.00710438285022974
      policy_loss: -0.0039032583590596914
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1043 s, 45 iter, 450000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-56-50
  done: false
  episode_len_mean: 148.36
  episode_reward_max: 389.54300492966496
  episode_reward_mean: 318.36511098333995
  episode_reward_min: 100.48786088508109
  episodes_this_iter: 67
  episodes_total: 2354
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3072.52
    load_time_ms: 2.457
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.1368683941568192e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.3363851308822632
      kl: 0.006053559482097626
      policy_loss: -0.0019811810925602913
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1160 s, 50 iter, 500000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_00-58-47
  done: false
  episode_len_mean: 145.12
  episode_reward_max: 378.52023324424835
  episode_reward_mean: 332.14997080906136
  episode_reward_min: -129.40977542117238
  episodes_this_iter: 70
  episodes_total: 2697
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3090.231
    load_time_ms: 2.425
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 7.10542746348012e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.3299390077590942
      kl: 0.0038850163109600544
      policy_loss: -0.0014989989576861262
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1277 s, 55 iter, 550000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-00-44
  done: false
  episode_len_mean: 142.12
  episode_reward_max: 380.80385781370853
  episode_reward_mean: 340.38247309193343
  episode_reward_min: 162.34061614122163
  episodes_this_iter: 72
  episodes_total: 3047
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3083.906
    load_time_ms: 2.262
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 4.440892164675075e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.268591046333313
      kl: 0.011310072615742683
      policy_loss: -0.004232876002788544
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1393 s, 60 iter, 600000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-02-41
  done: false
  episode_len_mean: 139.92
  episode_reward_max: 384.6322655274044
  episode_reward_mean: 348.0251302179841
  episode_reward_min: 312.27494010917616
  episodes_this_iter: 72
  episodes_total: 3403
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3052.525
    load_time_ms: 2.503
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.2910977602005005
      kl: 0.007855868898332119
      policy_loss: -0.0032693345565348864
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1508 s, 65 iter, 650000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-04-36
  done: false
  episode_len_mean: 134.12
  episode_reward_max: 385.8078909283217
  episode_reward_mean: 344.78568764471106
  episode_reward_min: -137.56890759193294
  episodes_this_iter: 74
  episodes_total: 3770
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3057.129
    load_time_ms: 2.52
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.387778801460961e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.3063716888427734
      kl: 0.004929438699036837
      policy_loss: -0.0014448100700974464
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1624 s, 70 iter, 700000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-06-33
  done: false
  episode_len_mean: 133.85
  episode_reward_max: 387.97070325967377
  episode_reward_mean: 347.3179244134949
  episode_reward_min: -125.85319041077776
  episodes_this_iter: 74
  episodes_total: 4143
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3059.128
    load_time_ms: 2.463
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.318808913230896
      kl: 0.010543109849095345
      policy_loss: -0.0030381553806364536
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1740 s, 75 iter, 750000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-08-28
  done: false
  episode_len_mean: 133.01
  episode_reward_max: 390.1001295238472
  episode_reward_mean: 355.08550145257976
  episode_reward_min: -44.58332904752845
  episodes_this_iter: 75
  episodes_total: 4522
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3038.365
    load_time_ms: 2.497
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.0842021886413758e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.2990717887878418
      kl: 0.015284232795238495
      policy_loss: -0.0016160880913957953
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1854 s, 80 iter, 800000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-10-23
  done: false
  episode_len_mean: 132.98
  episode_reward_max: 391.6243147967273
  episode_reward_mean: 334.3618725467596
  episode_reward_min: -128.5164117650973
  episodes_this_iter: 75
  episodes_total: 4896
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3027.575
    load_time_ms: 2.467
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 6.776263679008599e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.3056743144989014
      kl: 0.0037757174577564
      policy_loss: -0.0019219156820327044
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 1969 s, 85 iter, 850000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-12-18
  done: false
  episode_len_mean: 132.13
  episode_reward_max: 392.4532875511711
  episode_reward_mean: 340.1463341158789
  episode_reward_min: -118.07255422173033
  episodes_this_iter: 77
  episodes_total: 5275
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3022.304
    load_time_ms: 2.366
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 8.470329598760748e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.2919474840164185
      kl: 0.002468009712174535
      policy_loss: -0.0013012116542086005
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2084 s, 90 iter, 900000 ts, 354 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-14-14
  done: false
  episode_len_mean: 134.93
  episode_reward_max: 390.0419781561569
  episode_reward_mean: 357.751592787322
  episode_reward_min: 126.85264008647584
  episodes_this_iter: 74
  episodes_total: 5647
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3000.125
    load_time_ms: 2.302
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.3064892292022705
      kl: 0.00878166500478983
      policy_loss: -0.0021654933225363493
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2199 s, 95 iter, 950000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-16-09
  done: false
  episode_len_mean: 135.74
  episode_reward_max: 391.1330845404719
  episode_reward_mean: 357.2014128736337
  episode_reward_min: 328.72243035151445
  episodes_this_iter: 73
  episodes_total: 6017
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3017.664
    load_time_ms: 2.452
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.3062018156051636
      kl: 0.010494573973119259
      policy_loss: -0.004422461148351431
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2315 s, 100 iter, 1000000 ts, 351 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-18-04
  done: false
  episode_len_mean: 135.05
  episode_reward_max: 387.10256880077736
  episode_reward_mean: 344.60684041774715
  episode_reward_min: -107.43498963035047
  episodes_this_iter: 74
  episodes_total: 6387
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3020.609
    load_time_ms: 2.378
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 6.617444999031835e-25
      cur_lr: 4.999999873689376e-05
      entropy: 1.317020297050476
      kl: 0.004793744534254074
      policy_loss: -0.0009667183039709926
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2431 s, 105 iter, 1050000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-20-01
  done: false
  episode_len_mean: 136.34
  episode_reward_max: 389.60182103318624
  episode_reward_mean: 352.6139969269584
  episode_reward_min: 95.46510877522269
  episodes_this_iter: 74
  episodes_total: 6757
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3032.944
    load_time_ms: 2.342
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 4.1359031243948966e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.330894947052002
      kl: 0.005978010129183531
      policy_loss: -0.0016295156674459577
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2547 s, 110 iter, 1100000 ts, 348 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-21-57
  done: false
  episode_len_mean: 133.25
  episode_reward_max: 382.8372546657624
  episode_reward_mean: 345.07430448252387
  episode_reward_min: -139.59220384382326
  episodes_this_iter: 75
  episodes_total: 7130
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3029.567
    load_time_ms: 2.464
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 2.5849394527468104e-27
      cur_lr: 4.999999873689376e-05
      entropy: 1.3121317625045776
      kl: 0.005161413922905922
      policy_loss: -0.003857939736917615
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2662 s, 115 iter, 1150000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-23-52
  done: false
  episode_len_mean: 135.16
  episode_reward_max: 388.681499276377
  episode_reward_mean: 355.3690861869095
  episode_reward_min: 142.20308441086308
  episodes_this_iter: 74
  episodes_total: 7500
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3020.576
    load_time_ms: 2.341
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 8.077935789833782e-29
      cur_lr: 4.999999873689376e-05
      entropy: 1.2763571739196777
      kl: 0.0034979020711034536
      policy_loss: -0.0005364996613934636
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2777 s, 120 iter, 1200000 ts, 354 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-25-47
  done: false
  episode_len_mean: 134.95
  episode_reward_max: 386.9951756886959
  episode_reward_mean: 352.4743992279823
  episode_reward_min: 41.66958148305184
  episodes_this_iter: 74
  episodes_total: 7870
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3024.803
    load_time_ms: 2.369
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 2.524354934323057e-30
      cur_lr: 4.999999873689376e-05
      entropy: 1.283003568649292
      kl: 0.004135431721806526
      policy_loss: -0.0011316403979435563
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 2892 s, 125 iter, 1250000 ts, 354 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-27-43
  done: false
  episode_len_mean: 135.36
  episode_reward_max: 395.7727769609461
  episode_reward_mean: 358.0253569415674
  episode_reward_min: 150.04522994289107
  episodes_this_iter: 74
  episodes_total: 8239
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3007.355
    load_time_ms: 2.594
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 7.888609169759553e-32
      cur_lr: 4.999999873689376e-05
      entropy: 1.234152913093567
      kl: 0.00453422823920846
      policy_loss: -0.002489705802872777
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3008 s, 130 iter, 1300000 ts, 359 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-29-39
  done: false
  episode_len_mean: 135.2
  episode_reward_max: 389.9018716410117
  episode_reward_mean: 360.7717226602168
  episode_reward_min: 335.4928030974947
  episodes_this_iter: 74
  episodes_total: 8607
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3029.811
    load_time_ms: 2.612
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 2.4651903655498604e-33
      cur_lr: 4.999999873689376e-05
      entropy: 1.215402603149414
      kl: 0.002148316940292716
      policy_loss: -0.0010990796145051718
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3124 s, 135 iter, 1350000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-31-34
  done: false
  episode_len_mean: 135.07
  episode_reward_max: 387.94882183152134
  episode_reward_mean: 356.3418700510467
  episode_reward_min: 28.49674964087123
  episodes_this_iter: 73
  episodes_total: 8978
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3040.003
    load_time_ms: 2.436
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 7.703719892343314e-35
      cur_lr: 4.999999873689376e-05
      entropy: 1.194858431816101
      kl: 0.00493203429505229
      policy_loss: -0.0014748432440683246
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3238 s, 140 iter, 1400000 ts, 360 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-33-30
  done: false
  episode_len_mean: 135.76
  episode_reward_max: 386.55655930025887
  episode_reward_mean: 358.6938164394006
  episode_reward_min: 132.84967397938937
  episodes_this_iter: 72
  episodes_total: 9345
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3045.948
    load_time_ms: 2.276
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 2.4074124663572855e-36
      cur_lr: 4.999999873689376e-05
      entropy: 1.1470612287521362
      kl: 0.006801423151046038
      policy_loss: -0.0017837855266407132
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3355 s, 145 iter, 1450000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-35-26
  done: false
  episode_len_mean: 135.89
  episode_reward_max: 400.3963543186514
  episode_reward_mean: 357.3277569597054
  episode_reward_min: 68.02705688479313
  episodes_this_iter: 74
  episodes_total: 9715
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3040.485
    load_time_ms: 2.263
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 1.5046327914733034e-37
      cur_lr: 4.999999873689376e-05
      entropy: 1.0979986190795898
      kl: 0.004645944572985172
      policy_loss: -0.0026521896943449974
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3470 s, 150 iter, 1500000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-37-22
  done: false
  episode_len_mean: 136.87
  episode_reward_max: 393.93618166579216
  episode_reward_mean: 360.2097710326862
  episode_reward_min: 149.32887962908637
  episodes_this_iter: 72
  episodes_total: 10083
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3032.585
    load_time_ms: 2.172
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 4.701977123029457e-39
      cur_lr: 4.999999873689376e-05
      entropy: 1.054105281829834
      kl: 0.008315596729516983
      policy_loss: -0.004577790852636099
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3586 s, 155 iter, 1550000 ts, 356 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-39-18
  done: false
  episode_len_mean: 137.36
  episode_reward_max: 396.52814548472054
  episode_reward_mean: 363.0715003776961
  episode_reward_min: 336.3472745381745
  episodes_this_iter: 73
  episodes_total: 10449
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3027.17
    load_time_ms: 2.27
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.4693735437217167e-40
      cur_lr: 4.999999873689376e-05
      entropy: 1.009243369102478
      kl: 0.008022169582545757
      policy_loss: -0.0023158860858529806
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3700 s, 160 iter, 1600000 ts, 360 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-41-12
  done: false
  episode_len_mean: 135.34
  episode_reward_max: 393.25188480631545
  episode_reward_mean: 362.81405928515704
  episode_reward_min: 176.27405398076394
  episodes_this_iter: 72
  episodes_total: 10817
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3002.149
    load_time_ms: 2.347
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 4.5920550675924255e-42
      cur_lr: 4.999999873689376e-05
      entropy: 1.005225419998169
      kl: 0.0053192852064967155
      policy_loss: -0.0009419578127563
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3814 s, 165 iter, 1650000 ts, 360 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-43-06
  done: false
  episode_len_mean: 135.33
  episode_reward_max: 393.2542957996428
  episode_reward_mean: 358.582490956108
  episode_reward_min: 95.21496087112826
  episodes_this_iter: 73
  episodes_total: 11185
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3012.125
    load_time_ms: 2.367
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 2.872661851865875e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.9600600004196167
      kl: 0.003855790477246046
      policy_loss: 0.00015115528367459774
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 3930 s, 170 iter, 1700000 ts, 359 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-45-03
  done: false
  episode_len_mean: 135.33
  episode_reward_max: 394.14100616933734
  episode_reward_mean: 355.122431372275
  episode_reward_min: 109.29731810746767
  episodes_this_iter: 74
  episodes_total: 11554
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3037.248
    load_time_ms: 2.393
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 1.8216880036222622e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.954255223274231
      kl: 0.012983996421098709
      policy_loss: -0.0029609273187816143
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4045 s, 175 iter, 1750000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-46-58
  done: false
  episode_len_mean: 133.83
  episode_reward_max: 385.4130584640792
  episode_reward_mean: 355.7206709140013
  episode_reward_min: 133.02583860196154
  episodes_this_iter: 76
  episodes_total: 11926
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3025.631
    load_time_ms: 2.331
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 1.401298464324817e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.8877899050712585
      kl: 0.0054756165482103825
      policy_loss: -0.001682804897427559
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4161 s, 180 iter, 1800000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-48-54
  done: false
  episode_len_mean: 133.95
  episode_reward_max: 393.0343683995097
  episode_reward_mean: 363.5438410944651
  episode_reward_min: 338.13979659463854
  episodes_this_iter: 75
  episodes_total: 12299
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3004.081
    load_time_ms: 2.336
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8274035453796387
      kl: 0.0054678223095834255
      policy_loss: -0.00033483063452877104
      total_loss: 6.0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4277 s, 185 iter, 1850000 ts, 358 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-50-50
  done: false
  episode_len_mean: 132.42
  episode_reward_max: 393.653172146945
  episode_reward_mean: 353.8790547870032
  episode_reward_min: 125.69853572412853
  episodes_this_iter: 76
  episodes_total: 12677
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3012.838
    load_time_ms: 2.473
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8261608481407166
      kl: 0.010798024013638496
      policy_loss: -0.0014157063560560346
      total_loss: 15.601

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4392 s, 190 iter, 1900000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-52-45
  done: false
  episode_len_mean: 131.59
  episode_reward_max: 393.7681129005668
  episode_reward_mean: 358.4899170348774
  episode_reward_min: 138.83449846563536
  episodes_this_iter: 76
  episodes_total: 13056
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3033.268
    load_time_ms: 2.693
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7429407238960266
      kl: 0.005547282751649618
      policy_loss: -0.001694719772785902
      total_loss: 2.0292

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4507 s, 195 iter, 1950000 ts, 364 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-54-41
  done: false
  episode_len_mean: 132.65
  episode_reward_max: 394.41986720713567
  episode_reward_mean: 359.77832425365307
  episode_reward_min: 142.44520658765038
  episodes_this_iter: 75
  episodes_total: 13432
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3020.616
    load_time_ms: 2.559
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7482737302780151
      kl: 0.006465108133852482
      policy_loss: -0.001492175622843206
      total_loss: 5.98

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4622 s, 200 iter, 2000000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-56-36
  done: false
  episode_len_mean: 133.24
  episode_reward_max: 393.8860515269826
  episode_reward_mean: 355.1608804434025
  episode_reward_min: 124.15469034875724
  episodes_this_iter: 75
  episodes_total: 13808
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 2996.819
    load_time_ms: 2.492
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7206569910049438
      kl: 0.006594658829271793
      policy_loss: -0.001277400879189372
      total_loss: 25.679

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4738 s, 205 iter, 2050000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_01-58-33
  done: false
  episode_len_mean: 132.41
  episode_reward_max: 394.82436803404823
  episode_reward_mean: 360.0589627805997
  episode_reward_min: 126.46535240480591
  episodes_this_iter: 75
  episodes_total: 14184
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3015.951
    load_time_ms: 2.602
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6823819875717163
      kl: 0.004901039879769087
      policy_loss: -0.0002726220991462469
      total_loss: 1.27

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4855 s, 210 iter, 2100000 ts, 358 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-00-30
  done: false
  episode_len_mean: 133.16
  episode_reward_max: 390.3711743753823
  episode_reward_mean: 358.4072835461083
  episode_reward_min: 138.15539995511455
  episodes_this_iter: 76
  episodes_total: 14559
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3040.806
    load_time_ms: 2.366
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7558534741401672
      kl: 0.008862488903105259
      policy_loss: -0.002032587071880698
      total_loss: 27.012

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 4970 s, 215 iter, 2150000 ts, 364 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-02-25
  done: false
  episode_len_mean: 131.64
  episode_reward_max: 392.09431204138104
  episode_reward_mean: 362.3152262938215
  episode_reward_min: 337.68824207933676
  episodes_this_iter: 76
  episodes_total: 14935
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3035.398
    load_time_ms: 2.265
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.648544430732727
      kl: 0.009216583333909512
      policy_loss: -0.004214863292872906
      total_loss: 1.2216

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 5086 s, 220 iter, 2200000 ts, 360 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-04-22
  done: false
  episode_len_mean: 131.63
  episode_reward_max: 393.33022718884877
  episode_reward_mean: 359.79081785109884
  episode_reward_min: 150.6439457234158
  episodes_this_iter: 76
  episodes_total: 15315
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3028.798
    load_time_ms: 2.395
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6350480318069458
      kl: 0.00948873721063137
      policy_loss: -0.0016591346357017756
      total_loss: 26.35

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 5202 s, 225 iter, 2250000 ts, 361 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-06-17
  done: false
  episode_len_mean: 132.62
  episode_reward_max: 397.46989300362213
  episode_reward_mean: 360.56172894868513
  episode_reward_min: 110.33195484262728
  episodes_this_iter: 75
  episodes_total: 15696
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3020.158
    load_time_ms: 2.454
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6915534734725952
      kl: 0.00798104889690876
      policy_loss: -0.0019031575648114085
      total_loss: 19.0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 5317 s, 230 iter, 2300000 ts, 362 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-08-13
  done: false
  episode_len_mean: 134.61
  episode_reward_max: 396.857733492003
  episode_reward_mean: 364.34934163399936
  episode_reward_min: 340.7385719458333
  episodes_this_iter: 74
  episodes_total: 16070
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3011.102
    load_time_ms: 2.457
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6679245233535767
      kl: 0.006251384038478136
      policy_loss: -0.0012486675987020135
      total_loss: 1.2314

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 5433 s, 235 iter, 2350000 ts, 364 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-10-09
  done: false
  episode_len_mean: 134.87
  episode_reward_max: 391.1494182137428
  episode_reward_mean: 359.00128006829283
  episode_reward_min: 92.25233004172941
  episodes_this_iter: 74
  episodes_total: 16440
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3029.559
    load_time_ms: 2.519
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6543087363243103
      kl: 0.005851191934198141
      policy_loss: -0.0007284085731953382
      total_loss: 26.51

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 5548 s, 240 iter, 2400000 ts, 364 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-12-05
  done: false
  episode_len_mean: 136.03
  episode_reward_max: 395.782715410378
  episode_reward_mean: 364.2073017310555
  episode_reward_min: 336.7010155179608
  episodes_this_iter: 73
  episodes_total: 16809
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3030.711
    load_time_ms: 2.454
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6703609824180603
      kl: 0.005755465477705002
      policy_loss: -0.0010495060123503208
      total_loss: 0.86440

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=8788], 5664 s, 245 iter, 2450000 ts, 361 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-14-00
  done: false
  episode_len_mean: 136.07
  episode_reward_max: 392.22078949748266
  episode_reward_mean: 367.1118138383986
  episode_reward_min: 340.93321268984704
  episodes_this_iter: 72
  episodes_total: 17175
  experiment_id: f156154b3f66448dbb2a78f4a163419b
  hostname: Gandalf
  info:
    grad_time_ms: 3046.945
    load_time_ms: 2.308
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6248233318328857
      kl: 0.005697138607501984
      policy_loss: -0.00024427432799711823
      total_loss: 0.7

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=8788], 5778 s, 250 iter, 2500000 ts, 354 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=8788], 5778 s, 250 iter, 2500000 ts, 354 rew



In [16]:
executeTraining()

 Starting SUMO on port 56893
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_02-15-334agdw911 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



22.2290626595547
25.98884184658098


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-16-31
  done: false
  episode_len_mean: 460.55
  episode_reward_max: 166.96364155438937
  episode_reward_mean: 44.01884152999507
  episode_reward_min: -151.64511966705183
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 4331.902
    load_time_ms: 149.004
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4203226566314697
      kl: 0.000493006722535938
      policy_loss: -0.0008918464300222695
      total_loss: 80.62206268310547
      vf_explained_var: 0.04680429399013519
      vf_loss: 80.62285614013672
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4206947088241577
      kl: 0.000603377993684262
      policy_loss: -0.0005594997783191502
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 116 s, 5 iter, 50000 ts, 65.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-18-25
  done: false
  episode_len_mean: 441.46
  episode_reward_max: 321.2478104401037
  episode_reward_mean: 101.79668706607197
  episode_reward_min: -159.20335210300462
  episodes_this_iter: 22
  episodes_total: 134
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3387.014
    load_time_ms: 26.858
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4314429759979248
      kl: 0.011563713662326336
      policy_loss: -0.007818739861249924
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 232 s, 10 iter, 100000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-20-22
  done: false
  episode_len_mean: 245.02
  episode_reward_max: 381.3242796690889
  episode_reward_mean: 166.38846640823874
  episode_reward_min: -155.2345126388563
  episodes_this_iter: 43
  episodes_total: 316
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3199.324
    load_time_ms: 2.58
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.392999291419983
      kl: 0.005530933849513531
      policy_loss: -0.0019852377008646727
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 348 s, 15 iter, 150000 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-22-18
  done: false
  episode_len_mean: 179.3
  episode_reward_max: 376.312430695928
  episode_reward_mean: 186.22567224749397
  episode_reward_min: -157.8576848434007
  episodes_this_iter: 57
  episodes_total: 584
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3193.753
    load_time_ms: 2.614
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3831045627593994
      kl: 0.004970438778400421
      policy_loss: -0.00139308487996459
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 465 s, 20 iter, 200000 ts, 185 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-24-15
  done: false
  episode_len_mean: 135.93
  episode_reward_max: 375.2265641576221
  episode_reward_mean: 171.4105949182431
  episode_reward_min: -168.33068538231112
  episodes_this_iter: 74
  episodes_total: 924
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3187.909
    load_time_ms: 2.376
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 7.629394644936838e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3449493646621704
      kl: 0.0024870363995432854
      policy_loss: -0.0011675701243802905
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 582 s, 25 iter, 250000 ts, 227 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-26-13
  done: false
  episode_len_mean: 119.95
  episode_reward_max: 375.3172416032402
  episode_reward_mean: 161.09162845312892
  episode_reward_min: -167.71486294562834
  episodes_this_iter: 85
  episodes_total: 1303
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3178.502
    load_time_ms: 2.622
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 9.536743306171047e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3111051321029663
      kl: 0.011505058966577053
      policy_loss: -0.0037042987532913685
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 701 s, 30 iter, 300000 ts, 189 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-28-11
  done: false
  episode_len_mean: 120.93
  episode_reward_max: 365.1068159048486
  episode_reward_mean: 191.848878447436
  episode_reward_min: -152.59299752297864
  episodes_this_iter: 82
  episodes_total: 1723
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3187.57
    load_time_ms: 2.712
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.215768814086914
      kl: 0.0025996367912739515
      policy_loss: -0.0007942660013213754
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 819 s, 35 iter, 350000 ts, 224 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-30-11
  done: false
  episode_len_mean: 120.76
  episode_reward_max: 344.40008153019403
  episode_reward_mean: 211.97673999910612
  episode_reward_min: -152.7410061686615
  episodes_this_iter: 83
  episodes_total: 2134
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3205.302
    load_time_ms: 2.429
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.2048345804214478
      kl: 0.0037982272915542126
      policy_loss: -0.0026363832876086235
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 938 s, 40 iter, 400000 ts, 247 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-32-10
  done: false
  episode_len_mean: 127.59
  episode_reward_max: 343.5143337726097
  episode_reward_mean: 277.51459973816424
  episode_reward_min: -140.2690265184269
  episodes_this_iter: 78
  episodes_total: 2530
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3196.195
    load_time_ms: 2.382
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.1536900997161865
      kl: 0.0024447222240269184
      policy_loss: -0.0011496031656861305
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1055 s, 45 iter, 450000 ts, 293 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-34-07
  done: false
  episode_len_mean: 125.77
  episode_reward_max: 361.2366320674973
  episode_reward_mean: 296.4863185064108
  episode_reward_min: -145.48454857600166
  episodes_this_iter: 80
  episodes_total: 2925
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3180.912
    load_time_ms: 2.349
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 2.9103830890414573e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1432205438613892
      kl: 0.005410943180322647
      policy_loss: -0.0020650101359933615
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1173 s, 50 iter, 500000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-36-06
  done: false
  episode_len_mean: 123.7
  episode_reward_max: 366.5948739546775
  episode_reward_mean: 307.9050057924353
  episode_reward_min: -138.48602554104184
  episodes_this_iter: 80
  episodes_total: 3325
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3189.6
    load_time_ms: 2.335
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 1.8189894306509108e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.1477811336517334
      kl: 0.022708481177687645
      policy_loss: -0.0049191792495548725
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1291 s, 55 iter, 550000 ts, 295 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-38-03
  done: false
  episode_len_mean: 122.64
  episode_reward_max: 362.30832435041185
  episode_reward_mean: 311.58281658422436
  episode_reward_min: -117.33741256408436
  episodes_this_iter: 82
  episodes_total: 3732
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3160.138
    load_time_ms: 2.459
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1673085689544678
      kl: 0.0033894304651767015
      policy_loss: -0.0025577605701982975
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1409 s, 60 iter, 600000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-40-02
  done: false
  episode_len_mean: 118.72
  episode_reward_max: 368.7600072619955
  episode_reward_mean: 301.67475632628555
  episode_reward_min: -140.90345114450417
  episodes_this_iter: 84
  episodes_total: 4145
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3081.521
    load_time_ms: 2.382
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 1.421085492696024e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.1060062646865845
      kl: 0.010164960287511349
      policy_loss: -0.002824759343639016
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1527 s, 65 iter, 650000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-42-00
  done: false
  episode_len_mean: 121.1
  episode_reward_max: 373.11509118810704
  episode_reward_mean: 320.15872593441685
  episode_reward_min: 270.6296420282347
  episodes_this_iter: 82
  episodes_total: 4561
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3068.838
    load_time_ms: 2.264
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.77635686587003e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0785770416259766
      kl: 0.003978819120675325
      policy_loss: -0.002307295799255371
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1644 s, 70 iter, 700000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-43-56
  done: false
  episode_len_mean: 119.66
  episode_reward_max: 360.0521304909223
  episode_reward_mean: 316.69980490541917
  episode_reward_min: 277.94217727189783
  episodes_this_iter: 83
  episodes_total: 4978
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3047.77
    load_time_ms: 2.341
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 4.440892164675075e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.9950880408287048
      kl: 0.007549494504928589
      policy_loss: -0.0013677554670721292
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1759 s, 75 iter, 750000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-45-52
  done: false
  episode_len_mean: 120.32
  episode_reward_max: 375.8172237680431
  episode_reward_mean: 323.10782260421024
  episode_reward_min: -112.62365823415821
  episodes_this_iter: 83
  episodes_total: 5395
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3025.703
    load_time_ms: 2.253
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 5.551115205843844e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.9321505427360535
      kl: 0.006118918303400278
      policy_loss: -0.001771042705513537
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1875 s, 80 iter, 800000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-47-48
  done: false
  episode_len_mean: 121.38
  episode_reward_max: 385.11019375788635
  episode_reward_mean: 335.4485195380006
  episode_reward_min: 276.24607777584555
  episodes_this_iter: 82
  episodes_total: 5809
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3025.439
    load_time_ms: 2.256
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 6.938894007304805e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9410173296928406
      kl: 0.0040891836397349834
      policy_loss: -0.00016967751435004175
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 1990 s, 85 iter, 850000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-49-44
  done: false
  episode_len_mean: 122.26
  episode_reward_max: 381.81667766100185
  episode_reward_mean: 336.616157042473
  episode_reward_min: 273.5258730125435
  episodes_this_iter: 82
  episodes_total: 6219
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3010.335
    load_time_ms: 2.289
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 4.336808754565503e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.913484513759613
      kl: 0.0033771961461752653
      policy_loss: -0.002554189180955291
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2106 s, 90 iter, 900000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-51-40
  done: false
  episode_len_mean: 122.52
  episode_reward_max: 385.3865033099635
  episode_reward_mean: 340.42858975517674
  episode_reward_min: 286.3359760498166
  episodes_this_iter: 82
  episodes_total: 6628
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3036.104
    load_time_ms: 2.423
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 1.0842021886413758e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.8418228030204773
      kl: 0.007397530134767294
      policy_loss: -0.0016729850322008133
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2222 s, 95 iter, 950000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-53-35
  done: false
  episode_len_mean: 124.21
  episode_reward_max: 390.26092723999983
  episode_reward_mean: 339.6386381547561
  episode_reward_min: 159.7511097352904
  episodes_this_iter: 81
  episodes_total: 7033
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3054.38
    load_time_ms: 2.724
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.8552688956260681
      kl: 0.009114949963986874
      policy_loss: -0.0001225556479766965
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2336 s, 100 iter, 1000000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-55-30
  done: false
  episode_len_mean: 123.95
  episode_reward_max: 394.69567797560626
  episode_reward_mean: 344.74260133402635
  episode_reward_min: 280.18631474443094
  episodes_this_iter: 81
  episodes_total: 7437
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3054.102
    load_time_ms: 2.738
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.8354601860046387
      kl: 0.019627898931503296
      policy_loss: -0.0009150520199909806
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2451 s, 105 iter, 1050000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-57-25
  done: false
  episode_len_mean: 124.26
  episode_reward_max: 390.51105521514296
  episode_reward_mean: 341.36724898686634
  episode_reward_min: 280.8473103151298
  episodes_this_iter: 81
  episodes_total: 7839
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3039.08
    load_time_ms: 2.567
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 2.117582399690187e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.764877438545227
      kl: 0.011612774804234505
      policy_loss: -0.0010504706297069788
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2567 s, 110 iter, 1100000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_02-59-21
  done: false
  episode_len_mean: 124.35
  episode_reward_max: 392.38111498211475
  episode_reward_mean: 339.4965874732287
  episode_reward_min: 176.70547528935612
  episodes_this_iter: 80
  episodes_total: 8242
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3031.576
    load_time_ms: 2.342
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.6485117077827454
      kl: 0.014756502583622932
      policy_loss: -0.0022071488201618195
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2682 s, 115 iter, 1150000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-01-16
  done: false
  episode_len_mean: 123.81
  episode_reward_max: 396.841674658239
  episode_reward_mean: 347.99712724513944
  episode_reward_min: 169.88046327010125
  episodes_this_iter: 82
  episodes_total: 8646
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3046.314
    load_time_ms: 2.347
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6060786247253418
      kl: 0.009617872536182404
      policy_loss: 0.0007287327898666263
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2797 s, 120 iter, 1200000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-03-12
  done: false
  episode_len_mean: 124.08
  episode_reward_max: 387.724506046308
  episode_reward_mean: 336.98900702683125
  episode_reward_min: 184.32085798095557
  episodes_this_iter: 81
  episodes_total: 9048
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3049.106
    load_time_ms: 2.481
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.5932019352912903
      kl: 0.019423073157668114
      policy_loss: 0.0022894784342497587
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 2912 s, 125 iter, 1250000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-05-07
  done: false
  episode_len_mean: 124.39
  episode_reward_max: 398.0226706920043
  episode_reward_mean: 336.99638524843556
  episode_reward_min: 159.37605434834967
  episodes_this_iter: 80
  episodes_total: 9450
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3041.661
    load_time_ms: 2.439
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.4266507923603058
      kl: 0.022353550419211388
      policy_loss: 0.005228148773312569
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3027 s, 130 iter, 1300000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-07-02
  done: false
  episode_len_mean: 124.26
  episode_reward_max: 391.02905747857375
  episode_reward_mean: 339.77656983906184
  episode_reward_min: 179.54374683541056
  episodes_this_iter: 80
  episodes_total: 9852
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3048.203
    load_time_ms: 2.549
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.4079068899154663
      kl: 0.021162662655115128
      policy_loss: 0.004042239394038916
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3141 s, 135 iter, 1350000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-08-56
  done: false
  episode_len_mean: 124.39
  episode_reward_max: 389.79215459253686
  episode_reward_mean: 345.33681140706307
  episode_reward_min: 156.12496451499686
  episodes_this_iter: 80
  episodes_total: 10255
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3041.293
    load_time_ms: 2.565
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.3851790428161621
      kl: 0.018427150323987007
      policy_loss: 0.0021517009008675814
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3255 s, 140 iter, 1400000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-10-51
  done: false
  episode_len_mean: 124.35
  episode_reward_max: 390.20524447770083
  episode_reward_mean: 337.8815648240551
  episode_reward_min: 184.2912173691762
  episodes_this_iter: 81
  episodes_total: 10658
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3034.015
    load_time_ms: 2.516
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.34432512521743774
      kl: 0.02275608293712139
      policy_loss: 0.005005366634577513
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3370 s, 145 iter, 1450000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-12-45
  done: false
  episode_len_mean: 123.21
  episode_reward_max: 387.6507799647928
  episode_reward_mean: 343.67870250832544
  episode_reward_min: 147.07554065686264
  episodes_this_iter: 81
  episodes_total: 11061
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3058.215
    load_time_ms: 2.46
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 3.970465914735356e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.3094979226589203
      kl: 0.06388890743255615
      policy_loss: 0.02368156611919403
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3484 s, 150 iter, 1500000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-14-40
  done: false
  episode_len_mean: 124.12
  episode_reward_max: 385.98150850858536
  episode_reward_mean: 341.17369758300714
  episode_reward_min: 280.74957038152615
  episodes_this_iter: 80
  episodes_total: 11464
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3051.714
    load_time_ms: 2.376
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 1.340032182128234e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.28758642077445984
      kl: 0.06127282232046127
      policy_loss: 0.036465395241975784
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3599 s, 155 iter, 1550000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-16-36
  done: false
  episode_len_mean: 124.42
  episode_reward_max: 391.64286522902154
  episode_reward_mean: 342.7362804754342
  episode_reward_min: 174.50095365765367
  episodes_this_iter: 80
  episodes_total: 11866
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3021.721
    load_time_ms: 2.354
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 4.52260898939172e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.3150254786014557
      kl: 0.026802102103829384
      policy_loss: 0.004091951064765453
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3715 s, 160 iter, 1600000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-18-31
  done: false
  episode_len_mean: 124.36
  episode_reward_max: 384.810918369088
  episode_reward_mean: 336.70164674243387
  episode_reward_min: 276.7767302871783
  episodes_this_iter: 81
  episodes_total: 12267
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3028.056
    load_time_ms: 2.325
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 1.0175870857220094e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.2814948856830597
      kl: 0.043241143226623535
      policy_loss: 0.024009624496102333
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3830 s, 165 iter, 1650000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-20-26
  done: false
  episode_len_mean: 123.65
  episode_reward_max: 392.8121058691682
  episode_reward_mean: 340.5209631143073
  episode_reward_min: 277.0057545433135
  episodes_this_iter: 81
  episodes_total: 12670
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3042.051
    load_time_ms: 2.416
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 2.289571968393698e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.24638159573078156
      kl: 0.04362749308347702
      policy_loss: 0.036356039345264435
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 3946 s, 170 iter, 1700000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-22-22
  done: false
  episode_len_mean: 124.14
  episode_reward_max: 387.7391462030042
  episode_reward_mean: 335.1112240647071
  episode_reward_min: 277.822208184743
  episodes_this_iter: 80
  episodes_total: 13074
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3063.73
    load_time_ms: 2.446
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 2.5757671707110257e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.24195174872875214
      kl: 0.021331211552023888
      policy_loss: 0.008832567371428013
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4060 s, 175 iter, 1750000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-24-18
  done: false
  episode_len_mean: 124.34
  episode_reward_max: 387.66502620613574
  episode_reward_mean: 339.4449260255842
  episode_reward_min: 277.6534861358946
  episodes_this_iter: 80
  episodes_total: 13476
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3054.681
    load_time_ms: 2.31
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 2.5757671707110257e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.25739696621894836
      kl: 7.569677829742432
      policy_loss: 0.1079024150967598
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4176 s, 180 iter, 1800000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-26-13
  done: false
  episode_len_mean: 124.44
  episode_reward_max: 390.4453561050132
  episode_reward_mean: 341.5144641689988
  episode_reward_min: 286.42518716333717
  episodes_this_iter: 80
  episodes_total: 13878
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3018.77
    load_time_ms: 2.366
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 3.863651387155263e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.2551613450050354
      kl: 0.023134563118219376
      policy_loss: 0.008348120376467705
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4292 s, 185 iter, 1850000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-28-11
  done: false
  episode_len_mean: 124.74
  episode_reward_max: 384.7531249467384
  episode_reward_mean: 340.7546569703079
  episode_reward_min: 283.12395676418305
  episodes_this_iter: 80
  episodes_total: 14279
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3040.877
    load_time_ms: 2.432
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 1.9318256935776314e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.24582557380199432
      kl: 45.72770309448242
      policy_loss: 0.1297994703054428
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4408 s, 190 iter, 1900000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-30-05
  done: false
  episode_len_mean: 124.58
  episode_reward_max: 384.38438354053847
  episode_reward_mean: 340.4199299372347
  episode_reward_min: -136.05134629699975
  episodes_this_iter: 80
  episodes_total: 14678
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3044.316
    load_time_ms: 2.457
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 2.897738540366447e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.2158750593662262
      kl: 0.9010249376296997
      policy_loss: 0.022394835948944092
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4523 s, 195 iter, 1950000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-32-01
  done: false
  episode_len_mean: 124.75
  episode_reward_max: 380.54564535362783
  episode_reward_mean: 339.1422651404542
  episode_reward_min: -135.24257550656748
  episodes_this_iter: 80
  episodes_total: 15078
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3059.686
    load_time_ms: 2.595
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 2.2004700660401123e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.3152045011520386
      kl: 0.9097902178764343
      policy_loss: 0.013911277987062931
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4637 s, 200 iter, 2000000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-33-55
  done: false
  episode_len_mean: 125.51
  episode_reward_max: 386.96164369636426
  episode_reward_mean: 341.91069308515966
  episode_reward_min: 280.4222203547767
  episodes_this_iter: 80
  episodes_total: 15477
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3055.051
    load_time_ms: 2.616
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 3.3007059068537355e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.27546513080596924
      kl: 0.012477029114961624
      policy_loss: 0.004257295746356249
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4751 s, 205 iter, 2050000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-35-49
  done: false
  episode_len_mean: 123.91
  episode_reward_max: 391.71040858536446
  episode_reward_mean: 335.40481364784705
  episode_reward_min: -139.60290876993443
  episodes_this_iter: 82
  episodes_total: 15876
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3013.104
    load_time_ms: 2.424
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 1.6503529534268677e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.2698765993118286
      kl: 1.5595901012420654
      policy_loss: -0.01083256583660841
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4865 s, 210 iter, 2100000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-37-43
  done: false
  episode_len_mean: 125.96
  episode_reward_max: 381.60012996007913
  episode_reward_mean: 335.4055619276802
  episode_reward_min: 279.44113006317167
  episodes_this_iter: 80
  episodes_total: 16273
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3017.438
    load_time_ms: 2.391
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 2.475529026243518e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.2491648942232132
      kl: 0.012114732526242733
      policy_loss: 0.003157030325382948
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 4979 s, 215 iter, 2150000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-39-38
  done: false
  episode_len_mean: 125.62
  episode_reward_max: 387.733454641368
  episode_reward_mean: 340.0335240874623
  episode_reward_min: 293.4524150045811
  episodes_this_iter: 79
  episodes_total: 16670
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3043.356
    load_time_ms: 2.685
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 2.475529026243518e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.1893543154001236
      kl: 0.02560945600271225
      policy_loss: 0.012230616062879562
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 5094 s, 220 iter, 2200000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-41-33
  done: false
  episode_len_mean: 125.42
  episode_reward_max: 390.5605278378967
  episode_reward_mean: 336.2641958904855
  episode_reward_min: 284.77465991767025
  episodes_this_iter: 80
  episodes_total: 17069
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3049.435
    load_time_ms: 2.627
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 5.56994141976407e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.27208152413368225
      kl: 0.012211604043841362
      policy_loss: 0.002715247916057706
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 5209 s, 225 iter, 2250000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-43-28
  done: false
  episode_len_mean: 125.09
  episode_reward_max: 383.4575566320909
  episode_reward_mean: 341.7695693405326
  episode_reward_min: 295.2448248885386
  episodes_this_iter: 80
  episodes_total: 17467
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3048.066
    load_time_ms: 2.422
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 5.56994141976407e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.24278919398784637
      kl: 0.018497183918952942
      policy_loss: 0.006643543019890785
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 5323 s, 230 iter, 2300000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-45-23
  done: false
  episode_len_mean: 125.67
  episode_reward_max: 377.7793556804678
  episode_reward_mean: 339.4314998413655
  episode_reward_min: 286.10126500545306
  episodes_this_iter: 79
  episodes_total: 17866
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3061.452
    load_time_ms: 2.832
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 8.354909504317013e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.20886291563510895
      kl: 0.020088858902454376
      policy_loss: 0.006620900705456734
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 5438 s, 235 iter, 2350000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-47-18
  done: false
  episode_len_mean: 124.42
  episode_reward_max: 379.94471928955875
  episode_reward_mean: 338.7519804387433
  episode_reward_min: 287.5788131837119
  episodes_this_iter: 80
  episodes_total: 18267
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3039.433
    load_time_ms: 2.888
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 1.2532369507133705e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.21289664506912231
      kl: 0.032457973808050156
      policy_loss: 0.01424045767635107
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 5552 s, 240 iter, 2400000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-49-13
  done: false
  episode_len_mean: 123.24
  episode_reward_max: 381.2080905753955
  episode_reward_mean: 340.92213879965135
  episode_reward_min: -142.35295149567924
  episodes_this_iter: 81
  episodes_total: 18670
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3027.06
    load_time_ms: 2.513
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 2.819781806245698e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.2299434244632721
      kl: 0.03942795842885971
      policy_loss: 0.01615658588707447
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11531], 5667 s, 245 iter, 2450000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-51-08
  done: false
  episode_len_mean: 121.39
  episode_reward_max: 387.315245180397
  episode_reward_mean: 329.8383652322882
  episode_reward_min: -142.50492716659346
  episodes_this_iter: 83
  episodes_total: 19080
  experiment_id: 20550ba960844b1bbc91f4a2edf3861d
  hostname: Gandalf
  info:
    grad_time_ms: 3035.909
    load_time_ms: 2.348
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 1.4275149554255716e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.17932887375354767
      kl: 15.574384689331055
      policy_loss: 0.035106856375932693
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=11531], 5783 s, 250 iter, 2500000 ts, 306 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=11531], 5783 s, 250 iter, 2500000 ts, 306 rew



In [17]:
executeTraining()

 Starting SUMO on port 51629
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_03-52-41zi0ztudu -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



13.158170968597778
1.9637819609747709


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-53-41
  done: false
  episode_len_mean: 475.9
  episode_reward_max: 225.10785036774976
  episode_reward_mean: 78.3440425605223
  episode_reward_min: -148.14493811964707
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 4132.78
    load_time_ms: 143.57
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.417690396308899
      kl: 0.0010189954191446304
      policy_loss: -0.0011139920679852366
      total_loss: 33.6623649597168
      vf_explained_var: 0.12451547384262085
      vf_loss: 33.66328048706055
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4218324422836304
      kl: 0.0006168478867039084
      policy_loss: -0.0025531200226396322
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 118 s, 5 iter, 50000 ts, 107 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-55-36
  done: false
  episode_len_mean: 430.52
  episode_reward_max: 317.76570024224606
  episode_reward_mean: 140.63583570101505
  episode_reward_min: -165.63339430279675
  episodes_this_iter: 26
  episodes_total: 135
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3301.853
    load_time_ms: 26.025
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4186124801635742
      kl: 0.0048323627561330795
      policy_loss: -0.0027323239482939243
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 233 s, 10 iter, 100000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-57-33
  done: false
  episode_len_mean: 233.86
  episode_reward_max: 377.5018249463362
  episode_reward_mean: 183.90883122552256
  episode_reward_min: -162.66275106689613
  episodes_this_iter: 45
  episodes_total: 325
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3152.842
    load_time_ms: 2.497
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.383658766746521
      kl: 0.005593016743659973
      policy_loss: -0.0016321531729772687
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 351 s, 15 iter, 150000 ts, 202 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_03-59-31
  done: false
  episode_len_mean: 193.98
  episode_reward_max: 355.7191661915872
  episode_reward_mean: 206.30156260723
  episode_reward_min: -163.0612235084802
  episodes_this_iter: 51
  episodes_total: 579
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3169.548
    load_time_ms: 2.475
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.4084546566009521
      kl: 0.0038438064511865377
      policy_loss: -0.002242353977635503
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 469 s, 20 iter, 200000 ts, 156 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-01-29
  done: false
  episode_len_mean: 158.39
  episode_reward_max: 373.5304545222338
  episode_reward_mean: 160.21031494683282
  episode_reward_min: -158.98662141117126
  episodes_this_iter: 60
  episodes_total: 871
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3167.816
    load_time_ms: 2.367
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 1.9073486612342094e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3544855117797852
      kl: 0.007081876043230295
      policy_loss: -0.0029493607580661774
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 586 s, 25 iter, 250000 ts, 190 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-03-26
  done: false
  episode_len_mean: 150.14
  episode_reward_max: 388.2699498068521
  episode_reward_mean: 185.45853300888146
  episode_reward_min: -166.04898466411592
  episodes_this_iter: 68
  episodes_total: 1199
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3153.843
    load_time_ms: 2.336
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 5.9604645663569045e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.362876057624817
      kl: 0.007063917815685272
      policy_loss: -0.0016966363182291389
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 703 s, 30 iter, 300000 ts, 204 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-05-23
  done: false
  episode_len_mean: 140.61
  episode_reward_max: 367.1338552918699
  episode_reward_mean: 186.83078544371452
  episode_reward_min: -163.02365971399243
  episodes_this_iter: 73
  episodes_total: 1541
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3160.635
    load_time_ms: 2.69
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.8626451769865326e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.3821500539779663
      kl: 0.007926782593131065
      policy_loss: -0.003957992885261774
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 820 s, 35 iter, 350000 ts, 163 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-07-21
  done: false
  episode_len_mean: 124.04
  episode_reward_max: 385.77972457198615
  episode_reward_mean: 138.35395113683228
  episode_reward_min: -166.32233241445903
  episodes_this_iter: 80
  episodes_total: 1906
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3161.405
    load_time_ms: 2.665
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.3168468475341797
      kl: 0.008320018649101257
      policy_loss: -0.0025467765517532825
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 937 s, 40 iter, 400000 ts, 206 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-09-18
  done: false
  episode_len_mean: 133.26
  episode_reward_max: 376.18577737575544
  episode_reward_mean: 201.81629895872396
  episode_reward_min: -165.09078780168065
  episodes_this_iter: 75
  episodes_total: 2271
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3134.793
    load_time_ms: 2.332
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 1.8189894306509108e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.2792809009552002
      kl: 0.004154514987021685
      policy_loss: -0.002248158911243081
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1055 s, 45 iter, 450000 ts, 230 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-11-16
  done: false
  episode_len_mean: 126.9
  episode_reward_max: 367.1709168346989
  episode_reward_mean: 216.2334011822668
  episode_reward_min: -154.92009308167917
  episodes_this_iter: 79
  episodes_total: 2658
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3158.787
    load_time_ms: 2.362
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.1368683941568192e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1999094486236572
      kl: 0.009107425808906555
      policy_loss: -0.002306243870407343
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1172 s, 50 iter, 500000 ts, 211 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-13-14
  done: false
  episode_len_mean: 124.12
  episode_reward_max: 365.48101076816926
  episode_reward_mean: 222.07707916987692
  episode_reward_min: -166.70924860922304
  episodes_this_iter: 80
  episodes_total: 3070
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3170.771
    load_time_ms: 2.415
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 3.55271373174006e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.219387173652649
      kl: 0.005861510988324881
      policy_loss: -0.003193443873897195
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1291 s, 55 iter, 550000 ts, 263 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-15-13
  done: false
  episode_len_mean: 117.2
  episode_reward_max: 376.1406060690043
  episode_reward_mean: 201.09025833467243
  episode_reward_min: -162.66400862084367
  episodes_this_iter: 87
  episodes_total: 3478
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3162.364
    load_time_ms: 2.385
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 1.1102230411687688e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.1952903270721436
      kl: 0.009265702217817307
      policy_loss: -0.00225698365829885
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1410 s, 60 iter, 600000 ts, 223 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-17-12
  done: false
  episode_len_mean: 124.41
  episode_reward_max: 371.09540067828317
  episode_reward_mean: 242.0664853367956
  episode_reward_min: -167.0856595186883
  episodes_this_iter: 80
  episodes_total: 3890
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3173.681
    load_time_ms: 2.287
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 3.4694470036524025e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1055865287780762
      kl: 0.008014829829335213
      policy_loss: -0.002935730153694749
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1528 s, 65 iter, 650000 ts, 222 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-19-10
  done: false
  episode_len_mean: 115.95
  episode_reward_max: 369.3386982719969
  episode_reward_mean: 222.5856545378159
  episode_reward_min: -163.78367094671398
  episodes_this_iter: 86
  episodes_total: 4320
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3122.153
    load_time_ms: 2.171
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 2.1684043772827515e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0599842071533203
      kl: 0.0035957968793809414
      policy_loss: -0.0012189248809590936
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1645 s, 70 iter, 700000 ts, 222 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-21-08
  done: false
  episode_len_mean: 117.63
  episode_reward_max: 378.4256137509258
  episode_reward_mean: 233.93270964215165
  episode_reward_min: -154.8636628841088
  episodes_this_iter: 86
  episodes_total: 4754
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3048.853
    load_time_ms: 2.224
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.102706789970398
      kl: 0.005917003378272057
      policy_loss: -0.0013772746315225959
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1762 s, 75 iter, 750000 ts, 198 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-23-05
  done: false
  episode_len_mean: 112.34
  episode_reward_max: 383.66529894593856
  episode_reward_mean: 220.74275470927378
  episode_reward_min: -169.6898722564485
  episodes_this_iter: 89
  episodes_total: 5216
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3043.527
    load_time_ms: 2.419
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 4.235164799380374e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.0411468744277954
      kl: 0.004362975712865591
      policy_loss: -0.0015910530928522348
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1877 s, 80 iter, 800000 ts, 216 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-25-00
  done: false
  episode_len_mean: 107.86
  episode_reward_max: 386.4779172185584
  episode_reward_mean: 195.7248448701124
  episode_reward_min: -166.54573548053548
  episodes_this_iter: 94
  episodes_total: 5669
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3067.769
    load_time_ms: 2.383
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.034974455833435
      kl: 0.007372562773525715
      policy_loss: -0.002027334412559867
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 1992 s, 85 iter, 850000 ts, 207 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-26-55
  done: false
  episode_len_mean: 112.14
  episode_reward_max: 388.46958846427935
  episode_reward_mean: 217.2789608510558
  episode_reward_min: -166.04050556176162
  episodes_this_iter: 90
  episodes_total: 6126
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3060.614
    load_time_ms: 2.392
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 4.1359031243948966e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.0352524518966675
      kl: 0.007586432620882988
      policy_loss: -0.0005950817139819264
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2106 s, 90 iter, 900000 ts, 223 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-28-49
  done: false
  episode_len_mean: 108.15
  episode_reward_max: 377.9322673692011
  episode_reward_mean: 203.75909605342636
  episode_reward_min: -158.50388626830298
  episodes_this_iter: 92
  episodes_total: 6577
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3050.237
    load_time_ms: 2.609
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 2.5849394527468104e-27
      cur_lr: 4.999999873689376e-05
      entropy: 1.0469521284103394
      kl: 0.007164869457483292
      policy_loss: -0.0034718126989901066
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2221 s, 95 iter, 950000 ts, 226 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-30-44
  done: false
  episode_len_mean: 105.54
  episode_reward_max: 382.1669038882779
  episode_reward_mean: 196.74319108818082
  episode_reward_min: -168.8008578786583
  episodes_this_iter: 93
  episodes_total: 7032
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3037.519
    load_time_ms: 2.414
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 8.077935789833782e-29
      cur_lr: 4.999999873689376e-05
      entropy: 1.0337222814559937
      kl: 0.003487532027065754
      policy_loss: -0.0016524690436199307
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2336 s, 100 iter, 1000000 ts, 229 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-32-40
  done: false
  episode_len_mean: 111.56
  episode_reward_max: 381.42815603309015
  episode_reward_mean: 231.9183409835171
  episode_reward_min: -169.45348058455187
  episodes_this_iter: 90
  episodes_total: 7494
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3045.938
    load_time_ms: 2.19
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 5.048709868646114e-30
      cur_lr: 4.999999873689376e-05
      entropy: 1.0804797410964966
      kl: 0.005535449367016554
      policy_loss: -0.003856607712805271
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2452 s, 105 iter, 1050000 ts, 222 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-34-36
  done: false
  episode_len_mean: 112.27
  episode_reward_max: 376.06114586097374
  episode_reward_mean: 231.1356691294316
  episode_reward_min: -162.81655859221928
  episodes_this_iter: 89
  episodes_total: 7949
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3052.831
    load_time_ms: 2.245
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 1.5777218339519106e-31
      cur_lr: 4.999999873689376e-05
      entropy: 1.0339453220367432
      kl: 0.008899505250155926
      policy_loss: -0.0030964298639446497
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2568 s, 110 iter, 1100000 ts, 226 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-36-32
  done: false
  episode_len_mean: 111.78
  episode_reward_max: 377.77116490904666
  episode_reward_mean: 239.1993057652101
  episode_reward_min: -164.60443054688272
  episodes_this_iter: 87
  episodes_total: 8399
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3053.664
    load_time_ms: 2.259
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 4.930380731099721e-33
      cur_lr: 4.999999873689376e-05
      entropy: 1.1838058233261108
      kl: 0.006776520982384682
      policy_loss: -0.002256901701912284
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2682 s, 115 iter, 1150000 ts, 262 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-38-26
  done: false
  episode_len_mean: 111.38
  episode_reward_max: 379.89385042629385
  episode_reward_mean: 236.1126408201062
  episode_reward_min: -166.3046406680866
  episodes_this_iter: 91
  episodes_total: 8854
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3066.548
    load_time_ms: 2.372
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 1.5407439784686627e-34
      cur_lr: 4.999999873689376e-05
      entropy: 1.0684866905212402
      kl: 0.004001051187515259
      policy_loss: -0.001657519955188036
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2797 s, 120 iter, 1200000 ts, 211 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-40-22
  done: false
  episode_len_mean: 112.83
  episode_reward_max: 369.79486751786294
  episode_reward_mean: 230.93358178145942
  episode_reward_min: -164.86910826939763
  episodes_this_iter: 86
  episodes_total: 9296
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3059.651
    load_time_ms: 2.421
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 4.814824932714571e-36
      cur_lr: 4.999999873689376e-05
      entropy: 1.046539545059204
      kl: 0.009937016293406487
      policy_loss: -0.0016743313753977418
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 2912 s, 125 iter, 1250000 ts, 218 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-42-17
  done: false
  episode_len_mean: 111.97
  episode_reward_max: 369.3478805032225
  episode_reward_mean: 229.63218003506375
  episode_reward_min: -167.8294612090525
  episodes_this_iter: 91
  episodes_total: 9739
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3062.444
    load_time_ms: 2.383
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 1.5046327914733034e-37
      cur_lr: 4.999999873689376e-05
      entropy: 1.0030864477157593
      kl: 0.00572030246257782
      policy_loss: -0.002048561116680503
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3027 s, 130 iter, 1300000 ts, 256 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-44-13
  done: false
  episode_len_mean: 115.57
  episode_reward_max: 376.0263981220625
  episode_reward_mean: 255.1197782058597
  episode_reward_min: -168.21369768819972
  episodes_this_iter: 88
  episodes_total: 10180
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3054.09
    load_time_ms: 2.448
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 4.701977123029457e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.9964326024055481
      kl: 0.0077032409608364105
      policy_loss: -0.0022855959832668304
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3143 s, 135 iter, 1350000 ts, 237 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-46-08
  done: false
  episode_len_mean: 114.38
  episode_reward_max: 382.51535200243245
  episode_reward_mean: 249.98515845487228
  episode_reward_min: -165.30231410708194
  episodes_this_iter: 89
  episodes_total: 10627
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3058.833
    load_time_ms: 2.382
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 1.4693735437217167e-40
      cur_lr: 4.999999873689376e-05
      entropy: 0.9033982753753662
      kl: 0.0062821186147630215
      policy_loss: -0.0010322334710508585

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3259 s, 140 iter, 1400000 ts, 253 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-48-05
  done: false
  episode_len_mean: 112.81
  episode_reward_max: 368.80919491156305
  episode_reward_mean: 233.42299127727264
  episode_reward_min: -150.72035019722463
  episodes_this_iter: 88
  episodes_total: 11060
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3055.421
    load_time_ms: 2.269
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 4.5920550675924255e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.9442415237426758
      kl: 0.006002926733344793
      policy_loss: -0.001953203696757555
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3375 s, 145 iter, 1450000 ts, 246 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-50-00
  done: false
  episode_len_mean: 116.58
  episode_reward_max: 384.8521111834735
  episode_reward_mean: 265.058795153076
  episode_reward_min: -163.73229135634506
  episodes_this_iter: 85
  episodes_total: 11496
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3070.123
    load_time_ms: 2.213
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 2.872661851865875e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.8360914587974548
      kl: 0.007591902278363705
      policy_loss: -0.0019751531071960926
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3490 s, 150 iter, 1500000 ts, 235 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-51-56
  done: false
  episode_len_mean: 110.59
  episode_reward_max: 381.85850164277406
  episode_reward_mean: 236.58393859606625
  episode_reward_min: -166.1100884940914
  episodes_this_iter: 91
  episodes_total: 11940
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3062.003
    load_time_ms: 2.522
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 8.407790785948902e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.8163507580757141
      kl: 0.007629325147718191
      policy_loss: -0.002429796615615487
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3606 s, 155 iter, 1550000 ts, 214 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-53-52
  done: false
  episode_len_mean: 103.91
  episode_reward_max: 371.99821852852114
  episode_reward_mean: 196.96189594354487
  episode_reward_min: -165.80093529238167
  episodes_this_iter: 95
  episodes_total: 12387
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3053.371
    load_time_ms: 2.735
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7402319312095642
      kl: 0.009724529460072517
      policy_loss: -0.0045240395702421665
      total_loss: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3722 s, 160 iter, 1600000 ts, 253 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-55-48
  done: false
  episode_len_mean: 111.83
  episode_reward_max: 385.6459665780632
  episode_reward_mean: 245.85445766595672
  episode_reward_min: -164.69080064355273
  episodes_this_iter: 91
  episodes_total: 12833
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3060.048
    load_time_ms: 2.361
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9032518267631531
      kl: 0.0031239527743309736
      policy_loss: 0.0002863498229999095
      total_loss: 10

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3837 s, 165 iter, 1650000 ts, 249 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-57-43
  done: false
  episode_len_mean: 109.94
  episode_reward_max: 383.27345214166644
  episode_reward_mean: 236.12451058361552
  episode_reward_min: -164.02437137029807
  episodes_this_iter: 90
  episodes_total: 13278
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3031.668
    load_time_ms: 2.386
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8622972369194031
      kl: 0.007768928073346615
      policy_loss: -0.001561726676300168
      total_loss: 10

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 3953 s, 170 iter, 1700000 ts, 269 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_04-59-39
  done: false
  episode_len_mean: 114.94
  episode_reward_max: 396.14432803038676
  episode_reward_mean: 266.3085149213064
  episode_reward_min: -165.71224152105611
  episodes_this_iter: 88
  episodes_total: 13713
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3054.732
    load_time_ms: 2.416
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7213175296783447
      kl: 0.006162191275507212
      policy_loss: -0.00019719242118299007
      total_loss: 6

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4069 s, 175 iter, 1750000 ts, 251 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-01-35
  done: false
  episode_len_mean: 109.29
  episode_reward_max: 385.01489074802726
  episode_reward_mean: 226.55213083450704
  episode_reward_min: -170.24879285871248
  episodes_this_iter: 90
  episodes_total: 14150
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3079.648
    load_time_ms: 2.246
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8215528726577759
      kl: 0.004667029716074467
      policy_loss: -0.0005457306979224086
      total_loss: 9

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4184 s, 180 iter, 1800000 ts, 280 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-03-31
  done: false
  episode_len_mean: 113.8
  episode_reward_max: 374.54727109089544
  episode_reward_mean: 256.4623122242506
  episode_reward_min: -163.9815642400457
  episodes_this_iter: 88
  episodes_total: 14585
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3078.649
    load_time_ms: 2.342
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7524605989456177
      kl: 0.0065925768576562405
      policy_loss: -0.0008164599421434104
      total_loss: 839

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4300 s, 185 iter, 1850000 ts, 285 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-05-27
  done: false
  episode_len_mean: 113.52
  episode_reward_max: 373.7711273563406
  episode_reward_mean: 253.91475393066634
  episode_reward_min: -162.6960953165237
  episodes_this_iter: 88
  episodes_total: 15023
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3051.652
    load_time_ms: 2.591
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8038730621337891
      kl: 0.005831657908856869
      policy_loss: -0.0005911681219004095
      total_loss: 900

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4415 s, 190 iter, 1900000 ts, 263 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-07-23
  done: false
  episode_len_mean: 110.29
  episode_reward_max: 385.84825391454433
  episode_reward_mean: 242.71021729840436
  episode_reward_min: -165.69574154963433
  episodes_this_iter: 90
  episodes_total: 15469
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3050.024
    load_time_ms: 2.628
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6729345321655273
      kl: 0.005526633933186531
      policy_loss: -0.0009462104644626379
      total_loss: 9

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4531 s, 195 iter, 1950000 ts, 280 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-09-19
  done: false
  episode_len_mean: 114.44
  episode_reward_max: 374.45181132924705
  episode_reward_mean: 260.9469734178894
  episode_reward_min: -165.87145054484714
  episodes_this_iter: 87
  episodes_total: 15903
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3057.416
    load_time_ms: 2.289
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7816578149795532
      kl: 0.008425647392868996
      policy_loss: -0.00218013022094965
      total_loss: 734.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4647 s, 200 iter, 2000000 ts, 231 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-11-15
  done: false
  episode_len_mean: 112.6
  episode_reward_max: 376.7732196256099
  episode_reward_mean: 251.6414511507744
  episode_reward_min: -164.32886079162452
  episodes_this_iter: 89
  episodes_total: 16347
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3057.679
    load_time_ms: 2.148
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7369194030761719
      kl: 0.006466034334152937
      policy_loss: -0.0014421670930460095
      total_loss: 906.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4763 s, 205 iter, 2050000 ts, 265 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-13-11
  done: false
  episode_len_mean: 116.72
  episode_reward_max: 385.43332985944807
  episode_reward_mean: 271.3506241597613
  episode_reward_min: -161.7119973486465
  episodes_this_iter: 86
  episodes_total: 16785
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3065.254
    load_time_ms: 2.348
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7176838517189026
      kl: 0.005854419432580471
      policy_loss: -0.0004941324586980045
      total_loss: 672

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4879 s, 210 iter, 2100000 ts, 250 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-15-07
  done: false
  episode_len_mean: 112.43
  episode_reward_max: 377.01519512352866
  episode_reward_mean: 251.6246474608237
  episode_reward_min: -162.16317524266074
  episodes_this_iter: 88
  episodes_total: 17225
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3055.75
    load_time_ms: 2.557
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7668604254722595
      kl: 0.002801628317683935
      policy_loss: 0.0001541185047244653
      total_loss: 818.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 4993 s, 215 iter, 2150000 ts, 278 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-17-02
  done: false
  episode_len_mean: 117.53
  episode_reward_max: 373.9536682384071
  episode_reward_mean: 280.86973954010824
  episode_reward_min: -161.00157256048917
  episodes_this_iter: 85
  episodes_total: 17651
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3058.381
    load_time_ms: 2.463
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7532693147659302
      kl: 0.006343824788928032
      policy_loss: -0.0025811956729739904
      total_loss: 51

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 5108 s, 220 iter, 2200000 ts, 302 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-18-57
  done: false
  episode_len_mean: 106.84
  episode_reward_max: 371.7055098059209
  episode_reward_mean: 213.9726092809837
  episode_reward_min: -161.79025298410482
  episodes_this_iter: 95
  episodes_total: 18091
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3040.51
    load_time_ms: 2.457
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8481704592704773
      kl: 0.004037397913634777
      policy_loss: -0.0019183328840881586
      total_loss: 1314

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 5224 s, 225 iter, 2250000 ts, 268 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-20-54
  done: false
  episode_len_mean: 118.25
  episode_reward_max: 375.3031611913704
  episode_reward_mean: 290.60324702238535
  episode_reward_min: -153.84760294314657
  episodes_this_iter: 85
  episodes_total: 18523
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3047.047
    load_time_ms: 2.63
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7223972082138062
      kl: 0.0037699334789067507
      policy_loss: -0.0013375108828768134
      total_loss: 50

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 5341 s, 230 iter, 2300000 ts, 293 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-22-50
  done: false
  episode_len_mean: 111.99
  episode_reward_max: 371.7718408519683
  episode_reward_mean: 255.3485709355052
  episode_reward_min: -167.69399170979634
  episodes_this_iter: 90
  episodes_total: 18967
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3060.506
    load_time_ms: 2.602
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7880701422691345
      kl: 0.007537589408457279
      policy_loss: -0.002633859636262059
      total_loss: 688.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 5456 s, 235 iter, 2350000 ts, 273 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-24-46
  done: false
  episode_len_mean: 113.36
  episode_reward_max: 375.9209805678612
  episode_reward_mean: 266.71009873289256
  episode_reward_min: -164.24981996066606
  episodes_this_iter: 89
  episodes_total: 19404
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3054.311
    load_time_ms: 2.44
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6808080077171326
      kl: 0.003304489888250828
      policy_loss: -0.0008041919791139662
      total_loss: 660

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 5571 s, 240 iter, 2400000 ts, 265 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-26-41
  done: false
  episode_len_mean: 118.1
  episode_reward_max: 376.880462870923
  episode_reward_mean: 289.4736656412252
  episode_reward_min: -154.00950979327692
  episodes_this_iter: 84
  episodes_total: 19835
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3042.356
    load_time_ms: 2.467
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7054092288017273
      kl: 0.007299729622900486
      policy_loss: -0.001733990851789713
      total_loss: 369.55

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=11586], 5686 s, 245 iter, 2450000 ts, 255 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-28-37
  done: false
  episode_len_mean: 111.68
  episode_reward_max: 371.73069477292853
  episode_reward_mean: 254.38225396087762
  episode_reward_min: -157.21597439079392
  episodes_this_iter: 90
  episodes_total: 20276
  experiment_id: 6a538aa2882144288cee4b6caba79e19
  hostname: Gandalf
  info:
    grad_time_ms: 3024.776
    load_time_ms: 2.473
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6447128057479858
      kl: 0.0033414876088500023
      policy_loss: -0.0008293116115964949
      total_loss: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=11586], 5802 s, 250 iter, 2500000 ts, 298 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=11586], 5802 s, 250 iter, 2500000 ts, 298 rew



In [18]:
executeTraining()

 Starting SUMO on port 46463
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_05-30-10a4o30o_v -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



20.90777836676881
3.996570103657498


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-30-48
  done: false
  episode_len_mean: 482.3
  episode_reward_max: 224.09629962626445
  episode_reward_mean: 69.05335200408665
  episode_reward_min: -140.18420703767222
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 4026.845
    load_time_ms: 141.323
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.422379493713379
      kl: 0.0013371440581977367
      policy_loss: -0.0016553581226617098
      total_loss: 56.40946578979492
      vf_explained_var: 0.08510105311870575
      vf_loss: 56.410850524902344
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4217745065689087
      kl: 0.0005206184578128159
      policy_loss: -0.0007711558137089014
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 115 s, 5 iter, 50000 ts, 108 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-32-41
  done: false
  episode_len_mean: 397.15
  episode_reward_max: 340.90716906926747
  episode_reward_mean: 105.51886156784344
  episode_reward_min: -159.55837165381828
  episodes_this_iter: 29
  episodes_total: 141
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3187.537
    load_time_ms: 25.585
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4230505228042603
      kl: 0.0038782779593020678
      policy_loss: -0.0011354110902175307
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 228 s, 10 iter, 100000 ts, 201 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-34-36
  done: false
  episode_len_mean: 297.44
  episode_reward_max: 366.4736399705221
  episode_reward_mean: 219.6957031327795
  episode_reward_min: -156.60150911419575
  episodes_this_iter: 36
  episodes_total: 299
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3025.99
    load_time_ms: 2.481
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.3735164403915405
      kl: 0.007029008585959673
      policy_loss: -0.0017180832801386714
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 344 s, 15 iter, 150000 ts, 286 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-36-31
  done: false
  episode_len_mean: 233.02
  episode_reward_max: 337.89910681757533
  episode_reward_mean: 287.36846838188825
  episode_reward_min: -154.29453724193758
  episodes_this_iter: 45
  episodes_total: 503
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3059.813
    load_time_ms: 2.32
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.2914894819259644
      kl: 0.009357372298836708
      policy_loss: -0.003302983706817031
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 461 s, 20 iter, 200000 ts, 286 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-38-29
  done: false
  episode_len_mean: 173.88
  episode_reward_max: 317.6843496888707
  episode_reward_mean: 282.77418725267694
  episode_reward_min: 247.92626365024887
  episodes_this_iter: 59
  episodes_total: 769
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3085.26
    load_time_ms: 2.345
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.2691349983215332
      kl: 0.0046928199008107185
      policy_loss: -0.0022713218349963427
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 576 s, 25 iter, 250000 ts, 291 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-40-23
  done: false
  episode_len_mean: 164.66
  episode_reward_max: 329.4706118500509
  episode_reward_mean: 291.3214147323086
  episode_reward_min: -139.48442733270036
  episodes_this_iter: 61
  episodes_total: 1069
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3091.95
    load_time_ms: 2.611
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 4.7683716530855236e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.2666574716567993
      kl: 0.0071014962159097195
      policy_loss: -0.0019364699255675077
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 691 s, 30 iter, 300000 ts, 303 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-42-20
  done: false
  episode_len_mean: 161.28
  episode_reward_max: 333.6443325433717
  episode_reward_mean: 305.92681289046175
  episode_reward_min: 269.56384685541974
  episodes_this_iter: 61
  episodes_total: 1377
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3071.048
    load_time_ms: 2.573
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.19838285446167
      kl: 0.019005557522177696
      policy_loss: -0.005776442587375641
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 808 s, 35 iter, 350000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-44-17
  done: false
  episode_len_mean: 155.19
  episode_reward_max: 328.1505376081835
  episode_reward_mean: 304.87147433376714
  episode_reward_min: 278.3818130995489
  episodes_this_iter: 65
  episodes_total: 1695
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3056.179
    load_time_ms: 2.437
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 7.450580707946131e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.139599084854126
      kl: 0.011457394808530807
      policy_loss: -0.0038389419205486774
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 925 s, 40 iter, 400000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-46-14
  done: false
  episode_len_mean: 145.93
  episode_reward_max: 336.00465230950806
  episode_reward_mean: 306.4727693760031
  episode_reward_min: 277.0861628759459
  episodes_this_iter: 70
  episodes_total: 2032
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3056.513
    load_time_ms: 2.532
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 9.313225884932663e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.123267650604248
      kl: 0.004797554574906826
      policy_loss: -0.0019512304570525885
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1041 s, 45 iter, 450000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-48-11
  done: false
  episode_len_mean: 146.52
  episode_reward_max: 344.89306372918594
  episode_reward_mean: 309.2827396150618
  episode_reward_min: 281.55462748450736
  episodes_this_iter: 68
  episodes_total: 2373
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3060.1
    load_time_ms: 2.607
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.1779934167861938
      kl: 0.022088993340730667
      policy_loss: -0.004008053801953793
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1159 s, 50 iter, 500000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-50-08
  done: false
  episode_len_mean: 147.97
  episode_reward_max: 342.69229329124886
  episode_reward_mean: 314.7686669055041
  episode_reward_min: 284.53798733451436
  episodes_this_iter: 68
  episodes_total: 2712
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3082.173
    load_time_ms: 2.366
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 1.1641532356165829e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.0743342638015747
      kl: 0.006962932646274567
      policy_loss: -0.0021118242293596268
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1277 s, 55 iter, 550000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-52-06
  done: false
  episode_len_mean: 146.5
  episode_reward_max: 344.18914584140464
  episode_reward_mean: 314.5615060474314
  episode_reward_min: 288.6622956448444
  episodes_this_iter: 68
  episodes_total: 3053
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3064.435
    load_time_ms: 2.365
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 1.4551915445207286e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.034365177154541
      kl: 0.011636425741016865
      policy_loss: -0.0036390090826898813
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1393 s, 60 iter, 600000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-54-03
  done: false
  episode_len_mean: 144.83
  episode_reward_max: 344.6364878850856
  episode_reward_mean: 318.46344083388806
  episode_reward_min: 289.4278887163349
  episodes_this_iter: 70
  episodes_total: 3397
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3083.418
    load_time_ms: 2.374
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.9315219521522522
      kl: 0.01570180244743824
      policy_loss: -0.0021787078585475683
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1510 s, 65 iter, 650000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-56-01
  done: false
  episode_len_mean: 147.02
  episode_reward_max: 348.8865512319995
  episode_reward_mean: 316.29566911970943
  episode_reward_min: 286.6966487718886
  episodes_this_iter: 69
  episodes_total: 3738
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3118.523
    load_time_ms: 2.378
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 6.821208501468431e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.978557288646698
      kl: 0.0034099130425602198
      policy_loss: -0.0005135151441209018
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1627 s, 70 iter, 700000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-57-58
  done: false
  episode_len_mean: 148.97
  episode_reward_max: 354.65275649679825
  episode_reward_mean: 322.5695747232938
  episode_reward_min: 293.2853257860173
  episodes_this_iter: 67
  episodes_total: 4075
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3093.026
    load_time_ms: 2.495
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 2.131627656708885e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.9572270512580872
      kl: 0.017895866185426712
      policy_loss: -0.0014275000430643559
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1745 s, 75 iter, 750000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_05-59-55
  done: false
  episode_len_mean: 148.3
  episode_reward_max: 349.94199578986473
  episode_reward_mean: 320.63731789819025
  episode_reward_min: 284.9857206032226
  episodes_this_iter: 66
  episodes_total: 4411
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3056.437
    load_time_ms: 2.454
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.5987214307459333e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.9287405610084534
      kl: 0.005760057829320431
      policy_loss: -0.0002486167068127543
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1862 s, 80 iter, 800000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-01-52
  done: false
  episode_len_mean: 147.02
  episode_reward_max: 349.688635516286
  episode_reward_mean: 318.53274494231545
  episode_reward_min: 286.11485714825926
  episodes_this_iter: 69
  episodes_total: 4753
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3055.263
    load_time_ms: 2.402
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 1.9984017884324166e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.8230554461479187
      kl: 0.007794863078743219
      policy_loss: -0.0012128385715186596
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 1979 s, 85 iter, 850000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-03-50
  done: false
  episode_len_mean: 147.37
  episode_reward_max: 351.259877486061
  episode_reward_mean: 320.3186794363401
  episode_reward_min: 283.340130040549
  episodes_this_iter: 68
  episodes_total: 5091
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3062.928
    load_time_ms: 2.411
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 2.4980022355405207e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.7790607810020447
      kl: 0.0160536952316761
      policy_loss: 0.0022362081799656153
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2096 s, 90 iter, 900000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-05-48
  done: false
  episode_len_mean: 147.44
  episode_reward_max: 353.5473793898524
  episode_reward_mean: 321.8303713875301
  episode_reward_min: 283.4620973688232
  episodes_this_iter: 67
  episodes_total: 5429
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3074.497
    load_time_ms: 2.598
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 1.5612513972128255e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.6811617612838745
      kl: 0.010755817405879498
      policy_loss: 0.0017151182983070612
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2213 s, 95 iter, 950000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-07-44
  done: false
  episode_len_mean: 147.57
  episode_reward_max: 370.1420301130043
  episode_reward_mean: 323.49585548843834
  episode_reward_min: 288.72602702519043
  episodes_this_iter: 68
  episodes_total: 5768
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3063.348
    load_time_ms: 2.564
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.5612513972128255e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.5984598994255066
      kl: 0.017376583069562912
      policy_loss: 0.005101227201521397
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2329 s, 100 iter, 1000000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-09-40
  done: false
  episode_len_mean: 146.72
  episode_reward_max: 356.4557994930526
  episode_reward_mean: 320.5627650250802
  episode_reward_min: 286.93461965443197
  episodes_this_iter: 68
  episodes_total: 6108
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3041.623
    load_time_ms: 2.422
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 7.806256986064127e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.5109758377075195
      kl: 0.02265365980565548
      policy_loss: 0.009584669023752213
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2443 s, 105 iter, 1050000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-11-35
  done: false
  episode_len_mean: 146.13
  episode_reward_max: 350.3158096615937
  episode_reward_mean: 316.9527987940955
  episode_reward_min: 287.92680357216227
  episodes_this_iter: 69
  episodes_total: 6450
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3031.749
    load_time_ms: 2.469
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 7.806256986064127e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.5363093018531799
      kl: 0.013424322940409184
      policy_loss: 0.0046213530004024506
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2560 s, 110 iter, 1100000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-13-31
  done: false
  episode_len_mean: 145.28
  episode_reward_max: 353.1305930677282
  episode_reward_mean: 319.60240343575634
  episode_reward_min: 281.1131515862299
  episodes_this_iter: 69
  episodes_total: 6792
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3007.15
    load_time_ms: 2.332
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 7.806256986064127e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.41199991106987
      kl: 0.014967021532356739
      policy_loss: 0.0029975196812301874
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2675 s, 115 iter, 1150000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-15-27
  done: false
  episode_len_mean: 146.17
  episode_reward_max: 355.329671615929
  episode_reward_mean: 323.14162732023146
  episode_reward_min: 290.7065813343359
  episodes_this_iter: 68
  episodes_total: 7134
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2986.69
    load_time_ms: 2.191
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 1.1709383669638601e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.4169359803199768
      kl: 0.016859637573361397
      policy_loss: 0.003640844952315092
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2789 s, 120 iter, 1200000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-17-21
  done: false
  episode_len_mean: 146.05
  episode_reward_max: 354.37104147390625
  episode_reward_mean: 319.4464650391714
  episode_reward_min: 287.9435919507015
  episodes_this_iter: 68
  episodes_total: 7476
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2989.518
    load_time_ms: 2.366
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.1709383669638601e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.4433545470237732
      kl: 0.024266963824629784
      policy_loss: 0.00452339556068182
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 2903 s, 125 iter, 1250000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-19-15
  done: false
  episode_len_mean: 145.1
  episode_reward_max: 361.0719422329883
  episode_reward_mean: 322.0031813671407
  episode_reward_min: 282.663096708413
  episodes_this_iter: 69
  episodes_total: 7820
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3013.803
    load_time_ms: 2.53
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 1.756406981759119e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.3912506103515625
      kl: 0.012929036282002926
      policy_loss: 0.0011821455555036664
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3018 s, 130 iter, 1300000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-21-10
  done: false
  episode_len_mean: 145.82
  episode_reward_max: 354.0455442339853
  episode_reward_mean: 322.84053358843084
  episode_reward_min: 291.50530877901184
  episodes_this_iter: 69
  episodes_total: 8164
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3016.862
    load_time_ms: 2.349
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 1.317305908403587e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.3858959674835205
      kl: 14.723283767700195
      policy_loss: 0.04006306454539299
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3134 s, 135 iter, 1350000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-23-06
  done: false
  episode_len_mean: 146.35
  episode_reward_max: 366.80371346487516
  episode_reward_mean: 325.20332548127055
  episode_reward_min: 295.11056953947224
  episodes_this_iter: 69
  episodes_total: 8507
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2995.881
    load_time_ms: 2.211
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 4.445907608883168e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.41985106468200684
      kl: 0.035717856138944626
      policy_loss: 0.003990287892520428
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3248 s, 140 iter, 1400000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-25-02
  done: false
  episode_len_mean: 146.45
  episode_reward_max: 356.35008064325933
  episode_reward_mean: 324.31655971063105
  episode_reward_min: 289.8657434976847
  episodes_this_iter: 69
  episodes_total: 8850
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2997.422
    load_time_ms: 2.314
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 1.0003289535047714e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.4342087507247925
      kl: 0.027892833575606346
      policy_loss: 0.005811518523842096
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3361 s, 145 iter, 1450000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-26-54
  done: false
  episode_len_mean: 147.4
  episode_reward_max: 373.2069138073994
  episode_reward_mean: 329.8233933035024
  episode_reward_min: 298.8271290847023
  episodes_this_iter: 68
  episodes_total: 9191
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2999.599
    load_time_ms: 2.277
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 1.500493885206494e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.38957443833351135
      kl: 51.39881134033203
      policy_loss: 0.03817041218280792
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3476 s, 150 iter, 1500000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-28-50
  done: false
  episode_len_mean: 145.82
  episode_reward_max: 360.7888646968456
  episode_reward_mean: 323.14403129591415
  episode_reward_min: 286.46830945057775
  episodes_this_iter: 68
  episodes_total: 9534
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2985.435
    load_time_ms: 2.276
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 2.2507397524749446e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.2806739807128906
      kl: 0.031986746937036514
      policy_loss: 0.006356645375490189
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3592 s, 155 iter, 1550000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-30-46
  done: false
  episode_len_mean: 147.34
  episode_reward_max: 361.81096343877715
  episode_reward_mean: 326.55565378025585
  episode_reward_min: 289.85557819484575
  episodes_this_iter: 68
  episodes_total: 9873
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3006.452
    load_time_ms: 2.239
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 5.064165146172146e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.27203062176704407
      kl: 0.08798684179782867
      policy_loss: 0.01525939255952835
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3706 s, 160 iter, 1600000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-32-41
  done: false
  episode_len_mean: 147.93
  episode_reward_max: 360.38479744491764
  episode_reward_mean: 326.93836802826024
  episode_reward_min: 293.1966801168247
  episodes_this_iter: 68
  episodes_total: 10213
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3000.033
    load_time_ms: 2.222
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 2.563733414998108e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.40105772018432617
      kl: 0.01765749230980873
      policy_loss: 0.0010151398601010442
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3822 s, 165 iter, 1650000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-34-36
  done: false
  episode_len_mean: 147.08
  episode_reward_max: 365.17876035661664
  episode_reward_mean: 330.16103982461624
  episode_reward_min: 286.3558687488977
  episodes_this_iter: 68
  episodes_total: 10552
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2974.529
    load_time_ms: 2.282
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 2.563733414998108e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.2695944607257843
      kl: 0.05871383845806122
      policy_loss: 0.009934885427355766
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 3936 s, 170 iter, 1700000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-36-30
  done: false
  episode_len_mean: 147.08
  episode_reward_max: 367.53932293577907
  episode_reward_mean: 329.89187010816
  episode_reward_min: 291.1853161619603
  episodes_this_iter: 67
  episodes_total: 10893
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3017.639
    load_time_ms: 2.339
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 8.652602492462656e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.32536500692367554
      kl: 5.937588691711426
      policy_loss: 0.013955576345324516
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4050 s, 175 iter, 1750000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-38-25
  done: false
  episode_len_mean: 147.32
  episode_reward_max: 369.50781725562376
  episode_reward_mean: 326.3682936350272
  episode_reward_min: 290.03127510524354
  episodes_this_iter: 68
  episodes_total: 11233
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3039.052
    load_time_ms: 2.517
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 1.9468355608040977e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.351024329662323
      kl: 0.024595022201538086
      policy_loss: 0.0015908980276435614
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4164 s, 180 iter, 1800000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-40-19
  done: false
  episode_len_mean: 147.06
  episode_reward_max: 363.31655198128374
  episode_reward_mean: 325.30404903740464
  episode_reward_min: 292.76499425975237
  episodes_this_iter: 68
  episodes_total: 11571
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2993.88
    load_time_ms: 2.592
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 1.9468355608040977e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.311768114566803
      kl: 0.013032515533268452
      policy_loss: 0.0012382030254229903
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4278 s, 185 iter, 1850000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-42-13
  done: false
  episode_len_mean: 145.81
  episode_reward_max: 358.77165813328656
  episode_reward_mean: 327.025453795547
  episode_reward_min: 288.60874411879684
  episodes_this_iter: 69
  episodes_total: 11915
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2975.416
    load_time_ms: 2.522
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 1.9468355608040977e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.27211618423461914
      kl: 8.54168701171875
      policy_loss: 0.027218660339713097
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4394 s, 190 iter, 1900000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-44-09
  done: false
  episode_len_mean: 145.84
  episode_reward_max: 366.97153939223284
  episode_reward_mean: 323.93416605901433
  episode_reward_min: 286.9683555541596
  episodes_this_iter: 69
  episodes_total: 12257
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3008.271
    load_time_ms: 2.41
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 4.380378794199358e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.28134703636169434
      kl: 0.01500647608190775
      policy_loss: 0.0024890138301998377
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4508 s, 195 iter, 1950000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-46-03
  done: false
  episode_len_mean: 144.9
  episode_reward_max: 361.06701358195056
  episode_reward_mean: 326.25324771365194
  episode_reward_min: 296.960495745647
  episodes_this_iter: 69
  episodes_total: 12601
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3014.359
    load_time_ms: 2.392
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 9.855850910520016e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.19163383543491364
      kl: 0.01896139606833458
      policy_loss: 0.0023698972072452307
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4623 s, 200 iter, 2000000 ts, 328 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-47-58
  done: false
  episode_len_mean: 144.63
  episode_reward_max: 362.66110698524125
  episode_reward_mean: 328.3073263077016
  episode_reward_min: 293.26806228389165
  episodes_this_iter: 70
  episodes_total: 12946
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2987.921
    load_time_ms: 2.299
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 9.855850910520016e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.15906712412834167
      kl: 0.023548820987343788
      policy_loss: 0.0027866861782968044
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4737 s, 205 iter, 2050000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-49-52
  done: false
  episode_len_mean: 145.69
  episode_reward_max: 367.5393768621048
  episode_reward_mean: 328.12576024864984
  episode_reward_min: 290.36651315693234
  episodes_this_iter: 68
  episodes_total: 13290
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2998.466
    load_time_ms: 2.113
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 9.855850910520016e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.15948912501335144
      kl: 0.02260385826230049
      policy_loss: 0.00386191438883543
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4852 s, 210 iter, 2100000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-51-48
  done: false
  episode_len_mean: 144.99
  episode_reward_max: 364.31802441820776
  episode_reward_mean: 325.0116423336169
  episode_reward_min: 292.6652030545059
  episodes_this_iter: 68
  episodes_total: 13635
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2998.316
    load_time_ms: 2.14
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 2.2175669842625957e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.08060428500175476
      kl: 0.010693652555346489
      policy_loss: -0.0021647335961461067
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 4967 s, 215 iter, 2150000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-53-44
  done: false
  episode_len_mean: 144.52
  episode_reward_max: 359.37432583256793
  episode_reward_mean: 323.33912642459694
  episode_reward_min: 291.3871979481185
  episodes_this_iter: 69
  episodes_total: 13982
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 2980.173
    load_time_ms: 2.259
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 4.989526858085319e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.10441452264785767
      kl: 0.014336705207824707
      policy_loss: 0.001739945961162448
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 5081 s, 220 iter, 2200000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-55-38
  done: false
  episode_len_mean: 144.38
  episode_reward_max: 359.7447248922098
  episode_reward_mean: 323.5138126988916
  episode_reward_min: 289.7272323716609
  episodes_this_iter: 68
  episodes_total: 14327
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3004.67
    load_time_ms: 2.288
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 3.7421448047508105e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.12939715385437012
      kl: 0.013227651827037334
      policy_loss: -0.0012357719242572784
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 5197 s, 225 iter, 2250000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-57-33
  done: false
  episode_len_mean: 145.37
  episode_reward_max: 359.55630022041896
  episode_reward_mean: 324.4226662882013
  episode_reward_min: 289.6423833933365
  episodes_this_iter: 69
  episodes_total: 14674
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3019.982
    load_time_ms: 2.435
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 3.7421448047508105e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.11483865231275558
      kl: 0.024012498557567596
      policy_loss: 0.0015898765996098518
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 5312 s, 230 iter, 2300000 ts, 326 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_06-59-29
  done: false
  episode_len_mean: 144.77
  episode_reward_max: 360.2174649336967
  episode_reward_mean: 325.3840945332094
  episode_reward_min: 287.8233718443698
  episodes_this_iter: 70
  episodes_total: 15020
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3010.518
    load_time_ms: 2.495
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 3.7421448047508105e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.0882515236735344
      kl: 0.015604238957166672
      policy_loss: 0.0041585648432374
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 5426 s, 235 iter, 2350000 ts, 326 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-01-23
  done: false
  episode_len_mean: 144.62
  episode_reward_max: 357.37270664796984
  episode_reward_mean: 325.76097749596215
  episode_reward_min: 293.2783893856366
  episodes_this_iter: 70
  episodes_total: 15366
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3018.513
    load_time_ms: 2.351
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 5.6132175459393946e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.040925033390522
      kl: 0.021731244400143623
      policy_loss: 0.0007871999405324459
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 5540 s, 240 iter, 2400000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-03-17
  done: false
  episode_len_mean: 145.78
  episode_reward_max: 355.6072520330849
  episode_reward_mean: 326.7228894609249
  episode_reward_min: 287.53284267104857
  episodes_this_iter: 69
  episodes_total: 15713
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3010.092
    load_time_ms: 2.258
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 4.2099129900479565e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.09963693469762802
      kl: 0.020767977461218834
      policy_loss: 0.0019173210021108389
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12934], 5654 s, 245 iter, 2450000 ts, 328 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-05-11
  done: false
  episode_len_mean: 145.86
  episode_reward_max: 364.09483621535475
  episode_reward_mean: 325.7070780303791
  episode_reward_min: 288.06083823301447
  episodes_this_iter: 68
  episodes_total: 16055
  experiment_id: 5c3ea7354895452d94534bdd12753dcb
  hostname: Gandalf
  info:
    grad_time_ms: 3002.975
    load_time_ms: 2.212
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 4.2099129900479565e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.04361474886536598
      kl: 0.015627054497599602
      policy_loss: 0.00012752757174894214
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=12934], 5768 s, 250 iter, 2500000 ts, 327 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=12934], 5768 s, 250 iter, 2500000 ts, 327 rew



In [19]:
executeTraining()

 Starting SUMO on port 59043
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_07-06-459ng3s251 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



10.957504619834332
17.644324126891608


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-07-43
  done: false
  episode_len_mean: 442.8181818181818
  episode_reward_max: 180.19796888057584
  episode_reward_mean: 42.19472211105914
  episode_reward_min: -153.75328963924363
  episodes_this_iter: 22
  episodes_total: 22
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 4137.981
    load_time_ms: 140.993
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.418033242225647
      kl: 0.000716587295755744
      policy_loss: -0.0011960151605308056
      total_loss: 99.93111419677734
      vf_explained_var: 0.04277541860938072
      vf_loss: 99.93217468261719
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4212207794189453
      kl: 0.000382322003133595
      policy_loss: -0.001716250786557

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 116 s, 5 iter, 50000 ts, 107 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-09-37
  done: false
  episode_len_mean: 445.61
  episode_reward_max: 329.09974058977184
  episode_reward_mean: 138.1547633246074
  episode_reward_min: -150.38365234295665
  episodes_this_iter: 25
  episodes_total: 133
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3337.813
    load_time_ms: 25.478
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.3656378984451294
      kl: 0.008572235703468323
      policy_loss: -0.0026705574709922075
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 230 s, 10 iter, 100000 ts, 202 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-11-31
  done: false
  episode_len_mean: 251.71
  episode_reward_max: 375.39376805737953
  episode_reward_mean: 204.75104735585526
  episode_reward_min: -153.8168149767231
  episodes_this_iter: 42
  episodes_total: 314
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3176.665
    load_time_ms: 2.412
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.3647626638412476
      kl: 0.00784820131957531
      policy_loss: -0.002342699794098735
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 347 s, 15 iter, 150000 ts, 228 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-13-29
  done: false
  episode_len_mean: 188.93
  episode_reward_max: 391.4630033054636
  episode_reward_mean: 217.73082203262885
  episode_reward_min: -160.71036002109724
  episodes_this_iter: 54
  episodes_total: 561
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3170.718
    load_time_ms: 2.688
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.4000263214111328
      kl: 0.004621367435902357
      policy_loss: -0.0016882885247468948
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 465 s, 20 iter, 200000 ts, 205 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-15-27
  done: false
  episode_len_mean: 184.62
  episode_reward_max: 361.03387746712156
  episode_reward_mean: 200.6936034697309
  episode_reward_min: -161.48163085226903
  episodes_this_iter: 52
  episodes_total: 835
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3146.192
    load_time_ms: 2.609
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.4141310453414917
      kl: 0.002740053692832589
      policy_loss: -0.0012147234519943595
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 583 s, 25 iter, 250000 ts, 230 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-17-25
  done: false
  episode_len_mean: 177.16
  episode_reward_max: 383.6260486940086
  episode_reward_mean: 230.439909322433
  episode_reward_min: -162.9552635191619
  episodes_this_iter: 59
  episodes_total: 1128
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3124.302
    load_time_ms: 2.465
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3574265241622925
      kl: 0.0025174166075885296
      policy_loss: -0.001346086268313229
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 701 s, 30 iter, 300000 ts, 202 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-19-24
  done: false
  episode_len_mean: 170.07
  episode_reward_max: 375.6271377211971
  episode_reward_mean: 197.8501637216654
  episode_reward_min: -167.00898139898274
  episodes_this_iter: 59
  episodes_total: 1417
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3139.781
    load_time_ms: 2.694
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.328795075416565
      kl: 0.009819517843425274
      policy_loss: -0.004774404224008322
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 817 s, 35 iter, 350000 ts, 201 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-21-21
  done: false
  episode_len_mean: 152.21
  episode_reward_max: 376.3098002238217
  episode_reward_mean: 223.60935164689775
  episode_reward_min: -161.2825433771852
  episodes_this_iter: 66
  episodes_total: 1730
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3144.573
    load_time_ms: 2.734
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 1.1641532356165829e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3461886644363403
      kl: 0.0027850617188960314
      policy_loss: -0.0009271439630538225
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 937 s, 40 iter, 400000 ts, 234 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-23-21
  done: false
  episode_len_mean: 135.4
  episode_reward_max: 365.6850884724705
  episode_reward_mean: 194.0589350307964
  episode_reward_min: -159.92992937881334
  episodes_this_iter: 74
  episodes_total: 2084
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3132.429
    load_time_ms: 2.669
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.364557147026062
      kl: 0.0035879085771739483
      policy_loss: -0.0010974567849189043
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1054 s, 45 iter, 450000 ts, 250 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-25-18
  done: false
  episode_len_mean: 133.87
  episode_reward_max: 374.95120870401644
  episode_reward_mean: 226.2481523866765
  episode_reward_min: -150.96645989945824
  episodes_this_iter: 74
  episodes_total: 2442
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3146.086
    load_time_ms: 2.506
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.1368683941568192e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.288644790649414
      kl: 0.0029482266400009394
      policy_loss: -0.0007543688989244401
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1172 s, 50 iter, 500000 ts, 252 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-27-16
  done: false
  episode_len_mean: 131.21
  episode_reward_max: 390.1401277520701
  episode_reward_mean: 239.5075740718704
  episode_reward_min: -160.4262794249807
  episodes_this_iter: 77
  episodes_total: 2816
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3164.324
    load_time_ms: 2.376
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 3.55271373174006e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.2446123361587524
      kl: 0.004751934669911861
      policy_loss: -0.0013268085895106196
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1290 s, 55 iter, 550000 ts, 266 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-29-14
  done: false
  episode_len_mean: 131.94
  episode_reward_max: 365.5133593507118
  episode_reward_mean: 272.13847281089375
  episode_reward_min: -153.99566914192775
  episodes_this_iter: 77
  episodes_total: 3194
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3171.493
    load_time_ms: 2.548
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 1.1102230411687688e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.245880365371704
      kl: 0.0051233707927167416
      policy_loss: -0.002575690858066082
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1410 s, 60 iter, 600000 ts, 286 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-31-14
  done: false
  episode_len_mean: 124.6
  episode_reward_max: 370.56540903823264
  episode_reward_mean: 266.8686269750391
  episode_reward_min: -138.78807424694227
  episodes_this_iter: 81
  episodes_total: 3589
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3167.321
    load_time_ms: 2.544
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 6.938894007304805e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1903101205825806
      kl: 0.009892275556921959
      policy_loss: -0.0025838136207312346
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1528 s, 65 iter, 650000 ts, 288 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-33-13
  done: false
  episode_len_mean: 127.53
  episode_reward_max: 356.4278117083467
  episode_reward_mean: 309.4596465118213
  episode_reward_min: -135.3780120806261
  episodes_this_iter: 77
  episodes_total: 3982
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3170.733
    load_time_ms: 2.381
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 8.673617509131006e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.1135388612747192
      kl: 0.01093710120767355
      policy_loss: -0.0043191248551011086
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1647 s, 70 iter, 700000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-35-13
  done: false
  episode_len_mean: 124.1
  episode_reward_max: 364.5448014852707
  episode_reward_mean: 318.31613699071096
  episode_reward_min: 273.54076532491865
  episodes_this_iter: 80
  episodes_total: 4381
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3187.27
    load_time_ms: 2.519
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 4.336808754565503e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.034837007522583
      kl: 0.035024065524339676
      policy_loss: -0.0066793132573366165
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1766 s, 75 iter, 750000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-37-11
  done: false
  episode_len_mean: 128.16
  episode_reward_max: 377.2228500717146
  episode_reward_mean: 328.87614237433905
  episode_reward_min: 281.5607037504612
  episodes_this_iter: 80
  episodes_total: 4778
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3180.322
    load_time_ms: 2.544
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 5.421010943206879e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.0397577285766602
      kl: 0.012274938635528088
      policy_loss: -0.004011180717498064
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 1884 s, 80 iter, 800000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-39-09
  done: false
  episode_len_mean: 132.5
  episode_reward_max: 391.3419132577605
  episode_reward_mean: 343.1136480531035
  episode_reward_min: 134.57439252326435
  episodes_this_iter: 75
  episodes_total: 5160
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3165.689
    load_time_ms: 2.429
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 6.776263679008599e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.0505483150482178
      kl: 0.00791097991168499
      policy_loss: -0.0018611099803820252
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2002 s, 85 iter, 850000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-41-07
  done: false
  episode_len_mean: 127.05
  episode_reward_max: 393.3668096323722
  episode_reward_mean: 339.5332864391746
  episode_reward_min: -141.53459486234104
  episodes_this_iter: 79
  episodes_total: 5550
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3141.675
    load_time_ms: 2.317
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 2.117582399690187e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.029654860496521
      kl: 0.004944089334458113
      policy_loss: -0.002497523557394743
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2121 s, 90 iter, 900000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-43-06
  done: false
  episode_len_mean: 129.44
  episode_reward_max: 394.08162728017635
  episode_reward_mean: 343.3567097349634
  episode_reward_min: 163.3698255913343
  episodes_this_iter: 78
  episodes_total: 5939
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3123.224
    load_time_ms: 2.401
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 2.646977999612734e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.0492653846740723
      kl: 0.011467677541077137
      policy_loss: -0.003548212116584182
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2241 s, 95 iter, 950000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-45-06
  done: false
  episode_len_mean: 129.5
  episode_reward_max: 387.06279192063624
  episode_reward_mean: 344.1243378461454
  episode_reward_min: 137.73220212985768
  episodes_this_iter: 77
  episodes_total: 6324
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3147.507
    load_time_ms: 2.493
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.6543612497579586e-25
      cur_lr: 4.999999873689376e-05
      entropy: 1.0274419784545898
      kl: 0.014012116938829422
      policy_loss: -0.002470766194164753
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2359 s, 100 iter, 1000000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-47-04
  done: false
  episode_len_mean: 130.51
  episode_reward_max: 383.3639487768082
  episode_reward_mean: 336.4060697913196
  episode_reward_min: 124.15049003601092
  episodes_this_iter: 76
  episodes_total: 6708
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3157.805
    load_time_ms: 2.52
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 1.0339757810987241e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.0110950469970703
      kl: 0.004815020598471165
      policy_loss: -0.000995130161754787
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2476 s, 105 iter, 1050000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-49-02
  done: false
  episode_len_mean: 131.39
  episode_reward_max: 390.18791435632727
  episode_reward_mean: 339.14331153887287
  episode_reward_min: 131.0581164154412
  episodes_this_iter: 76
  episodes_total: 7090
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3079.091
    load_time_ms: 2.547
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 6.462348631867026e-28
      cur_lr: 4.999999873689376e-05
      entropy: 0.9920274019241333
      kl: 0.009824788197875023
      policy_loss: -0.002552399644628167
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2593 s, 110 iter, 1100000 ts, 350 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-50-59
  done: false
  episode_len_mean: 129.7
  episode_reward_max: 387.76267003241475
  episode_reward_mean: 341.76734574580803
  episode_reward_min: -139.10262017465828
  episodes_this_iter: 78
  episodes_total: 7474
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3014.143
    load_time_ms: 2.492
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 4.038967894916891e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.9467270970344543
      kl: 0.007103284355252981
      policy_loss: -0.002226894721388817
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2711 s, 115 iter, 1150000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-52-59
  done: false
  episode_len_mean: 130.03
  episode_reward_max: 384.05080454253016
  episode_reward_mean: 338.8986834948043
  episode_reward_min: 138.9948673706083
  episodes_this_iter: 76
  episodes_total: 7860
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3018.416
    load_time_ms: 2.559
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 2.524354934323057e-30
      cur_lr: 4.999999873689376e-05
      entropy: 0.911153256893158
      kl: 0.004650624468922615
      policy_loss: -0.0027817541267722845
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2828 s, 120 iter, 1200000 ts, 348 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-54-55
  done: false
  episode_len_mean: 129.76
  episode_reward_max: 388.41550413670456
  episode_reward_mean: 339.9918273719341
  episode_reward_min: 111.35139693824243
  episodes_this_iter: 78
  episodes_total: 8248
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2996.171
    load_time_ms: 2.446
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.5777218339519106e-31
      cur_lr: 4.999999873689376e-05
      entropy: 0.8639324903488159
      kl: 0.011699985712766647
      policy_loss: -0.0027910852804780006
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 2944 s, 125 iter, 1250000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-56-51
  done: false
  episode_len_mean: 128.37
  episode_reward_max: 385.43290609167065
  episode_reward_mean: 341.3600435058415
  episode_reward_min: 133.01540569126638
  episodes_this_iter: 77
  episodes_total: 8636
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2976.12
    load_time_ms: 2.233
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 9.860761462199441e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.8024216890335083
      kl: 0.004901099484413862
      policy_loss: -0.002574324142187834
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3058 s, 130 iter, 1300000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_07-58-45
  done: false
  episode_len_mean: 126.01
  episode_reward_max: 384.6645225807068
  episode_reward_mean: 334.80021127603163
  episode_reward_min: -143.0759808431987
  episodes_this_iter: 80
  episodes_total: 9030
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3001.59
    load_time_ms: 2.281
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 3.0814879569373254e-34
      cur_lr: 4.999999873689376e-05
      entropy: 0.8058927655220032
      kl: 0.013459243811666965
      policy_loss: -0.008134732022881508
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3172 s, 135 iter, 1350000 ts, 326 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-00-39
  done: false
  episode_len_mean: 123.78
  episode_reward_max: 389.37268677407474
  episode_reward_mean: 316.91037072758263
  episode_reward_min: -143.57428313260598
  episodes_this_iter: 81
  episodes_total: 9430
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2997.374
    load_time_ms: 2.551
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 7.703719892343314e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.7561962604522705
      kl: 0.0039027424063533545
      policy_loss: -0.000693627807777375
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3288 s, 140 iter, 1400000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-02-34
  done: false
  episode_len_mean: 125.97
  episode_reward_max: 393.03932112767654
  episode_reward_mean: 336.3340714855799
  episode_reward_min: -144.47767849406202
  episodes_this_iter: 79
  episodes_total: 9825
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2977.805
    load_time_ms: 2.443
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 2.4074124663572855e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.7052016258239746
      kl: 0.00711058359593153
      policy_loss: -0.0024639053735882044
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3402 s, 145 iter, 1450000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-04-30
  done: false
  episode_len_mean: 127.57
  episode_reward_max: 394.7723319806904
  episode_reward_mean: 347.95963823763117
  episode_reward_min: -146.4787480998635
  episodes_this_iter: 78
  episodes_total: 10220
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3017.45
    load_time_ms: 2.213
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 3.009265582946607e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.6625738143920898
      kl: 0.0066365995444357395
      policy_loss: -0.0031752390787005424
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3517 s, 150 iter, 1500000 ts, 350 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-06-25
  done: false
  episode_len_mean: 127.37
  episode_reward_max: 393.66485324686386
  episode_reward_mean: 349.45584188895145
  episode_reward_min: 152.79729206378062
  episodes_this_iter: 78
  episodes_total: 10611
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3032.171
    load_time_ms: 2.254
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 1.8807909893416293e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.6218243837356567
      kl: 0.007283533923327923
      policy_loss: -0.002172011649236083
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3631 s, 155 iter, 1550000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-08-20
  done: false
  episode_len_mean: 126.91
  episode_reward_max: 393.10416093343497
  episode_reward_mean: 350.8926842792926
  episode_reward_min: 172.11226973025487
  episodes_this_iter: 79
  episodes_total: 11006
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2995.831
    load_time_ms: 2.204
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 5.87746614891758e-40
      cur_lr: 4.999999873689376e-05
      entropy: 0.5801113247871399
      kl: 0.005814352538436651
      policy_loss: -0.0024498312268406153
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3746 s, 160 iter, 1600000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-10-14
  done: false
  episode_len_mean: 123.38
  episode_reward_max: 390.79374805437436
  episode_reward_mean: 332.67953130225237
  episode_reward_min: -144.10671233208592
  episodes_this_iter: 81
  episodes_total: 11406
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2997.126
    load_time_ms: 2.337
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 3.6733637943810755e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.5437612533569336
      kl: 0.006806640420109034
      policy_loss: 0.00029909625300206244


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3861 s, 165 iter, 1650000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-12-10
  done: false
  episode_len_mean: 124.73
  episode_reward_max: 383.5660583343389
  episode_reward_mean: 336.64474935070257
  episode_reward_min: -141.18815133189926
  episodes_this_iter: 79
  episodes_total: 11809
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3014.0
    load_time_ms: 2.411
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 1.1476634422820252e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.5471445322036743
      kl: 0.008857760578393936
      policy_loss: -0.0023909639567136765
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 3975 s, 170 iter, 1700000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-14-04
  done: false
  episode_len_mean: 124.08
  episode_reward_max: 390.5282518888007
  episode_reward_mean: 334.2516280812908
  episode_reward_min: -142.213992309946
  episodes_this_iter: 80
  episodes_total: 12211
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3005.363
    load_time_ms: 2.317
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 3.6433760072445244e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.5354505777359009
      kl: 0.006885610520839691
      policy_loss: -0.0018985912902280688
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4089 s, 175 iter, 1750000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-15-57
  done: false
  episode_len_mean: 125.45
  episode_reward_max: 390.46213722797063
  episode_reward_mean: 350.76878616773496
  episode_reward_min: 151.00032047235368
  episodes_this_iter: 80
  episodes_total: 12611
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2991.921
    load_time_ms: 2.208
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 1.401298464324817e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.4782046377658844
      kl: 0.0073088062927126884
      policy_loss: -0.0018560009775683284
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4205 s, 180 iter, 1800000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-17-53
  done: false
  episode_len_mean: 124.68
  episode_reward_max: 393.7472786137128
  episode_reward_mean: 348.6668068583136
  episode_reward_min: 162.73262175875954
  episodes_this_iter: 80
  episodes_total: 13010
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2993.915
    load_time_ms: 2.189
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4481772482395172
      kl: 0.007399695925414562
      policy_loss: -0.002578203799203038
      total_loss: 14.85

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4319 s, 185 iter, 1850000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-19-48
  done: false
  episode_len_mean: 122.86
  episode_reward_max: 387.855525420779
  episode_reward_mean: 342.5606160305129
  episode_reward_min: -138.71921889261648
  episodes_this_iter: 81
  episodes_total: 13412
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2991.925
    load_time_ms: 2.251
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.35263505578041077
      kl: 0.008082191459834576
      policy_loss: -0.002686193212866783
      total_loss: 70.4

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4433 s, 190 iter, 1900000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-21-42
  done: false
  episode_len_mean: 121.97
  episode_reward_max: 386.1839669174759
  episode_reward_mean: 335.40887655417066
  episode_reward_min: -147.01602080193084
  episodes_this_iter: 82
  episodes_total: 13820
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2977.312
    load_time_ms: 2.246
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3413897454738617
      kl: 0.009929458610713482
      policy_loss: -0.00011072231427533552
      total_loss: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4547 s, 195 iter, 1950000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-23-37
  done: false
  episode_len_mean: 121.1
  episode_reward_max: 393.6083243724706
  episode_reward_mean: 323.51553179372763
  episode_reward_min: -145.6830422707392
  episodes_this_iter: 82
  episodes_total: 14229
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2992.715
    load_time_ms: 2.34
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3443167507648468
      kl: 0.01621958240866661
      policy_loss: -0.0012528311926871538
      total_loss: 155.71

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4662 s, 200 iter, 2000000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-25-32
  done: false
  episode_len_mean: 120.74
  episode_reward_max: 384.73334892099786
  episode_reward_mean: 327.1730501037907
  episode_reward_min: -145.92500234500415
  episodes_this_iter: 84
  episodes_total: 14640
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3016.123
    load_time_ms: 2.356
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.28305020928382874
      kl: 0.007968487218022346
      policy_loss: -0.0014653656398877501
      total_loss: 2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4777 s, 205 iter, 2050000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-27-27
  done: false
  episode_len_mean: 121.19
  episode_reward_max: 392.29166950302516
  episode_reward_mean: 326.17320631846377
  episode_reward_min: -136.47453750543826
  episodes_this_iter: 82
  episodes_total: 15046
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3000.595
    load_time_ms: 2.263
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.31110700964927673
      kl: 0.00865462701767683
      policy_loss: 0.00031559934723190963
      total_loss: 2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 4891 s, 210 iter, 2100000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-29-22
  done: false
  episode_len_mean: 123.58
  episode_reward_max: 388.93389296507223
  episode_reward_mean: 349.0016105305769
  episode_reward_min: 164.4430852222617
  episodes_this_iter: 82
  episodes_total: 15454
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2998.484
    load_time_ms: 2.239
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2365061640739441
      kl: 0.009143318980932236
      policy_loss: -0.000543696922250092
      total_loss: 16.79

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5005 s, 215 iter, 2150000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-31-16
  done: false
  episode_len_mean: 123.72
  episode_reward_max: 387.6597851836352
  episode_reward_mean: 347.0974265356731
  episode_reward_min: 305.831916550253
  episodes_this_iter: 80
  episodes_total: 15858
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3016.237
    load_time_ms: 2.312
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1748519390821457
      kl: 0.0057501294650137424
      policy_loss: -0.0021769567392766476
      total_loss: 3.622

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5120 s, 220 iter, 2200000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-33-11
  done: false
  episode_len_mean: 123.41
  episode_reward_max: 383.628496587135
  episode_reward_mean: 345.7820651656251
  episode_reward_min: 304.89437677182053
  episodes_this_iter: 80
  episodes_total: 16264
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3018.242
    load_time_ms: 2.567
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1387249082326889
      kl: 0.008734062314033508
      policy_loss: -0.0013141474919393659
      total_loss: 3.780

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5235 s, 225 iter, 2250000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-35-06
  done: false
  episode_len_mean: 122.67
  episode_reward_max: 385.6764526311159
  episode_reward_mean: 344.47934368631866
  episode_reward_min: 307.48757537000455
  episodes_this_iter: 81
  episodes_total: 16670
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 2998.827
    load_time_ms: 2.543
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.08737970888614655
      kl: 0.009053905494511127
      policy_loss: -0.0017226869240403175
      total_loss: 2.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5350 s, 230 iter, 2300000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-37-02
  done: false
  episode_len_mean: 121.93
  episode_reward_max: 385.9752718130717
  episode_reward_mean: 346.2893963120735
  episode_reward_min: 299.04794700966727
  episodes_this_iter: 82
  episodes_total: 17080
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3005.511
    load_time_ms: 2.434
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.016559751704335213
      kl: 0.007875403389334679
      policy_loss: -0.0017023057444021106
      total_loss: 2.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5466 s, 235 iter, 2350000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-38-58
  done: false
  episode_len_mean: 122.68
  episode_reward_max: 386.8242485940164
  episode_reward_mean: 346.15518286358883
  episode_reward_min: 307.0859609917647
  episodes_this_iter: 82
  episodes_total: 17489
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3023.805
    load_time_ms: 2.369
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.007320705335587263
      kl: 0.010239715687930584
      policy_loss: -0.0024735366459935904
      total_loss: 2.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5580 s, 240 iter, 2400000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-40-52
  done: false
  episode_len_mean: 121.46
  episode_reward_max: 384.7843400589058
  episode_reward_mean: 343.7299170377201
  episode_reward_min: 301.6585387362985
  episodes_this_iter: 83
  episodes_total: 17900
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3019.354
    load_time_ms: 2.215
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.028535932302474976
      kl: 0.008393562398850918
      policy_loss: -0.0032593633513897657
      total_loss: 2.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15639], 5695 s, 245 iter, 2450000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-42-48
  done: false
  episode_len_mean: 121.48
  episode_reward_max: 378.3485096863348
  episode_reward_mean: 344.95567852616756
  episode_reward_min: 298.95026460288256
  episodes_this_iter: 82
  episodes_total: 18311
  experiment_id: 66dfac4d4bef4417b3614ae78383d242
  hostname: Gandalf
  info:
    grad_time_ms: 3014.646
    load_time_ms: 2.328
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.00568760558962822
      kl: 0.012159575708210468
      policy_loss: -0.0026044112164527178
      total_loss: 2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=15639], 5811 s, 250 iter, 2500000 ts, 335 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=15639], 5811 s, 250 iter, 2500000 ts, 335 rew



In [20]:
executeTraining()

 Starting SUMO on port 38227
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_08-44-23y6gvqxti -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



3.2951957799774645
20.073916229565203


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-45-22
  done: false
  episode_len_mean: 493.6
  episode_reward_max: 211.41740660329017
  episode_reward_mean: 102.45080646431802
  episode_reward_min: -141.06065739003904
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 4076.763
    load_time_ms: 140.286
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.426827311515808
      kl: 0.0019523317459970713
      policy_loss: -0.003468233859166503
      total_loss: 28.603544235229492
      vf_explained_var: 0.21481865644454956
      vf_loss: 28.60662078857422
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4209234714508057
      kl: 0.0005891452310606837
      policy_loss: -0.0007949161808937788
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 116 s, 5 iter, 50000 ts, 84.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-47-17
  done: false
  episode_len_mean: 439.66
  episode_reward_max: 284.50764431840986
  episode_reward_mean: 93.87761829795103
  episode_reward_min: -165.86209647971805
  episodes_this_iter: 23
  episodes_total: 132
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3278.766
    load_time_ms: 25.261
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.3859541416168213
      kl: 0.003478431608527899
      policy_loss: -0.002000587759539485
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 230 s, 10 iter, 100000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-49-11
  done: false
  episode_len_mean: 359.31
  episode_reward_max: 397.3207344507491
  episode_reward_mean: 162.46372780004089
  episode_reward_min: -157.81617602320767
  episodes_this_iter: 31
  episodes_total: 267
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3115.147
    load_time_ms: 2.266
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.3661308288574219
      kl: 0.0034161775838583708
      policy_loss: -0.002299199579283595
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 345 s, 15 iter, 150000 ts, 177 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-51-07
  done: false
  episode_len_mean: 259.21
  episode_reward_max: 377.72864516830833
  episode_reward_mean: 194.70308755031795
  episode_reward_min: -162.56412433861556
  episodes_this_iter: 41
  episodes_total: 457
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3096.177
    load_time_ms: 2.291
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3738001585006714
      kl: 0.008099541999399662
      policy_loss: -0.003955863881856203
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 462 s, 20 iter, 200000 ts, 192 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-53-03
  done: false
  episode_len_mean: 205.17
  episode_reward_max: 381.33907194764316
  episode_reward_mean: 203.47664563907998
  episode_reward_min: -161.3369581650312
  episodes_this_iter: 49
  episodes_total: 689
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3104.145
    load_time_ms: 2.254
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3094303607940674
      kl: 0.006311662960797548
      policy_loss: -0.0017371723661199212
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 576 s, 25 iter, 250000 ts, 246 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-54-57
  done: false
  episode_len_mean: 185.37
  episode_reward_max: 376.9649094211501
  episode_reward_mean: 207.15156048528172
  episode_reward_min: -159.01923623539824
  episodes_this_iter: 56
  episodes_total: 941
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3105.28
    load_time_ms: 2.219
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.291749358177185
      kl: 0.0038525874260812998
      policy_loss: -0.0020408581476658583
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 690 s, 30 iter, 300000 ts, 296 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-56-51
  done: false
  episode_len_mean: 184.24
  episode_reward_max: 387.7108314772858
  episode_reward_mean: 315.47107036477666
  episode_reward_min: 265.946436379403
  episodes_this_iter: 54
  episodes_total: 1208
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3101.677
    load_time_ms: 2.24
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 7.450580707946131e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.2151027917861938
      kl: 0.003848257241770625
      policy_loss: -0.0014615933177992702
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 805 s, 35 iter, 350000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_08-58-46
  done: false
  episode_len_mean: 167.53
  episode_reward_max: 396.8533877726204
  episode_reward_mean: 342.9439940492845
  episode_reward_min: 309.67071645547963
  episodes_this_iter: 61
  episodes_total: 1498
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3137.332
    load_time_ms: 2.387
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2478700876235962
      kl: 0.007951716892421246
      policy_loss: -0.0011723788920789957
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 920 s, 40 iter, 400000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-00-42
  done: false
  episode_len_mean: 155.74
  episode_reward_max: 384.5498351293286
  episode_reward_mean: 342.25769039297177
  episode_reward_min: 307.01029338802965
  episodes_this_iter: 64
  episodes_total: 1814
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3121.966
    load_time_ms: 2.387
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 2.9103830890414573e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1850265264511108
      kl: 0.009887740947306156
      policy_loss: -0.0036444920115172863
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1035 s, 45 iter, 450000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-02-38
  done: false
  episode_len_mean: 146.43
  episode_reward_max: 386.68647436974595
  episode_reward_mean: 325.46076096454755
  episode_reward_min: -144.57260058314847
  episodes_this_iter: 69
  episodes_total: 2146
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3098.087
    load_time_ms: 2.337
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.8189894306509108e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.198235034942627
      kl: 0.007711934857070446
      policy_loss: -0.0015980215976014733
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1150 s, 50 iter, 500000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-04-33
  done: false
  episode_len_mean: 141.42
  episode_reward_max: 396.0701183606236
  episode_reward_mean: 345.5340544183553
  episode_reward_min: -124.40877450478955
  episodes_this_iter: 72
  episodes_total: 2493
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3112.127
    load_time_ms: 2.564
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1813286542892456
      kl: 0.011085452511906624
      policy_loss: -0.0027601842302829027
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1264 s, 55 iter, 550000 ts, 352 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-06-27
  done: false
  episode_len_mean: 133.88
  episode_reward_max: 384.6046841184371
  episode_reward_mean: 341.2656578770151
  episode_reward_min: -135.10595488669028
  episodes_this_iter: 75
  episodes_total: 2860
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3100.468
    load_time_ms: 2.458
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 5.1159085783869865e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.2840818166732788
      kl: 0.007934868335723877
      policy_loss: -0.0030140490271151066
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1378 s, 60 iter, 600000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-08-22
  done: false
  episode_len_mean: 137.01
  episode_reward_max: 393.7783196064504
  episode_reward_mean: 358.35596457650854
  episode_reward_min: 322.1550282490741
  episodes_this_iter: 74
  episodes_total: 3227
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3106.345
    load_time_ms: 2.325
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 3.1974428614918666e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.214700698852539
      kl: 0.02201688475906849
      policy_loss: -0.005219041369855404
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1494 s, 65 iter, 650000 ts, 354 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-10-17
  done: false
  episode_len_mean: 136.14
  episode_reward_max: 402.02835656269065
  episode_reward_mean: 356.1395057282006
  episode_reward_min: 174.85899284185558
  episodes_this_iter: 74
  episodes_total: 3594
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3106.32
    load_time_ms: 2.428
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 3.996803576864833e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0981744527816772
      kl: 0.005823437590152025
      policy_loss: -0.0013571154559031129
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1608 s, 70 iter, 700000 ts, 356 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-12-12
  done: false
  episode_len_mean: 133.91
  episode_reward_max: 395.34633412943487
  episode_reward_mean: 357.49797235013256
  episode_reward_min: 323.9410868664535
  episodes_this_iter: 75
  episodes_total: 3966
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3081.996
    load_time_ms: 2.265
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 2.4980022355405207e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.0390790700912476
      kl: 0.013255498372018337
      policy_loss: -0.002565532922744751
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1724 s, 75 iter, 750000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-14-07
  done: false
  episode_len_mean: 136.52
  episode_reward_max: 396.2493205490417
  episode_reward_mean: 345.5523844255801
  episode_reward_min: -105.58384197506543
  episodes_this_iter: 73
  episodes_total: 4334
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3099.162
    load_time_ms: 2.278
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.4051255854072952e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.1544132232666016
      kl: 0.004605477210134268
      policy_loss: -0.0014317685272544622
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1838 s, 80 iter, 800000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-16-02
  done: false
  episode_len_mean: 130.41
  episode_reward_max: 394.5520389827833
  episode_reward_mean: 318.81569281215417
  episode_reward_min: -118.89177098459604
  episodes_this_iter: 77
  episodes_total: 4709
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3128.26
    load_time_ms: 2.361
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 8.782034908795595e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1168338060379028
      kl: 0.0031961046624928713
      policy_loss: -0.0017948973691090941
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 1952 s, 85 iter, 850000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-17-57
  done: false
  episode_len_mean: 135.63
  episode_reward_max: 387.8041726112304
  episode_reward_mean: 349.04504546606535
  episode_reward_min: -114.08480233971218
  episodes_this_iter: 74
  episodes_total: 5083
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3109.598
    load_time_ms: 2.399
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 2.7443859089986235e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0847922563552856
      kl: 0.008522018790245056
      policy_loss: -0.001629894133657217
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2068 s, 90 iter, 900000 ts, 351 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-19-52
  done: false
  episode_len_mean: 135.64
  episode_reward_max: 388.85727265193793
  episode_reward_mean: 352.8709610394843
  episode_reward_min: -107.26306748652274
  episodes_this_iter: 74
  episodes_total: 5457
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3099.472
    load_time_ms: 2.41
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 8.576205965620698e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.028810977935791
      kl: 0.006592649035155773
      policy_loss: -0.003980147652328014
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2183 s, 95 iter, 950000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-21-48
  done: false
  episode_len_mean: 135.52
  episode_reward_max: 387.681773261596
  episode_reward_mean: 325.9061715632485
  episode_reward_min: -109.92293885679938
  episodes_this_iter: 75
  episodes_total: 5829
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3121.608
    load_time_ms: 2.35
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 2.1440514914051746e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.2318352460861206
      kl: 0.021194709464907646
      policy_loss: -0.003791447263211012
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2298 s, 100 iter, 1000000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-23-42
  done: false
  episode_len_mean: 133.9
  episode_reward_max: 389.5644487932888
  episode_reward_mean: 319.09203688100826
  episode_reward_min: -112.57169998573166
  episodes_this_iter: 75
  episodes_total: 6198
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3098.363
    load_time_ms: 2.376
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 1.0720257457025873e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.126703143119812
      kl: 0.01746450364589691
      policy_loss: -0.0018902537412941456
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2412 s, 105 iter, 1050000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-25-38
  done: false
  episode_len_mean: 136.04
  episode_reward_max: 391.65399324901836
  episode_reward_mean: 342.987550515725
  episode_reward_min: -112.4064902608366
  episodes_this_iter: 73
  episodes_total: 6567
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3089.329
    load_time_ms: 2.252
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 1.340032182128234e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.0440260171890259
      kl: 0.01297188550233841
      policy_loss: -0.0006221350631676614
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2527 s, 110 iter, 1100000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-27-32
  done: false
  episode_len_mean: 135.31
  episode_reward_max: 391.8189667524669
  episode_reward_mean: 337.7039917682058
  episode_reward_min: -113.73916118753665
  episodes_this_iter: 73
  episodes_total: 6937
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3124.441
    load_time_ms: 2.459
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 6.70016091064117e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.9667849540710449
      kl: 0.007099726237356663
      policy_loss: -0.0017025923589244485
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2642 s, 115 iter, 1150000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-29-28
  done: false
  episode_len_mean: 134.44
  episode_reward_max: 383.43858992425595
  episode_reward_mean: 340.8376452309644
  episode_reward_min: -107.24189067193338
  episodes_this_iter: 74
  episodes_total: 7308
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3113.627
    load_time_ms: 2.475
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 4.187600569150732e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.9509546160697937
      kl: 0.015166537836194038
      policy_loss: -0.0013613759074360132
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2758 s, 120 iter, 1200000 ts, 348 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-31-23
  done: false
  episode_len_mean: 135.6
  episode_reward_max: 392.3581857592755
  episode_reward_mean: 337.0315618874577
  episode_reward_min: -109.90163864026627
  episodes_this_iter: 74
  episodes_total: 7678
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3102.951
    load_time_ms: 2.153
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 2.093800284575366e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.8455796837806702
      kl: 0.02236141636967659
      policy_loss: 0.005070326384156942
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2872 s, 125 iter, 1250000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-33-18
  done: false
  episode_len_mean: 134.65
  episode_reward_max: 385.59771481890317
  episode_reward_mean: 337.0165761340906
  episode_reward_min: -108.60243210044771
  episodes_this_iter: 74
  episodes_total: 8045
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3101.977
    load_time_ms: 2.233
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 2.093800284575366e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.8534061908721924
      kl: 0.030724357813596725
      policy_loss: 0.002034540055319667
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 2987 s, 130 iter, 1300000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-35-13
  done: false
  episode_len_mean: 134.7
  episode_reward_max: 392.7305565280914
  episode_reward_mean: 332.6726213764833
  episode_reward_min: -111.33485980655381
  episodes_this_iter: 73
  episodes_total: 8415
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3090.155
    load_time_ms: 2.408
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 1.046900142287683e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.8487594723701477
      kl: 0.019861580803990364
      policy_loss: 0.005255057476460934
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3102 s, 135 iter, 1350000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-37-08
  done: false
  episode_len_mean: 133.61
  episode_reward_max: 387.8539729599094
  episode_reward_mean: 328.57136554105847
  episode_reward_min: -112.19197252889327
  episodes_this_iter: 76
  episodes_total: 8789
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3104.705
    load_time_ms: 2.444
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 2.3555258440002314e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.7705082297325134
      kl: 0.01620885729789734
      policy_loss: 0.003199583850800991
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3217 s, 140 iter, 1400000 ts, 355 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-39-03
  done: false
  episode_len_mean: 137.81
  episode_reward_max: 392.82242451099705
  episode_reward_mean: 356.35472729204776
  episode_reward_min: 137.9115282710565
  episodes_this_iter: 72
  episodes_total: 9156
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3100.423
    load_time_ms: 2.342
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 3.5332882729622814e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.7308639883995056
      kl: 0.01830761320888996
      policy_loss: 0.0020672916434705257
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3330 s, 145 iter, 1450000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-40-57
  done: false
  episode_len_mean: 136.53
  episode_reward_max: 386.78236224502325
  episode_reward_mean: 355.45838603069365
  episode_reward_min: 142.84432980864165
  episodes_this_iter: 73
  episodes_total: 9522
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3084.198
    load_time_ms: 2.379
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 3.5332882729622814e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.5619399547576904
      kl: 50.20535659790039
      policy_loss: 0.10418174415826797
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3445 s, 150 iter, 1500000 ts, 354 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-42-52
  done: false
  episode_len_mean: 135.96
  episode_reward_max: 389.65707976478325
  episode_reward_mean: 353.50146552301953
  episode_reward_min: 137.48681969709367
  episodes_this_iter: 74
  episodes_total: 9890
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3096.129
    load_time_ms: 2.419
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 1.1924850386438028e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.5161714553833008
      kl: 0.051667969673871994
      policy_loss: 0.00657215341925621
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3560 s, 155 iter, 1550000 ts, 361 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-44-47
  done: false
  episode_len_mean: 135.92
  episode_reward_max: 389.9836312430637
  episode_reward_mean: 357.2179468808358
  episode_reward_min: 173.48221144674665
  episodes_this_iter: 74
  episodes_total: 10258
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3118.695
    load_time_ms: 2.229
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 9.055432938645147e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.44626256823539734
      kl: 0.0247491467744112
      policy_loss: 0.0037834392860531807
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3674 s, 160 iter, 1600000 ts, 361 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-46-42
  done: false
  episode_len_mean: 135.41
  episode_reward_max: 385.8557051911836
  episode_reward_mean: 358.23832616253435
  episode_reward_min: 155.9536198148951
  episodes_this_iter: 74
  episodes_total: 10629
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3101.014
    load_time_ms: 2.245
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 2.037472785904088e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.44211554527282715
      kl: 4.838359355926514
      policy_loss: 0.041708964854478836
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3789 s, 165 iter, 1650000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-48-37
  done: false
  episode_len_mean: 134.32
  episode_reward_max: 390.61149358577717
  episode_reward_mean: 354.1327861869569
  episode_reward_min: 151.98512312274096
  episodes_this_iter: 74
  episodes_total: 11000
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3081.313
    load_time_ms: 2.425
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 2.2921566869268728e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.45046165585517883
      kl: 0.01530271116644144
      policy_loss: 0.0005368702113628387
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 3904 s, 170 iter, 1700000 ts, 352 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-50-31
  done: false
  episode_len_mean: 132.55
  episode_reward_max: 390.8526622987797
  episode_reward_mean: 350.4439095499168
  episode_reward_min: -141.02313779313448
  episodes_this_iter: 75
  episodes_total: 11375
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3101.307
    load_time_ms: 2.409
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 5.1573529794589617e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.4564700424671173
      kl: 0.03387529030442238
      policy_loss: 0.005212555173784494
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4018 s, 175 iter, 1750000 ts, 361 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-52-26
  done: false
  episode_len_mean: 132.4
  episode_reward_max: 390.41615427389354
  episode_reward_mean: 357.65006012258243
  episode_reward_min: 163.28528113138998
  episodes_this_iter: 76
  episodes_total: 11753
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3115.981
    load_time_ms: 2.523
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 7.736028207010994e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.36301249265670776
      kl: 0.019563058391213417
      policy_loss: 0.0014786187093704939
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4132 s, 180 iter, 1800000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-54-21
  done: false
  episode_len_mean: 132.33
  episode_reward_max: 392.9943876507995
  episode_reward_mean: 357.6346020208398
  episode_reward_min: 147.02742965237053
  episodes_this_iter: 75
  episodes_total: 12131
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3102.334
    load_time_ms: 2.56
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 1.740606693676272e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.27284687757492065
      kl: 0.0139356954023242
      policy_loss: 0.0011603519087657332
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4248 s, 185 iter, 1850000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-56-17
  done: false
  episode_len_mean: 129.4
  episode_reward_max: 393.1368793271691
  episode_reward_mean: 353.13612920541755
  episode_reward_min: -141.98360608646007
  episodes_this_iter: 78
  episodes_total: 12515
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3096.792
    load_time_ms: 2.612
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 2.6109092200990665e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.273680716753006
      kl: 0.37531614303588867
      policy_loss: -0.0008694429416209459
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4362 s, 190 iter, 1900000 ts, 359 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_09-58-12
  done: false
  episode_len_mean: 129.3
  episode_reward_max: 393.2502350544629
  episode_reward_mean: 362.62669269132124
  episode_reward_min: 331.2605056184559
  episodes_this_iter: 78
  episodes_total: 12899
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3089.657
    load_time_ms: 2.437
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 5.874544924807558e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.253974586725235
      kl: 0.023890797048807144
      policy_loss: 0.00971535500138998
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4477 s, 195 iter, 1950000 ts, 362 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-00-07
  done: false
  episode_len_mean: 131.83
  episode_reward_max: 390.7933914349431
  episode_reward_mean: 360.26475892511
  episode_reward_min: 333.6438039875358
  episodes_this_iter: 76
  episodes_total: 13283
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3084.661
    load_time_ms: 2.456
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 1.3217729236260627e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.21184837818145752
      kl: 0.032590240240097046
      policy_loss: 0.021777762100100517
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4592 s, 200 iter, 2000000 ts, 363 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-02-02
  done: false
  episode_len_mean: 133.21
  episode_reward_max: 388.9992000414399
  episode_reward_mean: 361.7796870482351
  episode_reward_min: 334.07079857996683
  episodes_this_iter: 75
  episodes_total: 13661
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3151.598
    load_time_ms: 2.486
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 1.9826598398229755e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.1624060422182083
      kl: 0.019821321591734886
      policy_loss: 0.005999069660902023
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4715 s, 205 iter, 2050000 ts, 365 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-04-09
  done: false
  episode_len_mean: 130.53
  episode_reward_max: 392.9025340816697
  episode_reward_mean: 357.0028301012016
  episode_reward_min: -136.94833376217204
  episodes_this_iter: 77
  episodes_total: 14041
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3265.997
    load_time_ms: 2.297
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 2.973989759734463e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.17694756388664246
      kl: 1.0182899236679077
      policy_loss: 0.0033569352235645056
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 4853 s, 210 iter, 2100000 ts, 352 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-06-41
  done: false
  episode_len_mean: 132.02
  episode_reward_max: 386.7046114202657
  episode_reward_mean: 357.5604070195504
  episode_reward_min: 153.19299700487807
  episodes_this_iter: 75
  episodes_total: 14423
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 3710.001
    load_time_ms: 2.536
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 1.5055821290633096e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.21695956587791443
      kl: 0.02125265821814537
      policy_loss: 0.0034943241626024246
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 5140 s, 215 iter, 2150000 ts, 365 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-11-13
  done: false
  episode_len_mean: 130.21
  episode_reward_max: 389.6806840230451
  episode_reward_mean: 354.66346829910356
  episode_reward_min: -140.07249684900913
  episodes_this_iter: 77
  episodes_total: 14806
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 5250.46
    load_time_ms: 3.625
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.1291865160181255e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.15266592800617218
      kl: 0.09351732581853867
      policy_loss: 0.0013009299291297793
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 5286 s, 220 iter, 2200000 ts, 363 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-13-41
  done: false
  episode_len_mean: 131.83
  episode_reward_max: 394.86920955620593
  episode_reward_mean: 360.50875131169903
  episode_reward_min: 333.9427585480343
  episodes_this_iter: 76
  episodes_total: 15186
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 5333.524
    load_time_ms: 3.884
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 3.8110049358476355e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.1465553641319275
      kl: 0.01433502696454525
      policy_loss: 0.00449371850118041
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 5592 s, 225 iter, 2250000 ts, 358 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-19-00
  done: false
  episode_len_mean: 130.21
  episode_reward_max: 388.1149349254373
  episode_reward_mean: 356.09043805575413
  episode_reward_min: -138.05415536848056
  episodes_this_iter: 76
  episodes_total: 15569
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 6742.797
    load_time_ms: 3.413
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 8.57476037864297e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.10535537451505661
      kl: 0.035867977887392044
      policy_loss: 0.020176533609628677
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 5769 s, 230 iter, 2300000 ts, 359 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-21-48
  done: false
  episode_len_mean: 129.63
  episode_reward_max: 394.11483417068746
  episode_reward_mean: 353.9078461986288
  episode_reward_min: -140.9605134460723
  episodes_this_iter: 77
  episodes_total: 15954
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 6839.824
    load_time_ms: 3.219
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 1.9293207136096274e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.10009374469518661
      kl: 0.5887627005577087
      policy_loss: 0.006110407877713442
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 5913 s, 235 iter, 2350000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-24-12
  done: false
  episode_len_mean: 127.63
  episode_reward_max: 398.82484790846985
  episode_reward_mean: 344.1224800773756
  episode_reward_min: -148.90430775267575
  episodes_this_iter: 79
  episodes_total: 16342
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 4310.732
    load_time_ms: 2.84
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 9.767189440758234e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.1337096095085144
      kl: 3.307927370071411
      policy_loss: 0.005632770713418722
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 6095 s, 240 iter, 2400000 ts, 357 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-27-13
  done: false
  episode_len_mean: 128.33
  episode_reward_max: 389.02057549547266
  episode_reward_mean: 362.03374103464057
  episode_reward_min: 333.60280673391196
  episodes_this_iter: 78
  episodes_total: 16731
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 4792.924
    load_time_ms: 3.29
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 4.94463796124854e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.12173916399478912
      kl: 0.014698967337608337
      policy_loss: 0.0054800985381007195
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=15699], 6261 s, 245 iter, 2450000 ts, 362 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-30-12
  done: false
  episode_len_mean: 128.24
  episode_reward_max: 391.9262431436814
  episode_reward_mean: 356.01539623650046
  episode_reward_min: -142.20081201533162
  episodes_this_iter: 78
  episodes_total: 17121
  experiment_id: ee4541f6ec54417a81146033521df0f9
  hostname: Gandalf
  info:
    grad_time_ms: 5011.592
    load_time_ms: 3.437
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 3.708478160743675e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.155276820063591
      kl: 28.742816925048828
      policy_loss: 0.028615670278668404
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=15699], 6439 s, 250 iter, 2500000 ts, 353 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=15699], 6439 s, 250 iter, 2500000 ts, 353 rew



In [None]:
executeTraining()

 Starting SUMO on port 52631
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-16_10-32-29hed5wc05 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



16.434145357006784
14.367219755037919


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-33-07
  done: false
  episode_len_mean: 489.0
  episode_reward_max: 179.3880309181247
  episode_reward_mean: 86.1190011133148
  episode_reward_min: -151.7486308010638
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 4240.323
    load_time_ms: 150.942
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4182323217391968
      kl: 0.0007666427409276366
      policy_loss: -0.0021987531799823046
      total_loss: 25.61863136291504
      vf_explained_var: 0.1952669322490692
      vf_loss: 25.620677947998047
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.418394923210144
      kl: 0.0013116004411131144
      policy_loss: -0.0018925844924524426
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 120 s, 5 iter, 50000 ts, 101 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-35-05
  done: false
  episode_len_mean: 402.25
  episode_reward_max: 280.2998255191442
  episode_reward_mean: 111.64199685597043
  episode_reward_min: -158.02126673190355
  episodes_this_iter: 31
  episodes_total: 142
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3375.932
    load_time_ms: 27.201
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4206793308258057
      kl: 0.0014311131089925766
      policy_loss: -0.0011552408104762435
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 238 s, 10 iter, 100000 ts, 161 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-37-04
  done: false
  episode_len_mean: 271.36
  episode_reward_max: 356.6029616233184
  episode_reward_mean: 134.44522911801468
  episode_reward_min: -163.74955624133113
  episodes_this_iter: 41
  episodes_total: 319
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3205.89
    load_time_ms: 2.648
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.4184283018112183
      kl: 0.0057281265035271645
      policy_loss: -0.0021758894436061382
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 356 s, 15 iter, 150000 ts, 225 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-39-02
  done: false
  episode_len_mean: 205.76
  episode_reward_max: 356.50638074299076
  episode_reward_mean: 235.1119703893858
  episode_reward_min: -162.2332162662087
  episodes_this_iter: 50
  episodes_total: 557
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3201.931
    load_time_ms: 2.805
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.403420329093933
      kl: 0.00646847253665328
      policy_loss: -0.0023058955557644367
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 476 s, 20 iter, 200000 ts, 237 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-41-03
  done: false
  episode_len_mean: 182.31
  episode_reward_max: 377.8853526450906
  episode_reward_mean: 244.1134719660918
  episode_reward_min: -157.8089198270329
  episodes_this_iter: 55
  episodes_total: 820
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3197.337
    load_time_ms: 2.635
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 1.9073486612342094e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.4005272388458252
      kl: 0.0082851005718112
      policy_loss: -0.0034378559794276953
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 594 s, 25 iter, 250000 ts, 256 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-43-01
  done: false
  episode_len_mean: 169.41
  episode_reward_max: 352.20708652363885
  episode_reward_mean: 284.9775916021797
  episode_reward_min: -141.92482337410166
  episodes_this_iter: 58
  episodes_total: 1110
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3205.039
    load_time_ms: 2.481
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 5.9604645663569045e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.3764498233795166
      kl: 0.006444194354116917
      policy_loss: -0.0015404855366796255
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 713 s, 30 iter, 300000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-44-59
  done: false
  episode_len_mean: 156.47
  episode_reward_max: 351.430631415661
  episode_reward_mean: 312.5771981519858
  episode_reward_min: 279.096225199232
  episodes_this_iter: 64
  episodes_total: 1424
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3210.682
    load_time_ms: 2.612
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 7.450580707946131e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.314957618713379
      kl: 0.009836667217314243
      policy_loss: -0.002595038153231144
      total_los

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 829 s, 35 iter, 350000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-46-56
  done: false
  episode_len_mean: 151.0
  episode_reward_max: 370.7561129057823
  episode_reward_mean: 324.9705379721828
  episode_reward_min: 287.85112534766967
  episodes_this_iter: 66
  episodes_total: 1751
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3190.467
    load_time_ms: 2.536
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2764943838119507
      kl: 0.009800616651773453
      policy_loss: -0.0019548204727470875
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 945 s, 40 iter, 400000 ts, 326 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-48-52
  done: false
  episode_len_mean: 141.49
  episode_reward_max: 359.6224120422741
  episode_reward_mean: 324.5467151450176
  episode_reward_min: 289.76580675781986
  episodes_this_iter: 72
  episodes_total: 2094
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3138.641
    load_time_ms: 2.302
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 2.9103830890414573e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.202426552772522
      kl: 0.006498439237475395
      policy_loss: -0.0019121519289910793
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 1061 s, 45 iter, 450000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-50-48
  done: false
  episode_len_mean: 139.92
  episode_reward_max: 366.46097880827205
  episode_reward_mean: 329.7576528585229
  episode_reward_min: 287.98646653706663
  episodes_this_iter: 70
  episodes_total: 2454
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3093.157
    load_time_ms: 2.211
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 9.094947153254554e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1413443088531494
      kl: 0.022095046937465668
      policy_loss: -0.004346325062215328
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=17258], 1178 s, 50 iter, 500000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-16_10-52-45
  done: false
  episode_len_mean: 136.42
  episode_reward_max: 372.22948262587283
  episode_reward_mean: 324.1759006789613
  episode_reward_min: -141.63079371102415
  episodes_this_iter: 73
  episodes_total: 2819
  experiment_id: 24c603b7188b4f0a80a63654cfc9e16b
  hostname: Gandalf
  info:
    grad_time_ms: 3110.017
    load_time_ms: 2.314
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.0990521907806396
      kl: 0.007127183489501476
      policy_loss: -0.0026886085979640484
      