# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-12_18-24-26_26953/logs.
Waiting for redis server at 127.0.0.1:43529 to respond...
Waiting for redis server at 127.0.0.1:34858 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8890/notebooks/ray_ui.ipynb?token=1667cf0b8490b75b968f56a120accd5075d84837f2ed6c11



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-03-12_18-24-26_26953/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-12_18-24-26_26953/sockets/raylet'],
 'redis_address': '192.168.2.102:43529',
 'webui_url': 'http://localhost:8890/notebooks/ray_ui.ipynb?token=1667cf0b8490b75b968f56a120accd5075d84837f2ed6c11'}

In [11]:
def executeTraining():
    
    # The algorithm or model to train. This may refer to "
    #      "the name of a built-on algorithm (e.g. RLLib's DQN "
    #      "or PPO), or a user-defined trainable function or "
    #      "class registered in the tune registry.")
    alg_run = "PPO"

    agent_cls = get_agent_class(alg_run)
    config = agent_cls._default_config.copy()
    config["num_workers"] = N_CPUS  # number of parallel workers
    config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
    config["gamma"] = 0.999  # discount rate
    config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
    config["use_gae"] = True  # using generalized advantage estimation
    config["lambda"] = 0.97  
    #config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
    #config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
    config["kl_target"] = 0.02  # target KL divergence
    config["num_sgd_iter"] = 10  # number of SGD iterations
    config["horizon"] = HORIZON  # rollout horizon

    # save the flow params for replay
    flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                           indent=4)  # generating a string version of flow_params
    config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
    config['env_config']['run'] = alg_run

    # Call the utility function make_create_env to be able to 
    # register the Flow env for this experiment
    create_env, gym_name = make_create_env(params=flow_params, version=0)

    # Register as rllib env with Gym
    register_env(gym_name, create_env)
    
    # multi agent policy mapping
    test_env = create_env()
    obs_space = test_env.observation_space
    act_space = test_env.action_space

    def gen_policy():
        return (PPOPolicyGraph, obs_space, act_space, {})

    # Setup PG with an ensemble of `num_policies` different policy graphs
    policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
    def policy_mapping_fn(agent_id):
        return agent_id

    config.update({
            'multiagent': {
                'policy_graphs': policy_graphs,
                'policy_mapping_fn': tune.function(policy_mapping_fn)
            }
        })
    
    trials = run_experiments({
        flow_params["exp_tag"]: {
            "run": alg_run,  # RL algorithm to run
            "env": gym_name,  # environment name generated earlier
            "config": {  # configuration params (must match "run" value)
                **config
            },
            "checkpoint_freq": 1,  # number of iterations between checkpoints
            "max_failures": 999,
            "stop": {  # stopping conditions
                "training_iteration": 300,  # number of iterations to stop after
            },
        },
    })

In [12]:
executeTraining()

 Starting SUMO on port 51275
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-12_18-24-28tsnpscrn -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



24.311956666327163
25.838955093458935


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-25-24
  done: false
  episode_len_mean: 455.04761904761904
  episode_reward_max: 270.75795500964466
  episode_reward_mean: 47.97779331562042
  episode_reward_min: -168.11321703014733
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 4580.163
    load_time_ms: 146.605
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4170949459075928
      kl: 0.0007874090224504471
      policy_loss: -0.0008958953549154103
      total_loss: 113.08045196533203
      vf_explained_var: 0.0426790677011013
      vf_loss: 113.0811767578125
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4219223260879517
      kl: 0.0008876294014044106
      policy_loss: -0.00087617355

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 123 s, 5 iter, 50000 ts, 112 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-27-26
  done: false
  episode_len_mean: 428.62
  episode_reward_max: 372.9499641757883
  episode_reward_mean: 153.71626407892748
  episode_reward_min: -163.1948929522456
  episodes_this_iter: 26
  episodes_total: 137
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3735.603
    load_time_ms: 26.618
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.39472234249115
      kl: 0.009146330878138542
      policy_loss: -0.004239782225340605
      total_loss

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 245 s, 10 iter, 100000 ts, 230 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-29-29
  done: false
  episode_len_mean: 228.34
  episode_reward_max: 364.5445274538058
  episode_reward_mean: 222.60110423242813
  episode_reward_min: -157.92623549375963
  episodes_this_iter: 47
  episodes_total: 332
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3586.95
    load_time_ms: 2.758
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.3623945713043213
      kl: 0.003060264978557825
      policy_loss: -0.0006501601310446858
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 369 s, 15 iter, 150000 ts, 264 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-31-32
  done: false
  episode_len_mean: 180.96
  episode_reward_max: 328.55287311770616
  episode_reward_mean: 261.445587426441
  episode_reward_min: -137.545565993704
  episodes_this_iter: 54
  episodes_total: 590
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3585.917
    load_time_ms: 2.7
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 2.441406286379788e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.2938258647918701
      kl: 0.0042021735571324825
      policy_loss: -0.0012652555014938116
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 492 s, 20 iter, 200000 ts, 287 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-33-36
  done: false
  episode_len_mean: 179.01
  episode_reward_max: 362.62431138104523
  episode_reward_mean: 305.11013125775406
  episode_reward_min: 238.46284224521008
  episodes_this_iter: 54
  episodes_total: 872
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3558.43
    load_time_ms: 2.516
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 7.629394644936838e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3108536005020142
      kl: 0.006764150690287352
      policy_loss: -0.00124320515897125
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 615 s, 25 iter, 250000 ts, 308 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-35-40
  done: false
  episode_len_mean: 158.87
  episode_reward_max: 386.93932098996277
  episode_reward_mean: 303.9716755123127
  episode_reward_min: -140.34224461376888
  episodes_this_iter: 64
  episodes_total: 1176
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3525.045
    load_time_ms: 2.446
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 4.7683716530855236e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.2358640432357788
      kl: 0.008450791239738464
      policy_loss: -0.00261439080350101
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 740 s, 30 iter, 300000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-37-44
  done: false
  episode_len_mean: 147.44
  episode_reward_max: 364.4702120367285
  episode_reward_mean: 302.55347175666975
  episode_reward_min: -137.75644830126222
  episodes_this_iter: 68
  episodes_total: 1503
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3494.909
    load_time_ms: 2.339
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.4901161415892261e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.1422691345214844
      kl: 0.015691783279180527
      policy_loss: -0.0029458433855324984
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 862 s, 35 iter, 350000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-39-46
  done: false
  episode_len_mean: 140.75
  episode_reward_max: 348.56018732487365
  episode_reward_mean: 307.92612374462595
  episode_reward_min: 268.98261304507065
  episodes_this_iter: 70
  episodes_total: 1857
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3476.086
    load_time_ms: 2.415
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 1.8626451769865326e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.140379786491394
      kl: 0.002814641920849681
      policy_loss: -0.0015350955072790384
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 982 s, 40 iter, 400000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-41-46
  done: false
  episode_len_mean: 140.09
  episode_reward_max: 361.81554479468355
  episode_reward_mean: 308.28576749752875
  episode_reward_min: -141.8635447840793
  episodes_this_iter: 72
  episodes_total: 2211
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3467.368
    load_time_ms: 2.775
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.1117733716964722
      kl: 0.007867691107094288
      policy_loss: -0.002065768465399742
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1104 s, 45 iter, 450000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-43-48
  done: false
  episode_len_mean: 139.38
  episode_reward_max: 363.56167248575423
  episode_reward_mean: 308.50902066152355
  episode_reward_min: -142.91620448262918
  episodes_this_iter: 72
  episodes_total: 2571
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3469.096
    load_time_ms: 2.711
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1413899660110474
      kl: 0.004564174450933933
      policy_loss: -0.0013750896323472261
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1225 s, 50 iter, 500000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-45-49
  done: false
  episode_len_mean: 141.24
  episode_reward_max: 372.55113639008255
  episode_reward_mean: 325.6642860199086
  episode_reward_min: 270.99178414362433
  episodes_this_iter: 71
  episodes_total: 2930
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3478.965
    load_time_ms: 2.419
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 1.091393360234949e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1153839826583862
      kl: 0.0008528514299541712
      policy_loss: -6.536241562571377e-05
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1345 s, 55 iter, 550000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-47-50
  done: false
  episode_len_mean: 142.63
  episode_reward_max: 373.20812420545775
  episode_reward_mean: 333.03312751687184
  episode_reward_min: 295.8539655764412
  episodes_this_iter: 70
  episodes_total: 3284
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3501.828
    load_time_ms: 2.369
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 6.821208501468431e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.0990612506866455
      kl: 0.010153359733521938
      policy_loss: -0.0006594693986698985
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1467 s, 60 iter, 600000 ts, 328 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-49-52
  done: false
  episode_len_mean: 138.8
  episode_reward_max: 383.43223618249397
  episode_reward_mean: 316.1445002350072
  episode_reward_min: -146.5755767598482
  episodes_this_iter: 72
  episodes_total: 3637
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3492.567
    load_time_ms: 2.48
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 3.410604250734216e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.01437246799469
      kl: 0.006043625064194202
      policy_loss: -0.00026821575011126697
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1589 s, 65 iter, 650000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-51-55
  done: false
  episode_len_mean: 141.31
  episode_reward_max: 379.64047344509254
  episode_reward_mean: 331.8758251378355
  episode_reward_min: -136.0807360703946
  episodes_this_iter: 71
  episodes_total: 3994
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3488.687
    load_time_ms: 2.619
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 6.394885722983733e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.9885849356651306
      kl: 0.004082833416759968
      policy_loss: -0.0016148159047588706
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1711 s, 70 iter, 700000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-53-57
  done: false
  episode_len_mean: 139.3
  episode_reward_max: 387.41535564298545
  episode_reward_mean: 323.67106204598724
  episode_reward_min: -144.55831170594496
  episodes_this_iter: 72
  episodes_total: 4348
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3496.588
    load_time_ms: 2.524
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 3.996803576864833e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0074902772903442
      kl: 0.010504554025828838
      policy_loss: -0.001456155558116734
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1832 s, 75 iter, 750000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-55-58
  done: false
  episode_len_mean: 139.76
  episode_reward_max: 371.8532803535156
  episode_reward_mean: 333.3699496999001
  episode_reward_min: 292.4194347632114
  episodes_this_iter: 72
  episodes_total: 4705
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3489.778
    load_time_ms: 2.423
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 4.9960044710810415e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.9361996054649353
      kl: 0.010829820297658443
      policy_loss: -0.0014460172969847918
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 1953 s, 80 iter, 800000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_18-58-00
  done: false
  episode_len_mean: 140.06
  episode_reward_max: 384.8465289699716
  episode_reward_mean: 333.99368823334
  episode_reward_min: 290.72766555240037
  episodes_this_iter: 71
  episodes_total: 5063
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3486.874
    load_time_ms: 2.312
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 6.245005588851302e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.9041703939437866
      kl: 0.01842276193201542
      policy_loss: -0.002664031460881233
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2076 s, 85 iter, 850000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-00-02
  done: false
  episode_len_mean: 140.91
  episode_reward_max: 394.6330392222015
  episode_reward_mean: 339.6548672486096
  episode_reward_min: -140.82881575551002
  episodes_this_iter: 72
  episodes_total: 5421
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3476.688
    load_time_ms: 2.377
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 7.806256986064127e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9714370965957642
      kl: 0.008225328288972378
      policy_loss: -0.002068995963782072
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2198 s, 90 iter, 900000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-02-05
  done: false
  episode_len_mean: 139.71
  episode_reward_max: 389.3510555921715
  episode_reward_mean: 335.10430400135834
  episode_reward_min: -141.42387029935588
  episodes_this_iter: 72
  episodes_total: 5775
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3484.84
    load_time_ms: 2.526
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 4.8789106162900796e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.9015485048294067
      kl: 0.008054100908339024
      policy_loss: 0.0005123392911627889
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2319 s, 95 iter, 950000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-04-06
  done: false
  episode_len_mean: 139.89
  episode_reward_max: 381.3997618894395
  episode_reward_mean: 339.90784595322714
  episode_reward_min: -147.96681585552187
  episodes_this_iter: 72
  episodes_total: 6132
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3485.024
    load_time_ms: 2.475
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 3.0493191351812997e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.8165479302406311
      kl: 0.006135013420134783
      policy_loss: -0.0011951596243306994
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2440 s, 100 iter, 1000000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-06-08
  done: false
  episode_len_mean: 137.18
  episode_reward_max: 385.0567932031121
  episode_reward_mean: 323.35381671844993
  episode_reward_min: -146.81921465151643
  episodes_this_iter: 73
  episodes_total: 6494
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.829
    load_time_ms: 2.37
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 9.529122297441562e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.7907843589782715
      kl: 0.007234597112983465
      policy_loss: 0.0006942546460777521
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2561 s, 105 iter, 1050000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-08-09
  done: false
  episode_len_mean: 138.99
  episode_reward_max: 382.657157955587
  episode_reward_mean: 331.9035864346004
  episode_reward_min: -147.88847520557215
  episodes_this_iter: 71
  episodes_total: 6850
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3458.976
    load_time_ms: 2.383
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 1.1911402871801952e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.7776305675506592
      kl: 0.01057923398911953
      policy_loss: -0.0010403713677078485
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2682 s, 110 iter, 1100000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-10-10
  done: false
  episode_len_mean: 140.55
  episode_reward_max: 379.77190488532733
  episode_reward_mean: 342.0470149967761
  episode_reward_min: -140.3724497794242
  episodes_this_iter: 71
  episodes_total: 7205
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3458.578
    load_time_ms: 2.569
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 2.977850717950488e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.7533653974533081
      kl: 0.014355323277413845
      policy_loss: 0.0011375368339940906
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2803 s, 115 iter, 1150000 ts, 348 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-12-11
  done: false
  episode_len_mean: 139.52
  episode_reward_max: 390.62459034883744
  episode_reward_mean: 340.7988962941067
  episode_reward_min: -147.5211649381288
  episodes_this_iter: 71
  episodes_total: 7562
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3443.197
    load_time_ms: 2.627
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 1.488925358975244e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6886699199676514
      kl: 0.012946657836437225
      policy_loss: 0.0017280710162594914
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 2924 s, 120 iter, 1200000 ts, 326 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-14-13
  done: false
  episode_len_mean: 137.66
  episode_reward_max: 387.8747677004052
  episode_reward_mean: 329.80600950776966
  episode_reward_min: -140.47900126963972
  episodes_this_iter: 73
  episodes_total: 7924
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3439.956
    load_time_ms: 2.717
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 7.44462679487622e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6526458263397217
      kl: 0.01798083446919918
      policy_loss: -7.74563814047724e-05
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3045 s, 125 iter, 1250000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-16-13
  done: false
  episode_len_mean: 138.78
  episode_reward_max: 388.1022225659448
  episode_reward_mean: 338.5421771380076
  episode_reward_min: -142.62580716434306
  episodes_this_iter: 72
  episodes_total: 8283
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3445.856
    load_time_ms: 2.726
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 7.44462679487622e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6395606994628906
      kl: 0.028287282213568687
      policy_loss: 0.006278378888964653
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3165 s, 130 iter, 1300000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-18-13
  done: false
  episode_len_mean: 136.77
  episode_reward_max: 391.4982262548285
  episode_reward_mean: 323.37178381557715
  episode_reward_min: -147.65404870885942
  episodes_this_iter: 73
  episodes_total: 8646
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.011
    load_time_ms: 2.482
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 1.11669384666811e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.619472086429596
      kl: 0.045966774225234985
      policy_loss: 0.0083584850654006
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3284 s, 135 iter, 1350000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-20-12
  done: false
  episode_len_mean: 138.88
  episode_reward_max: 383.4535523717762
  episode_reward_mean: 336.0583939225794
  episode_reward_min: -142.91464477302506
  episodes_this_iter: 72
  episodes_total: 9006
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3453.139
    load_time_ms: 2.346
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 8.375201138301463e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.628268301486969
      kl: 0.03379618376493454
      policy_loss: 0.005840796045958996
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3405 s, 140 iter, 1400000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-22-14
  done: false
  episode_len_mean: 139.86
  episode_reward_max: 381.07588029475147
  episode_reward_mean: 331.5052622183437
  episode_reward_min: -152.1650559933131
  episodes_this_iter: 71
  episodes_total: 9366
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3446.665
    load_time_ms: 2.303
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 1.256280811694705e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.7071694731712341
      kl: 0.028057826682925224
      policy_loss: 0.0016307460609823465
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3525 s, 145 iter, 1450000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-24-14
  done: false
  episode_len_mean: 139.7
  episode_reward_max: 379.6468565346074
  episode_reward_mean: 340.6277829161762
  episode_reward_min: -134.66969563155425
  episodes_this_iter: 73
  episodes_total: 9726
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3453.624
    load_time_ms: 2.271
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 1.884420675200185e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6021058559417725
      kl: 0.01713060401380062
      policy_loss: -0.0005490693147294223
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3647 s, 150 iter, 1500000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-26-16
  done: false
  episode_len_mean: 137.48
  episode_reward_max: 381.02441309854817
  episode_reward_mean: 324.17055581023874
  episode_reward_min: -145.79065682318273
  episodes_this_iter: 73
  episodes_total: 10086
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.266
    load_time_ms: 2.322
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 2.826630618369825e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6133416891098022
      kl: 0.035041242837905884
      policy_loss: 0.003511802526190877
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3769 s, 155 iter, 1550000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-28-20
  done: false
  episode_len_mean: 139.62
  episode_reward_max: 377.5250704960752
  episode_reward_mean: 337.4131032417852
  episode_reward_min: -131.10867280613792
  episodes_this_iter: 72
  episodes_total: 10445
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3443.967
    load_time_ms: 2.492
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 6.359919285762559e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.594995379447937
      kl: 0.03267813101410866
      policy_loss: 0.0038452493026852608
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 3892 s, 160 iter, 1600000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-30-22
  done: false
  episode_len_mean: 138.26
  episode_reward_max: 383.3178898729347
  episode_reward_mean: 336.52817389650204
  episode_reward_min: -139.21192817633192
  episodes_this_iter: 72
  episodes_total: 10812
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3443.564
    load_time_ms: 2.527
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 6.359919285762559e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6410830020904541
      kl: 0.013225521892309189
      policy_loss: 0.0025976765900850296
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4013 s, 165 iter, 1650000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-32-23
  done: false
  episode_len_mean: 138.52
  episode_reward_max: 381.25865116656615
  episode_reward_mean: 338.4990133903224
  episode_reward_min: -139.6843103021385
  episodes_this_iter: 72
  episodes_total: 11175
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3456.936
    load_time_ms: 2.447
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 6.359919285762559e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6529852747917175
      kl: 0.032576143741607666
      policy_loss: 0.013047812506556511
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4134 s, 170 iter, 1700000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-34-25
  done: false
  episode_len_mean: 136.41
  episode_reward_max: 396.90849668381037
  episode_reward_mean: 325.88889400026613
  episode_reward_min: -142.99531991483786
  episodes_this_iter: 73
  episodes_total: 11538
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.361
    load_time_ms: 2.428
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 9.539880309150423e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.6268277764320374
      kl: 0.014796825125813484
      policy_loss: -0.0008905577706173062
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4256 s, 175 iter, 1750000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-36-26
  done: false
  episode_len_mean: 139.26
  episode_reward_max: 376.3364304265923
  episode_reward_mean: 340.64967953337344
  episode_reward_min: 306.19821196226883
  episodes_this_iter: 71
  episodes_total: 11898
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3462.531
    load_time_ms: 2.479
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 9.539880309150423e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.59669029712677
      kl: 0.019236471503973007
      policy_loss: 0.010730828158557415
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4377 s, 180 iter, 1800000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-38-28
  done: false
  episode_len_mean: 138.51
  episode_reward_max: 382.0125304986791
  episode_reward_mean: 342.0635900131338
  episode_reward_min: 153.46839487051005
  episodes_this_iter: 72
  episodes_total: 12261
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3467.812
    load_time_ms: 2.37
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 1.4309824802460613e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.5774171352386475
      kl: 0.007998370565474033
      policy_loss: 0.0023954894859343767
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4498 s, 185 iter, 1850000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-40-29
  done: false
  episode_len_mean: 139.51
  episode_reward_max: 377.7912963269034
  episode_reward_mean: 341.37816843958194
  episode_reward_min: 125.51015852410382
  episodes_this_iter: 73
  episodes_total: 12622
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3465.89
    load_time_ms: 2.43
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 7.154912401230306e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.5608018040657043
      kl: 0.023993292823433876
      policy_loss: 0.012287325225770473
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4619 s, 190 iter, 1900000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-42-31
  done: false
  episode_len_mean: 139.31
  episode_reward_max: 378.00511978285226
  episode_reward_mean: 345.462853253269
  episode_reward_min: 315.0219381060393
  episodes_this_iter: 72
  episodes_total: 12982
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3461.109
    load_time_ms: 2.537
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 1.0732363474249576e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.6242058277130127
      kl: 0.05204008147120476
      policy_loss: 0.020462391898036003
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4740 s, 195 iter, 1950000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-44-32
  done: false
  episode_len_mean: 137.86
  episode_reward_max: 381.81966521042193
  episode_reward_mean: 333.8973085111009
  episode_reward_min: -141.32547189427297
  episodes_this_iter: 73
  episodes_total: 13344
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3455.19
    load_time_ms: 2.456
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 2.4147821169720392e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.5621924996376038
      kl: 0.6895112991333008
      policy_loss: 0.029108673334121704
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4860 s, 200 iter, 2000000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-46-32
  done: false
  episode_len_mean: 137.67
  episode_reward_max: 375.9146339388805
  episode_reward_mean: 332.77494269814895
  episode_reward_min: -143.880565369447
  episodes_this_iter: 73
  episodes_total: 13707
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3473.874
    load_time_ms: 2.512
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 1.2224833559980908e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.5686721801757812
      kl: 1.280364990234375
      policy_loss: 0.02936415560543537
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 4981 s, 205 iter, 2050000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-48-33
  done: false
  episode_len_mean: 136.94
  episode_reward_max: 371.84981943198295
  episode_reward_mean: 333.49214334795715
  episode_reward_min: -142.93184750443973
  episodes_this_iter: 74
  episodes_total: 14071
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3473.023
    load_time_ms: 2.607
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 4.1258823835671693e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.6186375021934509
      kl: 5.407833576202393
      policy_loss: 0.061571426689624786
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5101 s, 210 iter, 2100000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-50-33
  done: false
  episode_len_mean: 136.53
  episode_reward_max: 369.9708800299037
  episode_reward_mean: 329.4878800846947
  episode_reward_min: -141.14453421317788
  episodes_this_iter: 74
  episodes_total: 14438
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3450.864
    load_time_ms: 2.558
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 6.188822565608795e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.6429862976074219
      kl: 0.030040349811315536
      policy_loss: -0.0024939340073615313
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5222 s, 215 iter, 2150000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-52-34
  done: false
  episode_len_mean: 137.77
  episode_reward_max: 375.08811329599524
  episode_reward_mean: 333.87333465450627
  episode_reward_min: -141.2411962525181
  episodes_this_iter: 72
  episodes_total: 14802
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3450.076
    load_time_ms: 2.392
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.3924853549410176e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.6472498774528503
      kl: 0.019371837377548218
      policy_loss: 1.3418160961009562e-05
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5342 s, 220 iter, 2200000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-54-35
  done: false
  episode_len_mean: 135.02
  episode_reward_max: 369.17075441169317
  episode_reward_mean: 326.68431106308543
  episode_reward_min: -144.83941529538092
  episodes_this_iter: 74
  episodes_total: 15170
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.546
    load_time_ms: 2.301
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 2.0887273760792532e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.6331738829612732
      kl: 0.0229233019053936
      policy_loss: -0.0032077806536108255
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5463 s, 225 iter, 2250000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-56-36
  done: false
  episode_len_mean: 137.0
  episode_reward_max: 374.62241007017667
  episode_reward_mean: 341.47502832054016
  episode_reward_min: 306.3794776958108
  episodes_this_iter: 72
  episodes_total: 15536
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3453.64
    load_time_ms: 2.461
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 3.133092376783426e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.5858588814735413
      kl: 0.012018849141895771
      policy_loss: 0.0031791762448847294
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5584 s, 230 iter, 2300000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_19-58-38
  done: false
  episode_len_mean: 136.43
  episode_reward_max: 380.26015204360857
  episode_reward_mean: 338.76130674208997
  episode_reward_min: -137.40087188018
  episodes_this_iter: 73
  episodes_total: 15903
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3436.753
    load_time_ms: 2.57
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 4.6996359398460466e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.6069772839546204
      kl: 63.66011428833008
      policy_loss: 0.06950894743204117
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5705 s, 235 iter, 2350000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-00-39
  done: false
  episode_len_mean: 137.84
  episode_reward_max: 376.5997785379526
  episode_reward_mean: 331.18737282546607
  episode_reward_min: -149.55093114153976
  episodes_this_iter: 73
  episodes_total: 16268
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3446.632
    load_time_ms: 2.372
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 1.0574183389008502e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.5919790267944336
      kl: 0.023644132539629936
      policy_loss: -0.0023803003132343292
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5825 s, 240 iter, 2400000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-02-39
  done: false
  episode_len_mean: 139.23
  episode_reward_max: 381.5591587411595
  episode_reward_mean: 340.76211172056486
  episode_reward_min: -147.90159595676755
  episodes_this_iter: 72
  episodes_total: 16630
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3470.387
    load_time_ms: 2.474
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 2.3791918077875705e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6445095539093018
      kl: 0.018121829256415367
      policy_loss: -0.00385602586902678
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 5945 s, 245 iter, 2450000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-04-39
  done: false
  episode_len_mean: 137.17
  episode_reward_max: 381.1674466394236
  episode_reward_mean: 332.9284929740234
  episode_reward_min: -143.24916542005772
  episodes_this_iter: 73
  episodes_total: 16993
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3466.452
    load_time_ms: 2.621
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 1.7843936942819645e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.5328306555747986
      kl: 0.026594238355755806
      policy_loss: 0.0004820797767024487
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6066 s, 250 iter, 2500000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-06-41
  done: false
  episode_len_mean: 135.66
  episode_reward_max: 376.9080540530552
  episode_reward_mean: 319.43680222665887
  episode_reward_min: -142.9980233870569
  episodes_this_iter: 74
  episodes_total: 17357
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3466.241
    load_time_ms: 2.522
    num_steps_sampled: 2510000
    num_steps_trained: 2510000
    rl_0:
      cur_kl_coeff: 2.6765907029816602e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6465146541595459
      kl: 0.03306989371776581
      policy_loss: 0.0007790653617121279
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6187 s, 255 iter, 2550000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-08-42
  done: false
  episode_len_mean: 137.75
  episode_reward_max: 377.920536045362
  episode_reward_mean: 336.11072612600947
  episode_reward_min: -144.29565756168446
  episodes_this_iter: 73
  episodes_total: 17718
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3459.052
    load_time_ms: 2.553
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 6.022328516253239e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6456432938575745
      kl: 0.037002213299274445
      policy_loss: -0.001559225027449429
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6309 s, 260 iter, 2600000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-10-43
  done: false
  episode_len_mean: 137.99
  episode_reward_max: 375.47185790191185
  episode_reward_mean: 333.0555346812262
  episode_reward_min: -142.56783677755925
  episodes_this_iter: 72
  episodes_total: 18080
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3444.982
    load_time_ms: 2.471
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 6.022328516253239e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6634663939476013
      kl: 0.03067704662680626
      policy_loss: 0.0042949882335960865
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6430 s, 265 iter, 2650000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-12-45
  done: false
  episode_len_mean: 136.2
  episode_reward_max: 379.24516180604326
  episode_reward_mean: 311.6899042154615
  episode_reward_min: -151.4318985280815
  episodes_this_iter: 74
  episodes_total: 18441
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3452.435
    load_time_ms: 2.374
    num_steps_sampled: 2660000
    num_steps_trained: 2660000
    rl_0:
      cur_kl_coeff: 9.033492128145004e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.7340459227561951
      kl: 0.024506868794560432
      policy_loss: 6.171612039906904e-05
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6551 s, 270 iter, 2700000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-14-46
  done: false
  episode_len_mean: 138.49
  episode_reward_max: 381.3894701818373
  episode_reward_mean: 331.53263204117786
  episode_reward_min: -137.29601237068283
  episodes_this_iter: 72
  episodes_total: 18804
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3446.807
    load_time_ms: 2.345
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 2.03253553496217e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7209014296531677
      kl: 0.03155090659856796
      policy_loss: 0.002750687301158905
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6671 s, 275 iter, 2750000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-16-47
  done: false
  episode_len_mean: 137.24
  episode_reward_max: 381.9012627704684
  episode_reward_mean: 332.4899516440526
  episode_reward_min: -136.90022656888996
  episodes_this_iter: 73
  episodes_total: 19167
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.035
    load_time_ms: 2.37
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 4.573206052264133e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.5978144407272339
      kl: 0.0460376963019371
      policy_loss: 0.0010047330288216472
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6792 s, 280 iter, 2800000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-18-48
  done: false
  episode_len_mean: 138.77
  episode_reward_max: 385.9884798918771
  episode_reward_mean: 339.49902299640866
  episode_reward_min: -141.9700022070712
  episodes_this_iter: 72
  episodes_total: 19528
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3458.255
    load_time_ms: 2.366
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 1.0289713229853388e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.6252782344818115
      kl: 0.018818004056811333
      policy_loss: 0.0010682898573577404
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 6912 s, 285 iter, 2850000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-20-48
  done: false
  episode_len_mean: 138.86
  episode_reward_max: 373.127547335813
  episode_reward_mean: 327.88344526922674
  episode_reward_min: -144.0894721050268
  episodes_this_iter: 72
  episodes_total: 19889
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3448.285
    load_time_ms: 2.227
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 2.315185063126706e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.6161824464797974
      kl: 0.016290538012981415
      policy_loss: -0.0015007787151262164
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 7033 s, 290 iter, 2900000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-22-49
  done: false
  episode_len_mean: 140.16
  episode_reward_max: 378.77503093862805
  episode_reward_mean: 339.0726618930933
  episode_reward_min: 19.960065262902333
  episodes_this_iter: 72
  episodes_total: 20247
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3452.083
    load_time_ms: 2.311
    num_steps_sampled: 2910000
    num_steps_trained: 2910000
    rl_0:
      cur_kl_coeff: 2.315185063126706e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.6279006600379944
      kl: 0.0268265288323164
      policy_loss: 0.0007481768843717873
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=26994], 7154 s, 295 iter, 2950000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-24-51
  done: false
  episode_len_mean: 140.36
  episode_reward_max: 375.1510592504099
  episode_reward_mean: 337.5222488564645
  episode_reward_min: 163.3328903890441
  episodes_this_iter: 72
  episodes_total: 20608
  experiment_id: 3c083513fdcf4257a3e2216ed5864a2f
  hostname: Gandalf
  info:
    grad_time_ms: 3457.743
    load_time_ms: 2.373
    num_steps_sampled: 2960000
    num_steps_trained: 2960000
    rl_0:
      cur_kl_coeff: 7.813751552606587e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.6391147375106812
      kl: 61.53817367553711
      policy_loss: 0.10306043922901154
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=26994], 7275 s, 300 iter, 3000000 ts, 342 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=26994], 7275 s, 300 iter, 3000000 ts, 342 rew



In [13]:
executeTraining()

 Starting SUMO on port 41009
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-12_20-26-29hvez1r1v -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



11.81848450957205
2.4475537966176044


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-27-29
  done: false
  episode_len_mean: 461.76190476190476
  episode_reward_max: 169.89625099605405
  episode_reward_mean: 13.840764374215118
  episode_reward_min: -153.2152350975988
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 4553.048
    load_time_ms: 144.467
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4170629978179932
      kl: 0.001030373852699995
      policy_loss: -0.0011537749087437987
      total_loss: 128.91114807128906
      vf_explained_var: 0.014319413341581821
      vf_loss: 128.91207885742188
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.418864369392395
      kl: 0.0012330821482464671
      policy_loss: -0.0009915238

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 121 s, 5 iter, 50000 ts, 64.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-29-28
  done: false
  episode_len_mean: 454.39
  episode_reward_max: 300.00309345178084
  episode_reward_mean: 87.37004122309301
  episode_reward_min: -153.96501866118967
  episodes_this_iter: 22
  episodes_total: 130
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3697.481
    load_time_ms: 26.016
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4201107025146484
      kl: 0.003641005139797926
      policy_loss: -0.001972984755411744
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 241 s, 10 iter, 100000 ts, 174 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-31-28
  done: false
  episode_len_mean: 279.28
  episode_reward_max: 346.6498795018241
  episode_reward_mean: 172.42275693171666
  episode_reward_min: -165.1169443983627
  episodes_this_iter: 37
  episodes_total: 289
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3525.054
    load_time_ms: 2.514
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.3900965452194214
      kl: 0.010811024345457554
      policy_loss: -0.0030879033729434013
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 361 s, 15 iter, 150000 ts, 198 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-33-29
  done: false
  episode_len_mean: 205.9
  episode_reward_max: 360.68686827768323
  episode_reward_mean: 170.26197882431094
  episode_reward_min: -171.31536932046305
  episodes_this_iter: 52
  episodes_total: 515
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3512.974
    load_time_ms: 2.578
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 2.441406286379788e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3803333044052124
      kl: 0.008138412609696388
      policy_loss: -0.0029154657386243343
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 483 s, 20 iter, 200000 ts, 244 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-35-31
  done: false
  episode_len_mean: 182.8
  episode_reward_max: 347.5768993593099
  episode_reward_mean: 239.0973446362692
  episode_reward_min: -154.69094001041583
  episodes_this_iter: 55
  episodes_total: 785
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3508.124
    load_time_ms: 2.443
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 7.629394644936838e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3492969274520874
      kl: 0.010270983912050724
      policy_loss: -0.0036120449658483267
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 603 s, 25 iter, 250000 ts, 277 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-37-32
  done: false
  episode_len_mean: 166.29
  episode_reward_max: 347.2243661221646
  episode_reward_mean: 263.56356985598194
  episode_reward_min: -152.661786759854
  episodes_this_iter: 62
  episodes_total: 1076
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3507.635
    load_time_ms: 2.348
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 4.7683716530855236e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3195106983184814
      kl: 0.011298777535557747
      policy_loss: -0.003760394873097539
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 725 s, 30 iter, 300000 ts, 269 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-39-34
  done: false
  episode_len_mean: 145.22
  episode_reward_max: 320.8719525813339
  episode_reward_mean: 276.5300721459105
  episode_reward_min: -137.03312561075631
  episodes_this_iter: 69
  episodes_total: 1406
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3520.725
    load_time_ms: 2.331
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 2.3841858265427618e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.2779539823532104
      kl: 0.005065020173788071
      policy_loss: -0.0013382723554968834
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 847 s, 35 iter, 350000 ts, 279 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-41-36
  done: false
  episode_len_mean: 133.99
  episode_reward_max: 344.1315908518354
  episode_reward_mean: 276.74338612586047
  episode_reward_min: -147.78290562260239
  episodes_this_iter: 75
  episodes_total: 1773
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3531.74
    load_time_ms: 2.395
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 2.9802322831784522e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.2597811222076416
      kl: 0.003775552613660693
      policy_loss: -0.0012988318921998143
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 968 s, 40 iter, 400000 ts, 274 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-43-37
  done: false
  episode_len_mean: 134.02
  episode_reward_max: 370.3648349955104
  episode_reward_mean: 264.46568788051337
  episode_reward_min: -150.81267529718818
  episodes_this_iter: 76
  episodes_total: 2142
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3517.77
    load_time_ms: 2.514
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 9.313225884932663e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2508790493011475
      kl: 0.003552905283868313
      policy_loss: -0.0011594581883400679
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1089 s, 45 iter, 450000 ts, 296 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-45-40
  done: false
  episode_len_mean: 139.07
  episode_reward_max: 361.59471708387446
  episode_reward_mean: 310.7544275028415
  episode_reward_min: -137.44385209740668
  episodes_this_iter: 71
  episodes_total: 2507
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3479.688
    load_time_ms: 2.513
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.2019652128219604
      kl: 0.013653770089149475
      policy_loss: -0.0022904847282916307
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1212 s, 50 iter, 500000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-47-41
  done: false
  episode_len_mean: 135.75
  episode_reward_max: 379.5904693010692
  episode_reward_mean: 311.1526304501095
  episode_reward_min: -136.27934906110076
  episodes_this_iter: 74
  episodes_total: 2872
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3416.947
    load_time_ms: 2.34
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.2132856845855713
      kl: 0.00853410828858614
      policy_loss: -0.001448153518140316
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1333 s, 55 iter, 550000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-49-43
  done: false
  episode_len_mean: 134.6
  episode_reward_max: 379.640158223993
  episode_reward_mean: 331.825634448827
  episode_reward_min: -129.43709795465338
  episodes_this_iter: 74
  episodes_total: 3243
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3359.045
    load_time_ms: 2.341
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1802339553833008
      kl: 0.007219797465950251
      policy_loss: -0.0012344216229394078
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1454 s, 60 iter, 600000 ts, 328 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-51-44
  done: false
  episode_len_mean: 130.17
  episode_reward_max: 370.6110980640322
  episode_reward_mean: 326.64304131908483
  episode_reward_min: 282.8540412829425
  episodes_this_iter: 76
  episodes_total: 3620
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3335.289
    load_time_ms: 2.404
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 1.421085492696024e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.113900065422058
      kl: 0.02473442256450653
      policy_loss: -0.004293106496334076
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1575 s, 65 iter, 650000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-53-45
  done: false
  episode_len_mean: 132.48
  episode_reward_max: 390.5256642404868
  episode_reward_mean: 333.0756229584458
  episode_reward_min: -139.6006907105658
  episodes_this_iter: 75
  episodes_total: 4002
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3334.832
    load_time_ms: 2.28
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 3.55271373174006e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.183534026145935
      kl: 0.006225959397852421
      policy_loss: -0.0019843264017254114
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1694 s, 70 iter, 700000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-55-45
  done: false
  episode_len_mean: 133.76
  episode_reward_max: 382.8281803574085
  episode_reward_mean: 339.1360638800572
  episode_reward_min: 292.7193057978245
  episodes_this_iter: 75
  episodes_total: 4376
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3333.575
    load_time_ms: 2.255
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.1727302074432373
      kl: 0.00947000365704298
      policy_loss: -0.003196545410901308
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1814 s, 75 iter, 750000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-57-45
  done: false
  episode_len_mean: 132.19
  episode_reward_max: 389.3137331557703
  episode_reward_mean: 343.7688641726524
  episode_reward_min: 180.74821488255492
  episodes_this_iter: 75
  episodes_total: 4756
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3338.168
    load_time_ms: 2.34
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.387778801460961e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.1290606260299683
      kl: 0.033981502056121826
      policy_loss: -0.0054750279523432255
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 1934 s, 80 iter, 800000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_20-59-44
  done: false
  episode_len_mean: 132.84
  episode_reward_max: 396.2279705936164
  episode_reward_mean: 334.79757440676616
  episode_reward_min: -138.77470533893708
  episodes_this_iter: 75
  episodes_total: 5133
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3332.583
    load_time_ms: 2.324
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.0961964130401611
      kl: 0.006184523459523916
      policy_loss: -0.0019035279983654618
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2053 s, 85 iter, 850000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-01-44
  done: false
  episode_len_mean: 134.64
  episode_reward_max: 383.9084945017073
  episode_reward_mean: 341.48926724422256
  episode_reward_min: 185.07720852745211
  episodes_this_iter: 74
  episodes_total: 5509
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3339.087
    load_time_ms: 2.274
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 1.0842021886413758e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.1222460269927979
      kl: 0.010018899105489254
      policy_loss: -0.0022616307251155376
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2173 s, 90 iter, 900000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-03-45
  done: false
  episode_len_mean: 134.0
  episode_reward_max: 392.40180910571314
  episode_reward_mean: 338.4574306881498
  episode_reward_min: -141.35436881174542
  episodes_this_iter: 75
  episodes_total: 5882
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3342.497
    load_time_ms: 2.354
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 2.7105054716034394e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.0986391305923462
      kl: 0.0036198801826685667
      policy_loss: -0.001340882503427565
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2293 s, 95 iter, 950000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-05-44
  done: false
  episode_len_mean: 135.11
  episode_reward_max: 397.2006863861124
  episode_reward_mean: 335.3433253590402
  episode_reward_min: -144.6755652299475
  episodes_this_iter: 74
  episodes_total: 6253
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3337.308
    load_time_ms: 2.431
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.098475694656372
      kl: 0.009386496618390083
      policy_loss: -0.0032508510630577803
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2412 s, 100 iter, 1000000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-07-44
  done: false
  episode_len_mean: 134.35
  episode_reward_max: 389.08620384837707
  episode_reward_mean: 342.4743116357801
  episode_reward_min: -137.7517456145262
  episodes_this_iter: 74
  episodes_total: 6625
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3342.668
    load_time_ms: 2.404
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 2.117582399690187e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.080439805984497
      kl: 0.012927353382110596
      policy_loss: -0.0028428833466023207
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2531 s, 105 iter, 1050000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-09-43
  done: false
  episode_len_mean: 134.44
  episode_reward_max: 394.8908982338689
  episode_reward_mean: 341.3796064149002
  episode_reward_min: 145.55744743821248
  episodes_this_iter: 74
  episodes_total: 6999
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3347.387
    load_time_ms: 2.423
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.0640153884887695
      kl: 0.019579550251364708
      policy_loss: -0.004084429237991571
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2652 s, 110 iter, 1100000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-11-44
  done: false
  episode_len_mean: 132.57
  episode_reward_max: 393.6930243005587
  episode_reward_mean: 325.55615424487587
  episode_reward_min: -138.88758568993816
  episodes_this_iter: 76
  episodes_total: 7374
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3353.895
    load_time_ms: 2.332
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 6.617444999031835e-25
      cur_lr: 4.999999873689376e-05
      entropy: 1.0365673303604126
      kl: 0.010087460279464722
      policy_loss: -0.0030289352871477604
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2772 s, 115 iter, 1150000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-13-44
  done: false
  episode_len_mean: 135.84
  episode_reward_max: 395.27489031963813
  episode_reward_mean: 344.8457745307429
  episode_reward_min: 145.85949354888788
  episodes_this_iter: 74
  episodes_total: 7745
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3343.643
    load_time_ms: 2.347
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 8.271806248789793e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.0905307531356812
      kl: 0.004940362181514502
      policy_loss: -0.0010091847507283092
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 2891 s, 120 iter, 1200000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-15-45
  done: false
  episode_len_mean: 135.05
  episode_reward_max: 392.505931508772
  episode_reward_mean: 342.7578727413441
  episode_reward_min: -137.97797318439132
  episodes_this_iter: 75
  episodes_total: 8115
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3335.57
    load_time_ms: 2.383
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.0339757810987241e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.0457159280776978
      kl: 0.003818731987848878
      policy_loss: -0.001365778036415577
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3012 s, 125 iter, 1250000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-17-44
  done: false
  episode_len_mean: 131.87
  episode_reward_max: 385.28634402578297
  episode_reward_mean: 327.24086420321294
  episode_reward_min: -139.7064064226339
  episodes_this_iter: 77
  episodes_total: 8489
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3341.942
    load_time_ms: 2.243
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 6.462348631867026e-28
      cur_lr: 4.999999873689376e-05
      entropy: 0.9798980355262756
      kl: 0.005580026190727949
      policy_loss: 0.00039962647133506835
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3131 s, 130 iter, 1300000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-19-44
  done: false
  episode_len_mean: 133.17
  episode_reward_max: 391.800617996008
  episode_reward_mean: 346.2468520900566
  episode_reward_min: 153.81113678692373
  episodes_this_iter: 75
  episodes_total: 8861
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3346.432
    load_time_ms: 2.362
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 2.0194839474584456e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.93979811668396
      kl: 0.011892231181263924
      policy_loss: -0.0007794076227582991
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3250 s, 135 iter, 1350000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-21-43
  done: false
  episode_len_mean: 132.77
  episode_reward_max: 400.8920522731492
  episode_reward_mean: 340.2051733294782
  episode_reward_min: -142.94308233580819
  episodes_this_iter: 76
  episodes_total: 9238
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3353.995
    load_time_ms: 2.575
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 1.0097419737292228e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.9349255561828613
      kl: 0.005253343842923641
      policy_loss: -0.0007369641098193824
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3368 s, 140 iter, 1400000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-23-42
  done: false
  episode_len_mean: 132.23
  episode_reward_max: 395.9736007744968
  episode_reward_mean: 338.0327996018556
  episode_reward_min: -141.91664852515487
  episodes_this_iter: 75
  episodes_total: 9613
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3349.578
    load_time_ms: 2.592
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 1.2621774671615285e-30
      cur_lr: 4.999999873689376e-05
      entropy: 0.9340021014213562
      kl: 0.01987895369529724
      policy_loss: 0.00038869419950060546
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3487 s, 145 iter, 1450000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-25-41
  done: false
  episode_len_mean: 133.4
  episode_reward_max: 392.8232346652269
  episode_reward_mean: 348.6504107844402
  episode_reward_min: 313.0750319909894
  episodes_this_iter: 75
  episodes_total: 9988
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3341.919
    load_time_ms: 2.375
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 1.5777218339519106e-31
      cur_lr: 4.999999873689376e-05
      entropy: 0.9257799983024597
      kl: 0.005342991556972265
      policy_loss: 0.0016245072474703193
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3607 s, 150 iter, 1500000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-27-41
  done: false
  episode_len_mean: 132.87
  episode_reward_max: 387.117576340353
  episode_reward_mean: 347.2516276500243
  episode_reward_min: 298.91154506645785
  episodes_this_iter: 75
  episodes_total: 10365
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3343.935
    load_time_ms: 2.197
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 1.479113815253733e-32
      cur_lr: 4.999999873689376e-05
      entropy: 0.8621240258216858
      kl: 0.005790297407656908
      policy_loss: -0.0016342350281774998
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3727 s, 155 iter, 1550000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-29-41
  done: false
  episode_len_mean: 132.38
  episode_reward_max: 387.06713626659985
  episode_reward_mean: 330.9805026419212
  episode_reward_min: -144.24740339073117
  episodes_this_iter: 74
  episodes_total: 10745
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3340.583
    load_time_ms: 2.507
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 3.697784538134333e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.8643702268600464
      kl: 0.04321402311325073
      policy_loss: 0.004258853383362293
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3847 s, 160 iter, 1600000 ts, 350 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-31-42
  done: false
  episode_len_mean: 132.11
  episode_reward_max: 391.3515766194951
  episode_reward_mean: 343.4169154939284
  episode_reward_min: -135.763158099286
  episodes_this_iter: 76
  episodes_total: 11122
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3336.103
    load_time_ms: 2.719
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 1.3866697987310998e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.8497591018676758
      kl: 0.018284741789102554
      policy_loss: 0.00013644658611156046
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 3966 s, 165 iter, 1650000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-33-40
  done: false
  episode_len_mean: 131.33
  episode_reward_max: 392.65308035201485
  episode_reward_mean: 332.9114827871485
  episode_reward_min: -140.7312337328722
  episodes_this_iter: 77
  episodes_total: 11503
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3336.538
    load_time_ms: 2.744
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 6.933348993655499e-34
      cur_lr: 4.999999873689376e-05
      entropy: 0.8467200994491577
      kl: 0.008325730450451374
      policy_loss: 0.005390787962824106
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4085 s, 170 iter, 1700000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-35-40
  done: false
  episode_len_mean: 133.4
  episode_reward_max: 387.38583604276175
  episode_reward_mean: 345.5783880661758
  episode_reward_min: 167.49397670825567
  episodes_this_iter: 76
  episodes_total: 11881
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3355.48
    load_time_ms: 2.494
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 1.7333372484138748e-34
      cur_lr: 4.999999873689376e-05
      entropy: 0.8220484852790833
      kl: 0.009235783480107784
      policy_loss: -0.0014954956714063883
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4205 s, 175 iter, 1750000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-37-40
  done: false
  episode_len_mean: 131.2
  episode_reward_max: 395.34109470414194
  episode_reward_mean: 335.4537195695528
  episode_reward_min: -141.65570965113375
  episodes_this_iter: 77
  episodes_total: 12261
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3366.425
    load_time_ms: 2.282
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 4.333343121034687e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.8039397597312927
      kl: 0.014087808318436146
      policy_loss: -0.0013463430805131793
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4323 s, 180 iter, 1800000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-39-39
  done: false
  episode_len_mean: 131.2
  episode_reward_max: 397.71436113062117
  episode_reward_mean: 342.6913586594014
  episode_reward_min: -149.15818213674763
  episodes_this_iter: 76
  episodes_total: 12640
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3351.353
    load_time_ms: 2.34
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 2.1666715605173435e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.7735947966575623
      kl: 0.013840925879776478
      policy_loss: 0.002304959809407592
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4443 s, 185 iter, 1850000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-41-39
  done: false
  episode_len_mean: 129.41
  episode_reward_max: 395.7385588841508
  episode_reward_mean: 330.6503957854501
  episode_reward_min: -145.03402173241156
  episodes_this_iter: 77
  episodes_total: 13024
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3346.106
    load_time_ms: 2.43
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 1.0833357802586717e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.760174572467804
      kl: 0.01020718552172184
      policy_loss: -9.553189011057839e-05
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4564 s, 190 iter, 1900000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-43-40
  done: false
  episode_len_mean: 129.12
  episode_reward_max: 386.4207248362898
  episode_reward_mean: 326.31811228020985
  episode_reward_min: -149.06249142652598
  episodes_this_iter: 77
  episodes_total: 13410
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3359.119
    load_time_ms: 2.461
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 1.3541697253233397e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.7975161075592041
      kl: 0.010739309713244438
      policy_loss: 0.00024562247563153505
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4684 s, 195 iter, 1950000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-45-42
  done: false
  episode_len_mean: 129.32
  episode_reward_max: 388.36649189724375
  episode_reward_mean: 336.4896067307912
  episode_reward_min: -143.8794164378018
  episodes_this_iter: 77
  episodes_total: 13795
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3363.392
    load_time_ms: 2.328
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 6.770848626616698e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.7222000956535339
      kl: 0.011931763030588627
      policy_loss: 0.001446559908799827
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4806 s, 200 iter, 2000000 ts, 351 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-47-42
  done: false
  episode_len_mean: 129.75
  episode_reward_max: 394.55418016873983
  episode_reward_mean: 339.9830666282438
  episode_reward_min: -141.13976990353785
  episodes_this_iter: 78
  episodes_total: 14180
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3353.661
    load_time_ms: 2.34
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 8.463560783270873e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.705659806728363
      kl: 0.007876145653426647
      policy_loss: 0.001650176360271871
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 4926 s, 205 iter, 2050000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-49-43
  done: false
  episode_len_mean: 129.14
  episode_reward_max: 398.7870922717104
  episode_reward_mean: 336.7849889901452
  episode_reward_min: -147.20868184438277
  episodes_this_iter: 77
  episodes_total: 14565
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3344.883
    load_time_ms: 2.448
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 2.1158901958177182e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.6998788118362427
      kl: 0.021546658128499985
      policy_loss: 0.0028444554191082716
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5046 s, 210 iter, 2100000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-51-43
  done: false
  episode_len_mean: 128.09
  episode_reward_max: 391.8860292609523
  episode_reward_mean: 331.749229809618
  episode_reward_min: -152.94830192655112
  episodes_this_iter: 78
  episodes_total: 14954
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3334.764
    load_time_ms: 2.455
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 2.1158901958177182e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.6897011399269104
      kl: 0.009897519834339619
      policy_loss: 0.00019857697770930827
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5166 s, 215 iter, 2150000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-53-43
  done: false
  episode_len_mean: 129.83
  episode_reward_max: 387.5818662371147
  episode_reward_mean: 342.81990790035917
  episode_reward_min: -139.27834909132523
  episodes_this_iter: 77
  episodes_total: 15342
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3331.567
    load_time_ms: 2.344
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.0579451679737823e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.6655486822128296
      kl: 0.010854857042431831
      policy_loss: 0.003919526003301144
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5286 s, 220 iter, 2200000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-55-44
  done: false
  episode_len_mean: 126.9
  episode_reward_max: 391.0378984180723
  episode_reward_mean: 330.8429140707587
  episode_reward_min: -141.01286545627073
  episodes_this_iter: 79
  episodes_total: 15732
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3336.673
    load_time_ms: 2.45
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 1.0579451679737823e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.6510285139083862
      kl: 0.012533323839306831
      policy_loss: 0.00035633528023026884
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5405 s, 225 iter, 2250000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-57-43
  done: false
  episode_len_mean: 128.11
  episode_reward_max: 393.7585947729448
  episode_reward_mean: 341.500515369494
  episode_reward_min: -144.97396609971008
  episodes_this_iter: 78
  episodes_total: 16120
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3330.029
    load_time_ms: 2.52
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 1.0579451679737823e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.6232656240463257
      kl: 0.012479634024202824
      policy_loss: 0.0007189449388533831
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5526 s, 230 iter, 2300000 ts, 352 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_21-59-44
  done: false
  episode_len_mean: 129.62
  episode_reward_max: 388.32318486785147
  episode_reward_mean: 348.30700217722153
  episode_reward_min: 305.90008065767535
  episodes_this_iter: 77
  episodes_total: 16510
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3328.465
    load_time_ms: 2.306
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 2.6448625696098398e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.6016077399253845
      kl: 0.01586994342505932
      policy_loss: 0.00520050385966897
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5646 s, 235 iter, 2350000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-01-44
  done: false
  episode_len_mean: 128.1
  episode_reward_max: 386.6169953271625
  episode_reward_mean: 344.7559587328467
  episode_reward_min: -159.58084586877686
  episodes_this_iter: 78
  episodes_total: 16899
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3342.092
    load_time_ms: 2.331
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 3.9672931537655275e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.6097465753555298
      kl: 0.02130117639899254
      policy_loss: 0.008746602572500706
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5764 s, 240 iter, 2400000 ts, 353 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-03-42
  done: false
  episode_len_mean: 128.77
  episode_reward_max: 392.34770664763755
  episode_reward_mean: 350.9892017257529
  episode_reward_min: 304.07213975687574
  episodes_this_iter: 78
  episodes_total: 17288
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3341.377
    load_time_ms: 2.573
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 1.4877347574997648e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.5976132154464722
      kl: 0.006792416330426931
      policy_loss: 0.0003876638365909457
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 5884 s, 245 iter, 2450000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-05-42
  done: false
  episode_len_mean: 128.88
  episode_reward_max: 390.2045992469694
  episode_reward_mean: 349.15534157656765
  episode_reward_min: 300.1475993374795
  episodes_this_iter: 78
  episodes_total: 17680
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3334.874
    load_time_ms: 2.433
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 9.298315960027324e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.587598443031311
      kl: 0.008742956444621086
      policy_loss: 0.0026733833365142345
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6002 s, 250 iter, 2500000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-07-41
  done: false
  episode_len_mean: 124.81
  episode_reward_max: 385.2272865419103
  episode_reward_mean: 330.574366014291
  episode_reward_min: -146.06304968266016
  episodes_this_iter: 79
  episodes_total: 18073
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3335.659
    load_time_ms: 2.37
    num_steps_sampled: 2510000
    num_steps_trained: 2510000
    rl_0:
      cur_kl_coeff: 3.4868509687794423e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.601987898349762
      kl: 0.0072951242327690125
      policy_loss: 0.0024268589913845062
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6122 s, 255 iter, 2550000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-09-41
  done: false
  episode_len_mean: 122.66
  episode_reward_max: 396.18485742158214
  episode_reward_mean: 325.57505597895533
  episode_reward_min: -148.85595269476497
  episodes_this_iter: 82
  episodes_total: 18476
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3345.602
    load_time_ms: 2.588
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 4.358038224050181e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.5805127024650574
      kl: 0.020123081281781197
      policy_loss: 0.003902246244251728
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6241 s, 260 iter, 2600000 ts, 353 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-11-40
  done: false
  episode_len_mean: 126.61
  episode_reward_max: 393.43424100407464
  episode_reward_mean: 349.5567588226057
  episode_reward_min: 304.9197288004268
  episodes_this_iter: 78
  episodes_total: 18875
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3351.931
    load_time_ms: 2.582
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 2.1790191120250905e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.5872986912727356
      kl: 0.027859654277563095
      policy_loss: 0.007518335711210966
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6360 s, 265 iter, 2650000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-13-40
  done: false
  episode_len_mean: 124.44
  episode_reward_max: 392.09911174169963
  episode_reward_mean: 337.91362143387187
  episode_reward_min: -156.7689456248458
  episodes_this_iter: 80
  episodes_total: 19272
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3350.745
    load_time_ms: 2.634
    num_steps_sampled: 2660000
    num_steps_trained: 2660000
    rl_0:
      cur_kl_coeff: 2.1790191120250905e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.5932800769805908
      kl: 0.031481511890888214
      policy_loss: 0.000753304804675281
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6479 s, 270 iter, 2700000 ts, 326 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-15-40
  done: false
  episode_len_mean: 121.49
  episode_reward_max: 391.2577038747155
  episode_reward_mean: 313.43334212683993
  episode_reward_min: -145.78056174315273
  episodes_this_iter: 82
  episodes_total: 19680
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3360.622
    load_time_ms: 2.606
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 1.6339140094027367e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.6216683983802795
      kl: 0.01562749594449997
      policy_loss: 0.002938747638836503
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6600 s, 275 iter, 2750000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-17-39
  done: false
  episode_len_mean: 126.43
  episode_reward_max: 383.7049519748697
  episode_reward_mean: 340.2125197363221
  episode_reward_min: -147.4274434849445
  episodes_this_iter: 80
  episodes_total: 20082
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3358.486
    load_time_ms: 2.507
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 1.6339140094027367e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.6552204489707947
      kl: 0.010220578871667385
      policy_loss: 0.004059264436364174
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6718 s, 280 iter, 2800000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-19-38
  done: false
  episode_len_mean: 123.89
  episode_reward_max: 387.458014170307
  episode_reward_mean: 329.3818440343797
  episode_reward_min: -146.8843706377666
  episodes_this_iter: 80
  episodes_total: 20482
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3358.057
    load_time_ms: 2.43
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 1.226136156284215e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.6299318075180054
      kl: 0.015244579873979092
      policy_loss: 0.0029278965666890144
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6837 s, 285 iter, 2850000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-21-38
  done: false
  episode_len_mean: 125.46
  episode_reward_max: 387.78641606658084
  episode_reward_mean: 344.6654184729412
  episode_reward_min: -141.6596187034028
  episodes_this_iter: 79
  episodes_total: 20881
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3360.72
    load_time_ms: 2.408
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 1.83850358519416e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.6410022377967834
      kl: 0.010110048577189445
      policy_loss: 0.002494267886504531
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 6957 s, 290 iter, 2900000 ts, 351 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-23-38
  done: false
  episode_len_mean: 125.1
  episode_reward_max: 388.44506232638366
  episode_reward_mean: 347.3813167435468
  episode_reward_min: -134.39383357585027
  episodes_this_iter: 81
  episodes_total: 21282
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3347.528
    load_time_ms: 2.479
    num_steps_sampled: 2910000
    num_steps_trained: 2910000
    rl_0:
      cur_kl_coeff: 9.1925179259708e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.6347320079803467
      kl: 0.03716764599084854
      policy_loss: 0.00031013289117254317
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28815], 7077 s, 295 iter, 2950000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-25-38
  done: false
  episode_len_mean: 125.82
  episode_reward_max: 389.3992650926119
  episode_reward_mean: 349.6721077764297
  episode_reward_min: -140.9449402879667
  episodes_this_iter: 79
  episodes_total: 21680
  experiment_id: 9d4e53469e9e4300bdec46d00feb8803
  hostname: Gandalf
  info:
    grad_time_ms: 3351.27
    load_time_ms: 2.492
    num_steps_sampled: 2960000
    num_steps_trained: 2960000
    rl_0:
      cur_kl_coeff: 1.37887768889562e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.7171923518180847
      kl: 0.008917853236198425
      policy_loss: -3.608163387980312e-05
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=28815], 7196 s, 300 iter, 3000000 ts, 337 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=28815], 7196 s, 300 iter, 3000000 ts, 337 rew



In [14]:
executeTraining()

 Starting SUMO on port 57437
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-12_22-27-15ke_66_iv -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



10.01081822073232
26.104054807852993


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-28-15
  done: false
  episode_len_mean: 434.3333333333333
  episode_reward_max: 193.38667648657946
  episode_reward_mean: 30.09197565530791
  episode_reward_min: -156.0618412190695
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 4525.795
    load_time_ms: 140.201
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4199107885360718
      kl: 0.0014287193771451712
      policy_loss: -0.002639362821355462
      total_loss: 114.3130111694336
      vf_explained_var: 0.018698977306485176
      vf_loss: 114.31536865234375
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4217541217803955
      kl: 0.001456847763620317
      policy_loss: -0.0017349777044

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 122 s, 5 iter, 50000 ts, 98.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-30-15
  done: false
  episode_len_mean: 410.99
  episode_reward_max: 357.71019220620303
  episode_reward_mean: 120.9981200309501
  episode_reward_min: -159.92121765714262
  episodes_this_iter: 29
  episodes_total: 141
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3711.202
    load_time_ms: 25.29
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4009897708892822
      kl: 0.00981423445045948
      policy_loss: -0.003668017452582717
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 241 s, 10 iter, 100000 ts, 199 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-32-15
  done: false
  episode_len_mean: 265.33
  episode_reward_max: 356.78928076891947
  episode_reward_mean: 209.11282151594085
  episode_reward_min: -162.9732527461369
  episodes_this_iter: 38
  episodes_total: 318
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3552.636
    load_time_ms: 2.286
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.3909927606582642
      kl: 0.0024774454068392515
      policy_loss: -0.0010783475590869784
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 362 s, 15 iter, 150000 ts, 254 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-34-16
  done: false
  episode_len_mean: 202.86
  episode_reward_max: 355.090135857098
  episode_reward_mean: 257.6096356810126
  episode_reward_min: -153.95445297513493
  episodes_this_iter: 50
  episodes_total: 549
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3550.15
    load_time_ms: 2.286
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.2992191314697266
      kl: 0.009030371904373169
      policy_loss: -0.0022215950302779675
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 482 s, 20 iter, 200000 ts, 265 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-36-16
  done: false
  episode_len_mean: 191.44
  episode_reward_max: 372.6444064686805
  episode_reward_mean: 268.03328842591435
  episode_reward_min: -152.06392430246055
  episodes_this_iter: 54
  episodes_total: 797
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3553.618
    load_time_ms: 2.308
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 7.629394644936838e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.2574416399002075
      kl: 0.0015147384256124496
      policy_loss: -0.0009245041292160749
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 604 s, 25 iter, 250000 ts, 269 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-38-18
  done: false
  episode_len_mean: 169.97
  episode_reward_max: 388.40764728471237
  episode_reward_mean: 262.3854477021838
  episode_reward_min: -146.74437470430593
  episodes_this_iter: 61
  episodes_total: 1080
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3558.168
    load_time_ms: 2.478
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 9.536743306171047e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.1929044723510742
      kl: 0.006473401561379433
      policy_loss: -0.0011260807514190674
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 725 s, 30 iter, 300000 ts, 306 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-40-20
  done: false
  episode_len_mean: 165.59
  episode_reward_max: 388.6618873221912
  episode_reward_mean: 316.4089228253245
  episode_reward_min: -149.31353696361134
  episodes_this_iter: 60
  episodes_total: 1382
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3569.569
    load_time_ms: 2.706
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.1805917024612427
      kl: 0.007784461136907339
      policy_loss: -0.002901526167988777
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 847 s, 35 iter, 350000 ts, 299 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-42-22
  done: false
  episode_len_mean: 158.79
  episode_reward_max: 384.8454756657383
  episode_reward_mean: 289.1194398228434
  episode_reward_min: -146.769302343224
  episodes_this_iter: 62
  episodes_total: 1692
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3568.889
    load_time_ms: 2.466
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 1.4901161415892261e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.1074515581130981
      kl: 0.003656932618469
      policy_loss: -0.0011648153886198997
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 968 s, 40 iter, 400000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-44-23
  done: false
  episode_len_mean: 155.04
  episode_reward_max: 372.3281359315154
  episode_reward_mean: 309.6609294663271
  episode_reward_min: -141.442612542228
  episodes_this_iter: 65
  episodes_total: 2009
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3551.288
    load_time_ms: 2.435
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.0704413652420044
      kl: 0.011548283509910107
      policy_loss: -0.004609977826476097
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1090 s, 45 iter, 450000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-46-25
  done: false
  episode_len_mean: 151.16
  episode_reward_max: 354.39308071715334
  episode_reward_mean: 312.988274151158
  episode_reward_min: 260.9199055741253
  episodes_this_iter: 66
  episodes_total: 2342
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3549.741
    load_time_ms: 2.649
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.1641532356165829e-11
      cur_lr: 4.999999873689376e-05
      entropy: 0.9862478375434875
      kl: 0.020659374073147774
      policy_loss: -0.006939905695617199
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1213 s, 50 iter, 500000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-48-29
  done: false
  episode_len_mean: 151.65
  episode_reward_max: 380.9846884725032
  episode_reward_mean: 321.21455945672744
  episode_reward_min: -139.41270402379348
  episodes_this_iter: 66
  episodes_total: 2671
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3571.672
    load_time_ms: 2.525
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 2.9103830890414573e-12
      cur_lr: 4.999999873689376e-05
      entropy: 0.9743366837501526
      kl: 0.005489574279636145
      policy_loss: -0.0006392990471795201
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1335 s, 55 iter, 550000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-50-31
  done: false
  episode_len_mean: 152.33
  episode_reward_max: 369.54035236244084
  episode_reward_mean: 316.20044388091645
  episode_reward_min: -139.06692043306813
  episodes_this_iter: 67
  episodes_total: 3001
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3579.084
    load_time_ms: 2.393
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 9.094947153254554e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.0009500980377197
      kl: 0.009356583468616009
      policy_loss: -0.0011099281255155802
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1458 s, 60 iter, 600000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-52-35
  done: false
  episode_len_mean: 151.53
  episode_reward_max: 379.0618728046674
  episode_reward_mean: 323.4469958474365
  episode_reward_min: -136.75574409417845
  episodes_this_iter: 66
  episodes_total: 3331
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3567.791
    load_time_ms: 2.649
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.9335383772850037
      kl: 0.008042641915380955
      policy_loss: -0.004186269361525774
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1578 s, 65 iter, 650000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-54-35
  done: false
  episode_len_mean: 151.48
  episode_reward_max: 369.86567695529027
  episode_reward_mean: 318.695667756817
  episode_reward_min: 260.67358544139626
  episodes_this_iter: 66
  episodes_total: 3661
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3566.841
    load_time_ms: 2.682
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 2.842170985392048e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.8895984292030334
      kl: 0.0032755755819380283
      policy_loss: -0.00027956441044807434
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1701 s, 70 iter, 700000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-56-38
  done: false
  episode_len_mean: 150.06
  episode_reward_max: 387.84833177815773
  episode_reward_mean: 325.463599747509
  episode_reward_min: 270.0157365417993
  episodes_this_iter: 66
  episodes_total: 3995
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3545.721
    load_time_ms: 2.291
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 1.77635686587003e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.8046126961708069
      kl: 0.005591989029198885
      policy_loss: -0.0017361292848363519
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1824 s, 75 iter, 750000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_22-58-41
  done: false
  episode_len_mean: 147.28
  episode_reward_max: 369.5956831666937
  episode_reward_mean: 321.14632985041413
  episode_reward_min: -139.06146772631485
  episodes_this_iter: 67
  episodes_total: 4333
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3545.406
    load_time_ms: 2.297
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.1102230411687688e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.7393594980239868
      kl: 0.003982303664088249
      policy_loss: -0.0016111246077343822
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 1945 s, 80 iter, 800000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-00-42
  done: false
  episode_len_mean: 147.35
  episode_reward_max: 363.27335835096096
  episode_reward_mean: 320.6294212971664
  episode_reward_min: 266.3961739299459
  episodes_this_iter: 68
  episodes_total: 4673
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3554.23
    load_time_ms: 2.472
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 3.4694470036524025e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.6874974370002747
      kl: 0.0030395332723855972
      policy_loss: -0.0002082651189994067
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 2066 s, 85 iter, 850000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-02-43
  done: false
  episode_len_mean: 146.99
  episode_reward_max: 375.5152511233352
  episode_reward_mean: 320.8590751220825
  episode_reward_min: 272.304909834075
  episodes_this_iter: 68
  episodes_total: 5012
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3593.42
    load_time_ms: 2.498
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 1.0842021886413758e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.6461614966392517
      kl: 0.00514202518388629
      policy_loss: -0.00029282690957188606
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 2194 s, 90 iter, 900000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-04-55
  done: false
  episode_len_mean: 148.07
  episode_reward_max: 379.0372155834026
  episode_reward_mean: 318.9687846513501
  episode_reward_min: 267.3961369116291
  episodes_this_iter: 66
  episodes_total: 5352
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3641.635
    load_time_ms: 2.467
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 3.3881318395042993e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.6676440834999084
      kl: 0.009567598812282085
      policy_loss: -0.0006604394293390214
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 2351 s, 95 iter, 950000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-07-37
  done: false
  episode_len_mean: 148.22
  episode_reward_max: 367.8667138135767
  episode_reward_mean: 321.9510931979106
  episode_reward_min: 271.82214620806053
  episodes_this_iter: 68
  episodes_total: 5691
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 4013.241
    load_time_ms: 2.98
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.6389012336730957
      kl: 0.005781302694231272
      policy_loss: -0.0025728256441652775
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 2521 s, 100 iter, 1000000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-10-49
  done: false
  episode_len_mean: 150.16
  episode_reward_max: 372.340241847169
  episode_reward_mean: 321.8186486990907
  episode_reward_min: 268.26026528262804
  episodes_this_iter: 67
  episodes_total: 6026
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 4753.62
    load_time_ms: 3.332
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 3.3087224995159173e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6709665656089783
      kl: 0.007772249169647694
      policy_loss: -0.0011326662497594953
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 2742 s, 105 iter, 1050000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-15-07
  done: false
  episode_len_mean: 149.05
  episode_reward_max: 376.36012194516127
  episode_reward_mean: 324.81667495305027
  episode_reward_min: 271.35644366822584
  episodes_this_iter: 66
  episodes_total: 6361
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 6114.211
    load_time_ms: 3.355
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 2.0679515621974483e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.5255663990974426
      kl: 0.009290327318012714
      policy_loss: -0.00039161695167422295
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 2982 s, 110 iter, 1100000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-18-05
  done: false
  episode_len_mean: 149.28
  episode_reward_max: 379.72374291062334
  episode_reward_mean: 324.19879601323333
  episode_reward_min: -141.69084550674376
  episodes_this_iter: 66
  episodes_total: 6695
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 5556.412
    load_time_ms: 3.194
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 1.2924697263734052e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.5341377854347229
      kl: 0.009199365973472595
      policy_loss: -0.0023281804751604795
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3112 s, 115 iter, 1150000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-20-11
  done: false
  episode_len_mean: 149.97
  episode_reward_max: 385.16939335463985
  episode_reward_mean: 324.54956457739536
  episode_reward_min: 274.91778931601766
  episodes_this_iter: 67
  episodes_total: 7030
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3711.453
    load_time_ms: 2.819
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 8.077935789833782e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.5591099858283997
      kl: 0.008533105254173279
      policy_loss: -0.0035087461583316326
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3232 s, 120 iter, 1200000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-22-12
  done: false
  episode_len_mean: 144.14
  episode_reward_max: 367.9892584441907
  episode_reward_mean: 315.1164998136095
  episode_reward_min: -143.9058858129498
  episodes_this_iter: 68
  episodes_total: 7370
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3409.312
    load_time_ms: 2.669
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.0097419737292228e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.45754942297935486
      kl: 0.008252675645053387
      policy_loss: -0.0021954786498099566
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3351 s, 125 iter, 1250000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-24-11
  done: false
  episode_len_mean: 146.51
  episode_reward_max: 378.94086229160825
  episode_reward_mean: 325.1436420034117
  episode_reward_min: 272.56966814686126
  episodes_this_iter: 68
  episodes_total: 7716
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3399.215
    load_time_ms: 2.595
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 6.3108873358076425e-31
      cur_lr: 4.999999873689376e-05
      entropy: 0.5077887773513794
      kl: 0.008196057751774788
      policy_loss: -0.002152504865080118
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3471 s, 130 iter, 1300000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-26-11
  done: false
  episode_len_mean: 145.09
  episode_reward_max: 372.8433057243226
  episode_reward_mean: 317.22004614481824
  episode_reward_min: -144.51073600836995
  episodes_this_iter: 69
  episodes_total: 8060
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3400.794
    load_time_ms: 2.609
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 3.9443045848797766e-32
      cur_lr: 4.999999873689376e-05
      entropy: 0.4637676775455475
      kl: 0.007164819166064262
      policy_loss: -0.0019484544172883034
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3593 s, 135 iter, 1350000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-28-13
  done: false
  episode_len_mean: 144.95
  episode_reward_max: 374.88843691675584
  episode_reward_mean: 324.73731938841473
  episode_reward_min: 271.865596916027
  episodes_this_iter: 69
  episodes_total: 8405
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3418.296
    load_time_ms: 2.483
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 2.4651903655498604e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.41435113549232483
      kl: 0.00956579390913248
      policy_loss: -0.0009749330347403884
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3714 s, 140 iter, 1400000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-30-15
  done: false
  episode_len_mean: 141.91
  episode_reward_max: 379.06995465446283
  episode_reward_mean: 301.77442432950085
  episode_reward_min: -149.23454294064408
  episodes_this_iter: 69
  episodes_total: 8754
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3416.091
    load_time_ms: 2.518
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 3.0814879569373254e-34
      cur_lr: 4.999999873689376e-05
      entropy: 0.4371732175350189
      kl: 0.02365165576338768
      policy_loss: -0.0017215420957654715
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3835 s, 145 iter, 1450000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-32-16
  done: false
  episode_len_mean: 145.15
  episode_reward_max: 372.1856077277478
  episode_reward_mean: 322.5343251447635
  episode_reward_min: -142.19541784294032
  episodes_this_iter: 68
  episodes_total: 9098
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3399.676
    load_time_ms: 2.525
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 1.9259299730858284e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.4108743369579315
      kl: 0.009121033363044262
      policy_loss: -0.0013543253298848867
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 3957 s, 150 iter, 1500000 ts, 328 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-34-18
  done: false
  episode_len_mean: 145.87
  episode_reward_max: 363.6171490088851
  episode_reward_mean: 321.9885958670832
  episode_reward_min: -144.01872070267387
  episodes_this_iter: 69
  episodes_total: 9442
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3396.934
    load_time_ms: 2.457
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 4.814824932714571e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.47398248314857483
      kl: 0.010052191093564034
      policy_loss: -0.0021124123595654964
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4077 s, 155 iter, 1550000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-36-18
  done: false
  episode_len_mean: 145.51
  episode_reward_max: 373.96439805777754
  episode_reward_mean: 322.51604610495576
  episode_reward_min: -143.75032380387512
  episodes_this_iter: 69
  episodes_total: 9782
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3404.574
    load_time_ms: 2.372
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.2037062331786428e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.43738603591918945
      kl: 0.016162673011422157
      policy_loss: -0.0030196206644177437


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4197 s, 160 iter, 1600000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-38-18
  done: false
  episode_len_mean: 141.84
  episode_reward_max: 383.8061449454951
  episode_reward_mean: 300.9012412683595
  episode_reward_min: -145.07107043445993
  episodes_this_iter: 71
  episodes_total: 10129
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3415.758
    load_time_ms: 2.29
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 6.018531165893214e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.37447574734687805
      kl: 0.022039692848920822
      policy_loss: 0.004625852685421705
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4318 s, 165 iter, 1650000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-40-20
  done: false
  episode_len_mean: 144.82
  episode_reward_max: 375.56542478321074
  episode_reward_mean: 307.79465520180025
  episode_reward_min: -147.28814747829932
  episodes_this_iter: 68
  episodes_total: 10472
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3431.614
    load_time_ms: 2.534
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 3.009265582946607e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.46493223309516907
      kl: 0.01266991626471281
      policy_loss: -4.666441964218393e-05
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4439 s, 170 iter, 1700000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-42-22
  done: false
  episode_len_mean: 144.94
  episode_reward_max: 372.15410344446457
  episode_reward_mean: 311.51215648093256
  episode_reward_min: -140.81146496046333
  episodes_this_iter: 68
  episodes_total: 10817
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3425.188
    load_time_ms: 2.573
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 7.523163957366517e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.5032498240470886
      kl: 0.014364988543093204
      policy_loss: -0.0035605805460363626
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4559 s, 175 iter, 1750000 ts, 301 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-44-21
  done: false
  episode_len_mean: 145.57
  episode_reward_max: 377.0952281723059
  episode_reward_mean: 319.69889743375455
  episode_reward_min: -144.03771052334255
  episodes_this_iter: 69
  episodes_total: 11165
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3405.683
    load_time_ms: 2.405
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 7.523163957366517e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.4278367757797241
      kl: 0.013231410644948483
      policy_loss: -0.0006661447696387768
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4690 s, 180 iter, 1800000 ts, 303 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-46-39
  done: false
  episode_len_mean: 143.78
  episode_reward_max: 382.45780954871685
  episode_reward_mean: 307.09904969742746
  episode_reward_min: -147.24500671458506
  episodes_this_iter: 68
  episodes_total: 11518
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3659.928
    load_time_ms: 2.563
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 9.403954246058914e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.48756492137908936
      kl: 0.009219285100698471
      policy_loss: 0.0003501809842418879
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 4883 s, 185 iter, 1850000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-49-56
  done: false
  episode_len_mean: 145.24
  episode_reward_max: 383.40176298899195
  episode_reward_mean: 317.88342911253955
  episode_reward_min: -144.29265602261813
  episodes_this_iter: 68
  episodes_total: 11863
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 5364.119
    load_time_ms: 3.158
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 5.87746614891758e-40
      cur_lr: 4.999999873689376e-05
      entropy: 0.46016037464141846
      kl: 0.011029542423784733
      policy_loss: 0.00013159989612177014
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5111 s, 190 iter, 1900000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-53-41
  done: false
  episode_len_mean: 142.94
  episode_reward_max: 379.1110075345041
  episode_reward_mean: 305.70831608126804
  episode_reward_min: -146.83840422763075
  episodes_this_iter: 71
  episodes_total: 12213
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 6245.165
    load_time_ms: 3.523
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 7.346867718608583e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.44816893339157104
      kl: 0.005966172553598881
      policy_loss: -0.0013781500747427344
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5237 s, 195 iter, 1950000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-55-41
  done: false
  episode_len_mean: 146.2
  episode_reward_max: 380.75735617382435
  episode_reward_mean: 321.2344096867824
  episode_reward_min: -147.44379300813858
  episodes_this_iter: 68
  episodes_total: 12558
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 4602.303
    load_time_ms: 2.885
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 3.6733637943810755e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.3793303370475769
      kl: 0.017509805038571358
      policy_loss: 0.0029350686818361282
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5357 s, 200 iter, 2000000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-57-40
  done: false
  episode_len_mean: 145.9
  episode_reward_max: 378.81864873701664
  episode_reward_mean: 318.6994606625965
  episode_reward_min: -148.77899435117305
  episodes_this_iter: 69
  episodes_total: 12903
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3469.273
    load_time_ms: 2.327
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 3.6733637943810755e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.3240642547607422
      kl: 0.01913038082420826
      policy_loss: 0.000533609592821449
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5476 s, 205 iter, 2050000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_23-59-39
  done: false
  episode_len_mean: 145.22
  episode_reward_max: 383.9955144706294
  episode_reward_mean: 313.8752356832107
  episode_reward_min: -145.65898630425065
  episodes_this_iter: 70
  episodes_total: 13247
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3403.655
    load_time_ms: 2.491
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 9.184110135184851e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.3255529999732971
      kl: 0.016941143199801445
      policy_loss: 0.0010306460317224264
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5595 s, 210 iter, 2100000 ts, 301 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-01-39
  done: false
  episode_len_mean: 143.61
  episode_reward_max: 390.3509368441308
  episode_reward_mean: 307.3505152377272
  episode_reward_min: -148.6099820428171
  episodes_this_iter: 70
  episodes_total: 13594
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3388.337
    load_time_ms: 2.466
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 1.3774763904312952e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.2777676582336426
      kl: 0.018008917570114136
      policy_loss: 0.0019725391175597906
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5714 s, 215 iter, 2150000 ts, 294 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-03-37
  done: false
  episode_len_mean: 145.3
  episode_reward_max: 385.5938096555961
  episode_reward_mean: 313.33498969067006
  episode_reward_min: -147.08969198736324
  episodes_this_iter: 66
  episodes_total: 13942
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3381.478
    load_time_ms: 2.347
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.3774763904312952e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.3018955588340759
      kl: 0.016074996441602707
      policy_loss: 4.7009438276290894e-05
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5832 s, 220 iter, 2200000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-05-37
  done: false
  episode_len_mean: 145.58
  episode_reward_max: 374.2274971048572
  episode_reward_mean: 318.02502091190314
  episode_reward_min: -147.71662169187738
  episodes_this_iter: 68
  episodes_total: 14287
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3386.255
    load_time_ms: 2.407
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 2.0663547154933753e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.21166908740997314
      kl: 0.011219839565455914
      policy_loss: -0.000269637064775452
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 5953 s, 225 iter, 2250000 ts, 302 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-07-37
  done: false
  episode_len_mean: 142.21
  episode_reward_max: 379.4296886591688
  episode_reward_mean: 309.7521951828074
  episode_reward_min: -146.85741761085063
  episodes_this_iter: 71
  episodes_total: 14636
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3394.276
    load_time_ms: 2.387
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 3.0993919433936304e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.11569253355264664
      kl: 0.031399376690387726
      policy_loss: 0.0029775267466902733
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6072 s, 230 iter, 2300000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-09-36
  done: false
  episode_len_mean: 145.3
  episode_reward_max: 383.5321508744473
  episode_reward_mean: 320.3296085291854
  episode_reward_min: -149.38647714755493
  episodes_this_iter: 70
  episodes_total: 14982
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3407.446
    load_time_ms: 2.423
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 3.0993919433936304e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.24762172996997833
      kl: 0.01612943224608898
      policy_loss: -0.0032473409082740545
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6192 s, 235 iter, 2350000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-11-37
  done: false
  episode_len_mean: 146.62
  episode_reward_max: 383.0648253470955
  episode_reward_mean: 304.7550155381334
  episode_reward_min: -145.01513142955432
  episodes_this_iter: 68
  episodes_total: 15326
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3396.689
    load_time_ms: 2.625
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 3.0993919433936304e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.30212244391441345
      kl: 0.011073024943470955
      policy_loss: -0.0024246545508503914
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6311 s, 240 iter, 2400000 ts, 302 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-13-36
  done: false
  episode_len_mean: 150.44
  episode_reward_max: 395.07162645850667
  episode_reward_mean: 323.2004778889399
  episode_reward_min: -140.77162889535123
  episodes_this_iter: 66
  episodes_total: 15666
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3375.675
    load_time_ms: 2.573
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 3.0993919433936304e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.3593129515647888
      kl: 0.020367035642266273
      policy_loss: 0.0008204871555790305
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6431 s, 245 iter, 2450000 ts, 310 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-15-36
  done: false
  episode_len_mean: 147.78
  episode_reward_max: 383.38141059520814
  episode_reward_mean: 314.3909424104352
  episode_reward_min: -145.7462700899025
  episodes_this_iter: 68
  episodes_total: 16009
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3369.686
    load_time_ms: 2.319
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 1.5496959716968152e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.2859726548194885
      kl: 0.01411434356123209
      policy_loss: -0.0003553068090695888
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6550 s, 250 iter, 2500000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-17-36
  done: false
  episode_len_mean: 148.86
  episode_reward_max: 382.2548786713366
  episode_reward_mean: 325.59100860648306
  episode_reward_min: -140.0732181131544
  episodes_this_iter: 68
  episodes_total: 16351
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3385.205
    load_time_ms: 2.323
    num_steps_sampled: 2510000
    num_steps_trained: 2510000
    rl_0:
      cur_kl_coeff: 1.5496959716968152e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.21756550669670105
      kl: 0.015566138550639153
      policy_loss: 0.0007341363816522062
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6669 s, 255 iter, 2550000 ts, 282 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-19-35
  done: false
  episode_len_mean: 139.44
  episode_reward_max: 384.6903978979201
  episode_reward_mean: 279.07908061514337
  episode_reward_min: -154.2397425134521
  episodes_this_iter: 70
  episodes_total: 16699
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3393.185
    load_time_ms: 2.528
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 7.749180507716238e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.2534034848213196
      kl: 0.05032123625278473
      policy_loss: 0.0030876470264047384
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6789 s, 260 iter, 2600000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-21-35
  done: false
  episode_len_mean: 147.0
  episode_reward_max: 391.4400080617259
  episode_reward_mean: 319.39405038305915
  episode_reward_min: -144.93733940511876
  episodes_this_iter: 69
  episodes_total: 17041
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3389.405
    load_time_ms: 2.521
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 5.811184731555016e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.2339942455291748
      kl: 0.013701927848160267
      policy_loss: 0.0007055558380670846
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 6908 s, 265 iter, 2650000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-23-34
  done: false
  episode_len_mean: 147.89
  episode_reward_max: 384.067600103218
  episode_reward_mean: 318.4099448530393
  episode_reward_min: -140.4147326137454
  episodes_this_iter: 67
  episodes_total: 17384
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3386.457
    load_time_ms: 2.424
    num_steps_sampled: 2660000
    num_steps_trained: 2660000
    rl_0:
      cur_kl_coeff: 5.811184731555016e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.2762117385864258
      kl: 0.009279219433665276
      policy_loss: 0.0009174023871310055
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 7026 s, 270 iter, 2700000 ts, 306 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-25-32
  done: false
  episode_len_mean: 141.92
  episode_reward_max: 378.17216204272535
  episode_reward_mean: 294.75032063818054
  episode_reward_min: -146.06090470733488
  episodes_this_iter: 70
  episodes_total: 17730
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3356.525
    load_time_ms: 2.462
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 1.4531465075048353e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.1848425418138504
      kl: 0.007646944839507341
      policy_loss: -0.0001646792225074023


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 7144 s, 275 iter, 2750000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-27-30
  done: false
  episode_len_mean: 145.42
  episode_reward_max: 373.52163275919463
  episode_reward_mean: 311.391881851177
  episode_reward_min: -155.08046854932059
  episodes_this_iter: 69
  episodes_total: 18073
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3350.83
    load_time_ms: 2.4
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 1.821688003622262e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.24552612006664276
      kl: 0.020762456580996513
      policy_loss: -0.0011014441261067986
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 7262 s, 280 iter, 2800000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-29-29
  done: false
  episode_len_mean: 148.81
  episode_reward_max: 380.8634916149974
  episode_reward_mean: 329.3819372833942
  episode_reward_min: -142.3108123095656
  episodes_this_iter: 67
  episodes_total: 18415
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3361.368
    load_time_ms: 2.387
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 1.821688003622262e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.21782101690769196
      kl: 0.02030303329229355
      policy_loss: -0.002026123460382223
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 7381 s, 285 iter, 2850000 ts, 303 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-31-28
  done: false
  episode_len_mean: 144.88
  episode_reward_max: 383.2025180796089
  episode_reward_mean: 308.29415726683106
  episode_reward_min: -145.61582635756113
  episodes_this_iter: 68
  episodes_total: 18757
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3353.045
    load_time_ms: 2.371
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 9.10844001811131e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.1930840164422989
      kl: 0.015467746183276176
      policy_loss: 0.0011313441209495068
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 7500 s, 290 iter, 2900000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-33-28
  done: false
  episode_len_mean: 149.65
  episode_reward_max: 378.1354095237104
  episode_reward_mean: 339.1078264186592
  episode_reward_min: 295.2858835914802
  episodes_this_iter: 67
  episodes_total: 19097
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3356.883
    load_time_ms: 2.266
    num_steps_sampled: 2910000
    num_steps_trained: 2910000
    rl_0:
      cur_kl_coeff: 9.10844001811131e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.09081870317459106
      kl: 0.024350423365831375
      policy_loss: -0.0022661809343844652
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=28878], 7619 s, 295 iter, 2950000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-35-27
  done: false
  episode_len_mean: 148.44
  episode_reward_max: 387.0590326055178
  episode_reward_mean: 330.20938329202016
  episode_reward_min: -146.7462331781263
  episodes_this_iter: 68
  episodes_total: 19436
  experiment_id: e3ae719ff1ee406e8a8b95dc8378f8e3
  hostname: Gandalf
  info:
    grad_time_ms: 3356.634
    load_time_ms: 2.331
    num_steps_sampled: 2960000
    num_steps_trained: 2960000
    rl_0:
      cur_kl_coeff: 4.484155085839415e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.13722443580627441
      kl: 0.029280683025717735
      policy_loss: -0.0013283913722261786
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=28878], 7737 s, 300 iter, 3000000 ts, 321 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=28878], 7737 s, 300 iter, 3000000 ts, 321 rew



In [15]:
executeTraining()

 Starting SUMO on port 55707
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-13_00-37-03ula1uqx_ -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



23.724963860171645
26.27183767579561


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-37-42
  done: false
  episode_len_mean: 499.0
  episode_reward_max: 173.63690576553265
  episode_reward_mean: 90.72541522801056
  episode_reward_min: -113.69505547852305
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 4585.186
    load_time_ms: 151.821
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4192588329315186
      kl: 0.0019781726878136396
      policy_loss: -0.002234462648630142
      total_loss: 26.256221771240234
      vf_explained_var: 0.2183620035648346
      vf_loss: 26.258060455322266
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4185649156570435
      kl: 0.0010123230749741197
      policy_loss: -0.001407453790307045
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 118 s, 5 iter, 50000 ts, 110 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-39-39
  done: false
  episode_len_mean: 401.04
  episode_reward_max: 377.2050247439947
  episode_reward_mean: 139.96828155257873
  episode_reward_min: -164.2127675261156
  episodes_this_iter: 30
  episodes_total: 141
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3695.495
    load_time_ms: 27.361
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4230947494506836
      kl: 0.00855925865471363
      policy_loss: -0.0036022854037582874
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 237 s, 10 iter, 100000 ts, 157 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-41-38
  done: false
  episode_len_mean: 206.82
  episode_reward_max: 383.3729331036498
  episode_reward_mean: 151.57002810282427
  episode_reward_min: -160.878312516247
  episodes_this_iter: 52
  episodes_total: 352
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3521.379
    load_time_ms: 2.554
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.3818913698196411
      kl: 0.0037152159493416548
      policy_loss: -0.0005438837688416243
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 358 s, 15 iter, 150000 ts, 200 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-43-38
  done: false
  episode_len_mean: 171.82
  episode_reward_max: 345.0613505984624
  episode_reward_mean: 192.6402482266456
  episode_reward_min: -165.713989512538
  episodes_this_iter: 56
  episodes_total: 617
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3520.374
    load_time_ms: 2.452
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3698514699935913
      kl: 0.003635683096945286
      policy_loss: -0.0017256689025089145
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 478 s, 20 iter, 200000 ts, 200 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-45-40
  done: false
  episode_len_mean: 150.55
  episode_reward_max: 328.4555274877941
  episode_reward_mean: 181.21958287727804
  episode_reward_min: -163.17400602882907
  episodes_this_iter: 67
  episodes_total: 919
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3526.376
    load_time_ms: 2.347
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3423404693603516
      kl: 0.003712354926392436
      policy_loss: -0.00042300939094275236
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 600 s, 25 iter, 250000 ts, 252 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-47-42
  done: false
  episode_len_mean: 154.34
  episode_reward_max: 347.3699786774061
  episode_reward_mean: 269.7117321051888
  episode_reward_min: -150.00871065761234
  episodes_this_iter: 66
  episodes_total: 1237
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3537.768
    load_time_ms: 2.523
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3150473833084106
      kl: 0.0031533055007457733
      policy_loss: -0.0018238188931718469
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 722 s, 30 iter, 300000 ts, 257 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-49-42
  done: false
  episode_len_mean: 150.77
  episode_reward_max: 332.06233831238643
  episode_reward_mean: 268.818776984569
  episode_reward_min: -160.03657365669832
  episodes_this_iter: 64
  episodes_total: 1569
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3544.818
    load_time_ms: 2.462
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.4901161415892261e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.361337661743164
      kl: 0.0027358410879969597
      policy_loss: -0.001819673809222877
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 840 s, 35 iter, 350000 ts, 257 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-51-42
  done: false
  episode_len_mean: 147.94
  episode_reward_max: 320.2599943573169
  episode_reward_mean: 270.9522943161471
  episode_reward_min: -133.1182320924336
  episodes_this_iter: 68
  episodes_total: 1907
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3544.547
    load_time_ms: 2.355
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 4.6566129424663316e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.301256775856018
      kl: 0.014849737286567688
      policy_loss: -0.003299075411632657
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 961 s, 40 iter, 400000 ts, 278 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-53-43
  done: false
  episode_len_mean: 132.53
  episode_reward_max: 306.05180213476217
  episode_reward_mean: 275.41057085854953
  episode_reward_min: 239.35746389405182
  episodes_this_iter: 76
  episodes_total: 2269
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3536.605
    load_time_ms: 2.299
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 2.9103830890414573e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.2885316610336304
      kl: 0.0014925514115020633
      policy_loss: -0.0006460082367993891
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1081 s, 45 iter, 450000 ts, 286 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-55-44
  done: false
  episode_len_mean: 134.86
  episode_reward_max: 334.8614053758125
  episode_reward_mean: 283.7378636829282
  episode_reward_min: -138.47316319790508
  episodes_this_iter: 73
  episodes_total: 2647
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3535.395
    load_time_ms: 2.23
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 3.6379788613018216e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.2968602180480957
      kl: 0.005028032697737217
      policy_loss: -0.0022235624492168427
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1204 s, 50 iter, 500000 ts, 304 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-57-47
  done: false
  episode_len_mean: 155.69
  episode_reward_max: 373.1739527463477
  episode_reward_mean: 308.0522585937239
  episode_reward_min: 236.88576657611236
  episodes_this_iter: 61
  episodes_total: 2993
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3526.908
    load_time_ms: 2.323
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 4.547473576627277e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.3619288206100464
      kl: 0.006544334813952446
      policy_loss: -0.002685016253963113
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1325 s, 55 iter, 550000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_00-59-48
  done: false
  episode_len_mean: 145.23
  episode_reward_max: 386.19927275693783
  episode_reward_mean: 323.4097224766318
  episode_reward_min: 178.9090818861742
  episodes_this_iter: 70
  episodes_total: 3338
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3534.716
    load_time_ms: 2.334
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 5.684341970784096e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.2751903533935547
      kl: 0.0038458097260445356
      policy_loss: -0.0009114897693507373
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1446 s, 60 iter, 600000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-01-49
  done: false
  episode_len_mean: 148.72
  episode_reward_max: 392.8415538543396
  episode_reward_mean: 327.3296249099975
  episode_reward_min: 135.13485091213465
  episodes_this_iter: 67
  episodes_total: 3673
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3547.566
    load_time_ms: 2.518
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.842170985392048e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.294232964515686
      kl: 0.00834241509437561
      policy_loss: -0.002027513226494193
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1567 s, 65 iter, 650000 ts, 328 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-03-50
  done: false
  episode_len_mean: 146.28
  episode_reward_max: 390.16828604653256
  episode_reward_mean: 326.31258982712495
  episode_reward_min: 116.17718449779773
  episodes_this_iter: 68
  episodes_total: 4018
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3534.466
    load_time_ms: 2.51
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 7.10542746348012e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.2182759046554565
      kl: 0.012943808920681477
      policy_loss: -0.00202954956330359
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1689 s, 70 iter, 700000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-05-52
  done: false
  episode_len_mean: 146.89
  episode_reward_max: 400.00149348542453
  episode_reward_mean: 345.51008127564455
  episode_reward_min: 161.42720603981007
  episodes_this_iter: 68
  episodes_total: 4360
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3526.946
    load_time_ms: 2.421
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 3.55271373174006e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.1665312051773071
      kl: 0.013854963704943657
      policy_loss: -0.002004321664571762
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1810 s, 75 iter, 750000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-07-55
  done: false
  episode_len_mean: 143.21
  episode_reward_max: 391.3892017862086
  episode_reward_mean: 334.8394064939203
  episode_reward_min: 155.2260548238644
  episodes_this_iter: 70
  episodes_total: 4707
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3526.754
    load_time_ms: 2.367
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.0336188077926636
      kl: 0.014675111509859562
      policy_loss: -0.004177050665020943
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 1933 s, 80 iter, 800000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-09-57
  done: false
  episode_len_mean: 142.04
  episode_reward_max: 392.61028223527506
  episode_reward_mean: 331.93579748150955
  episode_reward_min: 127.72513319829096
  episodes_this_iter: 70
  episodes_total: 5058
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3524.802
    load_time_ms: 2.164
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.9396986365318298
      kl: 0.031147202476859093
      policy_loss: -0.005862601101398468
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2056 s, 85 iter, 850000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-12-01
  done: false
  episode_len_mean: 141.51
  episode_reward_max: 397.2790045850764
  episode_reward_mean: 336.4900718479538
  episode_reward_min: 143.6353057246988
  episodes_this_iter: 71
  episodes_total: 5409
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3537.682
    load_time_ms: 2.314
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 2.775557602921922e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.8684919476509094
      kl: 0.04459197446703911
      policy_loss: -0.011998716741800308
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2177 s, 90 iter, 900000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-14-02
  done: false
  episode_len_mean: 140.85
  episode_reward_max: 391.06886154100687
  episode_reward_mean: 339.4917624107939
  episode_reward_min: 290.1497084676622
  episodes_this_iter: 71
  episodes_total: 5763
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3538.331
    load_time_ms: 2.435
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 2.602084541880963e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.8322566151618958
      kl: 0.010470325127243996
      policy_loss: -0.003074007574468851
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2298 s, 95 iter, 950000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-16-03
  done: false
  episode_len_mean: 140.98
  episode_reward_max: 400.3799398642418
  episode_reward_mean: 340.0474841168608
  episode_reward_min: 120.31608269220328
  episodes_this_iter: 71
  episodes_total: 6118
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3525.961
    load_time_ms: 2.386
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 9.757821232580159e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.7438872456550598
      kl: 0.0036347194109112024
      policy_loss: -0.00030547648202627897
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2422 s, 100 iter, 1000000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-18-07
  done: false
  episode_len_mean: 141.12
  episode_reward_max: 400.7287601120659
  episode_reward_mean: 340.6831852709904
  episode_reward_min: 145.3445950952364
  episodes_this_iter: 71
  episodes_total: 6471
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3538.137
    load_time_ms: 2.333
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 3.0493191351812997e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.6787290573120117
      kl: 0.006132159382104874
      policy_loss: -0.0005574962706305087
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2542 s, 105 iter, 1050000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-20-07
  done: false
  episode_len_mean: 140.63
  episode_reward_max: 393.06590394641853
  episode_reward_mean: 337.58897102363204
  episode_reward_min: 176.63590515486152
  episodes_this_iter: 72
  episodes_total: 6827
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3537.536
    load_time_ms: 2.332
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 3.8116489189766247e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.6249133944511414
      kl: 0.004076664336025715
      policy_loss: -0.0001145038622780703
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2662 s, 110 iter, 1100000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-22-08
  done: false
  episode_len_mean: 140.4
  episode_reward_max: 393.1622278559026
  episode_reward_mean: 343.1456059574463
  episode_reward_min: 179.5575111966226
  episodes_this_iter: 72
  episodes_total: 7182
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3532.009
    load_time_ms: 2.275
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 2.3822805743603904e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.5234730839729309
      kl: 0.007571314461529255
      policy_loss: -0.0005474656936712563
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2785 s, 115 iter, 1150000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-24-11
  done: false
  episode_len_mean: 140.27
  episode_reward_max: 389.93856813387964
  episode_reward_mean: 342.45530372413805
  episode_reward_min: 142.52989572156054
  episodes_this_iter: 70
  episodes_total: 7538
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3534.874
    load_time_ms: 2.132
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 2.977850717950488e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.40280860662460327
      kl: 0.012116915546357632
      policy_loss: -0.0016661996487528086
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 2906 s, 120 iter, 1200000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-26-12
  done: false
  episode_len_mean: 139.96
  episode_reward_max: 394.7394150681795
  episode_reward_mean: 343.78890915740647
  episode_reward_min: 163.34829795412338
  episodes_this_iter: 71
  episodes_total: 7895
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3531.215
    load_time_ms: 2.159
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.488925358975244e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.30879491567611694
      kl: 0.016198191791772842
      policy_loss: -0.0006336361402645707
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3028 s, 125 iter, 1250000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-28-15
  done: false
  episode_len_mean: 140.01
  episode_reward_max: 390.83369941386127
  episode_reward_mean: 336.17440323101937
  episode_reward_min: 154.5994590609523
  episodes_this_iter: 72
  episodes_total: 8253
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3527.145
    load_time_ms: 2.244
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 3.72231339743811e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.26294586062431335
      kl: 0.010310523211956024
      policy_loss: 0.0010733772069215775
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3151 s, 130 iter, 1300000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-30-17
  done: false
  episode_len_mean: 139.31
  episode_reward_max: 392.8397271165477
  episode_reward_mean: 340.8140839114908
  episode_reward_min: 287.6273495850488
  episodes_this_iter: 71
  episodes_total: 8611
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3537.905
    load_time_ms: 2.489
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 1.861156698719055e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.1622876226902008
      kl: 0.010298379696905613
      policy_loss: 0.0008355717291124165
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3270 s, 135 iter, 1350000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-32-17
  done: false
  episode_len_mean: 139.13
  episode_reward_max: 390.1771930873396
  episode_reward_mean: 335.53403526562414
  episode_reward_min: 148.0321021671927
  episodes_this_iter: 72
  episodes_total: 8971
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3556.187
    load_time_ms: 2.535
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 1.861156698719055e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.17000044882297516
      kl: 0.011028360575437546
      policy_loss: -0.002131796907633543
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3391 s, 140 iter, 1400000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-34-18
  done: false
  episode_len_mean: 139.3
  episode_reward_max: 400.0111242993473
  episode_reward_mean: 341.70135712101495
  episode_reward_min: 285.96172101084863
  episodes_this_iter: 72
  episodes_total: 9330
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3545.278
    load_time_ms: 2.303
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 9.305783493595275e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.1119910478591919
      kl: 0.016886791214346886
      policy_loss: 0.0024980122689157724
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3514 s, 145 iter, 1450000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-36-22
  done: false
  episode_len_mean: 141.65
  episode_reward_max: 393.11503471625963
  episode_reward_mean: 344.5493885736751
  episode_reward_min: 161.0760493203511
  episodes_this_iter: 71
  episodes_total: 9683
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3532.529
    load_time_ms: 2.314
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 4.6528917467976375e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.16579948365688324
      kl: 0.010783662088215351
      policy_loss: 0.000610661692917347
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3638 s, 150 iter, 1500000 ts, 338 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-38-26
  done: false
  episode_len_mean: 142.13
  episode_reward_max: 393.8051063586582
  episode_reward_mean: 344.94660637056995
  episode_reward_min: 170.3415153978276
  episodes_this_iter: 70
  episodes_total: 10037
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3541.124
    load_time_ms: 2.377
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 1.1632229366994094e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.1649552583694458
      kl: 0.011100255884230137
      policy_loss: 0.0009645152022130787
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3759 s, 155 iter, 1550000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-40-26
  done: false
  episode_len_mean: 138.11
  episode_reward_max: 394.8056092891454
  episode_reward_mean: 339.3083211436491
  episode_reward_min: 285.94124071621223
  episodes_this_iter: 72
  episodes_total: 10396
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3542.31
    load_time_ms: 2.402
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.1632229366994094e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.03907633572816849
      kl: 0.008926237002015114
      policy_loss: 0.0009875642135739326
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3879 s, 160 iter, 1600000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-42-27
  done: false
  episode_len_mean: 137.44
  episode_reward_max: 389.41754529001156
  episode_reward_mean: 340.3810411626434
  episode_reward_min: 165.75913072542534
  episodes_this_iter: 72
  episodes_total: 10758
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3526.041
    load_time_ms: 2.409
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 5.816114683497047e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.051765233278274536
      kl: 0.025827230885624886
      policy_loss: -0.0001878374459920451


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 3998 s, 165 iter, 1650000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-44-25
  done: false
  episode_len_mean: 136.83
  episode_reward_max: 386.8267291457429
  episode_reward_mean: 338.79945168462194
  episode_reward_min: 282.13956526309386
  episodes_this_iter: 72
  episodes_total: 11123
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3476.994
    load_time_ms: 2.362
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 2.9080573417485235e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.11723905801773071
      kl: 0.0135801387950778
      policy_loss: 0.0016366064082831144
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4117 s, 170 iter, 1700000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-46-25
  done: false
  episode_len_mean: 138.85
  episode_reward_max: 396.52515235054847
  episode_reward_mean: 341.0087481669383
  episode_reward_min: 282.1287956256013
  episodes_this_iter: 72
  episodes_total: 11487
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3437.526
    load_time_ms: 2.387
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 2.9080573417485235e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.045692842453718185
      kl: 0.02618465945124626
      policy_loss: 0.0027844328433275223
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4237 s, 175 iter, 1750000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-48-25
  done: false
  episode_len_mean: 139.78
  episode_reward_max: 393.6142731130912
  episode_reward_mean: 334.78140826812233
  episode_reward_min: 276.7635776752928
  episodes_this_iter: 72
  episodes_total: 11846
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3440.855
    load_time_ms: 2.261
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 6.543125889298018e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.037909362465143204
      kl: 0.017943119630217552
      policy_loss: 0.0015808528987690806
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4356 s, 180 iter, 1800000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-50-25
  done: false
  episode_len_mean: 142.07
  episode_reward_max: 389.45951891439415
  episode_reward_mean: 335.94874922647466
  episode_reward_min: 145.90172319805276
  episodes_this_iter: 70
  episodes_total: 12198
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3443.05
    load_time_ms: 2.234
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 3.271562944649009e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.03079693764448166
      kl: 0.013090034015476704
      policy_loss: -0.001301516080275178
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4475 s, 185 iter, 1850000 ts, 337 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-52-24
  done: false
  episode_len_mean: 143.53
  episode_reward_max: 391.21173461380596
  episode_reward_mean: 346.86186417502336
  episode_reward_min: 282.13310601513945
  episodes_this_iter: 69
  episodes_total: 12549
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3448.701
    load_time_ms: 2.406
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 3.271562944649009e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.04556972533464432
      kl: 0.028687939047813416
      policy_loss: 0.005206322763115168
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4595 s, 190 iter, 1900000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-54-24
  done: false
  episode_len_mean: 144.46
  episode_reward_max: 390.0514466981111
  episode_reward_mean: 335.5539379536318
  episode_reward_min: 160.62641683128453
  episodes_this_iter: 69
  episodes_total: 12895
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3452.891
    load_time_ms: 2.441
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 1.2268367301706103e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.034150734543800354
      kl: 0.014419584535062313
      policy_loss: 0.0007477427716366947
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4714 s, 195 iter, 1950000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-56-24
  done: false
  episode_len_mean: 144.19
  episode_reward_max: 400.00731437430153
  episode_reward_mean: 341.24625131877906
  episode_reward_min: 160.45093980619305
  episodes_this_iter: 69
  episodes_total: 13240
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3468.41
    load_time_ms: 2.463
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 1.8402545656251808e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.06744179874658585
      kl: 0.03479030728340149
      policy_loss: 0.0061420598067343235
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4834 s, 200 iter, 2000000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_01-58-23
  done: false
  episode_len_mean: 147.96
  episode_reward_max: 389.76128888230437
  episode_reward_mean: 336.6317948424405
  episode_reward_min: 151.07753640075424
  episodes_this_iter: 67
  episodes_total: 13583
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3473.113
    load_time_ms: 2.399
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 4.140574120807618e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.08402867615222931
      kl: 0.019996920600533485
      policy_loss: 0.0021334749180823565
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 4953 s, 205 iter, 2050000 ts, 333 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-00-23
  done: false
  episode_len_mean: 146.2
  episode_reward_max: 397.5007801490985
  episode_reward_mean: 334.60695313090145
  episode_reward_min: 139.570821715563
  episodes_this_iter: 69
  episodes_total: 13925
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3458.644
    load_time_ms: 2.43
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 3.1054293387512495e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.020551303401589394
      kl: 0.010620465502142906
      policy_loss: -0.0014176600379869342
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5071 s, 210 iter, 2100000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-02-21
  done: false
  episode_len_mean: 145.52
  episode_reward_max: 390.3911353271787
  episode_reward_mean: 345.4269939482651
  episode_reward_min: 174.8088691217602
  episodes_this_iter: 68
  episodes_total: 14267
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3448.329
    load_time_ms: 2.469
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 3.1054293387512495e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.02378195896744728
      kl: 0.015022325329482555
      policy_loss: 0.0018678578780964017
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5189 s, 215 iter, 2150000 ts, 339 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-04-20
  done: false
  episode_len_mean: 144.99
  episode_reward_max: 399.47948060931685
  episode_reward_mean: 343.25072275658954
  episode_reward_min: 143.59101263044454
  episodes_this_iter: 69
  episodes_total: 14611
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3432.591
    load_time_ms: 2.459
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 3.1054293387512495e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.04982908070087433
      kl: 0.03344464674592018
      policy_loss: -0.001818689750507474
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5308 s, 220 iter, 2200000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-06-19
  done: false
  episode_len_mean: 143.52
  episode_reward_max: 395.0948513963224
  episode_reward_mean: 343.58384034668217
  episode_reward_min: 176.5245529228746
  episodes_this_iter: 70
  episodes_total: 14960
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3427.678
    load_time_ms: 2.337
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 3.1054293387512495e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.08242199569940567
      kl: 0.009389897808432579
      policy_loss: 0.00046687645954079926
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5426 s, 225 iter, 2250000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-08-17
  done: false
  episode_len_mean: 142.41
  episode_reward_max: 388.7487906615843
  episode_reward_mean: 339.593130957816
  episode_reward_min: 176.00746374978854
  episodes_this_iter: 70
  episodes_total: 15310
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3448.473
    load_time_ms: 2.317
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 1.5527146693756248e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.1476239264011383
      kl: 0.009602718986570835
      policy_loss: 0.0001956088817678392
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5545 s, 230 iter, 2300000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-10-16
  done: false
  episode_len_mean: 142.82
  episode_reward_max: 389.4559242260562
  episode_reward_mean: 350.632845313064
  episode_reward_min: 287.3097216438151
  episodes_this_iter: 70
  episodes_total: 15662
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3458.082
    load_time_ms: 2.523
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 7.763573346878124e-28
      cur_lr: 4.999999873689376e-05
      entropy: -0.1858922690153122
      kl: 0.01806202158331871
      policy_loss: 0.0005321119679138064
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5665 s, 235 iter, 2350000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-12-16
  done: false
  episode_len_mean: 141.23
  episode_reward_max: 385.02585293382776
  episode_reward_mean: 337.8184130423623
  episode_reward_min: 145.90038184475844
  episodes_this_iter: 71
  episodes_total: 16017
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3452.868
    load_time_ms: 2.506
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 7.763573346878124e-28
      cur_lr: 4.999999873689376e-05
      entropy: -0.22228297591209412
      kl: 0.05276045575737953
      policy_loss: 0.00196007895283401
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5784 s, 240 iter, 2400000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-14-15
  done: false
  episode_len_mean: 136.67
  episode_reward_max: 392.84089197642044
  episode_reward_mean: 312.0022708072487
  episode_reward_min: -146.5560321224757
  episodes_this_iter: 73
  episodes_total: 16381
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3444.733
    load_time_ms: 2.426
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 5.895464152763768e-27
      cur_lr: 4.999999873689376e-05
      entropy: -0.02379644848406315
      kl: 0.053185608237981796
      policy_loss: 0.014256353490054607
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 5903 s, 245 iter, 2450000 ts, 324 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-16-16
  done: false
  episode_len_mean: 138.26
  episode_reward_max: 395.76683409404035
  episode_reward_mean: 332.3504822648684
  episode_reward_min: -142.86288164043197
  episodes_this_iter: 72
  episodes_total: 16747
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3423.324
    load_time_ms: 2.472
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 8.843196229145652e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.0026102967094630003
      kl: 2.9832072257995605
      policy_loss: 0.08184702694416046
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6024 s, 250 iter, 2500000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-18-16
  done: false
  episode_len_mean: 136.88
  episode_reward_max: 393.44824258888815
  episode_reward_mean: 317.9993206034655
  episode_reward_min: -143.2930553969431
  episodes_this_iter: 73
  episodes_total: 17114
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3426.642
    load_time_ms: 2.365
    num_steps_sampled: 2510000
    num_steps_trained: 2510000
    rl_0:
      cur_kl_coeff: 1.3264794728904466e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.04080599546432495
      kl: 0.012670832686126232
      policy_loss: -0.001553158275783062
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6144 s, 255 iter, 2550000 ts, 287 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-20-16
  done: false
  episode_len_mean: 139.42
  episode_reward_max: 391.0909927336684
  episode_reward_mean: 334.6675096268428
  episode_reward_min: -140.281614231067
  episodes_this_iter: 71
  episodes_total: 17483
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3448.297
    load_time_ms: 2.258
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 1.3264794728904466e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.04141244664788246
      kl: 0.016888583078980446
      policy_loss: 0.0019344419706612825
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6263 s, 260 iter, 2600000 ts, 323 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-22-15
  done: false
  episode_len_mean: 133.18
  episode_reward_max: 393.37485071829707
  episode_reward_mean: 304.0186082897001
  episode_reward_min: -143.3207569646118
  episodes_this_iter: 75
  episodes_total: 17852
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3449.462
    load_time_ms: 2.328
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 1.989719517484461e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.008198603987693787
      kl: 0.01765357330441475
      policy_loss: 0.0029725423082709312
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6381 s, 265 iter, 2650000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-24-14
  done: false
  episode_len_mean: 132.45
  episode_reward_max: 399.2741615983841
  episode_reward_mean: 304.6646866358614
  episode_reward_min: -142.08618866276265
  episodes_this_iter: 77
  episodes_total: 18219
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3437.125
    load_time_ms: 2.257
    num_steps_sampled: 2660000
    num_steps_trained: 2660000
    rl_0:
      cur_kl_coeff: 1.989719517484461e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.001657794346101582
      kl: 6.750422477722168
      policy_loss: 0.033887434750795364
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6502 s, 270 iter, 2700000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-26-15
  done: false
  episode_len_mean: 139.14
  episode_reward_max: 393.3178812483065
  episode_reward_mean: 344.77466807796395
  episode_reward_min: -132.9996738063623
  episodes_this_iter: 71
  episodes_total: 18588
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3432.633
    load_time_ms: 2.229
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 6.715301715210304e-26
      cur_lr: 4.999999873689376e-05
      entropy: -0.034414224326610565
      kl: 0.013602793216705322
      policy_loss: 0.0005817346391268075
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6620 s, 275 iter, 2750000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-28-13
  done: false
  episode_len_mean: 135.32
  episode_reward_max: 396.6723248845374
  episode_reward_mean: 325.74816379157426
  episode_reward_min: -145.199251090717
  episodes_this_iter: 74
  episodes_total: 18957
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3432.38
    load_time_ms: 2.502
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 1.5109430091818348e-25
      cur_lr: 4.999999873689376e-05
      entropy: -0.08497025072574615
      kl: 0.029889650642871857
      policy_loss: 0.006217748858034611
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6739 s, 280 iter, 2800000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-30-13
  done: false
  episode_len_mean: 135.64
  episode_reward_max: 389.52936021771296
  episode_reward_mean: 326.5245024742306
  episode_reward_min: -140.97606204255564
  episodes_this_iter: 73
  episodes_total: 19327
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3433.83
    load_time_ms: 2.502
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 2.2664148835513015e-25
      cur_lr: 4.999999873689376e-05
      entropy: -0.13349999487400055
      kl: 0.01630588062107563
      policy_loss: 0.000559977546799928
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6858 s, 285 iter, 2850000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-32-11
  done: false
  episode_len_mean: 135.05
  episode_reward_max: 391.397481235236
  episode_reward_mean: 315.4610864638973
  episode_reward_min: -140.22677436511563
  episodes_this_iter: 74
  episodes_total: 19698
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3433.439
    load_time_ms: 2.274
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 2.2664148835513015e-25
      cur_lr: 4.999999873689376e-05
      entropy: -0.06562212109565735
      kl: 0.020898666232824326
      policy_loss: 0.00047433949657715857
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 6977 s, 290 iter, 2900000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-34-11
  done: false
  episode_len_mean: 134.22
  episode_reward_max: 392.0805302185082
  episode_reward_mean: 313.9766169283164
  episode_reward_min: -144.6326965702824
  episodes_this_iter: 73
  episodes_total: 20068
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3440.547
    load_time_ms: 2.24
    num_steps_sampled: 2910000
    num_steps_trained: 2910000
    rl_0:
      cur_kl_coeff: 5.099432070505989e-25
      cur_lr: 4.999999873689376e-05
      entropy: -0.0857827439904213
      kl: 0.015346994623541832
      policy_loss: 0.0004922738298773766
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=30948], 7097 s, 295 iter, 2950000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-36-11
  done: false
  episode_len_mean: 135.14
  episode_reward_max: 385.6248790175055
  episode_reward_mean: 322.69625420970283
  episode_reward_min: -142.18210398450074
  episodes_this_iter: 73
  episodes_total: 20437
  experiment_id: deab26f2a5e54fe0ae1da2fc0714f80d
  hostname: Gandalf
  info:
    grad_time_ms: 3444.656
    load_time_ms: 2.304
    num_steps_sampled: 2960000
    num_steps_trained: 2960000
    rl_0:
      cur_kl_coeff: 1.1473720556264762e-24
      cur_lr: 4.999999873689376e-05
      entropy: -0.13401784002780914
      kl: 0.01770164631307125
      policy_loss: 0.002717869356274605
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=30948], 7216 s, 300 iter, 3000000 ts, 323 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=30948], 7216 s, 300 iter, 3000000 ts, 323 rew



In [16]:
executeTraining()

 Starting SUMO on port 42781
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-13_02-37-488i2grp5y -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



18.377068983595844
28.059751325730154


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-38-47
  done: false
  episode_len_mean: 500.0
  episode_reward_max: 235.037198121984
  episode_reward_mean: 121.41440899553632
  episode_reward_min: 43.69189411821678
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 4623.289
    load_time_ms: 150.645
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4203523397445679
      kl: 0.0021946991328150034
      policy_loss: -0.003515524324029684
      total_loss: 15.115537643432617
      vf_explained_var: 0.30585241317749023
      vf_loss: 15.11861801147461
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4170756340026855
      kl: 0.0026930130552500486
      policy_loss: -0.003957070875912905
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 124 s, 5 iter, 50000 ts, 98.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-40-49
  done: false
  episode_len_mean: 407.51
  episode_reward_max: 376.4254978318797
  episode_reward_mean: 103.20165400945702
  episode_reward_min: -158.69016618129467
  episodes_this_iter: 26
  episodes_total: 139
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3802.33
    load_time_ms: 27.298
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.4116982221603394
      kl: 0.003458708059042692
      policy_loss: -0.002234934363514185
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 246 s, 10 iter, 100000 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-42-51
  done: false
  episode_len_mean: 247.8
  episode_reward_max: 381.2699348859451
  episode_reward_mean: 209.93973449875003
  episode_reward_min: -151.67586958049446
  episodes_this_iter: 43
  episodes_total: 320
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3619.266
    load_time_ms: 2.624
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.4221855401992798
      kl: 0.003315133508294821
      policy_loss: -0.002263287315145135
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 368 s, 15 iter, 150000 ts, 204 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-44-53
  done: false
  episode_len_mean: 189.71
  episode_reward_max: 321.67724635370155
  episode_reward_mean: 196.57431127591914
  episode_reward_min: -159.55832174698716
  episodes_this_iter: 55
  episodes_total: 571
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3601.014
    load_time_ms: 2.53
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3919146060943604
      kl: 0.01304236426949501
      policy_loss: -0.004800320602953434
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 490 s, 20 iter, 200000 ts, 235 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-46-56
  done: false
  episode_len_mean: 168.05
  episode_reward_max: 307.64362867220535
  episode_reward_mean: 219.51487854163767
  episode_reward_min: -163.05253046289891
  episodes_this_iter: 59
  episodes_total: 863
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3590.315
    load_time_ms: 2.453
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3912131786346436
      kl: 0.010015608742833138
      policy_loss: -0.004148554522544146
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 611 s, 25 iter, 250000 ts, 266 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-48-59
  done: false
  episode_len_mean: 157.89
  episode_reward_max: 314.91226687973165
  episode_reward_mean: 261.2165125243428
  episode_reward_min: -143.45081728577597
  episodes_this_iter: 65
  episodes_total: 1170
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3595.924
    load_time_ms: 2.533
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 2.3841858265427618e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.320024013519287
      kl: 0.005154613871127367
      policy_loss: -0.0030773591715842485
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 734 s, 30 iter, 300000 ts, 254 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-51-01
  done: false
  episode_len_mean: 146.54
  episode_reward_max: 320.8335240487185
  episode_reward_mean: 264.95957145385967
  episode_reward_min: -159.50423506202264
  episodes_this_iter: 69
  episodes_total: 1507
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3579.311
    load_time_ms: 2.474
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 1.4901161415892261e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.2851911783218384
      kl: 0.011869917623698711
      policy_loss: -0.00258029717952013
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 857 s, 35 iter, 350000 ts, 259 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-53-03
  done: false
  episode_len_mean: 142.7
  episode_reward_max: 326.374868485298
  episode_reward_mean: 247.44163147649553
  episode_reward_min: -146.28042880559465
  episodes_this_iter: 71
  episodes_total: 1853
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3490.55
    load_time_ms: 2.454
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 9.313225884932663e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3064583539962769
      kl: 0.005540088750422001
      policy_loss: -0.0038409645203500986
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 979 s, 40 iter, 400000 ts, 262 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-55-05
  done: false
  episode_len_mean: 135.38
  episode_reward_max: 318.85343281805984
  episode_reward_mean: 278.25391264344324
  episode_reward_min: -141.27905498549458
  episodes_this_iter: 74
  episodes_total: 2217
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3439.666
    load_time_ms: 2.312
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 1.1641532356165829e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2701609134674072
      kl: 0.01123407855629921
      policy_loss: -0.0029764105565845966
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1100 s, 45 iter, 450000 ts, 279 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-57-07
  done: false
  episode_len_mean: 131.08
  episode_reward_max: 312.94709852910535
  episode_reward_mean: 271.4025576017823
  episode_reward_min: -147.46959018348613
  episodes_this_iter: 77
  episodes_total: 2596
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3439.335
    load_time_ms: 2.272
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 1.4551915445207286e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.2183688879013062
      kl: 0.01208181120455265
      policy_loss: -0.0027227664832025766
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1218 s, 50 iter, 500000 ts, 257 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_02-59-05
  done: false
  episode_len_mean: 127.42
  episode_reward_max: 322.7813559077144
  episode_reward_mean: 269.5786636771745
  episode_reward_min: -151.90783421410023
  episodes_this_iter: 78
  episodes_total: 2984
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3445.102
    load_time_ms: 2.486
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 9.094947153254554e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.2060059309005737
      kl: 0.009521462954580784
      policy_loss: -0.0011275234865024686
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1336 s, 55 iter, 550000 ts, 289 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-01-03
  done: false
  episode_len_mean: 127.74
  episode_reward_max: 320.4087402953801
  episode_reward_mean: 288.9733869804481
  episode_reward_min: 250.31226804869206
  episodes_this_iter: 78
  episodes_total: 3367
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3453.026
    load_time_ms: 2.592
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 4.547473576627277e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.099251627922058
      kl: 0.008313002064824104
      policy_loss: -0.0020632047671824694
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1458 s, 60 iter, 600000 ts, 291 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-03-04
  done: false
  episode_len_mean: 126.98
  episode_reward_max: 325.0515285093859
  episode_reward_mean: 298.4569860633735
  episode_reward_min: 265.8798397624348
  episodes_this_iter: 79
  episodes_total: 3764
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3449.346
    load_time_ms: 2.606
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 5.684341970784096e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.1344491243362427
      kl: 0.01796548254787922
      policy_loss: -0.005732162855565548
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1576 s, 65 iter, 650000 ts, 303 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-05-04
  done: false
  episode_len_mean: 126.93
  episode_reward_max: 338.267225104383
  episode_reward_mean: 295.21661162541847
  episode_reward_min: -143.9972246547484
  episodes_this_iter: 79
  episodes_total: 4159
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3448.546
    load_time_ms: 2.425
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.421085492696024e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.132333755493164
      kl: 0.0052376678213477135
      policy_loss: -0.0014874703483656049
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1695 s, 70 iter, 700000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-07-03
  done: false
  episode_len_mean: 127.24
  episode_reward_max: 343.9912754554195
  episode_reward_mean: 312.6474192680115
  episode_reward_min: 278.99364808341835
  episodes_this_iter: 79
  episodes_total: 4553
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3439.873
    load_time_ms: 2.407
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.1005748510360718
      kl: 0.012661498039960861
      policy_loss: -0.003025819780305028
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1816 s, 75 iter, 750000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-09-04
  done: false
  episode_len_mean: 127.31
  episode_reward_max: 356.62337719888103
  episode_reward_mean: 321.60523509278084
  episode_reward_min: 285.9451295199305
  episodes_this_iter: 79
  episodes_total: 4948
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3441.036
    load_time_ms: 2.46
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 1.6653341068038163e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.1053493022918701
      kl: 0.010522248223423958
      policy_loss: -0.002419860800728202
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 1936 s, 80 iter, 800000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-11-05
  done: false
  episode_len_mean: 127.03
  episode_reward_max: 357.696347134426
  episode_reward_mean: 318.2790257262094
  episode_reward_min: -142.4516452058494
  episodes_this_iter: 79
  episodes_total: 5339
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3448.665
    load_time_ms: 2.493
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 4.163335267009541e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.0510081052780151
      kl: 0.004822979681193829
      policy_loss: -0.0017180144786834717
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2056 s, 85 iter, 850000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-13-05
  done: false
  episode_len_mean: 128.43
  episode_reward_max: 362.5126706943275
  episode_reward_mean: 326.8807204582648
  episode_reward_min: -132.01625475919388
  episodes_this_iter: 77
  episodes_total: 5728
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3444.049
    load_time_ms: 2.38
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 5.204169083761926e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.0357811450958252
      kl: 0.006636190693825483
      policy_loss: -0.0009838108671829104
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2178 s, 90 iter, 900000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-15-06
  done: false
  episode_len_mean: 128.07
  episode_reward_max: 369.9312070691471
  episode_reward_mean: 319.59465773244244
  episode_reward_min: -139.9499066380072
  episodes_this_iter: 78
  episodes_total: 6120
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3450.152
    load_time_ms: 2.312
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 6.505211354702407e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.9805789589881897
      kl: 0.006396142765879631
      policy_loss: -0.00016430631512776017
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2298 s, 95 iter, 950000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-17-07
  done: false
  episode_len_mean: 131.38
  episode_reward_max: 368.9681815780798
  episode_reward_mean: 340.8071351304957
  episode_reward_min: 303.20360685976294
  episodes_this_iter: 76
  episodes_total: 6504
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3450.195
    load_time_ms: 2.416
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.6263028386756018e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0486140251159668
      kl: 0.027312811464071274
      policy_loss: -0.004249959252774715
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2418 s, 100 iter, 1000000 ts, 341 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-19-07
  done: false
  episode_len_mean: 131.31
  episode_reward_max: 377.84246838374264
  episode_reward_mean: 342.59111798433725
  episode_reward_min: 309.8591124851318
  episodes_this_iter: 76
  episodes_total: 6885
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3444.038
    load_time_ms: 2.43
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 4.0657570966890046e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.027443528175354
      kl: 0.0131662767380476
      policy_loss: -0.0026766005903482437
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2537 s, 105 iter, 1050000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-21-06
  done: false
  episode_len_mean: 132.23
  episode_reward_max: 383.9167207729782
  episode_reward_mean: 342.2143241977304
  episode_reward_min: 293.4263286886843
  episodes_this_iter: 75
  episodes_total: 7262
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3453.93
    load_time_ms: 2.324
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 5.082196370861256e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.9905438423156738
      kl: 0.004155443049967289
      policy_loss: -0.0015943573089316487
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2657 s, 110 iter, 1100000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-23-07
  done: false
  episode_len_mean: 132.23
  episode_reward_max: 375.9843759834571
  episode_reward_mean: 344.2291364945836
  episode_reward_min: 311.4292146489139
  episodes_this_iter: 76
  episodes_total: 7641
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3465.212
    load_time_ms: 2.364
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 3.176372731788285e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.9702678322792053
      kl: 0.007613399066030979
      policy_loss: -0.0006421896978281438
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2777 s, 115 iter, 1150000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-25-06
  done: false
  episode_len_mean: 132.3
  episode_reward_max: 385.73474687378103
  episode_reward_mean: 344.15025540275366
  episode_reward_min: 301.32209358034487
  episodes_this_iter: 75
  episodes_total: 8019
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3470.873
    load_time_ms: 2.394
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 3.970465914735356e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.8992575407028198
      kl: 0.006692875642329454
      policy_loss: -0.00022171720047481358
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 2896 s, 120 iter, 1200000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-27-06
  done: false
  episode_len_mean: 131.53
  episode_reward_max: 377.54473836572134
  episode_reward_mean: 345.42898514877436
  episode_reward_min: 306.8381943522283
  episodes_this_iter: 76
  episodes_total: 8399
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3459.92
    load_time_ms: 2.388
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 2.4815411967095975e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.920447826385498
      kl: 0.008623862639069557
      policy_loss: -0.0011750569101423025
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3016 s, 125 iter, 1250000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-29-06
  done: false
  episode_len_mean: 130.81
  episode_reward_max: 387.61769939696376
  episode_reward_mean: 344.66635955727133
  episode_reward_min: 305.5729192052963
  episodes_this_iter: 76
  episodes_total: 8779
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3455.203
    load_time_ms: 2.561
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 6.203852991773994e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8941448926925659
      kl: 0.01133637223392725
      policy_loss: -2.7038347980123945e-05
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3136 s, 130 iter, 1300000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-31-07
  done: false
  episode_len_mean: 132.1
  episode_reward_max: 387.39223123187577
  episode_reward_mean: 349.69222768285516
  episode_reward_min: 305.26275999945756
  episodes_this_iter: 76
  episodes_total: 9158
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3463.699
    load_time_ms: 2.419
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 9.305783493595275e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.9315576553344727
      kl: 0.008908257819712162
      policy_loss: 0.004641484934836626
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3257 s, 135 iter, 1350000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-33-08
  done: false
  episode_len_mean: 132.3
  episode_reward_max: 387.9717776478529
  episode_reward_mean: 347.96868875877743
  episode_reward_min: 308.0830788611989
  episodes_this_iter: 76
  episodes_total: 9536
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3454.484
    load_time_ms: 2.346
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 4.6528917467976375e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.9508801102638245
      kl: 0.008512569591403008
      policy_loss: 0.0017483641859143972
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3376 s, 140 iter, 1400000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-35-08
  done: false
  episode_len_mean: 132.4
  episode_reward_max: 388.5620712640287
  episode_reward_mean: 344.2177986652558
  episode_reward_min: 304.1323600872123
  episodes_this_iter: 76
  episodes_total: 9913
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3441.537
    load_time_ms: 2.465
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 1.1632229366994094e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8963692784309387
      kl: 0.0163351409137249
      policy_loss: 0.008954779244959354
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3498 s, 145 iter, 1450000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-37-08
  done: false
  episode_len_mean: 130.89
  episode_reward_max: 377.2727208287415
  episode_reward_mean: 345.82537606271416
  episode_reward_min: 298.6044333096735
  episodes_this_iter: 76
  episodes_total: 10292
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3439.216
    load_time_ms: 2.394
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 5.816114683497047e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.8424370884895325
      kl: 0.015251445583999157
      policy_loss: 0.011981312185525894
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3617 s, 150 iter, 1500000 ts, 351 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-39-09
  done: false
  episode_len_mean: 132.63
  episode_reward_max: 387.1959102553236
  episode_reward_mean: 346.34991140863326
  episode_reward_min: 296.12010695037236
  episodes_this_iter: 76
  episodes_total: 10670
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3444.375
    load_time_ms: 2.36
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 5.816114683497047e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.9200627207756042
      kl: 26.226612091064453
      policy_loss: 0.097920723259449
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3738 s, 155 iter, 1550000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-41-09
  done: false
  episode_len_mean: 131.41
  episode_reward_max: 386.7728259628824
  episode_reward_mean: 344.28294257801195
  episode_reward_min: 108.27555569180839
  episodes_this_iter: 76
  episodes_total: 11051
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3450.03
    load_time_ms: 2.546
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.3086251778596036e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.9522430300712585
      kl: 0.009368949569761753
      policy_loss: -0.0005096605163998902
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3857 s, 160 iter, 1600000 ts, 354 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-43-09
  done: false
  episode_len_mean: 131.52
  episode_reward_max: 388.4137537045778
  episode_reward_mean: 346.46258727430364
  episode_reward_min: 188.39757364874345
  episodes_this_iter: 76
  episodes_total: 11431
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3448.384
    load_time_ms: 2.532
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 2.4536734603412207e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.9433892965316772
      kl: 0.007043847814202309
      policy_loss: 0.0002311546413693577
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 3977 s, 165 iter, 1650000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-45-10
  done: false
  episode_len_mean: 131.86
  episode_reward_max: 386.5574687088241
  episode_reward_mean: 345.66867307891073
  episode_reward_min: -125.80950223094965
  episodes_this_iter: 75
  episodes_total: 11812
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3465.506
    load_time_ms: 2.46
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 2.7603814632517823e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.8378547430038452
      kl: 13.704381942749023
      policy_loss: -0.0017011528834700584
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4098 s, 170 iter, 1700000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-47-11
  done: false
  episode_len_mean: 133.19
  episode_reward_max: 396.98178389765735
  episode_reward_mean: 348.44768946995856
  episode_reward_min: 305.5678160438572
  episodes_this_iter: 75
  episodes_total: 12187
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3467.018
    load_time_ms: 2.537
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 4.140574120807618e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.7952131628990173
      kl: 19.23039436340332
      policy_loss: 0.05126025155186653
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4217 s, 175 iter, 1750000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-49-10
  done: false
  episode_len_mean: 129.53
  episode_reward_max: 387.07969364854677
  episode_reward_mean: 344.83584218668796
  episode_reward_min: -138.48108993539014
  episodes_this_iter: 77
  episodes_total: 12570
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3456.3
    load_time_ms: 2.434
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 1.0480823705321851e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8695446848869324
      kl: 0.009229766204953194
      policy_loss: -0.0009910978842526674
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4337 s, 180 iter, 1800000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-51-10
  done: false
  episode_len_mean: 123.86
  episode_reward_max: 391.92059242622895
  episode_reward_mean: 293.3225179701572
  episode_reward_min: -144.0963508443785
  episodes_this_iter: 80
  episodes_total: 12963
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3467.39
    load_time_ms: 2.375
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 2.652958945780893e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.9793645143508911
      kl: 0.01885485276579857
      policy_loss: 0.0003914184926543385
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4456 s, 185 iter, 1850000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-53-10
  done: false
  episode_len_mean: 129.75
  episode_reward_max: 394.12621413576613
  episode_reward_mean: 340.0604168852818
  episode_reward_min: -135.49246154706668
  episodes_this_iter: 76
  episodes_total: 13357
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3481.66
    load_time_ms: 2.434
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 3.979439034968922e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.986717700958252
      kl: 0.02203344739973545
      policy_loss: -0.001404772512614727
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4577 s, 190 iter, 1900000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-55-10
  done: false
  episode_len_mean: 128.7
  episode_reward_max: 387.22385710759846
  episode_reward_mean: 346.37052116623477
  episode_reward_min: 173.061929243747
  episodes_this_iter: 78
  episodes_total: 13748
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3471.136
    load_time_ms: 2.445
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 5.969157011709428e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8972458243370056
      kl: 0.015383309684693813
      policy_loss: 0.004289217293262482
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4697 s, 195 iter, 1950000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-57-11
  done: false
  episode_len_mean: 128.0
  episode_reward_max: 392.4007523622439
  episode_reward_mean: 346.9981736166071
  episode_reward_min: 181.85698950696394
  episodes_this_iter: 78
  episodes_total: 14139
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3469.54
    load_time_ms: 2.335
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 5.969157011709428e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8574106097221375
      kl: 0.011374200694262981
      policy_loss: -0.0005432915640994906
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4818 s, 200 iter, 2000000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_03-59-13
  done: false
  episode_len_mean: 126.62
  episode_reward_max: 383.8080236279144
  episode_reward_mean: 339.69773874081227
  episode_reward_min: -137.33764910746194
  episodes_this_iter: 79
  episodes_total: 14531
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3485.502
    load_time_ms: 2.559
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 5.969157011709428e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.7910320162773132
      kl: 24.039499282836914
      policy_loss: 0.010286676697432995
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 4939 s, 205 iter, 2050000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-01-14
  done: false
  episode_len_mean: 128.34
  episode_reward_max: 384.7515344707447
  episode_reward_mean: 347.5758982727338
  episode_reward_min: 311.81070045259605
  episodes_this_iter: 78
  episodes_total: 14919
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3466.593
    load_time_ms: 2.54
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 6.715301715210304e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.7427310943603516
      kl: 0.026411190629005432
      policy_loss: 0.011146586388349533
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5059 s, 210 iter, 2100000 ts, 346 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-03-14
  done: false
  episode_len_mean: 128.88
  episode_reward_max: 391.46785757847545
  episode_reward_mean: 346.42989416005616
  episode_reward_min: 305.1637388331738
  episodes_this_iter: 78
  episodes_total: 15307
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3443.448
    load_time_ms: 2.242
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 6.715301715210304e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.7413998246192932
      kl: 0.04496907442808151
      policy_loss: 0.01960047148168087
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5180 s, 215 iter, 2150000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-05-15
  done: false
  episode_len_mean: 129.2
  episode_reward_max: 386.79666908229103
  episode_reward_mean: 348.93933868059406
  episode_reward_min: 312.7935408431057
  episodes_this_iter: 78
  episodes_total: 15695
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3441.97
    load_time_ms: 2.424
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.0072955038005784e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.7206759452819824
      kl: 0.012137454003095627
      policy_loss: 0.0009794732322916389
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5300 s, 220 iter, 2200000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-07-16
  done: false
  episode_len_mean: 127.17
  episode_reward_max: 386.01850566476935
  episode_reward_mean: 348.1852261435895
  episode_reward_min: 300.4273719138902
  episodes_this_iter: 79
  episodes_total: 16089
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3447.299
    load_time_ms: 2.471
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 2.2664148835513015e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.7243832349777222
      kl: 0.013514836318790913
      policy_loss: 0.0011279431637376547
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5420 s, 225 iter, 2250000 ts, 345 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-09-16
  done: false
  episode_len_mean: 126.73
  episode_reward_max: 386.8064096820461
  episode_reward_mean: 346.4858907630754
  episode_reward_min: 304.24037768868436
  episodes_this_iter: 79
  episodes_total: 16484
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3449.711
    load_time_ms: 2.315
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 1.1332074417756508e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6732915043830872
      kl: 0.026806483045220375
      policy_loss: 0.0062156678177416325
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5542 s, 230 iter, 2300000 ts, 344 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-11-18
  done: false
  episode_len_mean: 126.96
  episode_reward_max: 385.4534686079951
  episode_reward_mean: 347.174267597695
  episode_reward_min: 303.9529779663064
  episodes_this_iter: 79
  episodes_total: 16879
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3458.219
    load_time_ms: 2.314
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 8.499056121466172e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.6681089997291565
      kl: 0.019113222137093544
      policy_loss: 0.009085066616535187
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5662 s, 235 iter, 2350000 ts, 347 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-13-18
  done: false
  episode_len_mean: 126.47
  episode_reward_max: 382.38972822619246
  episode_reward_mean: 343.5560432059918
  episode_reward_min: 304.5071051105608
  episodes_this_iter: 80
  episodes_total: 17274
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3452.068
    load_time_ms: 2.474
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 1.2748580176264973e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6737399697303772
      kl: 0.012381804175674915
      policy_loss: 0.0031733359210193157
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5782 s, 240 iter, 2400000 ts, 342 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-15-18
  done: false
  episode_len_mean: 126.09
  episode_reward_max: 393.6767247702778
  episode_reward_mean: 345.86851048295955
  episode_reward_min: 298.5582862047025
  episodes_this_iter: 79
  episodes_total: 17669
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3451.769
    load_time_ms: 2.54
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 1.2748580176264973e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6341903805732727
      kl: 0.023250600323081017
      policy_loss: 0.006169195752590895
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 5902 s, 245 iter, 2450000 ts, 340 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-17-18
  done: false
  episode_len_mean: 125.61
  episode_reward_max: 389.4834707619583
  episode_reward_mean: 338.6430609773191
  episode_reward_min: 297.43731762474073
  episodes_this_iter: 80
  episodes_total: 18067
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3462.529
    load_time_ms: 2.689
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 2.8684301390661906e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.5869292616844177
      kl: 0.022735798731446266
      policy_loss: 0.005449679214507341
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6022 s, 250 iter, 2500000 ts, 348 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-19-18
  done: false
  episode_len_mean: 126.8
  episode_reward_max: 387.7368176036647
  episode_reward_mean: 346.8370319244075
  episode_reward_min: 305.04190881959175
  episodes_this_iter: 78
  episodes_total: 18462
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3473.636
    load_time_ms: 2.6
    num_steps_sampled: 2510000
    num_steps_trained: 2510000
    rl_0:
      cur_kl_coeff: 2.8684301390661906e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6332025527954102
      kl: 0.02646343596279621
      policy_loss: 0.009074813686311245
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6141 s, 255 iter, 2550000 ts, 349 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-21-19
  done: false
  episode_len_mean: 126.66
  episode_reward_max: 386.4321136691754
  episode_reward_mean: 349.1773233409141
  episode_reward_min: 311.5561864305465
  episodes_this_iter: 79
  episodes_total: 18859
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3477.388
    load_time_ms: 2.394
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 3.226984676821442e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6071852445602417
      kl: 0.014279857277870178
      policy_loss: 0.0008980226120911539
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6261 s, 260 iter, 2600000 ts, 343 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-23-18
  done: false
  episode_len_mean: 125.61
  episode_reward_max: 375.18937758797597
  episode_reward_mean: 332.89000596299826
  episode_reward_min: 289.1782289577573
  episodes_this_iter: 79
  episodes_total: 19254
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3468.266
    load_time_ms: 2.397
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 4.840478124567811e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.36157089471817017
      kl: 0.023734556511044502
      policy_loss: 0.0067442930303514
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6380 s, 265 iter, 2650000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-25-17
  done: false
  episode_len_mean: 125.21
  episode_reward_max: 369.18764885909496
  episode_reward_mean: 327.27325044548803
  episode_reward_min: 296.17476609073555
  episodes_this_iter: 80
  episodes_total: 19654
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3455.192
    load_time_ms: 2.257
    num_steps_sampled: 2660000
    num_steps_trained: 2660000
    rl_0:
      cur_kl_coeff: 7.260717186851717e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.35196033120155334
      kl: 0.021882666274905205
      policy_loss: 0.010284759104251862
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6500 s, 270 iter, 2700000 ts, 334 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-27-18
  done: false
  episode_len_mean: 126.06
  episode_reward_max: 365.6199921888314
  episode_reward_mean: 331.6897687910573
  episode_reward_min: 290.1433890352989
  episodes_this_iter: 79
  episodes_total: 20052
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3460.64
    load_time_ms: 2.2
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 7.260717186851717e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.3576371669769287
      kl: 0.026860976591706276
      policy_loss: 0.005293285008519888
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6620 s, 275 iter, 2750000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-29-18
  done: false
  episode_len_mean: 125.76
  episode_reward_max: 373.5037233862367
  episode_reward_mean: 330.3810146972061
  episode_reward_min: 289.0150163476341
  episodes_this_iter: 80
  episodes_total: 20451
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3459.058
    load_time_ms: 2.286
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 3.6303585934258584e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.33057501912117004
      kl: 0.015191569924354553
      policy_loss: 0.0027308480348438025
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6740 s, 280 iter, 2800000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-31-18
  done: false
  episode_len_mean: 125.74
  episode_reward_max: 366.04408579404793
  episode_reward_mean: 331.34151314524416
  episode_reward_min: 299.18559536989983
  episodes_this_iter: 79
  episodes_total: 20849
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3457.8
    load_time_ms: 2.478
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 5.445537397100722e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.33681726455688477
      kl: 0.012011951766908169
      policy_loss: 0.0018463238375261426
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6860 s, 285 iter, 2850000 ts, 330 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-33-19
  done: false
  episode_len_mean: 126.87
  episode_reward_max: 371.3359735389686
  episode_reward_mean: 332.1260714844162
  episode_reward_min: 296.1194867628927
  episodes_this_iter: 79
  episodes_total: 21246
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3465.388
    load_time_ms: 2.384
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 5.445537397100722e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.35673797130584717
      kl: 0.024102259427309036
      policy_loss: 0.009557120501995087
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 6980 s, 290 iter, 2900000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-35-19
  done: false
  episode_len_mean: 127.37
  episode_reward_max: 371.88325987485416
  episode_reward_mean: 337.4829661938252
  episode_reward_min: 302.99334657645835
  episodes_this_iter: 79
  episodes_total: 21640
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3450.279
    load_time_ms: 2.246
    num_steps_sampled: 2910000
    num_steps_trained: 2910000
    rl_0:
      cur_kl_coeff: 8.1683065886891485e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.34254810214042664
      kl: 0.014994997531175613
      policy_loss: 0.005912939086556435
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3789], 7100 s, 295 iter, 2950000 ts, 336 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-37-18
  done: false
  episode_len_mean: 127.31
  episode_reward_max: 379.8059656112418
  episode_reward_mean: 339.81511294091865
  episode_reward_min: 291.78810216290236
  episodes_this_iter: 79
  episodes_total: 22033
  experiment_id: de3c47973cd9444396776c50d009e997
  hostname: Gandalf
  info:
    grad_time_ms: 3439.019
    load_time_ms: 2.456
    num_steps_sampled: 2960000
    num_steps_trained: 2960000
    rl_0:
      cur_kl_coeff: 1.2252459389995657e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.36555248498916626
      kl: 0.020335543900728226
      policy_loss: 0.007112202700227499
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=3789], 7219 s, 300 iter, 3000000 ts, 340 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=3789], 7219 s, 300 iter, 3000000 ts, 340 rew



In [17]:
executeTraining()

 Starting SUMO on port 57389
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.1/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-13_04-38-5556_jtq95 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING



8.428299975431056
25.44103369554646


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-39-56
  done: false
  episode_len_mean: 459.55
  episode_reward_max: 156.6480706130009
  episode_reward_mean: 54.223883915774216
  episode_reward_min: -163.98291414907604
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 4548.829
    load_time_ms: 150.365
    num_steps_sampled: 10000
    num_steps_trained: 10000
    rl_0:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.4194523096084595
      kl: 0.0005114871310070157
      policy_loss: -0.0006777705857530236
      total_loss: 69.36656188964844
      vf_explained_var: 0.02261347882449627
      vf_loss: 69.36713409423828
    rl_1:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 1.420228362083435
      kl: 0.0003566278319340199
      policy_loss: -0.001174206379801035
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 122 s, 5 iter, 50000 ts, 103 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-41-56
  done: false
  episode_len_mean: 439.06
  episode_reward_max: 326.95824978464543
  episode_reward_mean: 129.9293865269946
  episode_reward_min: -165.0442810664234
  episodes_this_iter: 27
  episodes_total: 134
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3716.356
    load_time_ms: 27.212
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.0062500000931322575
      cur_lr: 4.999999873689376e-05
      entropy: 1.40365731716156
      kl: 0.0033728349953889847
      policy_loss: -0.0021078006830066442
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 243 s, 10 iter, 100000 ts, 150 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-43-57
  done: false
  episode_len_mean: 305.75
  episode_reward_max: 392.70472427486925
  episode_reward_mean: 178.0509180126735
  episode_reward_min: -156.4303541368106
  episodes_this_iter: 35
  episodes_total: 295
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3550.227
    load_time_ms: 2.465
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531250291038305
      cur_lr: 4.999999873689376e-05
      entropy: 1.4182851314544678
      kl: 0.004295974969863892
      policy_loss: -0.0019421875476837158
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 364 s, 15 iter, 150000 ts, 189 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-45-59
  done: false
  episode_len_mean: 227.37
  episode_reward_max: 335.2177677699818
  episode_reward_mean: 191.56408329509188
  episode_reward_min: -159.72808664976012
  episodes_this_iter: 45
  episodes_total: 517
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3554.974
    load_time_ms: 2.469
    num_steps_sampled: 160000
    num_steps_trained: 160000
    rl_0:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.4124301671981812
      kl: 0.001992434961721301
      policy_loss: -0.0016749019268900156
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 486 s, 20 iter, 200000 ts, 254 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-48-00
  done: false
  episode_len_mean: 193.73
  episode_reward_max: 332.6800722095515
  episode_reward_mean: 261.5187999893227
  episode_reward_min: -145.83438831811165
  episodes_this_iter: 53
  episodes_total: 767
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3563.39
    load_time_ms: 2.505
    num_steps_sampled: 210000
    num_steps_trained: 210000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3875067234039307
      kl: 0.007356491405516863
      policy_loss: -0.0035132383927702904
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 607 s, 25 iter, 250000 ts, 268 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-50-02
  done: false
  episode_len_mean: 156.05
  episode_reward_max: 307.67853851878954
  episode_reward_mean: 265.40403867284175
  episode_reward_min: -136.503778381539
  episodes_this_iter: 64
  episodes_total: 1066
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3561.102
    load_time_ms: 2.465
    num_steps_sampled: 260000
    num_steps_trained: 260000
    rl_0:
      cur_kl_coeff: 9.536743306171047e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3551299571990967
      kl: 0.004899714607745409
      policy_loss: -0.001113133504986763
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 729 s, 30 iter, 300000 ts, 283 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-52-04
  done: false
  episode_len_mean: 153.77
  episode_reward_max: 336.36639447233523
  episode_reward_mean: 274.89562070821546
  episode_reward_min: -145.78044415223417
  episodes_this_iter: 66
  episodes_total: 1392
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3554.556
    load_time_ms: 2.386
    num_steps_sampled: 310000
    num_steps_trained: 310000
    rl_0:
      cur_kl_coeff: 5.9604645663569045e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.3464906215667725
      kl: 0.010916638188064098
      policy_loss: -0.0014362889342010021
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 852 s, 35 iter, 350000 ts, 287 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-54-08
  done: false
  episode_len_mean: 146.25
  episode_reward_max: 345.01263090974624
  episode_reward_mean: 291.2294004120884
  episode_reward_min: -158.98893890843095
  episodes_this_iter: 68
  episodes_total: 1733
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3548.299
    load_time_ms: 2.335
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 2.9802322831784522e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.3422774076461792
      kl: 0.0020629812497645617
      policy_loss: -0.0009043482132256031
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 975 s, 40 iter, 400000 ts, 299 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-56-11
  done: false
  episode_len_mean: 134.16
  episode_reward_max: 348.1710535731597
  episode_reward_mean: 302.47896547839264
  episode_reward_min: -143.33662467797453
  episodes_this_iter: 74
  episodes_total: 2092
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3553.468
    load_time_ms: 2.691
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 1.8626451769865326e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.2807129621505737
      kl: 0.015301895327866077
      policy_loss: -0.0034819352440536022
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1098 s, 45 iter, 450000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_04-58-14
  done: false
  episode_len_mean: 131.32
  episode_reward_max: 339.8437789775338
  episode_reward_mean: 305.15481358020975
  episode_reward_min: -140.75562332833147
  episodes_this_iter: 76
  episodes_total: 2467
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3550.795
    load_time_ms: 2.684
    num_steps_sampled: 460000
    num_steps_trained: 460000
    rl_0:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2378982305526733
      kl: 0.003996490966528654
      policy_loss: -0.0009592035203240812
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1220 s, 50 iter, 500000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-00-16
  done: false
  episode_len_mean: 129.15
  episode_reward_max: 343.930755103443
  episode_reward_mean: 308.75130659054776
  episode_reward_min: -140.51979898892984
  episodes_this_iter: 78
  episodes_total: 2852
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3541.487
    load_time_ms: 2.385
    num_steps_sampled: 510000
    num_steps_trained: 510000
    rl_0:
      cur_kl_coeff: 1.4551915445207286e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1992383003234863
      kl: 0.008192685432732105
      policy_loss: -0.0013685973826795816
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1341 s, 55 iter, 550000 ts, 304 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-02-18
  done: false
  episode_len_mean: 125.07
  episode_reward_max: 339.9434491167959
  episode_reward_mean: 305.1430434292895
  episode_reward_min: 273.40236202538017
  episodes_this_iter: 80
  episodes_total: 3249
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3556.819
    load_time_ms: 2.353
    num_steps_sampled: 560000
    num_steps_trained: 560000
    rl_0:
      cur_kl_coeff: 1.8189894306509108e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.1419734954833984
      kl: 0.015510935336351395
      policy_loss: -0.0025474762078374624
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1464 s, 60 iter, 600000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-04-21
  done: false
  episode_len_mean: 129.03
  episode_reward_max: 350.21958749385504
  episode_reward_mean: 307.40863561238604
  episode_reward_min: -140.06138841312992
  episodes_this_iter: 77
  episodes_total: 3642
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3561.406
    load_time_ms: 2.446
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.2095896005630493
      kl: 0.002502276562154293
      policy_loss: -0.000710981956217438
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1587 s, 65 iter, 650000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-06-24
  done: false
  episode_len_mean: 127.55
  episode_reward_max: 346.0105817452702
  episode_reward_mean: 313.20117450293264
  episode_reward_min: 282.6555730810468
  episodes_this_iter: 78
  episodes_total: 4034
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3567.054
    load_time_ms: 2.608
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 2.842170985392048e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.1009560823440552
      kl: 0.00822480022907257
      policy_loss: -0.0010455080773681402
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1709 s, 70 iter, 700000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-08-26
  done: false
  episode_len_mean: 126.78
  episode_reward_max: 344.69691561324095
  episode_reward_mean: 312.69997836736803
  episode_reward_min: 283.3924003968074
  episodes_this_iter: 78
  episodes_total: 4426
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3570.505
    load_time_ms: 2.607
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 3.55271373174006e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0605961084365845
      kl: 0.005748196039348841
      policy_loss: 8.538240945199504e-05
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1831 s, 75 iter, 750000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-10-29
  done: false
  episode_len_mean: 125.58
  episode_reward_max: 338.22893937516426
  episode_reward_mean: 306.6721524919908
  episode_reward_min: 276.5121492406375
  episodes_this_iter: 80
  episodes_total: 4823
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3571.866
    load_time_ms: 2.395
    num_steps_sampled: 760000
    num_steps_trained: 760000
    rl_0:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.0349295139312744
      kl: 0.030090445652604103
      policy_loss: -0.005107867531478405
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 1954 s, 80 iter, 800000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-12-32
  done: false
  episode_len_mean: 125.56
  episode_reward_max: 339.99393937683993
  episode_reward_mean: 308.0750006538284
  episode_reward_min: 277.4764825914535
  episodes_this_iter: 79
  episodes_total: 5218
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3560.26
    load_time_ms: 2.338
    num_steps_sampled: 810000
    num_steps_trained: 810000
    rl_0:
      cur_kl_coeff: 1.1102230411687688e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.9937925934791565
      kl: 0.028265228495001793
      policy_loss: -0.005136137828230858
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2076 s, 85 iter, 850000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-14-34
  done: false
  episode_len_mean: 125.74
  episode_reward_max: 343.2748342536986
  episode_reward_mean: 308.04092486979516
  episode_reward_min: 280.4722732785217
  episodes_this_iter: 79
  episodes_total: 5615
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3565.583
    load_time_ms: 2.343
    num_steps_sampled: 860000
    num_steps_trained: 860000
    rl_0:
      cur_kl_coeff: 5.551115205843844e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.9735603332519531
      kl: 0.009974760003387928
      policy_loss: -0.0011120608542114496
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2200 s, 90 iter, 900000 ts, 308 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-16-38
  done: false
  episode_len_mean: 125.75
  episode_reward_max: 342.1028120104354
  episode_reward_mean: 308.5424483680872
  episode_reward_min: 274.47050740446093
  episodes_this_iter: 79
  episodes_total: 6014
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3582.977
    load_time_ms: 2.355
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 3.4694470036524025e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9447755813598633
      kl: 0.02541428990662098
      policy_loss: -0.00120465736836195
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2322 s, 95 iter, 950000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-18-41
  done: false
  episode_len_mean: 125.2
  episode_reward_max: 343.1662424328626
  episode_reward_mean: 305.752218513175
  episode_reward_min: 276.9659303863013
  episodes_this_iter: 80
  episodes_total: 6412
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3575.95
    load_time_ms: 2.452
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9241488575935364
      kl: 0.018721701577305794
      policy_loss: 0.00015034020179882646
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2444 s, 100 iter, 1000000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-20-43
  done: false
  episode_len_mean: 124.54
  episode_reward_max: 341.93272103215395
  episode_reward_mean: 306.37680654925043
  episode_reward_min: 272.59810655439315
  episodes_this_iter: 80
  episodes_total: 6811
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3559.928
    load_time_ms: 2.456
    num_steps_sampled: 1010000
    num_steps_trained: 1010000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.8144547343254089
      kl: 0.024888310581445694
      policy_loss: -0.001107072806917131
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2568 s, 105 iter, 1050000 ts, 306 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-22-46
  done: false
  episode_len_mean: 124.85
  episode_reward_max: 339.41444171638403
  episode_reward_mean: 304.2130333225369
  episode_reward_min: 268.70120542821076
  episodes_this_iter: 80
  episodes_total: 7211
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3517.182
    load_time_ms: 2.54
    num_steps_sampled: 1060000
    num_steps_trained: 1060000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7705225348472595
      kl: 0.019281981512904167
      policy_loss: 0.0010926711838692427
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2689 s, 110 iter, 1100000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-24-48
  done: false
  episode_len_mean: 124.93
  episode_reward_max: 338.5482867442416
  episode_reward_mean: 307.49409355239663
  episode_reward_min: 276.5364130984376
  episodes_this_iter: 80
  episodes_total: 7610
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3450.206
    load_time_ms: 2.499
    num_steps_sampled: 1110000
    num_steps_trained: 1110000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7410731911659241
      kl: 0.01537575013935566
      policy_loss: 0.003400749061256647
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2811 s, 115 iter, 1150000 ts, 303 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-26-51
  done: false
  episode_len_mean: 124.82
  episode_reward_max: 334.2438806716472
  episode_reward_mean: 306.85154913505886
  episode_reward_min: 279.04974795720346
  episodes_this_iter: 80
  episodes_total: 8010
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3421.932
    load_time_ms: 2.467
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.643645167350769
      kl: 0.03041701205074787
      policy_loss: 0.013388321734964848
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 2935 s, 120 iter, 1200000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-28-53
  done: false
  episode_len_mean: 125.43
  episode_reward_max: 336.62481610394843
  episode_reward_mean: 306.75715282896334
  episode_reward_min: 273.17005959381464
  episodes_this_iter: 79
  episodes_total: 8409
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3425.315
    load_time_ms: 2.508
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.6002882122993469
      kl: 0.031118812039494514
      policy_loss: 0.013270237483084202
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3054 s, 125 iter, 1250000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-30-54
  done: false
  episode_len_mean: 124.78
  episode_reward_max: 342.9473533358055
  episode_reward_mean: 306.18936239082666
  episode_reward_min: 273.1351733872288
  episodes_this_iter: 80
  episodes_total: 8809
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3422.895
    load_time_ms: 2.537
    num_steps_sampled: 1260000
    num_steps_trained: 1260000
    rl_0:
      cur_kl_coeff: 2.602084541880963e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.5474839210510254
      kl: 0.06892186403274536
      policy_loss: 0.045536115765571594
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3173 s, 130 iter, 1300000 ts, 302 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-32-52
  done: false
  episode_len_mean: 125.06
  episode_reward_max: 345.9189380088862
  episode_reward_mean: 306.9550794964829
  episode_reward_min: 267.0970154793025
  episodes_this_iter: 79
  episodes_total: 9209
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3422.089
    load_time_ms: 2.418
    num_steps_sampled: 1310000
    num_steps_trained: 1310000
    rl_0:
      cur_kl_coeff: 1.9759582939187093e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.5111344456672668
      kl: 66.27987670898438
      policy_loss: 0.3428885340690613
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3293 s, 135 iter, 1350000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-34-53
  done: false
  episode_len_mean: 124.25
  episode_reward_max: 339.15318779022215
  episode_reward_mean: 304.3274611016965
  episode_reward_min: 270.0755010927089
  episodes_this_iter: 80
  episodes_total: 9610
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3431.543
    load_time_ms: 2.356
    num_steps_sampled: 1360000
    num_steps_trained: 1360000
    rl_0:
      cur_kl_coeff: 1.0003289535047714e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.5430816411972046
      kl: 0.11189007014036179
      policy_loss: 0.06240376457571983
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3412 s, 140 iter, 1400000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-36-52
  done: false
  episode_len_mean: 125.68
  episode_reward_max: 343.7475555460939
  episode_reward_mean: 308.6267706929708
  episode_reward_min: 275.984861211555
  episodes_this_iter: 80
  episodes_total: 10010
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3435.381
    load_time_ms: 2.348
    num_steps_sampled: 1410000
    num_steps_trained: 1410000
    rl_0:
      cur_kl_coeff: 2.2507397524749446e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.6454499959945679
      kl: 0.03928331285715103
      policy_loss: 0.023287761956453323
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3531 s, 145 iter, 1450000 ts, 308 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-38-52
  done: false
  episode_len_mean: 124.86
  episode_reward_max: 341.0977552962079
  episode_reward_mean: 306.2790285180954
  episode_reward_min: 277.199086670376
  episodes_this_iter: 80
  episodes_total: 10409
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3421.623
    load_time_ms: 2.303
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 7.596247719258219e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.6577038764953613
      kl: 925.1273803710938
      policy_loss: 0.21061846613883972
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3650 s, 150 iter, 1500000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-40-51
  done: false
  episode_len_mean: 125.54
  episode_reward_max: 361.0635421489158
  episode_reward_mean: 310.30510472742384
  episode_reward_min: 277.7896690799306
  episodes_this_iter: 80
  episodes_total: 10810
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3420.711
    load_time_ms: 2.42
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 1.709156051161732e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.6577786207199097
      kl: 45.616825103759766
      policy_loss: 0.2146947830915451
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3771 s, 155 iter, 1550000 ts, 308 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-42-52
  done: false
  episode_len_mean: 124.41
  episode_reward_max: 342.04696967665427
  episode_reward_mean: 306.9204223130024
  episode_reward_min: 273.28622328919664
  episodes_this_iter: 80
  episodes_total: 11212
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3420.471
    load_time_ms: 2.512
    num_steps_sampled: 1560000
    num_steps_trained: 1560000
    rl_0:
      cur_kl_coeff: 1.2978901621111616e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.6707040071487427
      kl: 67.03335571289062
      policy_loss: 0.18095763027668
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 3889 s, 160 iter, 1600000 ts, 308 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-44-50
  done: false
  episode_len_mean: 124.33
  episode_reward_max: 355.9912651908025
  episode_reward_mean: 307.9653226692262
  episode_reward_min: 273.9652175582305
  episodes_this_iter: 80
  episodes_total: 11614
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3414.851
    load_time_ms: 2.521
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 9.855850910520016e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.6716737747192383
      kl: 0.026923183351755142
      policy_loss: 0.01321965642273426
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4008 s, 165 iter, 1650000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-46-50
  done: false
  episode_len_mean: 123.87
  episode_reward_max: 343.93740634809535
  episode_reward_mean: 303.4375077033933
  episode_reward_min: 269.09048408988065
  episodes_this_iter: 81
  episodes_total: 12016
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3405.568
    load_time_ms: 2.313
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 1.4783777636329445e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.6753020286560059
      kl: 0.020448315888643265
      policy_loss: 0.012878959067165852
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4128 s, 170 iter, 1700000 ts, 303 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-48-49
  done: false
  episode_len_mean: 125.67
  episode_reward_max: 345.6067033997003
  episode_reward_mean: 310.02037574618373
  episode_reward_min: 272.1382515501505
  episodes_this_iter: 79
  episodes_total: 12417
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3409.456
    load_time_ms: 2.369
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 2.2175669842625957e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7473835945129395
      kl: 0.016245922073721886
      policy_loss: 0.008972708135843277
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4247 s, 175 iter, 1750000 ts, 308 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-50-49
  done: false
  episode_len_mean: 124.13
  episode_reward_max: 338.5801767966152
  episode_reward_mean: 306.8767903264743
  episode_reward_min: 272.2086336298788
  episodes_this_iter: 80
  episodes_total: 12819
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3418.836
    load_time_ms: 2.434
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 2.2175669842625957e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.6558217406272888
      kl: 0.0699630081653595
      policy_loss: 0.032229144126176834
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4365 s, 180 iter, 1800000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-52-47
  done: false
  episode_len_mean: 125.07
  episode_reward_max: 341.38840092232584
  episode_reward_mean: 310.26872085742326
  episode_reward_min: 272.93190914470097
  episodes_this_iter: 79
  episodes_total: 13219
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3437.197
    load_time_ms: 2.372
    num_steps_sampled: 1810000
    num_steps_trained: 1810000
    rl_0:
      cur_kl_coeff: 3.326351238723546e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7644453048706055
      kl: 0.031158065423369408
      policy_loss: 0.018600650131702423
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4484 s, 185 iter, 1850000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-54-46
  done: false
  episode_len_mean: 125.61
  episode_reward_max: 353.6418208511283
  episode_reward_mean: 310.70598196640594
  episode_reward_min: 273.5521447958009
  episodes_this_iter: 80
  episodes_total: 13620
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3438.23
    load_time_ms: 2.521
    num_steps_sampled: 1860000
    num_steps_trained: 1860000
    rl_0:
      cur_kl_coeff: 7.484289609501621e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7785724401473999
      kl: 0.0329458974301815
      policy_loss: 0.021260419860482216
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4603 s, 190 iter, 1900000 ts, 309 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-56-46
  done: false
  episode_len_mean: 125.45
  episode_reward_max: 345.64504424168257
  episode_reward_mean: 310.9552343349848
  episode_reward_min: 277.170535913491
  episodes_this_iter: 79
  episodes_total: 14020
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3437.114
    load_time_ms: 2.653
    num_steps_sampled: 1910000
    num_steps_trained: 1910000
    rl_0:
      cur_kl_coeff: 7.484289609501621e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7987464666366577
      kl: 0.014464554376900196
      policy_loss: 0.009777795523405075
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4723 s, 195 iter, 1950000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_05-58-46
  done: false
  episode_len_mean: 125.66
  episode_reward_max: 356.65991416766263
  episode_reward_mean: 310.8268715968609
  episode_reward_min: 276.5233608972274
  episodes_this_iter: 80
  episodes_total: 14420
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3431.479
    load_time_ms: 2.74
    num_steps_sampled: 1960000
    num_steps_trained: 1960000
    rl_0:
      cur_kl_coeff: 1.1226435091878789e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.824211597442627
      kl: 0.013325154781341553
      policy_loss: 0.010160865262150764
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4844 s, 200 iter, 2000000 ts, 307 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-00-46
  done: false
  episode_len_mean: 125.52
  episode_reward_max: 367.8242364149368
  episode_reward_mean: 312.3215862181055
  episode_reward_min: 276.57078374817934
  episodes_this_iter: 80
  episodes_total: 14820
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3423.705
    load_time_ms: 2.616
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 1.1226435091878789e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.8098200559616089
      kl: 0.017515093088150024
      policy_loss: 0.008952544070780277
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 4963 s, 205 iter, 2050000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-02-47
  done: false
  episode_len_mean: 124.96
  episode_reward_max: 345.62390109901634
  episode_reward_mean: 311.149373792381
  episode_reward_min: 270.16552048849564
  episodes_this_iter: 81
  episodes_total: 15220
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3429.288
    load_time_ms: 2.339
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 1.1226435091878789e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.7760841250419617
      kl: 0.04994034022092819
      policy_loss: 0.02675274759531021
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5082 s, 210 iter, 2100000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-04-46
  done: false
  episode_len_mean: 124.75
  episode_reward_max: 353.2856409653092
  episode_reward_mean: 313.0159151074711
  episode_reward_min: 282.19327047607675
  episodes_this_iter: 79
  episodes_total: 15618
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3433.89
    load_time_ms: 2.189
    num_steps_sampled: 2110000
    num_steps_trained: 2110000
    rl_0:
      cur_kl_coeff: 1.6839651960191826e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.7618141770362854
      kl: 0.013873936608433723
      policy_loss: 0.002357305260375142
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5201 s, 215 iter, 2150000 ts, 310 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-06-45
  done: false
  episode_len_mean: 125.55
  episode_reward_max: 357.3755010147511
  episode_reward_mean: 313.06330491629245
  episode_reward_min: 279.15332625027594
  episodes_this_iter: 79
  episodes_total: 16018
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3433.256
    load_time_ms: 2.504
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 1.6839651960191826e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.7723425030708313
      kl: 0.0838126465678215
      policy_loss: 0.0469927042722702
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5321 s, 220 iter, 2200000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-08-45
  done: false
  episode_len_mean: 125.53
  episode_reward_max: 353.20631417671876
  episode_reward_mean: 316.98052179718593
  episode_reward_min: 275.5954558124322
  episodes_this_iter: 80
  episodes_total: 16416
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3426.976
    load_time_ms: 2.668
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 9.472303041761776e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7436261773109436
      kl: 0.02748674526810646
      policy_loss: 0.01627272740006447
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5441 s, 225 iter, 2250000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-10-45
  done: false
  episode_len_mean: 126.2
  episode_reward_max: 354.1303243550112
  episode_reward_mean: 315.3368790892704
  episode_reward_min: 278.0786180237908
  episodes_this_iter: 79
  episodes_total: 16814
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3418.96
    load_time_ms: 2.545
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 9.472303041761776e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8406381011009216
      kl: 0.01604658178985119
      policy_loss: 0.009874124079942703
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5561 s, 230 iter, 2300000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-12-45
  done: false
  episode_len_mean: 125.54
  episode_reward_max: 358.8782981943862
  episode_reward_mean: 313.5749324626119
  episode_reward_min: 158.278319626189
  episodes_this_iter: 80
  episodes_total: 17212
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3407.24
    load_time_ms: 2.538
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 1.0656339905542461e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.7743223309516907
      kl: 0.025852616876363754
      policy_loss: 0.011693950742483139
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5681 s, 235 iter, 2350000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-14-46
  done: false
  episode_len_mean: 125.37
  episode_reward_max: 364.70296142246946
  episode_reward_mean: 317.7779658832005
  episode_reward_min: 279.85661426897883
  episodes_this_iter: 81
  episodes_total: 17610
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3422.655
    load_time_ms: 2.454
    num_steps_sampled: 2360000
    num_steps_trained: 2360000
    rl_0:
      cur_kl_coeff: 2.397677054729458e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.7515231370925903
      kl: 0.008531343191862106
      policy_loss: 0.003824894316494465
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5802 s, 240 iter, 2400000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-16-47
  done: false
  episode_len_mean: 126.28
  episode_reward_max: 361.02586784656484
  episode_reward_mean: 319.9372504164719
  episode_reward_min: 273.85758399279024
  episodes_this_iter: 80
  episodes_total: 18006
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3421.633
    load_time_ms: 2.333
    num_steps_sampled: 2410000
    num_steps_trained: 2410000
    rl_0:
      cur_kl_coeff: 1.198838527364729e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.775229275226593
      kl: 0.0202178917825222
      policy_loss: 0.01275753416121006
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 5923 s, 245 iter, 2450000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-18-49
  done: false
  episode_len_mean: 125.74
  episode_reward_max: 369.57466428669255
  episode_reward_mean: 316.09550654518847
  episode_reward_min: 170.35485469974708
  episodes_this_iter: 80
  episodes_total: 18403
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3421.038
    load_time_ms: 2.259
    num_steps_sampled: 2460000
    num_steps_trained: 2460000
    rl_0:
      cur_kl_coeff: 2.9970963184118224e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7409107089042664
      kl: 0.04398801922798157
      policy_loss: 0.012431535869836807
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6045 s, 250 iter, 2500000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-20-50
  done: false
  episode_len_mean: 126.45
  episode_reward_max: 355.2229566093938
  episode_reward_mean: 318.88284305207196
  episode_reward_min: 157.07974681059585
  episodes_this_iter: 79
  episodes_total: 18798
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3434.442
    load_time_ms: 2.345
    num_steps_sampled: 2510000
    num_steps_trained: 2510000
    rl_0:
      cur_kl_coeff: 4.495643969397965e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7951931357383728
      kl: 0.009316991083323956
      policy_loss: -0.0003411871730349958
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6165 s, 255 iter, 2550000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-22-51
  done: false
  episode_len_mean: 127.16
  episode_reward_max: 362.5205209601811
  episode_reward_mean: 316.7970483535166
  episode_reward_min: -139.53144702170394
  episodes_this_iter: 78
  episodes_total: 19193
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3422.78
    load_time_ms: 2.383
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 2.5287992457424108e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.9146826267242432
      kl: 0.011097044683992863
      policy_loss: -0.00011238073784625158
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6287 s, 260 iter, 2600000 ts, 314 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-24-52
  done: false
  episode_len_mean: 127.32
  episode_reward_max: 365.2133874721221
  episode_reward_mean: 316.1206434686476
  episode_reward_min: 111.35659024800071
  episodes_this_iter: 78
  episodes_total: 19587
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3412.685
    load_time_ms: 2.262
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 3.793199207426795e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8545384407043457
      kl: 0.013089112937450409
      policy_loss: 0.0007203368586488068
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6407 s, 265 iter, 2650000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-26-53
  done: false
  episode_len_mean: 125.48
  episode_reward_max: 360.3616873185495
  episode_reward_mean: 317.3813498646152
  episode_reward_min: -139.55221583780528
  episodes_this_iter: 80
  episodes_total: 19985
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3410.504
    load_time_ms: 2.322
    num_steps_sampled: 2660000
    num_steps_trained: 2660000
    rl_0:
      cur_kl_coeff: 4.267349955388092e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7671862840652466
      kl: 77.59795379638672
      policy_loss: 0.02197156846523285
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6527 s, 270 iter, 2700000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-28-54
  done: false
  episode_len_mean: 126.18
  episode_reward_max: 361.8522912727142
  episode_reward_mean: 323.4651030909856
  episode_reward_min: 282.5226524496032
  episodes_this_iter: 80
  episodes_total: 20383
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3415.069
    load_time_ms: 2.505
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 6.401025271895316e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7903820872306824
      kl: 0.05211315304040909
      policy_loss: 0.02032223343849182
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6647 s, 275 iter, 2750000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-30-54
  done: false
  episode_len_mean: 126.38
  episode_reward_max: 368.147897655609
  episode_reward_mean: 322.74808885261484
  episode_reward_min: 285.5098542033581
  episodes_this_iter: 79
  episodes_total: 20779
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3419.003
    load_time_ms: 2.522
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 2.400383799334386e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8548616170883179
      kl: 0.11505574733018875
      policy_loss: 0.015875142067670822
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6768 s, 280 iter, 2800000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-32-55
  done: false
  episode_len_mean: 125.95
  episode_reward_max: 367.5496675198047
  episode_reward_mean: 320.92874817071805
  episode_reward_min: 281.27392909041913
  episodes_this_iter: 78
  episodes_total: 21176
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3421.79
    load_time_ms: 2.544
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 6.075970618562437e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8460533022880554
      kl: 32.81127166748047
      policy_loss: 0.06402231007814407
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 6889 s, 285 iter, 2850000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-34-56
  done: false
  episode_len_mean: 126.25
  episode_reward_max: 372.49596035847446
  episode_reward_mean: 311.923776218956
  episode_reward_min: -140.35273580806327
  episodes_this_iter: 79
  episodes_total: 21572
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3416.312
    load_time_ms: 2.416
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 2.0506402023494352e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.8817878365516663
      kl: 30.256187438964844
      policy_loss: 0.045571185648441315
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 7010 s, 290 iter, 2900000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-36-58
  done: false
  episode_len_mean: 127.32
  episode_reward_max: 372.5288542774193
  episode_reward_mean: 324.030401276091
  episode_reward_min: 286.4712460997036
  episodes_this_iter: 79
  episodes_total: 21968
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3402.47
    load_time_ms: 2.241
    num_steps_sampled: 2910000
    num_steps_trained: 2910000
    rl_0:
      cur_kl_coeff: 2.306971074676062e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.8988209962844849
      kl: 0.014435973018407822
      policy_loss: 0.0023898521903902292
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=3852], 7131 s, 295 iter, 2950000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-13_06-38-59
  done: false
  episode_len_mean: 126.17
  episode_reward_max: 377.42600858272016
  episode_reward_mean: 319.2806483676348
  episode_reward_min: 159.3449023834841
  episodes_this_iter: 79
  episodes_total: 22364
  experiment_id: 69ac22f84045446ca531c2e60e3bb351
  hostname: Gandalf
  info:
    grad_time_ms: 3415.306
    load_time_ms: 2.268
    num_steps_sampled: 2960000
    num_steps_trained: 2960000
    rl_0:
      cur_kl_coeff: 2.306971074676062e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.8429166078567505
      kl: 0.009739730507135391
      policy_loss: 0.00043980739428661764
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=3852], 7252 s, 300 iter, 3000000 ts, 315 rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	TERMINATED [pid=3852], 7252 s, 300 iter, 3000000 ts, 315 rew

