# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=103

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-11_15-39-55_18336/logs.
Waiting for redis server at 127.0.0.1:43604 to respond...
Waiting for redis server at 127.0.0.1:54079 to respond...
Starting the Plasma object store with 6.5546633210000005 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=30f0302ce2f7f65d3a1489cfbc57b40dfb05c4335bfcd93a



{'node_ip_address': '172.16.123.117',
 'object_store_addresses': ['/tmp/ray/session_2019-03-11_15-39-55_18336/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-11_15-39-55_18336/sockets/raylet'],
 'redis_address': '172.16.123.117:43604',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=30f0302ce2f7f65d3a1489cfbc57b40dfb05c4335bfcd93a'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return agent_id

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn)
        }
    })

 Starting SUMO on port 58479


23.849015537022204
25.000549626928986


In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-11_15-39-58ssrs95mm -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-40-41
  done: false
  episode_len_mean: 103.0
  episode_reward_max: 20.473525119472047
  episode_reward_mean: 10.040975470023351
  episode_reward_min: 4.446865024276959
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 2139.944
    load_time_ms: 150.256
    num_steps_sampled: 2200
    num_steps_trained: 

  custom_metrics: {}
  date: 2019-03-11_15-41-10
  done: false
  episode_len_mean: 103.0
  episode_reward_max: 33.8636255653574
  episode_reward_mean: 10.943074297584197
  episode_reward_min: 2.5088519950107915
  episodes_this_iter: 22
  episodes_total: 106
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1142.806
    load_time_ms: 32.316
    num_steps_sampled: 11000
    num_steps_trained: 11000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4244626760482788
      kl: 0.0010611612815409899
      policy_loss: -0.003908729180693626
      total_loss: 3.588604211807251
      vf_explained_var: 0.33883801102638245
      vf_loss: 3.5924999713897705
    rl_1:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4246826171875
      kl: 0.0010549579747021198
      policy_loss: -0.005975959822535515
      total_loss: 3.2014319896698
      vf_explained_va

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 70 s, 9 iter, 19800 ts, 14.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-41-50
  done: false
  episode_len_mean: 103.0
  episode_reward_max: 47.49019103099962
  episode_reward_mean: 15.807334070455195
  episode_reward_min: 2.4599815605381488
  episodes_this_iter: 20
  episodes_total: 212
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1094.976
    load_time_ms: 17.513
    num_steps_sampled: 22000
    num_steps_trained: 22000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.4063326120376587
      kl: 0.0051465872675180435
      policy_loss: -0.007830937393009663
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 129 s, 14 iter, 30800 ts, 12.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-42-49
  done: false
  episode_len_mean: 102.44
  episode_reward_max: 55.49541427057067
  episode_reward_mean: 9.63875273597274
  episode_reward_min: -164.71235867701722
  episodes_this_iter: 22
  episodes_total: 320
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1274.816
    load_time_ms: 2.974
    num_steps_sampled: 33000
    num_steps_trained: 33000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.4215896129608154
      kl: 0.001090708072297275
      policy_loss: -0.0007822535699233413
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 167 s, 19 iter, 41800 ts, 17.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-43-24
  done: false
  episode_len_mean: 102.74
  episode_reward_max: 62.18640583172605
  episode_reward_mean: 16.172823571421098
  episode_reward_min: -160.54216454788255
  episodes_this_iter: 21
  episodes_total: 427
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1246.459
    load_time_ms: 2.864
    num_steps_sampled: 44000
    num_steps_trained: 44000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.4268198013305664
      kl: 0.001855549169704318
      policy_loss: -0.002835640450939536
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 208 s, 24 iter, 52800 ts, 20.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-44-06
  done: false
  episode_len_mean: 102.85
  episode_reward_max: 58.366397868965166
  episode_reward_mean: 21.36546524901523
  episode_reward_min: -156.88082968425294
  episodes_this_iter: 22
  episodes_total: 534
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1024.348
    load_time_ms: 2.811
    num_steps_sampled: 55000
    num_steps_trained: 55000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.4077293872833252
      kl: 0.0007547828136011958
      policy_loss: 0.0004799317684955895
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 241 s, 29 iter, 63800 ts, 29.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-44-43
  done: false
  episode_len_mean: 102.92
  episode_reward_max: 78.7976745187454
  episode_reward_mean: 29.169465157580053
  episode_reward_min: -150.2171392094341
  episodes_this_iter: 20
  episodes_total: 640
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 980.119
    load_time_ms: 3.097
    num_steps_sampled: 66000
    num_steps_trained: 66000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.385297179222107
      kl: 0.002854041289538145
      policy_loss: -0.0051377080380916595
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 280 s, 34 iter, 74800 ts, 20.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-45-18
  done: false
  episode_len_mean: 101.8
  episode_reward_max: 76.45347433644453
  episode_reward_mean: 23.27306213825827
  episode_reward_min: -161.1169182775248
  episodes_this_iter: 21
  episodes_total: 749
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 846.744
    load_time_ms: 3.015
    num_steps_sampled: 77000
    num_steps_trained: 77000
    rl_0:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3771058320999146
      kl: 0.0021300730295479298
      policy_loss: -0.000330275041051209
      total_lo

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 313 s, 39 iter, 85800 ts, 44.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-45-55
  done: false
  episode_len_mean: 102.79
  episode_reward_max: 98.44716095493446
  episode_reward_mean: 47.74255968073939
  episode_reward_min: -152.69682392539534
  episodes_this_iter: 21
  episodes_total: 856
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 875.814
    load_time_ms: 3.104
    num_steps_sampled: 88000
    num_steps_trained: 88000
    rl_0:
      cur_kl_coeff: 7.275957722603643e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.3805408477783203
      kl: 0.0015909959329292178
      policy_loss: 0.0008897623629309237
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 355 s, 44 iter, 96800 ts, 48.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-46-34
  done: false
  episode_len_mean: 102.04
  episode_reward_max: 116.53629364089227
  episode_reward_mean: 50.20086316862097
  episode_reward_min: -160.67461602609353
  episodes_this_iter: 22
  episodes_total: 964
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 897.093
    load_time_ms: 2.93
    num_steps_sampled: 99000
    num_steps_trained: 99000
    rl_0:
      cur_kl_coeff: 4.547473576627277e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.3786206245422363
      kl: 0.0009402100113220513
      policy_loss: -0.006504802033305168
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 393 s, 49 iter, 107800 ts, 68.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-47-13
  done: false
  episode_len_mean: 102.92
  episode_reward_max: 108.93552095474004
  episode_reward_mean: 71.5013607335509
  episode_reward_min: -127.25869950293482
  episodes_this_iter: 22
  episodes_total: 1071
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 958.565
    load_time_ms: 2.809
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 1.421085492696024e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.380571961402893
      kl: 0.0011150395730510354
      policy_loss: -0.0006647999980486929
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 429 s, 54 iter, 118800 ts, 67.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-47-48
  done: false
  episode_len_mean: 101.93
  episode_reward_max: 117.52134028654802
  episode_reward_mean: 69.21033588026425
  episode_reward_min: -160.8371282295389
  episodes_this_iter: 22
  episodes_total: 1179
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 946.168
    load_time_ms: 2.739
    num_steps_sampled: 121000
    num_steps_trained: 121000
    rl_0:
      cur_kl_coeff: 4.440892164675075e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.4010310173034668
      kl: 0.001192743657156825
      policy_loss: -0.006892018485814333
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 462 s, 59 iter, 129800 ts, 60.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-48-22
  done: false
  episode_len_mean: 101.34
  episode_reward_max: 116.75436156954243
  episode_reward_mean: 63.6466404756071
  episode_reward_min: -162.73696315732786
  episodes_this_iter: 21
  episodes_total: 1287
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 790.566
    load_time_ms: 2.328
    num_steps_sampled: 132000
    num_steps_trained: 132000
    rl_0:
      cur_kl_coeff: 1.387778801460961e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.404231071472168
      kl: 0.0006642249645665288
      policy_loss: -0.0009590970003046095
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 495 s, 64 iter, 140800 ts, 73 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-48-55
  done: false
  episode_len_mean: 102.61
  episode_reward_max: 118.38508296868442
  episode_reward_mean: 78.44263543600829
  episode_reward_min: -150.73009011305865
  episodes_this_iter: 22
  episodes_total: 1395
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 796.108
    load_time_ms: 2.159
    num_steps_sampled: 143000
    num_steps_trained: 143000
    rl_0:
      cur_kl_coeff: 4.336808754565503e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.375795602798462
      kl: 0.004631166812032461
      policy_loss: -0.0035513078328222036
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 532 s, 69 iter, 151800 ts, 72.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-49-33
  done: false
  episode_len_mean: 101.8
  episode_reward_max: 119.46783972086463
  episode_reward_mean: 70.92109371800555
  episode_reward_min: -163.1478785198502
  episodes_this_iter: 22
  episodes_total: 1503
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 860.861
    load_time_ms: 2.812
    num_steps_sampled: 154000
    num_steps_trained: 154000
    rl_0:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.3600672483444214
      kl: 0.009381260722875595
      policy_loss: -0.0068247150629758835
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 571 s, 74 iter, 162800 ts, 69.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-50-11
  done: false
  episode_len_mean: 101.38
  episode_reward_max: 133.78899390907068
  episode_reward_mean: 75.51180438813883
  episode_reward_min: -153.1166431676664
  episodes_this_iter: 22
  episodes_total: 1611
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 944.117
    load_time_ms: 3.252
    num_steps_sampled: 165000
    num_steps_trained: 165000
    rl_0:
      cur_kl_coeff: 4.235164799380374e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.3202283382415771
      kl: 0.0034622319508343935
      policy_loss: -0.0008284243522211909
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 606 s, 79 iter, 173800 ts, 84.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-50-50
  done: false
  episode_len_mean: 101.34
  episode_reward_max: 140.11516037014593
  episode_reward_mean: 87.23879091836177
  episode_reward_min: -163.27029613688
  episodes_this_iter: 21
  episodes_total: 1720
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 944.245
    load_time_ms: 2.934
    num_steps_sampled: 176000
    num_steps_trained: 176000
    rl_0:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.3549443483352661
      kl: 0.0061983815394341946
      policy_loss: -0.008554980158805847
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 643 s, 84 iter, 184800 ts, 93.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-51-24
  done: false
  episode_len_mean: 100.61
  episode_reward_max: 145.68272465285912
  episode_reward_mean: 88.03267394749984
  episode_reward_min: -166.57108657434938
  episodes_this_iter: 22
  episodes_total: 1830
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 888.675
    load_time_ms: 2.902
    num_steps_sampled: 187000
    num_steps_trained: 187000
    rl_0:
      cur_kl_coeff: 4.1359031243948966e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.3478929996490479
      kl: 0.003478762460872531
      policy_loss: -0.002440081909298897
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 675 s, 89 iter, 195800 ts, 91.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-51-57
  done: false
  episode_len_mean: 100.7
  episode_reward_max: 161.14466040639635
  episode_reward_mean: 100.28680227789039
  episode_reward_min: -164.56762076491836
  episodes_this_iter: 22
  episodes_total: 1939
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 823.408
    load_time_ms: 2.832
    num_steps_sampled: 198000
    num_steps_trained: 198000
    rl_0:
      cur_kl_coeff: 1.2924697263734052e-27
      cur_lr: 4.999999873689376e-05
      entropy: 1.285119891166687
      kl: 0.00522854458540678
      policy_loss: -0.005101319868117571
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 705 s, 94 iter, 206800 ts, 115 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-52-27
  done: false
  episode_len_mean: 101.03
  episode_reward_max: 166.03095336953754
  episode_reward_mean: 111.80079027557747
  episode_reward_min: -150.47072813322245
  episodes_this_iter: 22
  episodes_total: 2047
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 755.279
    load_time_ms: 2.825
    num_steps_sampled: 209000
    num_steps_trained: 209000
    rl_0:
      cur_kl_coeff: 4.038967894916891e-29
      cur_lr: 4.999999873689376e-05
      entropy: 1.2436350584030151
      kl: 0.005122196860611439
      policy_loss: -0.000374405033653602
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 735 s, 99 iter, 217800 ts, 89.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-52-56
  done: false
  episode_len_mean: 97.59
  episode_reward_max: 175.29799474668172
  episode_reward_mean: 87.44991649826554
  episode_reward_min: -149.6321780526883
  episodes_this_iter: 23
  episodes_total: 2160
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 740.765
    load_time_ms: 2.697
    num_steps_sampled: 220000
    num_steps_trained: 220000
    rl_0:
      cur_kl_coeff: 1.2621774671615285e-30
      cur_lr: 4.999999873689376e-05
      entropy: 1.2920277118682861
      kl: 0.003311971202492714
      policy_loss: -0.0010961368680000305
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 765 s, 104 iter, 228800 ts, 109 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-53-27
  done: false
  episode_len_mean: 97.84
  episode_reward_max: 196.50060415955866
  episode_reward_mean: 108.3820174176103
  episode_reward_min: -160.33116593322453
  episodes_this_iter: 23
  episodes_total: 2272
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 757.915
    load_time_ms: 2.376
    num_steps_sampled: 231000
    num_steps_trained: 231000
    rl_0:
      cur_kl_coeff: 3.9443045848797766e-32
      cur_lr: 4.999999873689376e-05
      entropy: 1.2978965044021606
      kl: 0.004736135248094797
      policy_loss: -0.001290210522711277
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 795 s, 109 iter, 239800 ts, 112 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-53-57
  done: false
  episode_len_mean: 99.54
  episode_reward_max: 201.25246147981449
  episode_reward_mean: 119.91908739673114
  episode_reward_min: -161.33600366622062
  episodes_this_iter: 21
  episodes_total: 2383
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 775.973
    load_time_ms: 2.497
    num_steps_sampled: 242000
    num_steps_trained: 242000
    rl_0:
      cur_kl_coeff: 1.2325951827749302e-33
      cur_lr: 4.999999873689376e-05
      entropy: 1.3864858150482178
      kl: 0.002138093113899231
      policy_loss: -0.0038701549638062716
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 828 s, 114 iter, 250800 ts, 140 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-54-30
  done: false
  episode_len_mean: 99.51
  episode_reward_max: 207.6613034779724
  episode_reward_mean: 132.950257219581
  episode_reward_min: -162.5188357660861
  episodes_this_iter: 23
  episodes_total: 2494
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 791.811
    load_time_ms: 2.539
    num_steps_sampled: 253000
    num_steps_trained: 253000
    rl_0:
      cur_kl_coeff: 3.851859946171657e-35
      cur_lr: 4.999999873689376e-05
      entropy: 1.2781850099563599
      kl: 0.0031837248243391514
      policy_loss: -0.009462312795221806
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 858 s, 119 iter, 261800 ts, 131 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-55-00
  done: false
  episode_len_mean: 99.5
  episode_reward_max: 211.2676513815722
  episode_reward_mean: 131.8890007458254
  episode_reward_min: -153.7058817029452
  episodes_this_iter: 22
  episodes_total: 2604
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 779.051
    load_time_ms: 2.213
    num_steps_sampled: 264000
    num_steps_trained: 264000
    rl_0:
      cur_kl_coeff: 1.2037062331786428e-36
      cur_lr: 4.999999873689376e-05
      entropy: 1.344802737236023
      kl: 0.003838219679892063
      policy_loss: -0.005248131696134806
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 892 s, 124 iter, 272800 ts, 117 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-55-36
  done: false
  episode_len_mean: 97.1
  episode_reward_max: 210.1846355543833
  episode_reward_mean: 125.05024131706915
  episode_reward_min: -156.7601679535291
  episodes_this_iter: 21
  episodes_total: 2717
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 817.084
    load_time_ms: 2.343
    num_steps_sampled: 275000
    num_steps_trained: 275000
    rl_0:
      cur_kl_coeff: 7.523163957366517e-38
      cur_lr: 4.999999873689376e-05
      entropy: 1.2889117002487183
      kl: 0.0026530460454523563
      policy_loss: -0.002741782460361719
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 922 s, 129 iter, 283800 ts, 139 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-56-07
  done: false
  episode_len_mean: 97.65
  episode_reward_max: 219.6439429773641
  episode_reward_mean: 130.5641963416714
  episode_reward_min: -158.09936106365836
  episodes_this_iter: 22
  episodes_total: 2829
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 811.514
    load_time_ms: 2.512
    num_steps_sampled: 286000
    num_steps_trained: 286000
    rl_0:
      cur_kl_coeff: 2.3509892621639607e-39
      cur_lr: 4.999999873689376e-05
      entropy: 1.3315669298171997
      kl: 0.0023690923117101192
      policy_loss: -0.0010575783671811223
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 963 s, 134 iter, 294800 ts, 118 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-56-48
  done: false
  episode_len_mean: 95.14
  episode_reward_max: 212.23138767719786
  episode_reward_mean: 109.64198654170697
  episode_reward_min: -166.57277856069885
  episodes_this_iter: 23
  episodes_total: 2944
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 894.168
    load_time_ms: 2.609
    num_steps_sampled: 297000
    num_steps_trained: 297000
    rl_0:
      cur_kl_coeff: 7.346867718608583e-41
      cur_lr: 4.999999873689376e-05
      entropy: 1.2651565074920654
      kl: 0.006889798678457737
      policy_loss: -0.00711700227111578
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1011 s, 139 iter, 305800 ts, 118 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-57-38
  done: false
  episode_len_mean: 97.81
  episode_reward_max: 207.7967520386459
  episode_reward_mean: 120.69336981047927
  episode_reward_min: -165.97566408025665
  episodes_this_iter: 22
  episodes_total: 3057
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1120.795
    load_time_ms: 3.226
    num_steps_sampled: 308000
    num_steps_trained: 308000
    rl_0:
      cur_kl_coeff: 2.2953268845640504e-42
      cur_lr: 4.999999873689376e-05
      entropy: 1.2376960515975952
      kl: 0.0048045930452644825
      policy_loss: -0.004942729137837887
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1058 s, 144 iter, 316800 ts, 133 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-58-29
  done: false
  episode_len_mean: 98.55
  episode_reward_max: 207.94430922421756
  episode_reward_mean: 127.00192606750801
  episode_reward_min: -164.02226937970917
  episodes_this_iter: 23
  episodes_total: 3169
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1311.656
    load_time_ms: 3.478
    num_steps_sampled: 319000
    num_steps_trained: 319000
    rl_0:
      cur_kl_coeff: 7.146622168056567e-44
      cur_lr: 4.999999873689376e-05
      entropy: 1.269714593887329
      kl: 0.0050214785151183605
      policy_loss: -0.005280486773699522
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1115 s, 149 iter, 327800 ts, 129 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_15-59-27
  done: false
  episode_len_mean: 99.65
  episode_reward_max: 210.4519148490745
  episode_reward_mean: 144.3603625682406
  episode_reward_min: -164.4469266107427
  episodes_this_iter: 22
  episodes_total: 3279
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1561.439
    load_time_ms: 3.374
    num_steps_sampled: 330000
    num_steps_trained: 330000
    rl_0:
      cur_kl_coeff: 2.802596928649634e-45
      cur_lr: 4.999999873689376e-05
      entropy: 1.2936639785766602
      kl: 0.0023489699233323336
      policy_loss: -0.001282232697121799
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1163 s, 154 iter, 338800 ts, 141 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-00-12
  done: false
  episode_len_mean: 99.51
  episode_reward_max: 209.4267740934472
  episode_reward_mean: 135.9027411915414
  episode_reward_min: -139.07372512495544
  episodes_this_iter: 23
  episodes_total: 3390
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1341.615
    load_time_ms: 3.029
    num_steps_sampled: 341000
    num_steps_trained: 341000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.33184015750885
      kl: 0.0021953117102384567
      policy_loss: -0.002281771507114172
      total_loss: 213.446289

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1202 s, 159 iter, 349800 ts, 129 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-00-48
  done: false
  episode_len_mean: 97.24
  episode_reward_max: 221.72225488413036
  episode_reward_mean: 133.58994017714903
  episode_reward_min: -154.56841582599412
  episodes_this_iter: 21
  episodes_total: 3502
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 944.06
    load_time_ms: 2.814
    num_steps_sampled: 352000
    num_steps_trained: 352000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.2761958837509155
      kl: 0.0012208414264023304
      policy_loss: -0.004537135828286409
      total_loss: 215.9759

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1232 s, 164 iter, 360800 ts, 146 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-01-18
  done: false
  episode_len_mean: 96.96
  episode_reward_max: 220.99627139156175
  episode_reward_mean: 133.097143995007
  episode_reward_min: -166.43711882833736
  episodes_this_iter: 24
  episodes_total: 3616
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 846.124
    load_time_ms: 2.56
    num_steps_sampled: 363000
    num_steps_trained: 363000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.348610758781433
      kl: 0.005988576449453831
      policy_loss: -0.0068203299306333065
      total_loss: 253.6280975

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1265 s, 169 iter, 371800 ts, 135 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-01-52
  done: false
  episode_len_mean: 96.44
  episode_reward_max: 227.62228286646135
  episode_reward_mean: 134.16096690411473
  episode_reward_min: -168.60545036844218
  episodes_this_iter: 22
  episodes_total: 3729
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 825.896
    load_time_ms: 2.269
    num_steps_sampled: 374000
    num_steps_trained: 374000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.1072168350219727
      kl: 0.007354432251304388
      policy_loss: -0.003852916182950139
      total_loss: 252.1246

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1303 s, 174 iter, 382800 ts, 131 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-02-32
  done: false
  episode_len_mean: 94.67
  episode_reward_max: 241.58410756747188
  episode_reward_mean: 127.49338044307751
  episode_reward_min: -169.84269680195897
  episodes_this_iter: 23
  episodes_total: 3844
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 911.447
    load_time_ms: 2.859
    num_steps_sampled: 385000
    num_steps_trained: 385000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.1699331998825073
      kl: 0.01102614775300026
      policy_loss: -0.011073424480855465
      total_loss: 406.83612

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1339 s, 179 iter, 393800 ts, 96.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-03-06
  done: false
  episode_len_mean: 92.87
  episode_reward_max: 243.98924253690728
  episode_reward_mean: 109.89882069743864
  episode_reward_min: -162.77544957774248
  episodes_this_iter: 23
  episodes_total: 3965
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 891.325
    load_time_ms: 3.053
    num_steps_sampled: 396000
    num_steps_trained: 396000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.1574740409851074
      kl: 0.0109963808208704
      policy_loss: -0.006750437431037426
      total_loss: 376.05706

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1373 s, 184 iter, 404800 ts, 116 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-03-39
  done: false
  episode_len_mean: 91.73
  episode_reward_max: 241.0317016475445
  episode_reward_mean: 112.51137015865332
  episode_reward_min: -161.99126618904197
  episodes_this_iter: 23
  episodes_total: 4084
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 806.73
    load_time_ms: 2.853
    num_steps_sampled: 407000
    num_steps_trained: 407000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.1315335035324097
      kl: 0.004592530895024538
      policy_loss: -0.0004633581847883761
      total_loss: 344.41229

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1403 s, 189 iter, 415800 ts, 114 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-04-10
  done: false
  episode_len_mean: 96.77
  episode_reward_max: 228.63638868841863
  episode_reward_mean: 132.9572132927694
  episode_reward_min: -157.7792714805761
  episodes_this_iter: 22
  episodes_total: 4199
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 794.732
    load_time_ms: 2.65
    num_steps_sampled: 418000
    num_steps_trained: 418000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0956873893737793
      kl: 0.0033663157373666763
      policy_loss: -0.003078538691624999
      total_loss: 182.003707

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1436 s, 194 iter, 426800 ts, 125 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-04-45
  done: false
  episode_len_mean: 93.98
  episode_reward_max: 225.61770540418388
  episode_reward_mean: 119.45427405512294
  episode_reward_min: -158.45297829865203
  episodes_this_iter: 25
  episodes_total: 4317
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 816.942
    load_time_ms: 2.476
    num_steps_sampled: 429000
    num_steps_trained: 429000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0769513845443726
      kl: 0.010908316820859909
      policy_loss: -0.007147990632802248
      total_loss: 555.7936

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1475 s, 199 iter, 437800 ts, 127 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-05-23
  done: false
  episode_len_mean: 95.1
  episode_reward_max: 232.48050606005233
  episode_reward_mean: 122.34757533057174
  episode_reward_min: -157.1581162571676
  episodes_this_iter: 25
  episodes_total: 4433
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 850.915
    load_time_ms: 2.794
    num_steps_sampled: 440000
    num_steps_trained: 440000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.1095833778381348
      kl: 0.005511419847607613
      policy_loss: -0.007606375031173229
      total_loss: 508.182678

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1509 s, 204 iter, 448800 ts, 133 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-05-59
  done: false
  episode_len_mean: 97.15
  episode_reward_max: 238.25098715908064
  episode_reward_mean: 139.98206969014385
  episode_reward_min: -167.00011666723907
  episodes_this_iter: 23
  episodes_total: 4547
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 861.663
    load_time_ms: 3.178
    num_steps_sampled: 451000
    num_steps_trained: 451000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.2467050552368164
      kl: 0.004487333819270134
      policy_loss: -0.002787298057228327
      total_loss: 351.8761

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1544 s, 209 iter, 459800 ts, 143 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-06-32
  done: false
  episode_len_mean: 95.87
  episode_reward_max: 237.30580011975164
  episode_reward_mean: 142.58935413275867
  episode_reward_min: -161.29704716027328
  episodes_this_iter: 23
  episodes_total: 4661
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 832.595
    load_time_ms: 2.996
    num_steps_sampled: 462000
    num_steps_trained: 462000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0582051277160645
      kl: 0.011929720640182495
      policy_loss: -0.00443922309204936
      total_loss: 339.85482

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1576 s, 214 iter, 470800 ts, 136 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-07-07
  done: false
  episode_len_mean: 94.53
  episode_reward_max: 240.3556101764815
  episode_reward_mean: 131.69272279374377
  episode_reward_min: -160.7887186962298
  episodes_this_iter: 24
  episodes_total: 4777
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 814.197
    load_time_ms: 2.552
    num_steps_sampled: 473000
    num_steps_trained: 473000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.1140434741973877
      kl: 0.009725179523229599
      policy_loss: -0.012288333848118782
      total_loss: 288.647460

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1617 s, 219 iter, 481800 ts, 131 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-07-48
  done: false
  episode_len_mean: 96.56
  episode_reward_max: 238.8936840764414
  episode_reward_mean: 140.20934354237082
  episode_reward_min: -155.07037064152078
  episodes_this_iter: 23
  episodes_total: 4891
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 989.933
    load_time_ms: 3.296
    num_steps_sampled: 484000
    num_steps_trained: 484000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0203344821929932
      kl: 0.0032200529240071774
      policy_loss: -0.00994054228067398
      total_loss: 254.16630

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1682 s, 224 iter, 492800 ts, 163 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-09-00
  done: false
  episode_len_mean: 99.29
  episode_reward_max: 240.096585187164
  episode_reward_mean: 170.78956437857664
  episode_reward_min: -149.6073012379351
  episodes_this_iter: 22
  episodes_total: 5002
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1405.085
    load_time_ms: 4.246
    num_steps_sampled: 495000
    num_steps_trained: 495000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.989587664604187
      kl: 0.006926714442670345
      policy_loss: -0.005555721931159496
      total_loss: 162.0602264

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1732 s, 229 iter, 503800 ts, 152 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-09-41
  done: false
  episode_len_mean: 96.36
  episode_reward_max: 239.9165804027806
  episode_reward_mean: 151.96962701103521
  episode_reward_min: -158.6449424838581
  episodes_this_iter: 23
  episodes_total: 5115
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1320.865
    load_time_ms: 3.603
    num_steps_sampled: 506000
    num_steps_trained: 506000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0126181840896606
      kl: 0.004654018208384514
      policy_loss: -0.004724584519863129
      total_loss: 239.96560

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1768 s, 234 iter, 514800 ts, 145 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-10-18
  done: false
  episode_len_mean: 95.56
  episode_reward_max: 241.96730236038806
  episode_reward_mean: 145.46201411244084
  episode_reward_min: -160.60369761109695
  episodes_this_iter: 22
  episodes_total: 5230
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 937.19
    load_time_ms: 2.996
    num_steps_sampled: 517000
    num_steps_trained: 517000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8708034157752991
      kl: 0.0028611302841454744
      policy_loss: -0.0014976206002756953
      total_loss: 247.928

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1805 s, 239 iter, 525800 ts, 155 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-10-55
  done: false
  episode_len_mean: 94.87
  episode_reward_max: 242.19443925693798
  episode_reward_mean: 145.39811284366635
  episode_reward_min: -166.43000662314245
  episodes_this_iter: 24
  episodes_total: 5346
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 912.221
    load_time_ms: 3.168
    num_steps_sampled: 528000
    num_steps_trained: 528000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8650066256523132
      kl: 0.005077164154499769
      policy_loss: -0.0023566510062664747
      total_loss: 266.142

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1836 s, 244 iter, 536800 ts, 114 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-11-26
  done: false
  episode_len_mean: 88.26
  episode_reward_max: 242.91792255786706
  episode_reward_mean: 101.80121691836003
  episode_reward_min: -168.26902011365763
  episodes_this_iter: 25
  episodes_total: 5468
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 871.433
    load_time_ms: 2.992
    num_steps_sampled: 539000
    num_steps_trained: 539000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6978750228881836
      kl: 0.009727905504405499
      policy_loss: -0.007102080155164003
      total_loss: 437.4754

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1871 s, 249 iter, 547800 ts, 155 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-12-02
  done: false
  episode_len_mean: 97.09
  episode_reward_max: 240.32440388864978
  episode_reward_mean: 162.43495610557375
  episode_reward_min: -158.2452360537232
  episodes_this_iter: 23
  episodes_total: 5581
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 849.749
    load_time_ms: 2.806
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7346407175064087
      kl: 0.005985027179121971
      policy_loss: -0.0023802071809768677
      total_loss: 442.0264

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1906 s, 254 iter, 558800 ts, 166 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-12-40
  done: false
  episode_len_mean: 95.51
  episode_reward_max: 242.27019239833066
  episode_reward_mean: 153.77326255799255
  episode_reward_min: -162.59114545019173
  episodes_this_iter: 24
  episodes_total: 5696
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 925.809
    load_time_ms: 2.769
    num_steps_sampled: 561000
    num_steps_trained: 561000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8736523985862732
      kl: 0.009384742937982082
      policy_loss: -0.009632669389247894
      total_loss: 627.2400

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 1952 s, 259 iter, 569800 ts, 141 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-13-26
  done: false
  episode_len_mean: 93.13
  episode_reward_max: 242.9359332764787
  episode_reward_mean: 141.33594086020014
  episode_reward_min: -165.0921293940879
  episodes_this_iter: 24
  episodes_total: 5815
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 986.35
    load_time_ms: 2.776
    num_steps_sampled: 572000
    num_steps_trained: 572000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8172053098678589
      kl: 0.006821326911449432
      policy_loss: -0.00455797603353858
      total_loss: 209.29394531

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2004 s, 264 iter, 580800 ts, 140 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-14-20
  done: false
  episode_len_mean: 92.25
  episode_reward_max: 240.40790184668037
  episode_reward_mean: 126.77948686736903
  episode_reward_min: -161.30249704038164
  episodes_this_iter: 24
  episodes_total: 5932
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1220.139
    load_time_ms: 3.184
    num_steps_sampled: 583000
    num_steps_trained: 583000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9271187782287598
      kl: 0.008971292525529861
      policy_loss: -0.009464721195399761
      total_loss: 352.618

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2054 s, 269 iter, 591800 ts, 132 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-15-06
  done: false
  episode_len_mean: 93.7
  episode_reward_max: 240.73216508234822
  episode_reward_mean: 137.7026385656263
  episode_reward_min: -163.50023133931617
  episodes_this_iter: 24
  episodes_total: 6049
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1320.109
    load_time_ms: 3.41
    num_steps_sampled: 594000
    num_steps_trained: 594000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.883965015411377
      kl: 0.004609987139701843
      policy_loss: -0.0007709663477726281
      total_loss: 240.734497

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2089 s, 274 iter, 602800 ts, 165 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-15-42
  done: false
  episode_len_mean: 97.59
  episode_reward_max: 242.32340813255155
  episode_reward_mean: 168.6753523939946
  episode_reward_min: -165.3178224103353
  episodes_this_iter: 23
  episodes_total: 6162
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1037.945
    load_time_ms: 3.066
    num_steps_sampled: 605000
    num_steps_trained: 605000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0060453414916992
      kl: 0.0043619112111628056
      policy_loss: -0.000280582724371925
      total_loss: 111.6141

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2127 s, 279 iter, 613800 ts, 152 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-16-21
  done: false
  episode_len_mean: 96.12
  episode_reward_max: 241.04115589475168
  episode_reward_mean: 160.5424514373566
  episode_reward_min: -166.9033773441019
  episodes_this_iter: 24
  episodes_total: 6278
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 892.834
    load_time_ms: 2.959
    num_steps_sampled: 616000
    num_steps_trained: 616000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8623655438423157
      kl: 0.005027846898883581
      policy_loss: -0.009568349458277225
      total_loss: 188.164047

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2167 s, 284 iter, 624800 ts, 162 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-16-59
  done: false
  episode_len_mean: 96.02
  episode_reward_max: 240.7784945780714
  episode_reward_mean: 155.84864193327297
  episode_reward_min: -144.00750629555708
  episodes_this_iter: 23
  episodes_total: 6392
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 929.176
    load_time_ms: 3.249
    num_steps_sampled: 627000
    num_steps_trained: 627000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.911747395992279
      kl: 0.010778870433568954
      policy_loss: -0.005213033873587847
      total_loss: 168.923065

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2203 s, 289 iter, 635800 ts, 140 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-17-36
  done: false
  episode_len_mean: 94.38
  episode_reward_max: 241.56952446750591
  episode_reward_mean: 147.43675680350105
  episode_reward_min: -160.1255205498794
  episodes_this_iter: 24
  episodes_total: 6509
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 913.012
    load_time_ms: 3.12
    num_steps_sampled: 638000
    num_steps_trained: 638000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 1.0154719352722168
      kl: 0.0041975597850978374
      policy_loss: -0.0069076488725841045
      total_loss: 134.9812

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2249 s, 294 iter, 646800 ts, 167 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-18-24
  done: false
  episode_len_mean: 97.42
  episode_reward_max: 240.2476557391337
  episode_reward_mean: 168.65528857621484
  episode_reward_min: -162.84056122736183
  episodes_this_iter: 22
  episodes_total: 6621
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 935.523
    load_time_ms: 2.689
    num_steps_sampled: 649000
    num_steps_trained: 649000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8801180124282837
      kl: 0.003779039718210697
      policy_loss: 0.001234059571288526
      total_loss: 108.466697

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2303 s, 299 iter, 657800 ts, 184 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-19-26
  done: false
  episode_len_mean: 99.45
  episode_reward_max: 240.46119583839584
  episode_reward_mean: 187.22468723397685
  episode_reward_min: -146.23380305212953
  episodes_this_iter: 21
  episodes_total: 6732
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1324.776
    load_time_ms: 3.246
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5772624015808105
      kl: 0.013520817272365093
      policy_loss: -0.00824824534356594
      total_loss: 107.5913

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2367 s, 304 iter, 668800 ts, 149 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-20-41
  done: false
  episode_len_mean: 93.86
  episode_reward_max: 241.0688495800505
  episode_reward_mean: 143.59680336950817
  episode_reward_min: -159.38073035624393
  episodes_this_iter: 24
  episodes_total: 6851
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 2013.164
    load_time_ms: 4.838
    num_steps_sampled: 671000
    num_steps_trained: 671000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7654991149902344
      kl: 0.005762824323028326
      policy_loss: -0.00902276299893856
      total_loss: 273.07858

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2459 s, 309 iter, 679800 ts, 155 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-21-53
  done: false
  episode_len_mean: 95.77
  episode_reward_max: 240.71794314336165
  episode_reward_mean: 162.58800414084598
  episode_reward_min: -163.83137908324233
  episodes_this_iter: 22
  episodes_total: 6966
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1972.027
    load_time_ms: 4.665
    num_steps_sampled: 682000
    num_steps_trained: 682000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.580166220664978
      kl: 0.0038735498674213886
      policy_loss: 0.00043294273200444877
      total_loss: 193.24

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2499 s, 314 iter, 690800 ts, 159 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-22-36
  done: false
  episode_len_mean: 96.0
  episode_reward_max: 239.73674611981642
  episode_reward_mean: 160.03995306693224
  episode_reward_min: -152.6246685307146
  episodes_this_iter: 23
  episodes_total: 7081
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1331.974
    load_time_ms: 3.213
    num_steps_sampled: 693000
    num_steps_trained: 693000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.9019175171852112
      kl: 0.01235540397465229
      policy_loss: -0.005907953716814518
      total_loss: 273.882324

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2539 s, 319 iter, 701800 ts, 153 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-23-17
  done: false
  episode_len_mean: 93.51
  episode_reward_max: 241.17722532774127
  episode_reward_mean: 143.7188468792846
  episode_reward_min: -154.8064936518001
  episodes_this_iter: 23
  episodes_total: 7197
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1022.259
    load_time_ms: 2.931
    num_steps_sampled: 704000
    num_steps_trained: 704000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5012513399124146
      kl: 0.013862086459994316
      policy_loss: -0.00021300881053321064
      total_loss: 223.129

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2582 s, 324 iter, 712800 ts, 147 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-23-58
  done: false
  episode_len_mean: 93.67
  episode_reward_max: 240.9923372411699
  episode_reward_mean: 146.81290616058698
  episode_reward_min: -164.84876534932383
  episodes_this_iter: 25
  episodes_total: 7316
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 938.197
    load_time_ms: 3.179
    num_steps_sampled: 715000
    num_steps_trained: 715000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.568491518497467
      kl: 0.004542650189250708
      policy_loss: 0.0007492058211937547
      total_loss: 192.331069

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2619 s, 329 iter, 723800 ts, 148 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-24-35
  done: false
  episode_len_mean: 95.8
  episode_reward_max: 240.9713639610447
  episode_reward_mean: 157.19545723790034
  episode_reward_min: -156.4631541951889
  episodes_this_iter: 23
  episodes_total: 7430
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 853.389
    load_time_ms: 2.998
    num_steps_sampled: 726000
    num_steps_trained: 726000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.46073412895202637
      kl: 0.009043190628290176
      policy_loss: -0.004172187764197588
      total_loss: 154.853225

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2664 s, 334 iter, 734800 ts, 180 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-25-28
  done: false
  episode_len_mean: 97.74
  episode_reward_max: 241.1160347752291
  episode_reward_mean: 171.98641084188247
  episode_reward_min: -164.35995566609583
  episodes_this_iter: 22
  episodes_total: 7542
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1151.106
    load_time_ms: 3.395
    num_steps_sampled: 737000
    num_steps_trained: 737000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8123093843460083
      kl: 0.005905658472329378
      policy_loss: -0.001435263198800385
      total_loss: 437.0161

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2725 s, 339 iter, 745800 ts, 144 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-26-22
  done: false
  episode_len_mean: 93.03
  episode_reward_max: 241.7068698337267
  episode_reward_mean: 141.67812262525865
  episode_reward_min: -164.68494560966036
  episodes_this_iter: 23
  episodes_total: 7659
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1414.44
    load_time_ms: 3.689
    num_steps_sampled: 748000
    num_steps_trained: 748000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.36198890209198
      kl: 0.00799498800188303
      policy_loss: 0.00034627580316737294
      total_loss: 103.3044281

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2767 s, 344 iter, 756800 ts, 153 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-27-04
  done: false
  episode_len_mean: 94.44
  episode_reward_max: 241.98101122425308
  episode_reward_mean: 152.87951443393638
  episode_reward_min: -166.33196117948322
  episodes_this_iter: 24
  episodes_total: 7776
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1128.294
    load_time_ms: 3.099
    num_steps_sampled: 759000
    num_steps_trained: 759000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5643333196640015
      kl: 0.006624795030802488
      policy_loss: -0.004190121311694384
      total_loss: 344.219

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2806 s, 349 iter, 767800 ts, 156 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-27-44
  done: false
  episode_len_mean: 96.27
  episode_reward_max: 241.83780821041452
  episode_reward_mean: 161.0041403641717
  episode_reward_min: -151.48555810982862
  episodes_this_iter: 23
  episodes_total: 7890
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 945.806
    load_time_ms: 2.899
    num_steps_sampled: 770000
    num_steps_trained: 770000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6000915169715881
      kl: 0.008015675470232964
      policy_loss: -0.001696350984275341
      total_loss: 251.88027

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2843 s, 354 iter, 778800 ts, 173 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-28-21
  done: false
  episode_len_mean: 98.73
  episode_reward_max: 241.7167340920024
  episode_reward_mean: 183.91781713020686
  episode_reward_min: -160.93255915554957
  episodes_this_iter: 22
  episodes_total: 8002
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 892.126
    load_time_ms: 2.739
    num_steps_sampled: 781000
    num_steps_trained: 781000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6337037086486816
      kl: 0.02203279361128807
      policy_loss: 0.01112437155097723
      total_loss: 118.82781982

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2878 s, 359 iter, 789800 ts, 161 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-28-56
  done: false
  episode_len_mean: 94.57
  episode_reward_max: 241.94087295249108
  episode_reward_mean: 152.45088691445181
  episode_reward_min: -157.59641870142383
  episodes_this_iter: 23
  episodes_total: 8118
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 869.957
    load_time_ms: 2.746
    num_steps_sampled: 792000
    num_steps_trained: 792000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.653792679309845
      kl: 0.00552340829744935
      policy_loss: -0.0032445918768644333
      total_loss: 461.81329

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2916 s, 364 iter, 800800 ts, 154 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-29-35
  done: false
  episode_len_mean: 92.46
  episode_reward_max: 240.49967907361255
  episode_reward_mean: 142.20211749075258
  episode_reward_min: -166.00544206606466
  episodes_this_iter: 22
  episodes_total: 8235
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 939.778
    load_time_ms: 2.991
    num_steps_sampled: 803000
    num_steps_trained: 803000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7715054154396057
      kl: 0.00704301567748189
      policy_loss: -0.005325383972376585
      total_loss: 457.77120

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2948 s, 369 iter, 811800 ts, 161 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-30-08
  done: false
  episode_len_mean: 96.34
  episode_reward_max: 241.03463946435463
  episode_reward_mean: 161.8811057193254
  episode_reward_min: -161.95031944653113
  episodes_this_iter: 23
  episodes_total: 8350
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 851.044
    load_time_ms: 2.861
    num_steps_sampled: 814000
    num_steps_trained: 814000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5621099472045898
      kl: 0.005078576970845461
      policy_loss: -0.001006695325486362
      total_loss: 265.71609

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 2983 s, 374 iter, 822800 ts, 153 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-30-41
  done: false
  episode_len_mean: 94.3
  episode_reward_max: 241.2454066366317
  episode_reward_mean: 155.03603398349512
  episode_reward_min: -154.91464728419368
  episodes_this_iter: 22
  episodes_total: 8465
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 763.388
    load_time_ms: 2.558
    num_steps_sampled: 825000
    num_steps_trained: 825000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.45730090141296387
      kl: 0.005331714637577534
      policy_loss: 0.0006628803675994277
      total_loss: 97.392433

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3018 s, 379 iter, 833800 ts, 131 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-31-16
  done: false
  episode_len_mean: 93.77
  episode_reward_max: 239.90086354523925
  episode_reward_mean: 145.01348910002895
  episode_reward_min: -164.2198158648497
  episodes_this_iter: 23
  episodes_total: 8584
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 829.649
    load_time_ms: 2.754
    num_steps_sampled: 836000
    num_steps_trained: 836000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6828339099884033
      kl: 0.012333767488598824
      policy_loss: -0.004979840479791164
      total_loss: 150.46333

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3050 s, 384 iter, 844800 ts, 151 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-31-51
  done: false
  episode_len_mean: 94.8
  episode_reward_max: 240.97782908225253
  episode_reward_mean: 152.25516404319836
  episode_reward_min: -164.06719252464598
  episodes_this_iter: 22
  episodes_total: 8700
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 863.671
    load_time_ms: 3.066
    num_steps_sampled: 847000
    num_steps_trained: 847000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.39777541160583496
      kl: 0.020826326683163643
      policy_loss: -0.004733850713819265
      total_loss: 110.1316

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3087 s, 389 iter, 855800 ts, 155 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-32-26
  done: false
  episode_len_mean: 93.41
  episode_reward_max: 238.94589505878812
  episode_reward_mean: 144.3893497447586
  episode_reward_min: -156.67892486142387
  episodes_this_iter: 24
  episodes_total: 8817
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 851.265
    load_time_ms: 2.795
    num_steps_sampled: 858000
    num_steps_trained: 858000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.631178081035614
      kl: 0.015225443989038467
      policy_loss: -0.0007051141583360732
      total_loss: 323.08288

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3123 s, 394 iter, 866800 ts, 125 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-33-05
  done: false
  episode_len_mean: 91.74
  episode_reward_max: 240.19705239231513
  episode_reward_mean: 132.06457794625007
  episode_reward_min: -167.3688269562991
  episodes_this_iter: 23
  episodes_total: 8936
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 891.973
    load_time_ms: 2.773
    num_steps_sampled: 869000
    num_steps_trained: 869000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4236995577812195
      kl: 0.0174117274582386
      policy_loss: 0.0061396388337016106
      total_loss: 95.37989044

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3167 s, 399 iter, 877800 ts, 130 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-33-48
  done: false
  episode_len_mean: 89.39
  episode_reward_max: 241.48477796531748
  episode_reward_mean: 111.8696304760306
  episode_reward_min: -165.56951864422348
  episodes_this_iter: 26
  episodes_total: 9058
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1133.965
    load_time_ms: 3.455
    num_steps_sampled: 880000
    num_steps_trained: 880000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.46357831358909607
      kl: 0.024036461487412453
      policy_loss: -0.0002579549909569323
      total_loss: 579.73

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3208 s, 404 iter, 888800 ts, 126 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-34-37
  done: false
  episode_len_mean: 92.33
  episode_reward_max: 236.18984714643446
  episode_reward_mean: 136.66983521867908
  episode_reward_min: -160.07673134706494
  episodes_this_iter: 23
  episodes_total: 9178
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1290.668
    load_time_ms: 4.109
    num_steps_sampled: 891000
    num_steps_trained: 891000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5055211186408997
      kl: 0.006657027639448643
      policy_loss: -6.234821285033831e-06
      total_loss: 326.49

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3260 s, 409 iter, 899800 ts, 125 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-35-22
  done: false
  episode_len_mean: 91.49
  episode_reward_max: 239.00696989224892
  episode_reward_mean: 125.3952536291121
  episode_reward_min: -155.9125395968146
  episodes_this_iter: 23
  episodes_total: 9297
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1185.497
    load_time_ms: 3.568
    num_steps_sampled: 902000
    num_steps_trained: 902000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.81154865026474
      kl: 0.009831276722252369
      policy_loss: 0.0026027506683021784
      total_loss: 108.4790725

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18379], 3329 s, 414 iter, 910800 ts, 144 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-11_16-36-33
  done: false
  episode_len_mean: 93.86
  episode_reward_max: 241.04244136447002
  episode_reward_mean: 143.94228401746375
  episode_reward_min: -158.56069366239578
  episodes_this_iter: 23
  episodes_total: 9415
  experiment_id: 4ce6132bd9ab404c9dd2064472fe6431
  hostname: Gandalf
  info:
    grad_time_ms: 1592.982
    load_time_ms: 3.77
    num_steps_sampled: 913000
    num_steps_trained: 913000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.673224151134491
      kl: 0.00439930334687233
      policy_loss: -0.003641101298853755
      total_loss: 539.217834