# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentTeamSpiritIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=39.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-02-23_01-09-09_12703/logs.
Waiting for redis server at 127.0.0.1:21994 to respond...
Waiting for redis server at 127.0.0.1:49613 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=e101b9a8d3f9ba6e6f712de31e0b9c203bc003a504a165d1



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-02-23_01-09-09_12703/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-02-23_01-09-09_12703/sockets/raylet'],
 'redis_address': '192.168.2.102:21994',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=e101b9a8d3f9ba6e6f712de31e0b9c203bc003a504a165d1'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return agent_id

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn)
        }
    })

 Starting SUMO on port 48217


1.6038076979708429
11.582291891152966


In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.2/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0_2019-02-23_01-09-11i4dg0a94 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-10-07
  done: false
  episode_len_mean: 400.1666666666667
  episode_reward_max: 145.38326111257012
  episode_reward_mean: -8.759566947312722
  episode_reward_min: -181.94378557141593
  episodes_this_iter: 24
  episodes_total: 24
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 4278.545
    load_time_ms: 143.977
    num

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-11-41
  done: false
  episode_len_mean: 398.7
  episode_reward_max: 207.50015774012948
  episode_reward_mean: 25.34166855334402
  episode_reward_min: -176.02688496905031
  episodes_this_iter: 26
  episodes_total: 124
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3472.252
    load_time_ms: 30.896
    num_steps_sampled: 50000
    num_steps_trained: 50000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4442048072814941
      kl: 0.005586014594882727
      policy_loss: -0.00443550618365407
      total_loss: 151.82093811035156
      vf_explained_var: 0.2782227396965027
      vf_loss: 151.82530212402344
    rl_1:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.414603352546692
      kl: 0.0022687746677547693
      policy_loss: -0.002418189542368

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 214 s, 9 iter, 90000 ts, 48.7 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-13-39
  done: false
  episode_len_mean: 333.85
  episode_reward_max: 208.21025410316182
  episode_reward_mean: 38.5822240009209
  episode_reward_min: -179.29151748662593
  episodes_this_iter: 33
  episodes_total: 269
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3369.236
    load_time_ms: 16.579
    num_steps_sampled: 100000
    num_steps_trained: 100000
    rl_0:
      cur_kl_coeff: 0.0003906250058207661
      cur_lr: 4.999999873689376e-05
      entropy: 1.439549207687378
      kl: 0.004999891854822636
      policy_loss: -0.00337318773

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 333 s, 14 iter, 140000 ts, 7.81 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-15-38
  done: false
  episode_len_mean: 305.45
  episode_reward_max: 203.06118103919619
  episode_reward_mean: 25.038074221456988
  episode_reward_min: -183.48026746832272
  episodes_this_iter: 28
  episodes_total: 431
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3271.985
    load_time_ms: 2.419
    num_steps_sampled: 150000
    num_steps_trained: 150000
    rl_0:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.4099640846252441
      kl: 0.0023611080832779408
      policy_loss: -0.001374

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 450 s, 19 iter, 190000 ts, 61 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-17-35
  done: false
  episode_len_mean: 254.06
  episode_reward_max: 211.2370153478997
  episode_reward_mean: 49.256519552869385
  episode_reward_min: -177.89501972044874
  episodes_this_iter: 42
  episodes_total: 608
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3263.241
    load_time_ms: 2.497
    num_steps_sampled: 200000
    num_steps_trained: 200000
    rl_0:
      cur_kl_coeff: 3.814697322468419e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.4443562030792236
      kl: 0.003174273297190666
      policy_loss: -0.0019141527

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 569 s, 24 iter, 240000 ts, 105 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-19-34
  done: false
  episode_len_mean: 218.07
  episode_reward_max: 217.5742707864381
  episode_reward_mean: 122.62047781001533
  episode_reward_min: -151.10609299527448
  episodes_this_iter: 47
  episodes_total: 821
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3245.769
    load_time_ms: 2.489
    num_steps_sampled: 250000
    num_steps_trained: 250000
    rl_0:
      cur_kl_coeff: 1.1920929132713809e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.4461814165115356
      kl: 0.0037622659001499414
      policy_loss: -0.0012750

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 688 s, 29 iter, 290000 ts, 93.2 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-21-33
  done: false
  episode_len_mean: 213.85
  episode_reward_max: 211.55395653818383
  episode_reward_mean: 97.6093677358442
  episode_reward_min: -180.81216930273956
  episodes_this_iter: 45
  episodes_total: 1067
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3237.93
    load_time_ms: 2.431
    num_steps_sampled: 300000
    num_steps_trained: 300000
    rl_0:
      cur_kl_coeff: 3.7252903539730653e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.4159560203552246
      kl: 0.0033431334886699915
      policy_loss: -0.0013365

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 806 s, 34 iter, 340000 ts, 110 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-23-32
  done: false
  episode_len_mean: 195.09
  episode_reward_max: 194.55015353671996
  episode_reward_mean: 109.94031918802045
  episode_reward_min: -184.68931254095526
  episodes_this_iter: 51
  episodes_total: 1332
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3237.829
    load_time_ms: 2.446
    num_steps_sampled: 350000
    num_steps_trained: 350000
    rl_0:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3986058235168457
      kl: 0.00550841772928834
      policy_loss: -0.0022649

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 925 s, 39 iter, 390000 ts, 120 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-25-31
  done: false
  episode_len_mean: 159.98
  episode_reward_max: 179.8337410508864
  episode_reward_mean: 117.55896690797056
  episode_reward_min: -174.54621595194922
  episodes_this_iter: 63
  episodes_total: 1632
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3245.286
    load_time_ms: 2.518
    num_steps_sampled: 400000
    num_steps_trained: 400000
    rl_0:
      cur_kl_coeff: 7.275957722603643e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.389387607574463
      kl: 0.0025914099533110857
      policy_loss: -0.00175589

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1045 s, 44 iter, 440000 ts, 115 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-27-32
  done: false
  episode_len_mean: 155.42
  episode_reward_max: 173.55049460435433
  episode_reward_mean: 123.19799347863733
  episode_reward_min: -187.41309548272062
  episodes_this_iter: 66
  episodes_total: 1963
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3252.765
    load_time_ms: 2.461
    num_steps_sampled: 450000
    num_steps_trained: 450000
    rl_0:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.3674184083938599
      kl: 0.009635932743549347
      policy_loss: -0.00388

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1165 s, 49 iter, 490000 ts, 116 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-29-32
  done: false
  episode_len_mean: 134.32
  episode_reward_max: 181.1152484889443
  episode_reward_mean: 122.14974922748746
  episode_reward_min: -156.5322551073875
  episodes_this_iter: 73
  episodes_total: 2333
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3263.117
    load_time_ms: 2.639
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 7.10542746348012e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.3622477054595947
      kl: 0.010939909145236015
      policy_loss: -0.004622458

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1284 s, 54 iter, 540000 ts, 133 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-31-31
  done: false
  episode_len_mean: 147.85
  episode_reward_max: 191.44310828612262
  episode_reward_mean: 134.43034807142067
  episode_reward_min: -144.0544710148374
  episodes_this_iter: 65
  episodes_total: 2698
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3201.341
    load_time_ms: 2.689
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.3112443685531616
      kl: 0.0014718365855515003
      policy_loss: -0.0010469

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1403 s, 59 iter, 590000 ts, 128 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-33-30
  done: false
  episode_len_mean: 131.68
  episode_reward_max: 189.4759598350896
  episode_reward_mean: 143.47007093884974
  episode_reward_min: -136.75809857942883
  episodes_this_iter: 78
  episodes_total: 3073
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3107.212
    load_time_ms: 2.521
    num_steps_sampled: 600000
    num_steps_trained: 600000
    rl_0:
      cur_kl_coeff: 2.775557602921922e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.2876793146133423
      kl: 0.0028764568269252777
      policy_loss: -0.001647

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1523 s, 64 iter, 640000 ts, 139 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-35-30
  done: false
  episode_len_mean: 118.19
  episode_reward_max: 191.822472369337
  episode_reward_mean: 143.20073902492052
  episode_reward_min: -164.10395937539107
  episodes_this_iter: 84
  episodes_total: 3462
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3082.539
    load_time_ms: 2.429
    num_steps_sampled: 650000
    num_steps_trained: 650000
    rl_0:
      cur_kl_coeff: 1.7347235018262012e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.252562165260315
      kl: 0.0034226595889776945
      policy_loss: -0.0025734

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1641 s, 69 iter, 690000 ts, 142 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-37-28
  done: false
  episode_len_mean: 123.84
  episode_reward_max: 208.73417199828862
  episode_reward_mean: 127.89556796136645
  episode_reward_min: -158.9596100878773
  episodes_this_iter: 82
  episodes_total: 3854
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3085.589
    load_time_ms: 2.406
    num_steps_sampled: 700000
    num_steps_trained: 700000
    rl_0:
      cur_kl_coeff: 5.421010943206879e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.2577980756759644
      kl: 0.006087045185267925
      policy_loss: -0.0039249

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1759 s, 74 iter, 740000 ts, 153 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-39-27
  done: false
  episode_len_mean: 119.92
  episode_reward_max: 217.52069531815314
  episode_reward_mean: 134.46815324786587
  episode_reward_min: -163.61940762245172
  episodes_this_iter: 81
  episodes_total: 4244
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3108.061
    load_time_ms: 2.292
    num_steps_sampled: 750000
    num_steps_trained: 750000
    rl_0:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.2373552322387695
      kl: 0.0034458530135452747
      policy_loss: -0.0015

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1877 s, 79 iter, 790000 ts, 138 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-41-24
  done: false
  episode_len_mean: 124.21
  episode_reward_max: 224.32505130914427
  episode_reward_mean: 129.07016972406242
  episode_reward_min: -159.56576211225627
  episodes_this_iter: 86
  episodes_total: 4629
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3127.547
    load_time_ms: 2.217
    num_steps_sampled: 800000
    num_steps_trained: 800000
    rl_0:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.179699182510376
      kl: 0.0060830083675682545
      policy_loss: -0.00309

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 1992 s, 84 iter, 840000 ts, 149 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-43-20
  done: false
  episode_len_mean: 138.55
  episode_reward_max: 225.68668800325057
  episode_reward_mean: 130.31766738361995
  episode_reward_min: -152.52569258659145
  episodes_this_iter: 63
  episodes_total: 5012
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3100.275
    load_time_ms: 2.214
    num_steps_sampled: 850000
    num_steps_trained: 850000
    rl_0:
      cur_kl_coeff: 3.3087224995159173e-25
      cur_lr: 4.999999873689376e-05
      entropy: 1.337809443473816
      kl: 0.0036949904169887304
      policy_loss: -0.00352

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2109 s, 89 iter, 890000 ts, 149 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-45-16
  done: false
  episode_len_mean: 113.05
  episode_reward_max: 222.43331099681157
  episode_reward_mean: 127.42525754557718
  episode_reward_min: -157.0576347849017
  episodes_this_iter: 87
  episodes_total: 5449
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3097.287
    load_time_ms: 2.252
    num_steps_sampled: 900000
    num_steps_trained: 900000
    rl_0:
      cur_kl_coeff: 1.0339757810987241e-26
      cur_lr: 4.999999873689376e-05
      entropy: 1.2228657007217407
      kl: 0.004121205769479275
      policy_loss: -0.002474

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2224 s, 94 iter, 940000 ts, 145 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-47-12
  done: false
  episode_len_mean: 123.75
  episode_reward_max: 223.2915462625158
  episode_reward_mean: 120.89213223980916
  episode_reward_min: -176.18981390587572
  episodes_this_iter: 81
  episodes_total: 5884
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3112.025
    load_time_ms: 2.368
    num_steps_sampled: 950000
    num_steps_trained: 950000
    rl_0:
      cur_kl_coeff: 3.231174315933513e-28
      cur_lr: 4.999999873689376e-05
      entropy: 1.2397916316986084
      kl: 0.003809463232755661
      policy_loss: -0.0025931

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2340 s, 99 iter, 990000 ts, 158 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-49-08
  done: false
  episode_len_mean: 125.31
  episode_reward_max: 225.16599318271759
  episode_reward_mean: 131.23995421274606
  episode_reward_min: -177.70848331484996
  episodes_this_iter: 76
  episodes_total: 6305
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3097.098
    load_time_ms: 2.512
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
    rl_0:
      cur_kl_coeff: 1.0097419737292228e-29
      cur_lr: 4.999999873689376e-05
      entropy: 1.2767598628997803
      kl: 0.0037948954850435257
      policy_loss: -0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2456 s, 104 iter, 1040000 ts, 145 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-51-05
  done: false
  episode_len_mean: 121.36
  episode_reward_max: 236.89928188003597
  episode_reward_mean: 155.42839941394195
  episode_reward_min: -158.15846639205154
  episodes_this_iter: 81
  episodes_total: 6750
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3063.788
    load_time_ms: 2.599
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
    rl_0:
      cur_kl_coeff: 3.1554436679038213e-31
      cur_lr: 4.999999873689376e-05
      entropy: 1.181519627571106
      kl: 0.004522817209362984
      policy_loss: -0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2572 s, 109 iter, 1090000 ts, 146 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-53-00
  done: false
  episode_len_mean: 113.74
  episode_reward_max: 232.444994820026
  episode_reward_mean: 131.03933011309766
  episode_reward_min: -177.07182487101448
  episodes_this_iter: 90
  episodes_total: 7179
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3078.668
    load_time_ms: 2.465
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
    rl_0:
      cur_kl_coeff: 9.860761462199441e-33
      cur_lr: 4.999999873689376e-05
      entropy: 1.1314996480941772
      kl: 0.007831055670976639
      policy_loss: -0.0041

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2688 s, 114 iter, 1140000 ts, 152 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-54-57
  done: false
  episode_len_mean: 113.63
  episode_reward_max: 223.3397722816799
  episode_reward_mean: 155.44709304844105
  episode_reward_min: -161.4787808711474
  episodes_this_iter: 90
  episodes_total: 7633
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.919
    load_time_ms: 2.335
    num_steps_sampled: 1150000
    num_steps_trained: 1150000
    rl_0:
      cur_kl_coeff: 3.0814879569373254e-34
      cur_lr: 4.999999873689376e-05
      entropy: 1.0968682765960693
      kl: 0.005371363367885351
      policy_loss: -0.000

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2805 s, 119 iter, 1190000 ts, 139 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-56-54
  done: false
  episode_len_mean: 108.59
  episode_reward_max: 225.93123945728286
  episode_reward_mean: 142.0891383587442
  episode_reward_min: -167.0611426875007
  episodes_this_iter: 91
  episodes_total: 8093
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3064.089
    load_time_ms: 2.351
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
    rl_0:
      cur_kl_coeff: 1.9259299730858284e-35
      cur_lr: 4.999999873689376e-05
      entropy: 1.0169060230255127
      kl: 0.003673017490655184
      policy_loss: -0.001

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 2921 s, 124 iter, 1240000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_01-58-51
  done: false
  episode_len_mean: 105.95
  episode_reward_max: 236.85043159439456
  episode_reward_mean: 156.54841857532077
  episode_reward_min: -176.90786633352593
  episodes_this_iter: 95
  episodes_total: 8557
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.393
    load_time_ms: 2.431
    num_steps_sampled: 1250000
    num_steps_trained: 1250000
    rl_0:
      cur_kl_coeff: 6.018531165893214e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.9865960478782654
      kl: 0.0055294097401201725
      policy_loss: -0.0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3038 s, 129 iter, 1290000 ts, 136 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-00-48
  done: false
  episode_len_mean: 110.61
  episode_reward_max: 228.5704776179612
  episode_reward_mean: 157.72890353371656
  episode_reward_min: -166.32556663539856
  episodes_this_iter: 89
  episodes_total: 9003
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3077.816
    load_time_ms: 2.491
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
    rl_0:
      cur_kl_coeff: 1.8807909893416293e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.996436595916748
      kl: 0.0074156830087304115
      policy_loss: -0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3154 s, 134 iter, 1340000 ts, 140 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-02-44
  done: false
  episode_len_mean: 100.25
  episode_reward_max: 229.6346492756676
  episode_reward_mean: 152.32006027069215
  episode_reward_min: -177.90363424014404
  episodes_this_iter: 100
  episodes_total: 9468
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3076.038
    load_time_ms: 2.408
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
    rl_0:
      cur_kl_coeff: 1.1754946310819804e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.8555963039398193
      kl: 0.005210178438574076
      policy_loss: -0.0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3271 s, 139 iter, 1390000 ts, 158 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-04-41
  done: false
  episode_len_mean: 121.14
  episode_reward_max: 227.43457469010016
  episode_reward_mean: 159.00365848592804
  episode_reward_min: -181.13053975411174
  episodes_this_iter: 83
  episodes_total: 9917
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3080.544
    load_time_ms: 2.39
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
    rl_0:
      cur_kl_coeff: 3.6733637943810755e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.9126669764518738
      kl: 0.006881444249302149
      policy_loss: -0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3387 s, 144 iter, 1440000 ts, 172 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-06-37
  done: false
  episode_len_mean: 117.82
  episode_reward_max: 219.4969100466332
  episode_reward_mean: 161.98792179131934
  episode_reward_min: -160.27454807622584
  episodes_this_iter: 86
  episodes_total: 10372
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3099.975
    load_time_ms: 2.439
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
    rl_0:
      cur_kl_coeff: 4.5920550675924255e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.8977375626564026
      kl: 0.004366109613329172
      policy_loss: -0.0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3502 s, 149 iter, 1490000 ts, 166 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-08-33
  done: false
  episode_len_mean: 111.52
  episode_reward_max: 227.1992927148417
  episode_reward_mean: 175.44398714310336
  episode_reward_min: 7.791556151704921
  episodes_this_iter: 88
  episodes_total: 10819
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3099.808
    load_time_ms: 2.627
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
    rl_0:
      cur_kl_coeff: 1.4293244336113134e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.8554191589355469
      kl: 0.0041921259835362434
      policy_loss: -5.21

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3617 s, 154 iter, 1540000 ts, 156 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-10-29
  done: false
  episode_len_mean: 107.58
  episode_reward_max: 223.19953640415997
  episode_reward_mean: 141.90114377105957
  episode_reward_min: -183.23030372769406
  episodes_this_iter: 88
  episodes_total: 11243
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3077.523
    load_time_ms: 2.76
    num_steps_sampled: 1550000
    num_steps_trained: 1550000
    rl_0:
      cur_kl_coeff: 4.203895392974451e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.8091483116149902
      kl: 0.004182177130132914
      policy_loss: -0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3733 s, 159 iter, 1590000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-12-25
  done: false
  episode_len_mean: 104.78
  episode_reward_max: 227.08115143455672
  episode_reward_mean: 153.2063760083013
  episode_reward_min: -176.28161168202112
  episodes_this_iter: 94
  episodes_total: 11704
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.66
    load_time_ms: 2.487
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7821781635284424
      kl: 0.005941102746874094
      policy_loss: -0.0008529264014214277
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3850 s, 164 iter, 1640000 ts, 153 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-14-22
  done: false
  episode_len_mean: 118.66
  episode_reward_max: 224.30346473821547
  episode_reward_mean: 154.09536992288628
  episode_reward_min: -157.57326757416533
  episodes_this_iter: 83
  episodes_total: 12149
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3067.584
    load_time_ms: 2.331
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.8808738589286804
      kl: 0.005687179043889046
      policy_loss: -0.0020379829220473766

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 3966 s, 169 iter, 1690000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-16-18
  done: false
  episode_len_mean: 113.82
  episode_reward_max: 227.64759219287436
  episode_reward_mean: 162.35436439090128
  episode_reward_min: -153.12728336640322
  episodes_this_iter: 90
  episodes_total: 12590
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3074.069
    load_time_ms: 2.338
    num_steps_sampled: 1700000
    num_steps_trained: 1700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7351281642913818
      kl: 0.006349725183099508
      policy_loss: -0.001037177979014814


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4083 s, 174 iter, 1740000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-18-15
  done: false
  episode_len_mean: 105.66
  episode_reward_max: 228.60950939626844
  episode_reward_mean: 164.7929736221681
  episode_reward_min: -178.17169519025185
  episodes_this_iter: 94
  episodes_total: 13033
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3089.081
    load_time_ms: 2.271
    num_steps_sampled: 1750000
    num_steps_trained: 1750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7096218466758728
      kl: 0.007241190876811743
      policy_loss: -0.003063447307795286
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4199 s, 179 iter, 1790000 ts, 165 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-20-11
  done: false
  episode_len_mean: 107.45
  episode_reward_max: 228.36994537485714
  episode_reward_mean: 168.96800330738535
  episode_reward_min: -171.5136075063936
  episodes_this_iter: 93
  episodes_total: 13482
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3083.367
    load_time_ms: 2.414
    num_steps_sampled: 1800000
    num_steps_trained: 1800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6881727576255798
      kl: 0.005061628762632608
      policy_loss: -0.0023392944131046534


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4315 s, 184 iter, 1840000 ts, 158 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-22-08
  done: false
  episode_len_mean: 110.84
  episode_reward_max: 228.99776751621368
  episode_reward_mean: 173.58995367505136
  episode_reward_min: -139.59837191468736
  episodes_this_iter: 90
  episodes_total: 13922
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3076.91
    load_time_ms: 2.746
    num_steps_sampled: 1850000
    num_steps_trained: 1850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6355474591255188
      kl: 0.004769606050103903
      policy_loss: -0.0004532778402790427


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4430 s, 189 iter, 1890000 ts, 164 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-24-02
  done: false
  episode_len_mean: 124.4
  episode_reward_max: 229.09984871691694
  episode_reward_mean: 168.01887294015637
  episode_reward_min: -140.14410010206265
  episodes_this_iter: 82
  episodes_total: 14324
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3080.014
    load_time_ms: 2.588
    num_steps_sampled: 1900000
    num_steps_trained: 1900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6849588751792908
      kl: 0.0034413153771311045
      policy_loss: -0.0003960382891818881

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4544 s, 194 iter, 1940000 ts, 165 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-25-57
  done: false
  episode_len_mean: 115.46
  episode_reward_max: 234.98740932821593
  episode_reward_mean: 165.38747746781533
  episode_reward_min: -157.6376053692116
  episodes_this_iter: 88
  episodes_total: 14739
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.713
    load_time_ms: 2.341
    num_steps_sampled: 1950000
    num_steps_trained: 1950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.693749725818634
      kl: 0.00607816968113184
      policy_loss: -0.0012331173056736588
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4661 s, 199 iter, 1990000 ts, 169 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-27-54
  done: false
  episode_len_mean: 124.11
  episode_reward_max: 231.61661827129223
  episode_reward_mean: 157.3630852020635
  episode_reward_min: -160.2300530123373
  episodes_this_iter: 82
  episodes_total: 15164
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.988
    load_time_ms: 2.359
    num_steps_sampled: 2000000
    num_steps_trained: 2000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.7174768447875977
      kl: 0.009942000731825829
      policy_loss: -0.003713516751304269
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4777 s, 204 iter, 2040000 ts, 151 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-29-50
  done: false
  episode_len_mean: 115.03
  episode_reward_max: 235.9155893954381
  episode_reward_mean: 147.37533629394994
  episode_reward_min: -166.32596975121868
  episodes_this_iter: 86
  episodes_total: 15608
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3069.129
    load_time_ms: 2.38
    num_steps_sampled: 2050000
    num_steps_trained: 2050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.612434983253479
      kl: 0.008001779206097126
      policy_loss: -0.0027051554061472416
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 4891 s, 209 iter, 2090000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-31-45
  done: false
  episode_len_mean: 102.99
  episode_reward_max: 228.77344269513773
  episode_reward_mean: 161.12545014244742
  episode_reward_min: -177.00657951579103
  episodes_this_iter: 97
  episodes_total: 16042
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3052.768
    load_time_ms: 2.378
    num_steps_sampled: 2100000
    num_steps_trained: 2100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.48471757769584656
      kl: 0.008586045354604721
      policy_loss: -0.002014152240008115

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5008 s, 214 iter, 2140000 ts, 169 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-33-42
  done: false
  episode_len_mean: 121.34
  episode_reward_max: 236.38078526972276
  episode_reward_mean: 154.73098345226916
  episode_reward_min: -174.70288157032581
  episodes_this_iter: 81
  episodes_total: 16467
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.965
    load_time_ms: 2.343
    num_steps_sampled: 2150000
    num_steps_trained: 2150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.6257174015045166
      kl: 0.007375848013907671
      policy_loss: -0.0023161827120929956

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5123 s, 219 iter, 2190000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-35-37
  done: false
  episode_len_mean: 118.54
  episode_reward_max: 229.71576709623355
  episode_reward_mean: 148.78569893088127
  episode_reward_min: -169.4957335604069
  episodes_this_iter: 82
  episodes_total: 16888
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3088.016
    load_time_ms: 2.39
    num_steps_sampled: 2200000
    num_steps_trained: 2200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4903582036495209
      kl: 0.006501093972474337
      policy_loss: -0.0019121429650112987
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5238 s, 224 iter, 2240000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-37-32
  done: false
  episode_len_mean: 111.77
  episode_reward_max: 229.43323592330242
  episode_reward_mean: 173.42845174600458
  episode_reward_min: -140.64790303611676
  episodes_this_iter: 92
  episodes_total: 17318
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3073.956
    load_time_ms: 2.55
    num_steps_sampled: 2250000
    num_steps_trained: 2250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4424573481082916
      kl: 0.005191334057599306
      policy_loss: 0.0003187592956237495
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5354 s, 229 iter, 2290000 ts, 138 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-39-28
  done: false
  episode_len_mean: 113.04
  episode_reward_max: 233.08694316047988
  episode_reward_mean: 166.51454674835267
  episode_reward_min: -171.8235336922509
  episodes_this_iter: 85
  episodes_total: 17739
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3065.769
    load_time_ms: 2.48
    num_steps_sampled: 2300000
    num_steps_trained: 2300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.5202969312667847
      kl: 0.008920727297663689
      policy_loss: -0.001041543553583324
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5469 s, 234 iter, 2340000 ts, 152 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-41-24
  done: false
  episode_len_mean: 109.48
  episode_reward_max: 226.22500665649312
  episode_reward_mean: 152.83938941774983
  episode_reward_min: -157.19888296272603
  episodes_this_iter: 92
  episodes_total: 18168
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3086.76
    load_time_ms: 2.386
    num_steps_sampled: 2350000
    num_steps_trained: 2350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3590467572212219
      kl: 0.008252626284956932
      policy_loss: -0.0014883601106703281


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5585 s, 239 iter, 2390000 ts, 151 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-43-20
  done: false
  episode_len_mean: 108.22
  episode_reward_max: 229.6616162631921
  episode_reward_mean: 155.5620385984992
  episode_reward_min: -171.3495702804011
  episodes_this_iter: 93
  episodes_total: 18629
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.955
    load_time_ms: 2.664
    num_steps_sampled: 2400000
    num_steps_trained: 2400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.36375415325164795
      kl: 0.011417745612561703
      policy_loss: -0.001511441427282989
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5700 s, 244 iter, 2440000 ts, 152 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-45-15
  done: false
  episode_len_mean: 107.01
  episode_reward_max: 234.1337557052357
  episode_reward_mean: 162.85347042003133
  episode_reward_min: -162.58280965401315
  episodes_this_iter: 94
  episodes_total: 19051
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3056.59
    load_time_ms: 2.547
    num_steps_sampled: 2450000
    num_steps_trained: 2450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2749499976634979
      kl: 0.011253505013883114
      policy_loss: 0.0024569241795688868
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5816 s, 249 iter, 2490000 ts, 154 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-47-11
  done: false
  episode_len_mean: 108.88
  episode_reward_max: 232.62208010201587
  episode_reward_mean: 149.42837481397046
  episode_reward_min: -154.2662974257722
  episodes_this_iter: 93
  episodes_total: 19487
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.847
    load_time_ms: 2.366
    num_steps_sampled: 2500000
    num_steps_trained: 2500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.302489697933197
      kl: 0.006307567935436964
      policy_loss: 0.0005947670433670282
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 5932 s, 254 iter, 2540000 ts, 150 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-49-07
  done: false
  episode_len_mean: 107.47
  episode_reward_max: 231.9458673127874
  episode_reward_mean: 165.92976083606584
  episode_reward_min: -174.79251239560475
  episodes_this_iter: 94
  episodes_total: 19925
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3081.314
    load_time_ms: 2.436
    num_steps_sampled: 2550000
    num_steps_trained: 2550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.23599612712860107
      kl: 0.013768617995083332
      policy_loss: -0.000376858573872596


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6047 s, 259 iter, 2590000 ts, 164 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-51-02
  done: false
  episode_len_mean: 103.28
  episode_reward_max: 226.1086526841128
  episode_reward_mean: 162.52084492224182
  episode_reward_min: -174.19980701099743
  episodes_this_iter: 96
  episodes_total: 20362
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3083.733
    load_time_ms: 2.371
    num_steps_sampled: 2600000
    num_steps_trained: 2600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.20435741543769836
      kl: 0.010281910188496113
      policy_loss: -0.003557579591870308


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6163 s, 264 iter, 2640000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-52-59
  done: false
  episode_len_mean: 110.79
  episode_reward_max: 230.73812458500186
  episode_reward_mean: 174.78063977159
  episode_reward_min: -156.49392860839052
  episodes_this_iter: 89
  episodes_total: 20803
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3078.866
    load_time_ms: 2.416
    num_steps_sampled: 2650000
    num_steps_trained: 2650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2017238736152649
      kl: 0.012346266768872738
      policy_loss: -0.00203256425447762
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6278 s, 269 iter, 2690000 ts, 159 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-54-54
  done: false
  episode_len_mean: 119.43
  episode_reward_max: 224.15195040036974
  episode_reward_mean: 157.1805645915207
  episode_reward_min: -150.91526537520758
  episodes_this_iter: 82
  episodes_total: 21243
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.386
    load_time_ms: 2.405
    num_steps_sampled: 2700000
    num_steps_trained: 2700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3586026132106781
      kl: 0.008330504409968853
      policy_loss: -0.00040508806705474854

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6395 s, 274 iter, 2740000 ts, 164 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-56-51
  done: false
  episode_len_mean: 125.64
  episode_reward_max: 227.10876550801927
  episode_reward_mean: 153.3560107422728
  episode_reward_min: -159.71162734204108
  episodes_this_iter: 84
  episodes_total: 21679
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3080.532
    load_time_ms: 2.514
    num_steps_sampled: 2750000
    num_steps_trained: 2750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2696608901023865
      kl: 0.013555040583014488
      policy_loss: 0.004768196493387222
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6511 s, 279 iter, 2790000 ts, 163 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_02-58-48
  done: false
  episode_len_mean: 110.49
  episode_reward_max: 225.9263386255663
  episode_reward_mean: 170.50776807496638
  episode_reward_min: -148.78940676696035
  episodes_this_iter: 91
  episodes_total: 22123
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3082.459
    load_time_ms: 2.518
    num_steps_sampled: 2800000
    num_steps_trained: 2800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.11398468166589737
      kl: 0.011386080645024776
      policy_loss: -0.0002335839235456660

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6626 s, 284 iter, 2840000 ts, 170 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-00-42
  done: false
  episode_len_mean: 111.1
  episode_reward_max: 229.44754949215172
  episode_reward_mean: 172.0477096712288
  episode_reward_min: -180.1455398953716
  episodes_this_iter: 90
  episodes_total: 22559
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3077.85
    load_time_ms: 2.405
    num_steps_sampled: 2850000
    num_steps_trained: 2850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.03398991376161575
      kl: 0.013903767801821232
      policy_loss: 0.0002156073896912858
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6739 s, 289 iter, 2890000 ts, 173 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-02-36
  done: false
  episode_len_mean: 114.54
  episode_reward_max: 235.95862052874838
  episode_reward_mean: 153.25602031066515
  episode_reward_min: -146.38606588354236
  episodes_this_iter: 88
  episodes_total: 23003
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3058.164
    load_time_ms: 2.468
    num_steps_sampled: 2900000
    num_steps_trained: 2900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1659010797739029
      kl: 0.019958706572651863
      policy_loss: -0.000927191402297467


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6855 s, 294 iter, 2940000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-04-32
  done: false
  episode_len_mean: 114.77
  episode_reward_max: 233.73918128023735
  episode_reward_mean: 174.11907997767932
  episode_reward_min: -129.29137027130292
  episodes_this_iter: 86
  episodes_total: 23424
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3083.627
    load_time_ms: 2.507
    num_steps_sampled: 2950000
    num_steps_trained: 2950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.12192881107330322
      kl: 0.00905864778906107
      policy_loss: 0.00047898359480313957

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 6970 s, 299 iter, 2990000 ts, 169 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-06-28
  done: false
  episode_len_mean: 103.5
  episode_reward_max: 229.2233664600054
  episode_reward_mean: 147.43828781608784
  episode_reward_min: -169.3172626505655
  episodes_this_iter: 98
  episodes_total: 23879
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3083.826
    load_time_ms: 2.479
    num_steps_sampled: 3000000
    num_steps_trained: 3000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.12684021890163422
      kl: 0.03426942601799965
      policy_loss: 0.00797396618872881
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7086 s, 304 iter, 3040000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-08-24
  done: false
  episode_len_mean: 123.85
  episode_reward_max: 231.5076020800158
  episode_reward_mean: 146.98788343122408
  episode_reward_min: -170.90960658520106
  episodes_this_iter: 79
  episodes_total: 24321
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.0
    load_time_ms: 2.36
    num_steps_sampled: 3050000
    num_steps_trained: 3050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.31754377484321594
      kl: 0.024864496663212776
      policy_loss: 0.0030907681211829185
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7201 s, 309 iter, 3090000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-10-19
  done: false
  episode_len_mean: 106.88
  episode_reward_max: 231.05007383850244
  episode_reward_mean: 154.403032802713
  episode_reward_min: -160.5225018111231
  episodes_this_iter: 93
  episodes_total: 24771
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3086.804
    load_time_ms: 2.466
    num_steps_sampled: 3100000
    num_steps_trained: 3100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.02276615984737873
      kl: 0.00949100311845541
      policy_loss: 0.0006539073656313121
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7316 s, 314 iter, 3140000 ts, 159 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-12-14
  done: false
  episode_len_mean: 115.81
  episode_reward_max: 229.73851288710551
  episode_reward_mean: 152.1340922571445
  episode_reward_min: -164.861141860234
  episodes_this_iter: 88
  episodes_total: 25206
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3083.215
    load_time_ms: 2.476
    num_steps_sampled: 3150000
    num_steps_trained: 3150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.046557072550058365
      kl: 0.016954582184553146
      policy_loss: 0.003231598297134042
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7432 s, 319 iter, 3190000 ts, 153 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-14-10
  done: false
  episode_len_mean: 118.38
  episode_reward_max: 229.94406244998655
  episode_reward_mean: 159.65347088648195
  episode_reward_min: -137.13375631744182
  episodes_this_iter: 83
  episodes_total: 25648
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3073.526
    load_time_ms: 2.388
    num_steps_sampled: 3200000
    num_steps_trained: 3200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1145700067281723
      kl: 0.009413211606442928
      policy_loss: 0.0006006956100463867


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7547 s, 324 iter, 3240000 ts, 146 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-16-06
  done: false
  episode_len_mean: 110.62
  episode_reward_max: 234.05130259052484
  episode_reward_mean: 151.77843474071398
  episode_reward_min: -168.0323871323223
  episodes_this_iter: 91
  episodes_total: 26085
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3057.055
    load_time_ms: 2.463
    num_steps_sampled: 3250000
    num_steps_trained: 3250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.00014308496611192822
      kl: 0.012229465879499912
      policy_loss: -0.002913097850978

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7662 s, 329 iter, 3290000 ts, 158 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-18-01
  done: false
  episode_len_mean: 127.44
  episode_reward_max: 225.41876854187004
  episode_reward_mean: 167.61278720636955
  episode_reward_min: -163.41627314131864
  episodes_this_iter: 78
  episodes_total: 26510
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3066.101
    load_time_ms: 2.412
    num_steps_sampled: 3300000
    num_steps_trained: 3300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.29116931557655334
      kl: 0.024764621630311012
      policy_loss: 0.003887081751599908


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7777 s, 334 iter, 3340000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-19-56
  done: false
  episode_len_mean: 116.76
  episode_reward_max: 226.51590122159908
  episode_reward_mean: 159.7803158649139
  episode_reward_min: -158.53005437234805
  episodes_this_iter: 83
  episodes_total: 26948
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.465
    load_time_ms: 2.4
    num_steps_sampled: 3350000
    num_steps_trained: 3350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.13162362575531006
      kl: 0.04606804624199867
      policy_loss: 0.006964384112507105
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 7892 s, 339 iter, 3390000 ts, 150 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-21-51
  done: false
  episode_len_mean: 107.25
  episode_reward_max: 224.40708975152913
  episode_reward_mean: 162.35173790827363
  episode_reward_min: -165.35825702685347
  episodes_this_iter: 90
  episodes_total: 27418
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3058.441
    load_time_ms: 2.524
    num_steps_sampled: 3400000
    num_steps_trained: 3400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.004967924207448959
      kl: 0.012197884730994701
      policy_loss: 0.000715395319275558

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8006 s, 344 iter, 3440000 ts, 154 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-23-46
  done: false
  episode_len_mean: 113.54
  episode_reward_max: 226.78111448181082
  episode_reward_mean: 152.13071142910354
  episode_reward_min: -167.48421174513896
  episodes_this_iter: 88
  episodes_total: 27867
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3083.014
    load_time_ms: 2.454
    num_steps_sampled: 3450000
    num_steps_trained: 3450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.028514891862869263
      kl: 0.011781105771660805
      policy_loss: -0.00113492831587791

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8123 s, 349 iter, 3490000 ts, 162 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-25-42
  done: false
  episode_len_mean: 102.71
  episode_reward_max: 226.0637469935358
  episode_reward_mean: 170.0180491805494
  episode_reward_min: -169.22684213078767
  episodes_this_iter: 97
  episodes_total: 28337
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3094.125
    load_time_ms: 2.375
    num_steps_sampled: 3500000
    num_steps_trained: 3500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1993454247713089
      kl: 0.022578727453947067
      policy_loss: 0.007571580354124308
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8238 s, 354 iter, 3540000 ts, 157 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-27-38
  done: false
  episode_len_mean: 106.58
  episode_reward_max: 224.0554662352818
  episode_reward_mean: 174.4300194849589
  episode_reward_min: -131.89567032619817
  episodes_this_iter: 92
  episodes_total: 28799
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3095.529
    load_time_ms: 2.439
    num_steps_sampled: 3550000
    num_steps_trained: 3550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.18101242184638977
      kl: 0.018540898337960243
      policy_loss: 0.0010983593529090285


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8354 s, 359 iter, 3590000 ts, 150 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-29-34
  done: false
  episode_len_mean: 104.9
  episode_reward_max: 227.11033468800466
  episode_reward_mean: 155.10770844666126
  episode_reward_min: -177.97285017944444
  episodes_this_iter: 97
  episodes_total: 29276
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3077.135
    load_time_ms: 2.338
    num_steps_sampled: 3600000
    num_steps_trained: 3600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2062491625547409
      kl: 0.03380803391337395
      policy_loss: -0.0038078518118709326


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8470 s, 364 iter, 3640000 ts, 153 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-31-30
  done: false
  episode_len_mean: 103.57
  episode_reward_max: 227.33186820841934
  episode_reward_mean: 151.79603974573465
  episode_reward_min: -156.73327748048106
  episodes_this_iter: 96
  episodes_total: 29749
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3066.008
    load_time_ms: 2.287
    num_steps_sampled: 3650000
    num_steps_trained: 3650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.06692029535770416
      kl: 0.018660198897123337
      policy_loss: 0.001531563117168843

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8585 s, 369 iter, 3690000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-33-25
  done: false
  episode_len_mean: 103.48
  episode_reward_max: 229.04724377161696
  episode_reward_mean: 163.89616392962154
  episode_reward_min: -174.56728873235357
  episodes_this_iter: 97
  episodes_total: 30225
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3092.247
    load_time_ms: 2.38
    num_steps_sampled: 3700000
    num_steps_trained: 3700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.19993628561496735
      kl: 0.020395932719111443
      policy_loss: 0.0034737323876470327

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8701 s, 374 iter, 3740000 ts, 159 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-35-22
  done: false
  episode_len_mean: 109.01
  episode_reward_max: 227.2243258120593
  episode_reward_mean: 152.78026414454214
  episode_reward_min: -161.81073444928035
  episodes_this_iter: 91
  episodes_total: 30705
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.742
    load_time_ms: 2.391
    num_steps_sampled: 3750000
    num_steps_trained: 3750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.040287189185619354
      kl: 0.024845348671078682
      policy_loss: 0.003512629307806492

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8818 s, 379 iter, 3790000 ts, 158 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-37-20
  done: false
  episode_len_mean: 101.74
  episode_reward_max: 226.9298434792756
  episode_reward_mean: 176.4280763848468
  episode_reward_min: -146.23234810431794
  episodes_this_iter: 99
  episodes_total: 31184
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3082.745
    load_time_ms: 2.542
    num_steps_sampled: 3800000
    num_steps_trained: 3800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.15873506665229797
      kl: 0.014595357701182365
      policy_loss: 0.0029477004427462816


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 8936 s, 384 iter, 3840000 ts, 141 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-39-17
  done: false
  episode_len_mean: 107.48
  episode_reward_max: 223.88911872193026
  episode_reward_mean: 160.2015079081882
  episode_reward_min: -167.3784356643535
  episodes_this_iter: 94
  episodes_total: 31656
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3101.163
    load_time_ms: 2.562
    num_steps_sampled: 3850000
    num_steps_trained: 3850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.00926315225660801
      kl: 0.02689841389656067
      policy_loss: 0.002705354243516922
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9053 s, 389 iter, 3890000 ts, 174 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-41-14
  done: false
  episode_len_mean: 107.95
  episode_reward_max: 227.79192102502586
  episode_reward_mean: 160.2075794765736
  episode_reward_min: -167.7297304677841
  episodes_this_iter: 92
  episodes_total: 32135
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3077.428
    load_time_ms: 2.289
    num_steps_sampled: 3900000
    num_steps_trained: 3900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.021905366331338882
      kl: 0.01448651123791933
      policy_loss: 0.0019924186635762453
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9169 s, 394 iter, 3940000 ts, 162 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-43-11
  done: false
  episode_len_mean: 101.28
  episode_reward_max: 231.37688984935596
  episode_reward_mean: 158.49651864218526
  episode_reward_min: -170.8171500478234
  episodes_this_iter: 99
  episodes_total: 32612
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.621
    load_time_ms: 2.266
    num_steps_sampled: 3950000
    num_steps_trained: 3950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.10204244405031204
      kl: 0.012269273400306702
      policy_loss: -0.000366297492291778

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9285 s, 399 iter, 3990000 ts, 168 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-45-08
  done: false
  episode_len_mean: 103.21
  episode_reward_max: 231.44534259600994
  episode_reward_mean: 149.71599234764534
  episode_reward_min: -174.51071123389426
  episodes_this_iter: 96
  episodes_total: 33087
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3088.231
    load_time_ms: 2.354
    num_steps_sampled: 4000000
    num_steps_trained: 4000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.01360798068344593
      kl: 0.021491946652531624
      policy_loss: 0.003945641219615936

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9402 s, 404 iter, 4040000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-47-04
  done: false
  episode_len_mean: 100.58
  episode_reward_max: 230.18819150583948
  episode_reward_mean: 163.04312018864442
  episode_reward_min: -166.75752076971483
  episodes_this_iter: 99
  episodes_total: 33572
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.367
    load_time_ms: 2.463
    num_steps_sampled: 4050000
    num_steps_trained: 4050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1060997024178505
      kl: 0.04746558889746666
      policy_loss: 0.00839760061353445
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9517 s, 409 iter, 4090000 ts, 155 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-49-00
  done: false
  episode_len_mean: 108.66
  episode_reward_max: 228.77488205076216
  episode_reward_mean: 151.1727746049699
  episode_reward_min: -162.79674456354434
  episodes_this_iter: 93
  episodes_total: 34052
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3061.969
    load_time_ms: 2.547
    num_steps_sampled: 4100000
    num_steps_trained: 4100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.035754550248384476
      kl: 0.013288099318742752
      policy_loss: -0.00048536961548961

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9632 s, 414 iter, 4140000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-50-54
  done: false
  episode_len_mean: 107.98
  episode_reward_max: 229.6346326990391
  episode_reward_mean: 156.47911835898807
  episode_reward_min: -161.55202881965585
  episodes_this_iter: 91
  episodes_total: 34528
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.711
    load_time_ms: 2.514
    num_steps_sampled: 4150000
    num_steps_trained: 4150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.06753761321306229
      kl: 0.011326272040605545
      policy_loss: 0.0010197876254096627


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9747 s, 419 iter, 4190000 ts, 166 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-52-50
  done: false
  episode_len_mean: 102.79
  episode_reward_max: 222.6770308900435
  episode_reward_mean: 147.4679668983391
  episode_reward_min: -177.0248548126028
  episodes_this_iter: 97
  episodes_total: 35010
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3068.38
    load_time_ms: 2.475
    num_steps_sampled: 4200000
    num_steps_trained: 4200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.02643340639770031
      kl: 0.018159084022045135
      policy_loss: 0.0026386012323200703
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9862 s, 424 iter, 4240000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-54-46
  done: false
  episode_len_mean: 112.42
  episode_reward_max: 228.88836386499347
  episode_reward_mean: 160.3895686585506
  episode_reward_min: -175.3318334606679
  episodes_this_iter: 90
  episodes_total: 35481
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3053.85
    load_time_ms: 2.414
    num_steps_sampled: 4250000
    num_steps_trained: 4250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.044243618845939636
      kl: 0.019383667036890984
      policy_loss: 0.003014196874573827
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 9979 s, 429 iter, 4290000 ts, 144 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-56-42
  done: false
  episode_len_mean: 106.15
  episode_reward_max: 223.45884475221777
  episode_reward_mean: 163.82491670665235
  episode_reward_min: -140.91615520449398
  episodes_this_iter: 93
  episodes_total: 35967
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3086.523
    load_time_ms: 2.436
    num_steps_sampled: 4300000
    num_steps_trained: 4300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.09671155363321304
      kl: 0.012485865503549576
      policy_loss: -1.2632554899028037e

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10095 s, 434 iter, 4340000 ts, 178 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_03-58-38
  done: false
  episode_len_mean: 103.31
  episode_reward_max: 229.52593687071737
  episode_reward_mean: 159.6417990765418
  episode_reward_min: -178.95124258744448
  episodes_this_iter: 100
  episodes_total: 36444
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3117.939
    load_time_ms: 2.683
    num_steps_sampled: 4350000
    num_steps_trained: 4350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.19637012481689453
      kl: 0.01203113328665495
      policy_loss: 0.000669372209813445

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10209 s, 439 iter, 4390000 ts, 157 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-00-33
  done: false
  episode_len_mean: 102.46
  episode_reward_max: 223.632955202946
  episode_reward_mean: 159.56283763825542
  episode_reward_min: -152.18796210383908
  episodes_this_iter: 97
  episodes_total: 36915
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3097.032
    load_time_ms: 2.552
    num_steps_sampled: 4400000
    num_steps_trained: 4400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.11237693578004837
      kl: 0.018518120050430298
      policy_loss: 0.005468409508466721


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10324 s, 444 iter, 4440000 ts, 168 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-02-28
  done: false
  episode_len_mean: 105.47
  episode_reward_max: 226.74270912095872
  episode_reward_mean: 129.68452184233556
  episode_reward_min: -177.9157604015956
  episodes_this_iter: 96
  episodes_total: 37390
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3065.616
    load_time_ms: 2.303
    num_steps_sampled: 4450000
    num_steps_trained: 4450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.13653095066547394
      kl: 0.011672616004943848
      policy_loss: -0.00067292811581864

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10440 s, 449 iter, 4490000 ts, 135 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-04-25
  done: false
  episode_len_mean: 105.9
  episode_reward_max: 230.57354525974034
  episode_reward_mean: 174.91246382005767
  episode_reward_min: -143.3618176300403
  episodes_this_iter: 94
  episodes_total: 37864
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3076.699
    load_time_ms: 2.376
    num_steps_sampled: 4500000
    num_steps_trained: 4500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2550087571144104
      kl: 0.020882098004221916
      policy_loss: 0.0009259572252631187


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10555 s, 454 iter, 4540000 ts, 165 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-06-20
  done: false
  episode_len_mean: 106.24
  episode_reward_max: 232.95649909189999
  episode_reward_mean: 165.45743046771378
  episode_reward_min: -144.19517752213318
  episodes_this_iter: 95
  episodes_total: 38322
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3074.396
    load_time_ms: 2.421
    num_steps_sampled: 4550000
    num_steps_trained: 4550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2328217625617981
      kl: 0.05961109325289726
      policy_loss: 0.014378736726939678


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10670 s, 459 iter, 4590000 ts, 163 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-08-16
  done: false
  episode_len_mean: 108.68
  episode_reward_max: 227.45962802226072
  episode_reward_mean: 155.24972262184468
  episode_reward_min: -168.5481160667927
  episodes_this_iter: 91
  episodes_total: 38781
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3065.908
    load_time_ms: 2.395
    num_steps_sampled: 4600000
    num_steps_trained: 4600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2143830806016922
      kl: 0.022589804604649544
      policy_loss: 0.0048968917690217495

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10787 s, 464 iter, 4640000 ts, 170 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-10-12
  done: false
  episode_len_mean: 106.06
  episode_reward_max: 227.47844628494934
  episode_reward_mean: 156.67109204269067
  episode_reward_min: -145.77183867347733
  episodes_this_iter: 93
  episodes_total: 39251
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3089.745
    load_time_ms: 2.402
    num_steps_sampled: 4650000
    num_steps_trained: 4650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.19045723974704742
      kl: 0.0331038162112236
      policy_loss: 0.005382821895182133


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 10902 s, 469 iter, 4690000 ts, 155 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-12-07
  done: false
  episode_len_mean: 109.71
  episode_reward_max: 233.86485462815355
  episode_reward_mean: 157.4531558087169
  episode_reward_min: -148.09858743245886
  episodes_this_iter: 94
  episodes_total: 39710
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3091.32
    load_time_ms: 2.393
    num_steps_sampled: 4700000
    num_steps_trained: 4700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1760568916797638
      kl: 0.04090459644794464
      policy_loss: 0.017738567665219307
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11017 s, 474 iter, 4740000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-14-03
  done: false
  episode_len_mean: 114.12
  episode_reward_max: 230.2738769275731
  episode_reward_mean: 152.4378266298252
  episode_reward_min: -170.65935641101711
  episodes_this_iter: 88
  episodes_total: 40164
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3090.388
    load_time_ms: 2.216
    num_steps_sampled: 4750000
    num_steps_trained: 4750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.029792724177241325
      kl: 0.012114601209759712
      policy_loss: 0.002382128499448299

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11132 s, 479 iter, 4790000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-15-58
  done: false
  episode_len_mean: 104.86
  episode_reward_max: 225.13340481814993
  episode_reward_mean: 142.48796698702256
  episode_reward_min: -171.31908178052606
  episodes_this_iter: 97
  episodes_total: 40629
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.233
    load_time_ms: 2.166
    num_steps_sampled: 4800000
    num_steps_trained: 4800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2961336672306061
      kl: 0.015535027720034122
      policy_loss: 0.003088978584855795

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11247 s, 484 iter, 4840000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-17-53
  done: false
  episode_len_mean: 117.63
  episode_reward_max: 234.32844451913914
  episode_reward_mean: 159.34760329623225
  episode_reward_min: -162.66061619503301
  episodes_this_iter: 89
  episodes_total: 41078
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.348
    load_time_ms: 2.28
    num_steps_sampled: 4850000
    num_steps_trained: 4850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2214435487985611
      kl: 0.018881412222981453
      policy_loss: -0.000691966968588531

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11363 s, 489 iter, 4890000 ts, 171 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-19-49
  done: false
  episode_len_mean: 110.12
  episode_reward_max: 232.2018289017942
  episode_reward_mean: 150.64489502587463
  episode_reward_min: -167.6062953510615
  episodes_this_iter: 90
  episodes_total: 41524
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3075.295
    load_time_ms: 2.322
    num_steps_sampled: 4900000
    num_steps_trained: 4900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.22935928404331207
      kl: 0.02158486284315586
      policy_loss: 0.0013757589040324092


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11479 s, 494 iter, 4940000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-21-46
  done: false
  episode_len_mean: 109.05
  episode_reward_max: 231.01153922281492
  episode_reward_mean: 149.5087470581695
  episode_reward_min: -175.84398276110454
  episodes_this_iter: 91
  episodes_total: 41962
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3064.845
    load_time_ms: 2.287
    num_steps_sampled: 4950000
    num_steps_trained: 4950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.22994934022426605
      kl: 0.12046761065721512
      policy_loss: 0.0242648683488369
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11596 s, 499 iter, 4990000 ts, 155 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-23-42
  done: false
  episode_len_mean: 109.36
  episode_reward_max: 235.802522107093
  episode_reward_mean: 164.38353001752205
  episode_reward_min: -143.716348924685
  episodes_this_iter: 91
  episodes_total: 42398
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3090.107
    load_time_ms: 2.462
    num_steps_sampled: 5000000
    num_steps_trained: 5000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.35206514596939087
      kl: 0.02855575643479824
      policy_loss: 0.003945839591324329
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11712 s, 504 iter, 5040000 ts, 166 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-25-38
  done: false
  episode_len_mean: 111.73
  episode_reward_max: 234.16058433242287
  episode_reward_mean: 158.18542690720278
  episode_reward_min: -161.3231695461467
  episodes_this_iter: 89
  episodes_total: 42832
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3107.488
    load_time_ms: 2.518
    num_steps_sampled: 5050000
    num_steps_trained: 5050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.20997215807437897
      kl: 0.018473923206329346
      policy_loss: 0.002070087241008877

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11826 s, 509 iter, 5090000 ts, 153 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-27-33
  done: false
  episode_len_mean: 108.74
  episode_reward_max: 236.5130149705748
  episode_reward_mean: 161.5526555286623
  episode_reward_min: -165.9254057116443
  episodes_this_iter: 92
  episodes_total: 43278
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3087.424
    load_time_ms: 2.48
    num_steps_sampled: 5100000
    num_steps_trained: 5100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.379798948764801
      kl: 0.018088718876242638
      policy_loss: 0.0021107459906488657
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 11941 s, 514 iter, 5140000 ts, 156 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-29-29
  done: false
  episode_len_mean: 111.76
  episode_reward_max: 230.76132666256828
  episode_reward_mean: 164.68799983977374
  episode_reward_min: -125.734686325882
  episodes_this_iter: 90
  episodes_total: 43730
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3061.958
    load_time_ms: 2.533
    num_steps_sampled: 5150000
    num_steps_trained: 5150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.31258898973464966
      kl: 0.022760625928640366
      policy_loss: 0.004036230035126209


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12057 s, 519 iter, 5190000 ts, 168 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-31-24
  done: false
  episode_len_mean: 107.38
  episode_reward_max: 233.56617580903352
  episode_reward_mean: 172.06132766002142
  episode_reward_min: -151.97776842903764
  episodes_this_iter: 93
  episodes_total: 44171
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3089.916
    load_time_ms: 2.33
    num_steps_sampled: 5200000
    num_steps_trained: 5200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.42875486612319946
      kl: 0.052696797996759415
      policy_loss: 0.010856415145099163

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12173 s, 524 iter, 5240000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-33-20
  done: false
  episode_len_mean: 105.39
  episode_reward_max: 227.00082786444236
  episode_reward_mean: 149.03688736140774
  episode_reward_min: -175.33486060255706
  episodes_this_iter: 96
  episodes_total: 44624
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3091.217
    load_time_ms: 2.369
    num_steps_sampled: 5250000
    num_steps_trained: 5250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4567863345146179
      kl: 0.015776213258504868
      policy_loss: -0.00087954057380557

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12288 s, 529 iter, 5290000 ts, 163 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-35-16
  done: false
  episode_len_mean: 110.11
  episode_reward_max: 231.46287263926556
  episode_reward_mean: 162.11038198867263
  episode_reward_min: -168.73755447806826
  episodes_this_iter: 91
  episodes_total: 45060
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3062.293
    load_time_ms: 2.345
    num_steps_sampled: 5300000
    num_steps_trained: 5300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3592437505722046
      kl: 0.028829995542764664
      policy_loss: 0.009239424020051956

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12403 s, 534 iter, 5340000 ts, 173 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-37-11
  done: false
  episode_len_mean: 112.79
  episode_reward_max: 233.56048508350767
  episode_reward_mean: 161.20988589121222
  episode_reward_min: -173.83373095221893
  episodes_this_iter: 89
  episodes_total: 45500
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.973
    load_time_ms: 2.278
    num_steps_sampled: 5350000
    num_steps_trained: 5350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3342002034187317
      kl: 0.047270648181438446
      policy_loss: 0.006747989449650049

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12518 s, 539 iter, 5390000 ts, 174 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-39-06
  done: false
  episode_len_mean: 116.92
  episode_reward_max: 230.60000382539698
  episode_reward_mean: 172.63889924149555
  episode_reward_min: -170.4515036248887
  episodes_this_iter: 84
  episodes_total: 45947
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3074.514
    load_time_ms: 2.54
    num_steps_sampled: 5400000
    num_steps_trained: 5400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.18564370274543762
      kl: 0.0186283178627491
      policy_loss: 0.003838991979137063
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12634 s, 544 iter, 5440000 ts, 169 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-41-02
  done: false
  episode_len_mean: 108.25
  episode_reward_max: 228.440474168232
  episode_reward_mean: 156.05004707354107
  episode_reward_min: -155.30921259634266
  episodes_this_iter: 92
  episodes_total: 46396
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.216
    load_time_ms: 2.658
    num_steps_sampled: 5450000
    num_steps_trained: 5450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3757028877735138
      kl: 55.62832260131836
      policy_loss: 0.08379770815372467
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12749 s, 549 iter, 5490000 ts, 109 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-42-57
  done: false
  episode_len_mean: 112.85
  episode_reward_max: 227.63083521227145
  episode_reward_mean: 112.46244851558889
  episode_reward_min: -173.31551579119574
  episodes_this_iter: 89
  episodes_total: 46822
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3073.766
    load_time_ms: 2.405
    num_steps_sampled: 5500000
    num_steps_trained: 5500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.34280985593795776
      kl: 0.008447380736470222
      policy_loss: -0.0003250545123592

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12863 s, 554 iter, 5540000 ts, 132 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-44-52
  done: false
  episode_len_mean: 120.09
  episode_reward_max: 233.8766286624379
  episode_reward_mean: 111.65495966612092
  episode_reward_min: -174.79762506383986
  episodes_this_iter: 83
  episodes_total: 47251
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3053.101
    load_time_ms: 2.168
    num_steps_sampled: 5550000
    num_steps_trained: 5550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.22328154742717743
      kl: 0.016987768933176994
      policy_loss: 0.002370402682572603

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 12978 s, 559 iter, 5590000 ts, 108 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-46-47
  done: false
  episode_len_mean: 108.9
  episode_reward_max: 232.94127340954876
  episode_reward_mean: 83.711278706137
  episode_reward_min: -172.95284647373694
  episodes_this_iter: 95
  episodes_total: 47699
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.521
    load_time_ms: 2.332
    num_steps_sampled: 5600000
    num_steps_trained: 5600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.42682257294654846
      kl: 0.039202138781547546
      policy_loss: 0.006221523508429527
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13094 s, 564 iter, 5640000 ts, 127 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-48-44
  done: false
  episode_len_mean: 108.34
  episode_reward_max: 235.3953002573247
  episode_reward_mean: 117.33117129158386
  episode_reward_min: -167.39605279529079
  episodes_this_iter: 93
  episodes_total: 48139
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.437
    load_time_ms: 2.465
    num_steps_sampled: 5650000
    num_steps_trained: 5650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5340510606765747
      kl: 0.02336244285106659
      policy_loss: 0.00384919298812747
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13211 s, 569 iter, 5690000 ts, 123 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-50-41
  done: false
  episode_len_mean: 120.83
  episode_reward_max: 224.8123159231975
  episode_reward_mean: 131.81912988259762
  episode_reward_min: -177.133352537744
  episodes_this_iter: 81
  episodes_total: 48577
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.935
    load_time_ms: 2.543
    num_steps_sampled: 5700000
    num_steps_trained: 5700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2849991023540497
      kl: 0.0181417278945446
      policy_loss: 0.0003548514796420932
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13326 s, 574 iter, 5740000 ts, 143 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-52-36
  done: false
  episode_len_mean: 115.78
  episode_reward_max: 224.68931583629816
  episode_reward_mean: 141.55977903612362
  episode_reward_min: -168.43974116603985
  episodes_this_iter: 87
  episodes_total: 48995
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.504
    load_time_ms: 2.528
    num_steps_sampled: 5750000
    num_steps_trained: 5750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.31110310554504395
      kl: 0.024517517536878586
      policy_loss: 0.00435414211824536

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13442 s, 579 iter, 5790000 ts, 132 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-54-32
  done: false
  episode_len_mean: 113.02
  episode_reward_max: 235.72415679863911
  episode_reward_mean: 120.03265547437482
  episode_reward_min: -171.40026471920623
  episodes_this_iter: 90
  episodes_total: 49426
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.194
    load_time_ms: 2.633
    num_steps_sampled: 5800000
    num_steps_trained: 5800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.42176172137260437
      kl: 0.022374499589204788
      policy_loss: 0.00365990702994167

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13558 s, 584 iter, 5840000 ts, 129 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-56-29
  done: false
  episode_len_mean: 117.16
  episode_reward_max: 235.6975536520177
  episode_reward_mean: 120.82492323383345
  episode_reward_min: -173.34814915311193
  episodes_this_iter: 86
  episodes_total: 49863
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3068.311
    load_time_ms: 2.576
    num_steps_sampled: 5850000
    num_steps_trained: 5850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3788833022117615
      kl: 0.02025512605905533
      policy_loss: 0.003451559692621231
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13674 s, 589 iter, 5890000 ts, 120 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_04-58-24
  done: false
  episode_len_mean: 117.19
  episode_reward_max: 233.2989359684513
  episode_reward_mean: 138.3216186430367
  episode_reward_min: -163.4307216622137
  episodes_this_iter: 86
  episodes_total: 50305
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3088.156
    load_time_ms: 2.318
    num_steps_sampled: 5900000
    num_steps_trained: 5900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3701843321323395
      kl: 7.037219047546387
      policy_loss: 0.05876084044575691
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13788 s, 594 iter, 5940000 ts, 145 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-00-19
  done: false
  episode_len_mean: 119.81
  episode_reward_max: 229.86928404643277
  episode_reward_mean: 127.16477898319336
  episode_reward_min: -155.0382981906651
  episodes_this_iter: 86
  episodes_total: 50736
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.855
    load_time_ms: 2.32
    num_steps_sampled: 5950000
    num_steps_trained: 5950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3455013930797577
      kl: 38.615482330322266
      policy_loss: 0.11033327132463455
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 13903 s, 599 iter, 5990000 ts, 144 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-02-14
  done: false
  episode_len_mean: 121.98
  episode_reward_max: 234.70666815138674
  episode_reward_mean: 152.12878092788551
  episode_reward_min: -159.8819615487842
  episodes_this_iter: 83
  episodes_total: 51169
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3060.642
    load_time_ms: 2.281
    num_steps_sampled: 6000000
    num_steps_trained: 6000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3624298870563507
      kl: 0.012687752023339272
      policy_loss: 0.0020263323094695807

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14019 s, 604 iter, 6040000 ts, 142 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-04-10
  done: false
  episode_len_mean: 115.18
  episode_reward_max: 227.44621978242574
  episode_reward_mean: 134.05863322632266
  episode_reward_min: -174.3825915438128
  episodes_this_iter: 86
  episodes_total: 51608
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3089.068
    load_time_ms: 2.333
    num_steps_sampled: 6050000
    num_steps_trained: 6050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3912418782711029
      kl: 0.01984558440744877
      policy_loss: 0.001806346233934164
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14135 s, 609 iter, 6090000 ts, 143 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-06-06
  done: false
  episode_len_mean: 111.05
  episode_reward_max: 237.22932098769914
  episode_reward_mean: 143.020800094762
  episode_reward_min: -147.8087689064676
  episodes_this_iter: 91
  episodes_total: 52055
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.679
    load_time_ms: 2.521
    num_steps_sampled: 6100000
    num_steps_trained: 6100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.39457860589027405
      kl: 0.03037112206220627
      policy_loss: 0.002464729594066739
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14251 s, 614 iter, 6140000 ts, 140 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-08-02
  done: false
  episode_len_mean: 114.75
  episode_reward_max: 225.29910616320146
  episode_reward_mean: 151.6843214192917
  episode_reward_min: -159.43601323518175
  episodes_this_iter: 86
  episodes_total: 52496
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3064.196
    load_time_ms: 2.592
    num_steps_sampled: 6150000
    num_steps_trained: 6150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4292255938053131
      kl: 0.025940876454114914
      policy_loss: 0.0010419663740321994

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14366 s, 619 iter, 6190000 ts, 133 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-09-58
  done: false
  episode_len_mean: 125.74
  episode_reward_max: 231.53050273046156
  episode_reward_mean: 138.58780922767278
  episode_reward_min: -164.1942564634606
  episodes_this_iter: 77
  episodes_total: 52927
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.84
    load_time_ms: 2.399
    num_steps_sampled: 6200000
    num_steps_trained: 6200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.1578211486339569
      kl: 0.017712682485580444
      policy_loss: 0.002816808642819524
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14480 s, 624 iter, 6240000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-11-52
  done: false
  episode_len_mean: 118.37
  episode_reward_max: 230.21623239090516
  episode_reward_mean: 143.38492099130184
  episode_reward_min: -166.56999379401705
  episodes_this_iter: 85
  episodes_total: 53359
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3062.146
    load_time_ms: 2.266
    num_steps_sampled: 6250000
    num_steps_trained: 6250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.28608381748199463
      kl: 0.34593167901039124
      policy_loss: 0.00790761411190033


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14596 s, 629 iter, 6290000 ts, 99 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-13-49
  done: false
  episode_len_mean: 105.28
  episode_reward_max: 233.68946881438816
  episode_reward_mean: 110.4725247424436
  episode_reward_min: -161.97349644918498
  episodes_this_iter: 95
  episodes_total: 53813
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.232
    load_time_ms: 2.339
    num_steps_sampled: 6300000
    num_steps_trained: 6300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.40182361006736755
      kl: 0.04283663630485535
      policy_loss: 0.015275247395038605
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14711 s, 634 iter, 6340000 ts, 88.7 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-15-43
  done: false
  episode_len_mean: 102.2
  episode_reward_max: 217.92503339850225
  episode_reward_mean: 96.3316164198176
  episode_reward_min: -146.30372446529924
  episodes_this_iter: 98
  episodes_total: 54276
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3075.9
    load_time_ms: 2.453
    num_steps_sampled: 6350000
    num_steps_trained: 6350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.416736364364624
      kl: 0.016228755936026573
      policy_loss: 0.0033665220253169537
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14826 s, 639 iter, 6390000 ts, 134 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-17-39
  done: false
  episode_len_mean: 109.01
  episode_reward_max: 231.857780621273
  episode_reward_mean: 116.1311698193399
  episode_reward_min: -182.83158036595148
  episodes_this_iter: 92
  episodes_total: 54736
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3064.383
    load_time_ms: 2.405
    num_steps_sampled: 6400000
    num_steps_trained: 6400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.337588369846344
      kl: 0.02083588019013405
      policy_loss: 0.0006264203693717718
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 14942 s, 644 iter, 6440000 ts, 123 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-19-35
  done: false
  episode_len_mean: 108.39
  episode_reward_max: 225.54353280943985
  episode_reward_mean: 121.2690478172204
  episode_reward_min: -152.10814498264082
  episodes_this_iter: 92
  episodes_total: 55189
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3071.114
    load_time_ms: 2.209
    num_steps_sampled: 6450000
    num_steps_trained: 6450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.33922380208969116
      kl: 0.025464827194809914
      policy_loss: 0.006460254080593586

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15059 s, 649 iter, 6490000 ts, 83.1 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-21-33
  done: false
  episode_len_mean: 107.77
  episode_reward_max: 223.91291923944283
  episode_reward_mean: 119.14793920826651
  episode_reward_min: -158.5297419371241
  episodes_this_iter: 92
  episodes_total: 55657
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3078.506
    load_time_ms: 2.228
    num_steps_sampled: 6500000
    num_steps_trained: 6500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4205421507358551
      kl: 0.027101345360279083
      policy_loss: 0.004068026784807444

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15175 s, 654 iter, 6540000 ts, 99.5 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-23-28
  done: false
  episode_len_mean: 109.55
  episode_reward_max: 220.57742836861956
  episode_reward_mean: 107.12161288009304
  episode_reward_min: -153.69680437710656
  episodes_this_iter: 92
  episodes_total: 56100
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3073.215
    load_time_ms: 2.316
    num_steps_sampled: 6550000
    num_steps_trained: 6550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3471585214138031
      kl: 0.015033717267215252
      policy_loss: -0.0007921855431050

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15291 s, 659 iter, 6590000 ts, 121 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-25-26
  done: false
  episode_len_mean: 115.44
  episode_reward_max: 213.22290929032977
  episode_reward_mean: 110.26964727191371
  episode_reward_min: -172.9834858526171
  episodes_this_iter: 85
  episodes_total: 56546
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3076.654
    load_time_ms: 2.445
    num_steps_sampled: 6600000
    num_steps_trained: 6600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3000338077545166
      kl: 0.01569190062582493
      policy_loss: 0.004567583557218313
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15408 s, 664 iter, 6640000 ts, 114 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-27-23
  done: false
  episode_len_mean: 110.72
  episode_reward_max: 214.90438124103858
  episode_reward_mean: 98.92347378152061
  episode_reward_min: -170.39383964354298
  episodes_this_iter: 89
  episodes_total: 57001
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3090.568
    load_time_ms: 2.616
    num_steps_sampled: 6650000
    num_steps_trained: 6650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.3152346611022949
      kl: 0.026232484728097916
      policy_loss: 0.01023201085627079
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15523 s, 669 iter, 6690000 ts, 85.6 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-29-18
  done: false
  episode_len_mean: 111.23
  episode_reward_max: 221.68922189988245
  episode_reward_mean: 110.39988238476391
  episode_reward_min: -147.8815255185603
  episodes_this_iter: 89
  episodes_total: 57449
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3082.217
    load_time_ms: 2.539
    num_steps_sampled: 6700000
    num_steps_trained: 6700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.38987261056900024
      kl: 0.0398196242749691
      policy_loss: 0.003705316223204136


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15639 s, 674 iter, 6740000 ts, 118 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-31-13
  done: false
  episode_len_mean: 113.96
  episode_reward_max: 214.05351573819237
  episode_reward_mean: 109.95860950457505
  episode_reward_min: -174.84316063691463
  episodes_this_iter: 88
  episodes_total: 57908
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.242
    load_time_ms: 2.34
    num_steps_sampled: 6750000
    num_steps_trained: 6750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4424079358577728
      kl: 0.02178829535841942
      policy_loss: 0.005041111260652542
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15755 s, 679 iter, 6790000 ts, 119 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-33-09
  done: false
  episode_len_mean: 108.03
  episode_reward_max: 227.07777847428443
  episode_reward_mean: 128.32020159147112
  episode_reward_min: -158.6226336998447
  episodes_this_iter: 94
  episodes_total: 58359
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3077.627
    load_time_ms: 2.276
    num_steps_sampled: 6800000
    num_steps_trained: 6800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5388298630714417
      kl: 0.034248556941747665
      policy_loss: 0.008359664119780064


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15869 s, 684 iter, 6840000 ts, 120 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-35-03
  done: false
  episode_len_mean: 121.05
  episode_reward_max: 226.44361870167123
  episode_reward_mean: 120.09159661814492
  episode_reward_min: -156.00688894281586
  episodes_this_iter: 83
  episodes_total: 58792
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3075.941
    load_time_ms: 2.347
    num_steps_sampled: 6850000
    num_steps_trained: 6850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.30427461862564087
      kl: 0.020124753937125206
      policy_loss: 0.00484105432406067

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 15984 s, 689 iter, 6890000 ts, 107 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-36-59
  done: false
  episode_len_mean: 113.67
  episode_reward_max: 229.6224490514468
  episode_reward_mean: 141.18527803071584
  episode_reward_min: -165.6845618919705
  episodes_this_iter: 87
  episodes_total: 59238
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3058.243
    load_time_ms: 2.423
    num_steps_sampled: 6900000
    num_steps_trained: 6900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.43851953744888306
      kl: 0.014848356135189533
      policy_loss: 0.0023703104816377163

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16098 s, 694 iter, 6940000 ts, 128 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-38-54
  done: false
  episode_len_mean: 110.18
  episode_reward_max: 234.91575133725152
  episode_reward_mean: 141.1207092873061
  episode_reward_min: -142.0469197346021
  episodes_this_iter: 89
  episodes_total: 59682
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3070.92
    load_time_ms: 2.348
    num_steps_sampled: 6950000
    num_steps_trained: 6950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4956282675266266
      kl: 0.030391763895750046
      policy_loss: 0.010949000716209412
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16213 s, 699 iter, 6990000 ts, 117 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-40-49
  done: false
  episode_len_mean: 107.72
  episode_reward_max: 213.8269965775181
  episode_reward_mean: 93.63251496888584
  episode_reward_min: -177.44308801642742
  episodes_this_iter: 93
  episodes_total: 60136
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3065.546
    load_time_ms: 2.349
    num_steps_sampled: 7000000
    num_steps_trained: 7000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5352062582969666
      kl: 0.022485284134745598
      policy_loss: 0.006504531018435955
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16328 s, 704 iter, 7040000 ts, 106 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-42-44
  done: false
  episode_len_mean: 109.56
  episode_reward_max: 225.49892087177747
  episode_reward_mean: 105.48541559912013
  episode_reward_min: -173.08968321555352
  episodes_this_iter: 91
  episodes_total: 60584
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3044.518
    load_time_ms: 2.31
    num_steps_sampled: 7050000
    num_steps_trained: 7050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4802808165550232
      kl: 0.025918444618582726
      policy_loss: 0.009251749143004417


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16444 s, 709 iter, 7090000 ts, 115 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-44-40
  done: false
  episode_len_mean: 110.73
  episode_reward_max: 233.35221663752404
  episode_reward_mean: 97.63122555582777
  episode_reward_min: -174.37002642277287
  episodes_this_iter: 88
  episodes_total: 61038
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3061.903
    load_time_ms: 2.252
    num_steps_sampled: 7100000
    num_steps_trained: 7100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4750915467739105
      kl: 0.03238270431756973
      policy_loss: 0.008368229493498802
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16559 s, 714 iter, 7140000 ts, 119 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-46-36
  done: false
  episode_len_mean: 109.34
  episode_reward_max: 228.54518978901243
  episode_reward_mean: 121.8232520025646
  episode_reward_min: -175.46458763996208
  episodes_this_iter: 92
  episodes_total: 61491
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3076.943
    load_time_ms: 2.288
    num_steps_sampled: 7150000
    num_steps_trained: 7150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.530061662197113
      kl: 3.5119738578796387
      policy_loss: 0.06716065853834152
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16676 s, 719 iter, 7190000 ts, 113 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-48-33
  done: false
  episode_len_mean: 104.08
  episode_reward_max: 225.3140253172462
  episode_reward_mean: 98.46238614043354
  episode_reward_min: -177.57815795315096
  episodes_this_iter: 96
  episodes_total: 61947
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3080.633
    load_time_ms: 2.342
    num_steps_sampled: 7200000
    num_steps_trained: 7200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6164475679397583
      kl: 0.029515167698264122
      policy_loss: 0.005543801002204418
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16793 s, 724 iter, 7240000 ts, 103 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-50-30
  done: false
  episode_len_mean: 105.55
  episode_reward_max: 209.59305126881597
  episode_reward_mean: 115.44976238680691
  episode_reward_min: -142.20960322842828
  episodes_this_iter: 94
  episodes_total: 62401
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.575
    load_time_ms: 2.416
    num_steps_sampled: 7250000
    num_steps_trained: 7250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5733689665794373
      kl: 0.016369963064789772
      policy_loss: 0.000969385378994047

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 16908 s, 729 iter, 7290000 ts, 113 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-52-24
  done: false
  episode_len_mean: 112.07
  episode_reward_max: 225.2660865132682
  episode_reward_mean: 114.20522062038907
  episode_reward_min: -176.59903819779592
  episodes_this_iter: 88
  episodes_total: 62858
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3074.724
    load_time_ms: 2.441
    num_steps_sampled: 7300000
    num_steps_trained: 7300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.44456803798675537
      kl: 0.029357796534895897
      policy_loss: 0.010744304396212101

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17023 s, 734 iter, 7340000 ts, 90.1 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-54-20
  done: false
  episode_len_mean: 113.77
  episode_reward_max: 227.7963238166531
  episode_reward_mean: 94.75241112572138
  episode_reward_min: -165.5203724194706
  episodes_this_iter: 90
  episodes_total: 63323
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3066.449
    load_time_ms: 2.475
    num_steps_sampled: 7350000
    num_steps_trained: 7350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.425812304019928
      kl: 68.48876190185547
      policy_loss: 0.11312665790319443
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17139 s, 739 iter, 7390000 ts, 75.9 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-56-16
  done: false
  episode_len_mean: 107.69
  episode_reward_max: 212.058834596938
  episode_reward_mean: 88.84439781913439
  episode_reward_min: -160.86649496155457
  episodes_this_iter: 93
  episodes_total: 63772
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3053.782
    load_time_ms: 2.412
    num_steps_sampled: 7400000
    num_steps_trained: 7400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5074535608291626
      kl: 0.026500921696424484
      policy_loss: 0.009508232586085796
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17254 s, 744 iter, 7440000 ts, 63.8 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_05-58-12
  done: false
  episode_len_mean: 115.18
  episode_reward_max: 221.83041974892106
  episode_reward_mean: 94.92890287684585
  episode_reward_min: -160.67662808395994
  episodes_this_iter: 87
  episodes_total: 64203
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3087.47
    load_time_ms: 2.464
    num_steps_sampled: 7450000
    num_steps_trained: 7450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5872854590415955
      kl: 0.023804927244782448
      policy_loss: 0.007550565060228109


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17371 s, 749 iter, 7490000 ts, 89.9 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-00-09
  done: false
  episode_len_mean: 113.05
  episode_reward_max: 219.92240702851572
  episode_reward_mean: 96.47200933115718
  episode_reward_min: -147.4707939626848
  episodes_this_iter: 88
  episodes_total: 64657
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3093.498
    load_time_ms: 2.579
    num_steps_sampled: 7500000
    num_steps_trained: 7500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5902290940284729
      kl: 0.02392340824007988
      policy_loss: 0.003992673009634018
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17487 s, 754 iter, 7540000 ts, 105 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-02-05
  done: false
  episode_len_mean: 112.37
  episode_reward_max: 221.33353180458593
  episode_reward_mean: 107.20715464703298
  episode_reward_min: -174.131619459027
  episodes_this_iter: 89
  episodes_total: 65111
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.394
    load_time_ms: 2.403
    num_steps_sampled: 7550000
    num_steps_trained: 7550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5304161906242371
      kl: 0.019066492095589638
      policy_loss: 0.006872063037008047
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17604 s, 759 iter, 7590000 ts, 112 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-04-01
  done: false
  episode_len_mean: 107.93
  episode_reward_max: 215.2151331201895
  episode_reward_mean: 104.19139543495247
  episode_reward_min: -175.53937796063622
  episodes_this_iter: 94
  episodes_total: 65579
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3073.971
    load_time_ms: 2.277
    num_steps_sampled: 7600000
    num_steps_trained: 7600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5416255593299866
      kl: 0.01896764524281025
      policy_loss: 0.002273926045745611
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17719 s, 764 iter, 7640000 ts, 118 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-05-58
  done: false
  episode_len_mean: 113.11
  episode_reward_max: 220.26587485150577
  episode_reward_mean: 113.09610016428225
  episode_reward_min: -163.12281276071127
  episodes_this_iter: 90
  episodes_total: 66031
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3074.693
    load_time_ms: 2.379
    num_steps_sampled: 7650000
    num_steps_trained: 7650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5892435908317566
      kl: 0.02160297892987728
      policy_loss: 0.0045520709827542305

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17835 s, 769 iter, 7690000 ts, 103 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-07-54
  done: false
  episode_len_mean: 106.73
  episode_reward_max: 217.94660075294448
  episode_reward_mean: 124.34678702845336
  episode_reward_min: -180.01076663702273
  episodes_this_iter: 93
  episodes_total: 66483
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.096
    load_time_ms: 2.546
    num_steps_sampled: 7700000
    num_steps_trained: 7700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6357531547546387
      kl: 0.02779804728925228
      policy_loss: 0.003410428063943982


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 17950 s, 774 iter, 7740000 ts, 115 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-09-49
  done: false
  episode_len_mean: 109.27
  episode_reward_max: 223.59652842865796
  episode_reward_mean: 108.41302388427619
  episode_reward_min: -152.65587700038216
  episodes_this_iter: 91
  episodes_total: 66944
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.639
    load_time_ms: 2.451
    num_steps_sampled: 7750000
    num_steps_trained: 7750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5779311060905457
      kl: 0.014431798830628395
      policy_loss: 0.00307506718672812


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18066 s, 779 iter, 7790000 ts, 120 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-11-45
  done: false
  episode_len_mean: 109.06
  episode_reward_max: 220.36830923012752
  episode_reward_mean: 119.34566185777553
  episode_reward_min: -143.16826608175288
  episodes_this_iter: 92
  episodes_total: 67397
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3061.262
    load_time_ms: 2.449
    num_steps_sampled: 7800000
    num_steps_trained: 7800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5502479672431946
      kl: 0.0319368802011013
      policy_loss: 0.00920466985553503
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18181 s, 784 iter, 7840000 ts, 116 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-13-41
  done: false
  episode_len_mean: 110.83
  episode_reward_max: 224.15211404982318
  episode_reward_mean: 123.97353260177397
  episode_reward_min: -176.89001878082684
  episodes_this_iter: 89
  episodes_total: 67851
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3082.227
    load_time_ms: 2.382
    num_steps_sampled: 7850000
    num_steps_trained: 7850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5801493525505066
      kl: 0.03227816894650459
      policy_loss: 0.01201231312006712
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18297 s, 789 iter, 7890000 ts, 119 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-15-37
  done: false
  episode_len_mean: 115.16
  episode_reward_max: 232.98293510637185
  episode_reward_mean: 121.08743212571973
  episode_reward_min: -160.17231747194649
  episodes_this_iter: 86
  episodes_total: 68305
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3087.487
    load_time_ms: 2.413
    num_steps_sampled: 7900000
    num_steps_trained: 7900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4400714039802551
      kl: 0.050260163843631744
      policy_loss: 0.014061837457120419

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18412 s, 794 iter, 7940000 ts, 124 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-17-32
  done: false
  episode_len_mean: 105.58
  episode_reward_max: 217.5003773088789
  episode_reward_mean: 129.00126246859028
  episode_reward_min: -163.36495022499034
  episodes_this_iter: 93
  episodes_total: 68772
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3090.536
    load_time_ms: 2.529
    num_steps_sampled: 7950000
    num_steps_trained: 7950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5287463068962097
      kl: 0.017573708668351173
      policy_loss: -0.000158035414642654

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18526 s, 799 iter, 7990000 ts, 122 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-19-27
  done: false
  episode_len_mean: 109.24
  episode_reward_max: 218.42122197893778
  episode_reward_mean: 123.51043972857873
  episode_reward_min: -144.7730692373723
  episodes_this_iter: 91
  episodes_total: 69227
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3091.006
    load_time_ms: 2.406
    num_steps_sampled: 8000000
    num_steps_trained: 8000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5418898463249207
      kl: 4.884096145629883
      policy_loss: 0.09382513165473938
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18643 s, 804 iter, 8040000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-21-23
  done: false
  episode_len_mean: 113.02
  episode_reward_max: 233.51400826694334
  episode_reward_mean: 153.61355839417283
  episode_reward_min: -168.59369784224066
  episodes_this_iter: 88
  episodes_total: 69674
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.566
    load_time_ms: 2.368
    num_steps_sampled: 8050000
    num_steps_trained: 8050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5849526524543762
      kl: 0.024737855419516563
      policy_loss: 0.005483543034642935

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18758 s, 809 iter, 8090000 ts, 164 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-23-18
  done: false
  episode_len_mean: 112.36
  episode_reward_max: 233.86380807439863
  episode_reward_mean: 147.81074537107628
  episode_reward_min: -168.5184688431174
  episodes_this_iter: 90
  episodes_total: 70122
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3093.886
    load_time_ms: 2.446
    num_steps_sampled: 8100000
    num_steps_trained: 8100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5798709392547607
      kl: 0.012402686290442944
      policy_loss: 0.0028600513469427824

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18873 s, 814 iter, 8140000 ts, 151 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-25-14
  done: false
  episode_len_mean: 113.99
  episode_reward_max: 236.57733913502238
  episode_reward_mean: 158.6076268515692
  episode_reward_min: -163.90670353217916
  episodes_this_iter: 87
  episodes_total: 70571
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3078.342
    load_time_ms: 2.478
    num_steps_sampled: 8150000
    num_steps_trained: 8150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5955860614776611
      kl: 0.025368981063365936
      policy_loss: 0.010597308166325092


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 18989 s, 819 iter, 8190000 ts, 151 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-27-10
  done: false
  episode_len_mean: 111.86
  episode_reward_max: 234.43673498129505
  episode_reward_mean: 152.2507325178409
  episode_reward_min: -172.5047635563438
  episodes_this_iter: 89
  episodes_total: 71017
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3095.381
    load_time_ms: 2.32
    num_steps_sampled: 8200000
    num_steps_trained: 8200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6452869176864624
      kl: 0.03465662896633148
      policy_loss: 0.009456517174839973
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19104 s, 824 iter, 8240000 ts, 171 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-29-04
  done: false
  episode_len_mean: 113.69
  episode_reward_max: 240.24770309313362
  episode_reward_mean: 163.42501767853625
  episode_reward_min: -166.10030760625196
  episodes_this_iter: 89
  episodes_total: 71464
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3115.332
    load_time_ms: 2.394
    num_steps_sampled: 8250000
    num_steps_trained: 8250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6026398539543152
      kl: 0.014311606995761395
      policy_loss: 0.000608402013313025

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19218 s, 829 iter, 8290000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-31-00
  done: false
  episode_len_mean: 108.66
  episode_reward_max: 228.94812139368256
  episode_reward_mean: 149.53215006469435
  episode_reward_min: -177.44548274890568
  episodes_this_iter: 91
  episodes_total: 71917
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3085.135
    load_time_ms: 2.329
    num_steps_sampled: 8300000
    num_steps_trained: 8300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6975575685501099
      kl: 0.027244996279478073
      policy_loss: 0.005558915436267853

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19334 s, 834 iter, 8340000 ts, 154 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-32-56
  done: false
  episode_len_mean: 112.97
  episode_reward_max: 235.07172629351226
  episode_reward_mean: 158.1627721319545
  episode_reward_min: -172.36511609652496
  episodes_this_iter: 88
  episodes_total: 72367
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3080.001
    load_time_ms: 2.269
    num_steps_sampled: 8350000
    num_steps_trained: 8350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6677677631378174
      kl: 0.0230824276804924
      policy_loss: 0.002645964501425624
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19448 s, 839 iter, 8390000 ts, 131 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-34-50
  done: false
  episode_len_mean: 109.2
  episode_reward_max: 240.3928134530083
  episode_reward_mean: 164.87360064569097
  episode_reward_min: -159.71785766223402
  episodes_this_iter: 88
  episodes_total: 72826
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3064.209
    load_time_ms: 2.292
    num_steps_sampled: 8400000
    num_steps_trained: 8400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6384711265563965
      kl: 0.01820308528840542
      policy_loss: 0.002532648155465722
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19563 s, 844 iter, 8440000 ts, 159 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-36-45
  done: false
  episode_len_mean: 109.22
  episode_reward_max: 226.99682605688562
  episode_reward_mean: 164.40606965246207
  episode_reward_min: -141.89118842447925
  episodes_this_iter: 92
  episodes_total: 73277
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3046.834
    load_time_ms: 2.517
    num_steps_sampled: 8450000
    num_steps_trained: 8450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7294890284538269
      kl: 2.791905164718628
      policy_loss: 0.050956323742866516
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19679 s, 849 iter, 8490000 ts, 158 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-38-41
  done: false
  episode_len_mean: 107.22
  episode_reward_max: 221.8607879822298
  episode_reward_mean: 165.20995970779222
  episode_reward_min: -169.74122903523426
  episodes_this_iter: 93
  episodes_total: 73736
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3072.18
    load_time_ms: 2.618
    num_steps_sampled: 8500000
    num_steps_trained: 8500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.8054736852645874
      kl: 0.031455423682928085
      policy_loss: 0.004274454899132252
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19794 s, 854 iter, 8540000 ts, 172 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-40-36
  done: false
  episode_len_mean: 112.86
  episode_reward_max: 224.2190066399166
  episode_reward_mean: 143.77148616923313
  episode_reward_min: -167.41166453456606
  episodes_this_iter: 89
  episodes_total: 74181
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3081.747
    load_time_ms: 2.455
    num_steps_sampled: 8550000
    num_steps_trained: 8550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6864653825759888
      kl: 0.026791797950863838
      policy_loss: 0.004233032930642366


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 19909 s, 859 iter, 8590000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-42-32
  done: false
  episode_len_mean: 112.15
  episode_reward_max: 226.5392933839428
  episode_reward_mean: 147.57837915548967
  episode_reward_min: -175.8136577130712
  episodes_this_iter: 89
  episodes_total: 74617
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3085.592
    load_time_ms: 2.337
    num_steps_sampled: 8600000
    num_steps_trained: 8600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6276543736457825
      kl: 0.3054029643535614
      policy_loss: 0.00708939041942358
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20024 s, 864 iter, 8640000 ts, 169 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-44-27
  done: false
  episode_len_mean: 111.66
  episode_reward_max: 227.2063563166683
  episode_reward_mean: 156.20239784157576
  episode_reward_min: -168.7024212185807
  episodes_this_iter: 91
  episodes_total: 75056
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3075.495
    load_time_ms: 2.247
    num_steps_sampled: 8650000
    num_steps_trained: 8650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.693490207195282
      kl: 0.029323067516088486
      policy_loss: 0.006084212101995945
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20140 s, 869 iter, 8690000 ts, 160 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-46-23
  done: false
  episode_len_mean: 106.59
  episode_reward_max: 227.05083612182315
  episode_reward_mean: 159.45866177472658
  episode_reward_min: -172.23841645439742
  episodes_this_iter: 93
  episodes_total: 75508
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3069.756
    load_time_ms: 2.266
    num_steps_sampled: 8700000
    num_steps_trained: 8700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7683469653129578
      kl: 0.030477026477456093
      policy_loss: 0.004847901873290539

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20255 s, 874 iter, 8740000 ts, 148 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-48-19
  done: false
  episode_len_mean: 115.52
  episode_reward_max: 231.34740133631436
  episode_reward_mean: 171.38967563704244
  episode_reward_min: -152.9211918893459
  episodes_this_iter: 86
  episodes_total: 75948
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3091.08
    load_time_ms: 2.435
    num_steps_sampled: 8750000
    num_steps_trained: 8750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7078158259391785
      kl: 0.04120590537786484
      policy_loss: 0.010144759900867939
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20370 s, 879 iter, 8790000 ts, 141 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-50-14
  done: false
  episode_len_mean: 117.22
  episode_reward_max: 228.06941069382628
  episode_reward_mean: 164.26200276042334
  episode_reward_min: -167.1459896185914
  episodes_this_iter: 85
  episodes_total: 76389
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3089.573
    load_time_ms: 2.594
    num_steps_sampled: 8800000
    num_steps_trained: 8800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6778767704963684
      kl: 0.022025741636753082
      policy_loss: 0.0027265488170087337

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20486 s, 884 iter, 8840000 ts, 163 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-52-10
  done: false
  episode_len_mean: 109.73
  episode_reward_max: 234.82879920437705
  episode_reward_mean: 148.05469066851637
  episode_reward_min: -173.13546739824227
  episodes_this_iter: 91
  episodes_total: 76828
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3101.121
    load_time_ms: 2.586
    num_steps_sampled: 8850000
    num_steps_trained: 8850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7503592371940613
      kl: 0.04106482118368149
      policy_loss: 0.005832836963236332


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20602 s, 889 iter, 8890000 ts, 155 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-54-06
  done: false
  episode_len_mean: 114.68
  episode_reward_max: 233.68066865800938
  episode_reward_mean: 160.5864896614208
  episode_reward_min: -169.43467462509335
  episodes_this_iter: 86
  episodes_total: 77260
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.331
    load_time_ms: 2.478
    num_steps_sampled: 8900000
    num_steps_trained: 8900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.634554386138916
      kl: 0.032260023057460785
      policy_loss: 0.009989476762712002
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20718 s, 894 iter, 8940000 ts, 166 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-56-02
  done: false
  episode_len_mean: 115.76
  episode_reward_max: 233.44404000368343
  episode_reward_mean: 162.82643505543945
  episode_reward_min: -143.9878940180437
  episodes_this_iter: 87
  episodes_total: 77692
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3076.44
    load_time_ms: 2.311
    num_steps_sampled: 8950000
    num_steps_trained: 8950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6886143088340759
      kl: 0.02630298025906086
      policy_loss: 0.009687920100986958
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20833 s, 899 iter, 8990000 ts, 170 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-57-57
  done: false
  episode_len_mean: 109.97
  episode_reward_max: 231.6352931816615
  episode_reward_mean: 144.8225337800984
  episode_reward_min: -168.6076367330535
  episodes_this_iter: 90
  episodes_total: 78125
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3079.127
    load_time_ms: 2.206
    num_steps_sampled: 9000000
    num_steps_trained: 9000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6961425542831421
      kl: 0.03315814211964607
      policy_loss: 0.009483273141086102
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 20947 s, 904 iter, 9040000 ts, 162 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_06-59-52
  done: false
  episode_len_mean: 113.65
  episode_reward_max: 227.92560363711345
  episode_reward_mean: 166.67073661350202
  episode_reward_min: -163.02654840455466
  episodes_this_iter: 87
  episodes_total: 78556
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3050.187
    load_time_ms: 2.226
    num_steps_sampled: 9050000
    num_steps_trained: 9050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6931370496749878
      kl: 0.057595014572143555
      policy_loss: 0.006194198504090309

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21062 s, 909 iter, 9090000 ts, 162 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-01-47
  done: false
  episode_len_mean: 117.83
  episode_reward_max: 231.16121618414383
  episode_reward_mean: 161.58505915939358
  episode_reward_min: -166.35480503362533
  episodes_this_iter: 86
  episodes_total: 78988
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3078.535
    load_time_ms: 2.333
    num_steps_sampled: 9100000
    num_steps_trained: 9100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6528252363204956
      kl: 0.018004706129431725
      policy_loss: -0.00056763965403661

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21178 s, 914 iter, 9140000 ts, 155 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-03-43
  done: false
  episode_len_mean: 117.59
  episode_reward_max: 222.79819101115248
  episode_reward_mean: 152.28053685815573
  episode_reward_min: -161.8690547235443
  episodes_this_iter: 86
  episodes_total: 79419
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3104.653
    load_time_ms: 2.357
    num_steps_sampled: 9150000
    num_steps_trained: 9150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6320477724075317
      kl: 0.02416580729186535
      policy_loss: 0.0026460401713848114


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21293 s, 919 iter, 9190000 ts, 159 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-05-39
  done: false
  episode_len_mean: 111.04
  episode_reward_max: 227.17686932794905
  episode_reward_mean: 153.91684387748606
  episode_reward_min: -174.82023897736406
  episodes_this_iter: 89
  episodes_total: 79859
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3099.941
    load_time_ms: 2.438
    num_steps_sampled: 9200000
    num_steps_trained: 9200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7337092757225037
      kl: 0.0393659807741642
      policy_loss: 0.0043258462101221085


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21408 s, 924 iter, 9240000 ts, 167 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-07-34
  done: false
  episode_len_mean: 113.92
  episode_reward_max: 230.29773991428354
  episode_reward_mean: 161.040016596301
  episode_reward_min: -140.77632821832967
  episodes_this_iter: 88
  episodes_total: 80309
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3086.243
    load_time_ms: 2.277
    num_steps_sampled: 9250000
    num_steps_trained: 9250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7005984783172607
      kl: 0.027053289115428925
      policy_loss: 0.006127328146249056
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21523 s, 929 iter, 9290000 ts, 153 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-09-29
  done: false
  episode_len_mean: 116.89
  episode_reward_max: 230.3259317218972
  episode_reward_mean: 161.58853990462566
  episode_reward_min: -153.19357538763242
  episodes_this_iter: 85
  episodes_total: 80759
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3059.808
    load_time_ms: 2.17
    num_steps_sampled: 9300000
    num_steps_trained: 9300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6054829955101013
      kl: 0.026481442153453827
      policy_loss: 0.005315123125910759
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21637 s, 934 iter, 9340000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-11-24
  done: false
  episode_len_mean: 117.79
  episode_reward_max: 235.46432119097884
  episode_reward_mean: 155.6694121142213
  episode_reward_min: -170.26589783238717
  episodes_this_iter: 86
  episodes_total: 81185
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3088.896
    load_time_ms: 2.337
    num_steps_sampled: 9350000
    num_steps_trained: 9350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.655349612236023
      kl: 0.02243495173752308
      policy_loss: 0.0022427700459957123
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21752 s, 939 iter, 9390000 ts, 152 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-13-18
  done: false
  episode_len_mean: 118.48
  episode_reward_max: 225.02615136968308
  episode_reward_mean: 165.2528988657971
  episode_reward_min: -138.83819738995692
  episodes_this_iter: 85
  episodes_total: 81615
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3109.165
    load_time_ms: 2.468
    num_steps_sampled: 9400000
    num_steps_trained: 9400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6328341364860535
      kl: 0.48623526096343994
      policy_loss: 0.027649087831377983
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21866 s, 944 iter, 9440000 ts, 150 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-15-13
  done: false
  episode_len_mean: 110.56
  episode_reward_max: 236.71056972065654
  episode_reward_mean: 146.66328359151245
  episode_reward_min: -150.06804364026732
  episodes_this_iter: 90
  episodes_total: 82050
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3074.167
    load_time_ms: 2.413
    num_steps_sampled: 9450000
    num_steps_trained: 9450000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7228837609291077
      kl: 0.05046767741441727
      policy_loss: 0.012880063615739346


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 21980 s, 949 iter, 9490000 ts, 171 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-17-07
  done: false
  episode_len_mean: 117.24
  episode_reward_max: 234.5907288224933
  episode_reward_mean: 156.19543174567667
  episode_reward_min: -159.03644301286738
  episodes_this_iter: 88
  episodes_total: 82475
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3075.228
    load_time_ms: 2.558
    num_steps_sampled: 9500000
    num_steps_trained: 9500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6740977168083191
      kl: 0.014465169981122017
      policy_loss: 0.003213347867131233


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22095 s, 954 iter, 9540000 ts, 148 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-19-02
  done: false
  episode_len_mean: 120.02
  episode_reward_max: 235.30379912525729
  episode_reward_mean: 161.973061143262
  episode_reward_min: -150.23831202363928
  episodes_this_iter: 86
  episodes_total: 82899
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3100.351
    load_time_ms: 2.614
    num_steps_sampled: 9550000
    num_steps_trained: 9550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6738044619560242
      kl: 0.046835921704769135
      policy_loss: 0.0037025879137218
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22209 s, 959 iter, 9590000 ts, 162 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-20-57
  done: false
  episode_len_mean: 119.07
  episode_reward_max: 228.21689113376462
  episode_reward_mean: 172.07695583767156
  episode_reward_min: -131.00266499195837
  episodes_this_iter: 83
  episodes_total: 83324
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3075.35
    load_time_ms: 2.442
    num_steps_sampled: 9600000
    num_steps_trained: 9600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5631063580513
      kl: 0.021061688661575317
      policy_loss: 0.003909769933670759
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22323 s, 964 iter, 9640000 ts, 149 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-22-51
  done: false
  episode_len_mean: 110.13
  episode_reward_max: 233.55402283125042
  episode_reward_mean: 158.8719150323513
  episode_reward_min: -167.91624340215446
  episodes_this_iter: 89
  episodes_total: 83760
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3051.478
    load_time_ms: 2.424
    num_steps_sampled: 9650000
    num_steps_trained: 9650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6931700706481934
      kl: 0.025412920862436295
      policy_loss: 0.0002811193990055471

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22438 s, 969 iter, 9690000 ts, 168 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-24-45
  done: false
  episode_len_mean: 116.92
  episode_reward_max: 232.64403420541527
  episode_reward_mean: 157.29885193768115
  episode_reward_min: -148.82544752754063
  episodes_this_iter: 84
  episodes_total: 84199
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3057.227
    load_time_ms: 2.282
    num_steps_sampled: 9700000
    num_steps_trained: 9700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6060718297958374
      kl: 7.413862228393555
      policy_loss: 0.07099992781877518
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22552 s, 974 iter, 9740000 ts, 162 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-26-40
  done: false
  episode_len_mean: 111.88
  episode_reward_max: 229.06389648916527
  episode_reward_mean: 159.7009152178952
  episode_reward_min: -176.88223698218934
  episodes_this_iter: 89
  episodes_total: 84639
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3084.533
    load_time_ms: 2.287
    num_steps_sampled: 9750000
    num_steps_trained: 9750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6445721983909607
      kl: 0.0190420001745224
      policy_loss: 0.005384942051023245
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22667 s, 979 iter, 9790000 ts, 161 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-28-35
  done: false
  episode_len_mean: 111.69
  episode_reward_max: 230.3789336521863
  episode_reward_mean: 158.02653691627953
  episode_reward_min: -163.72144639998803
  episodes_this_iter: 90
  episodes_total: 85078
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3099.294
    load_time_ms: 2.531
    num_steps_sampled: 9800000
    num_steps_trained: 9800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7018633484840393
      kl: 0.026223478838801384
      policy_loss: 0.003589326748624444


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22782 s, 984 iter, 9840000 ts, 147 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-30-31
  done: false
  episode_len_mean: 116.95
  episode_reward_max: 232.69858266714823
  episode_reward_mean: 162.65559225303076
  episode_reward_min: -168.64136532521718
  episodes_this_iter: 85
  episodes_total: 85511
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3086.345
    load_time_ms: 2.555
    num_steps_sampled: 9850000
    num_steps_trained: 9850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6592262983322144
      kl: 0.016444748267531395
      policy_loss: 0.001776738441549241

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 22897 s, 989 iter, 9890000 ts, 159 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-32-26
  done: false
  episode_len_mean: 116.24
  episode_reward_max: 234.02537217439811
  episode_reward_mean: 150.42853140848217
  episode_reward_min: -169.8375626692768
  episodes_this_iter: 85
  episodes_total: 85950
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3073.85
    load_time_ms: 2.526
    num_steps_sampled: 9900000
    num_steps_trained: 9900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5643344521522522
      kl: 0.021824868395924568
      policy_loss: 0.00533814774826169
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 23012 s, 994 iter, 9940000 ts, 156 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-34-21
  done: false
  episode_len_mean: 111.0
  episode_reward_max: 234.4029011180867
  episode_reward_mean: 146.53507325442615
  episode_reward_min: -174.15894386203524
  episodes_this_iter: 89
  episodes_total: 86382
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3052.295
    load_time_ms: 2.499
    num_steps_sampled: 9950000
    num_steps_trained: 9950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6908711791038513
      kl: 0.019373439252376556
      policy_loss: 0.0013647997984662652


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:	RUNNING [pid=12746], 23128 s, 999 iter, 9990000 ts, 168 rew

Result for PPO_MultiAgentTeamSpiritIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-23_07-36-17
  done: true
  episode_len_mean: 110.81
  episode_reward_max: 237.48101045400855
  episode_reward_mean: 157.08994961003972
  episode_reward_min: -178.83785417867946
  episodes_this_iter: 89
  episodes_total: 86829
  experiment_id: 1e6021831b8d437c9eb317160a4e5781
  hostname: Gandalf
  info:
    grad_time_ms: 3109.352
    load_time_ms: 2.3
    num_steps_sampled: 10000000
    num_steps_trained: 10000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7738745212554932
      kl: 0.02327972836792469
      policy_loss: 0.0015150641556829214
