# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500                                 #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv', 'MultiAgentIntersectionEnv_baseline_1', 'MultiAgentIntersectionEnv_baseline_2', 'MultiAgentIntersectionEnv_baseline_3', 'MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-27_18-49-37_3993/logs.
Waiting for redis server at 127.0.0.1:18844 to respond...
Waiting for redis server at 127.0.0.1:41057 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=0909cc23ad45c470113c14675027e7445175df12c7b32da8



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-03-27_18-49-37_3993/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-27_18-49-37_3993/sockets/raylet'],
 'redis_address': '192.168.2.102:18844',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=0909cc23ad45c470113c14675027e7445175df12c7b32da8'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate default 0.999
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network defaule 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return 'rl_0'

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['rl_0']
        }
    })

 Starting SUMO on port 44139


20.553154398181952
18.504479839823905


In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 2.5/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0_2019-03-27_18-49-39vs4d3yjp -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 2.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_18-50-34
  done: false
  episode_len_mean: 436.27272727272725
  episode_reward_max: 426.82213129389606
  episode_reward_mean: 106.97591505808391
  episode_reward_min: -115.22056416182052
  episodes_this_iter: 22
  episodes_total: 22
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 117 s, 5 iter, 50000 ts, 402 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_18-52-28
  done: false
  episode_len_mean: 318.89
  episode_reward_max: 704.4184270418435
  episode_reward_mean: 467.9996323150662
  episode_reward_min: -124.29876912235675
  episodes_this_iter: 36
  episodes_total: 166
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3991.273
    load_time_ms: 9.38
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.006250001490116119
      cur_lr: 4.999999873689376e-05
      entropy: 1.4083789587020874
      kl: 0.0065506016835570335
      poli

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_18-54-24
  done: false
  episode_len_mean: 195.31
  episode_reward_max: 722.2168823420557
  episode_reward_mean: 429.1218085063419
  episode_reward_min: -133.64956861302542
  episodes_this_iter: 52
  episodes_total: 397
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3896.224
    load_time_ms: 1.58
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00039062509313225746
      cur_lr: 4.999999873689376e-05
      entropy: 1.41211998462677
      kl: 0.004875446669757366
      policy_loss: -0.00132059957832098
      total_loss: 1263.8843994140625
      vf_explained_var: 0.06937707215547562
      vf_loss: 1263.8858642578125
    sample_time_ms: 19023.591
    update_time_ms: 5.947
  iterations_since_restore: 11
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 372 s, 16 iter, 160000 ts, 465 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_18-56-47
  done: false
  episode_len_mean: 167.29
  episode_reward_max: 717.0755032543003
  episode_reward_mean: 474.66708026984185
  episode_reward_min: -112.35038403251906
  episodes_this_iter: 60
  episodes_total: 752
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3972.516
    load_time_ms: 1.748
    num_steps_sampled: 170000
    num_steps_trained: 170000
    rl_0:
      cur_kl_coeff: 6.103517080191523e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3909573554992676
      kl: 0.004349222406744957
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_18-58-55
  done: false
  episode_len_mean: 133.63
  episode_reward_max: 707.2048169168594
  episode_reward_mean: 395.8934197377254
  episode_reward_min: -133.8188883367481
  episodes_this_iter: 77
  episodes_total: 1101
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4162.892
    load_time_ms: 1.773
    num_steps_sampled: 220000
    num_steps_trained: 220000
    rl_0:
      cur_kl_coeff: 1.907349087559851e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.379974603652954
      kl: 0.004213887732475996
      policy_loss: -0.0016978237545117736
      total_loss: 2292.095458984375
      vf_explained_var: 0.10830944031476974
      vf_loss: 2292.096923828125
    sample_time_ms: 20580.121
    update_time_ms: 5.369
  iterations_since_restore: 22
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 646 s, 27 iter, 270000 ts, 492 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-01-20
  done: false
  episode_len_mean: 138.03
  episode_reward_max: 727.707713371789
  episode_reward_mean: 491.9277383391769
  episode_reward_min: -123.90021808561823
  episodes_this_iter: 71
  episodes_total: 1543
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4052.083
    load_time_ms: 1.605
    num_steps_sampled: 280000
    num_steps_trained: 280000
    rl_0:
      cur_kl_coeff: 2.980232949312267e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.3333371877670288
      kl: 0.006693415343761444
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-03-24
  done: false
  episode_len_mean: 126.29
  episode_reward_max: 730.781161650334
  episode_reward_mean: 400.30971741689876
  episode_reward_min: -124.73604874002572
  episodes_this_iter: 78
  episodes_total: 1927
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4135.479
    load_time_ms: 1.617
    num_steps_sampled: 330000
    num_steps_trained: 330000
    rl_0:
      cur_kl_coeff: 9.313227966600834e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3414212465286255
      kl: 0.005076657515019178
      policy_loss: -0.0013022655621170998
      total_loss: 2648.21484375
      vf_explained_var: 0.12282726168632507
      vf_loss: 2648.216064453125
    sample_time_ms: 20384.186
    update_time_ms: 5.912
  iterations_since_restore: 33
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 910 s, 38 iter, 380000 ts, 392 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-05-43
  done: false
  episode_len_mean: 132.04
  episode_reward_max: 737.6240069446736
  episode_reward_mean: 490.20152329211464
  episode_reward_min: -126.80648404056944
  episodes_this_iter: 73
  episodes_total: 2389
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4007.964
    load_time_ms: 1.611
    num_steps_sampled: 390000
    num_steps_trained: 390000
    rl_0:
      cur_kl_coeff: 2.9103837395627608e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.303122639656067
      kl: 0.02174977771937847
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-07-41
  done: false
  episode_len_mean: 132.96
  episode_reward_max: 722.0946339316317
  episode_reward_mean: 482.1821206965112
  episode_reward_min: -101.58232899073377
  episodes_this_iter: 77
  episodes_total: 2774
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3896.905
    load_time_ms: 1.629
    num_steps_sampled: 440000
    num_steps_trained: 440000
    rl_0:
      cur_kl_coeff: 1.8189898372267255e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.3018484115600586
      kl: 0.007025898899883032
      policy_loss: -0.002009426709264517
      total_loss: 2645.56982421875
      vf_explained_var: 0.27791959047317505
      vf_loss: 2645.57177734375
    sample_time_ms: 19424.379
    update_time_ms: 5.405
  iterations_since_restore: 44
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 1175 s, 49 iter, 490000 ts, 512 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-10-09
  done: false
  episode_len_mean: 137.52
  episode_reward_max: 730.6449464202245
  episode_reward_mean: 523.9062620174074
  episode_reward_min: -118.61261929475377
  episodes_this_iter: 74
  episodes_total: 3222
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4043.741
    load_time_ms: 1.628
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 5.684343241333517e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.288338303565979
      kl: 0.014501417055726051
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-12-22
  done: false
  episode_len_mean: 131.03
  episode_reward_max: 730.8351528587153
  episode_reward_mean: 465.28660533257323
  episode_reward_min: -117.08008895655836
  episodes_this_iter: 77
  episodes_total: 3596
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4206.337
    load_time_ms: 1.572
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 1.4210858103333793e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.2815877199172974
      kl: 0.003790856571868062
      policy_loss: -0.0017686894861981273
      total_loss: 2672.393798828125
      vf_explained_var: 0.2620638906955719
      vf_loss: 2672.395751953125
    sample_time_ms: 21442.259
    update_time_ms: 5.429
  iterations_since_restore: 55
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 1482 s, 60 iter, 600000 ts, 526 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-15-22
  done: false
  episode_len_mean: 138.5
  episode_reward_max: 718.1260118151561
  episode_reward_mean: 559.8879390559836
  episode_reward_min: -126.34626970956595
  episodes_this_iter: 73
  episodes_total: 4042
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4544.268
    load_time_ms: 1.509
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.220446578645905e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.246590256690979
      kl: 0.007956204004585743
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-19-18
  done: false
  episode_len_mean: 138.8
  episode_reward_max: 728.9603722521238
  episode_reward_mean: 574.6670751516037
  episode_reward_min: -120.35729629392601
  episodes_this_iter: 72
  episodes_total: 4414
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 6959.386
    load_time_ms: 1.823
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.3877791116536907e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.2100332975387573
      kl: 0.022487882524728775
      policy_loss: -0.002044690540060401
      total_loss: 1558.114013671875
      vf_explained_var: 0.7018845677375793
      vf_loss: 1558.115966796875
    sample_time_ms: 31497.125
    update_time_ms: 9.146
  iterations_since_restore: 66
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 2005 s, 71 iter, 710000 ts, 555 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-24-03
  done: false
  episode_len_mean: 138.37
  episode_reward_max: 741.2946019373073
  episode_reward_mean: 565.615636263831
  episode_reward_min: -127.91589213921151
  episodes_this_iter: 72
  episodes_total: 4864
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 9241.756
    load_time_ms: 2.336
    num_steps_sampled: 720000
    num_steps_trained: 720000
    rl_0:
      cur_kl_coeff: 1.7347238895671134e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1567919254302979
      kl: 0.007869076915085316
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-27-24
  done: false
  episode_len_mean: 130.42
  episode_reward_max: 760.5484417199244
  episode_reward_mean: 499.7252428757641
  episode_reward_min: -130.75422284496221
  episodes_this_iter: 78
  episodes_total: 5237
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 9007.892
    load_time_ms: 2.313
    num_steps_sampled: 770000
    num_steps_trained: 770000
    rl_0:
      cur_kl_coeff: 2.1684048619588917e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0947487354278564
      kl: 0.008725745603442192
      policy_loss: -0.002129650441929698
      total_loss: 2255.991943359375
      vf_explained_var: 0.5526216626167297
      vf_loss: 2255.994384765625
    sample_time_ms: 35981.875
    update_time_ms: 13.32
  iterations_since_restore: 77
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 2400 s, 82 iter, 820000 ts, 548 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-30-36
  done: false
  episode_len_mean: 140.72
  episode_reward_max: 733.5608019301513
  episode_reward_mean: 584.399075373658
  episode_reward_min: -97.33927973245109
  episodes_this_iter: 72
  episodes_total: 5678
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 6737.482
    load_time_ms: 1.849
    num_steps_sampled: 830000
    num_steps_trained: 830000
    rl_0:
      cur_kl_coeff: 1.0842024309794459e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.1293915510177612
      kl: 0.01673899032175541
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-32-30
  done: false
  episode_len_mean: 134.47
  episode_reward_max: 747.4109617931597
  episode_reward_mean: 538.8142706466095
  episode_reward_min: -121.59634382463258
  episodes_this_iter: 76
  episodes_total: 6057
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3924.117
    load_time_ms: 1.517
    num_steps_sampled: 880000
    num_steps_trained: 880000
    rl_0:
      cur_kl_coeff: 1.0842024309794459e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.1064794063568115
      kl: 0.011164417490363121
      policy_loss: 0.0017999721458181739
      total_loss: 1954.5364990234375
      vf_explained_var: 0.6644344925880432
      vf_loss: 1954.534912109375
    sample_time_ms: 19882.664
    update_time_ms: 5.61
  iterations_since_restore: 88
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 2651 s, 93 iter, 930000 ts, 499 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-34-47
  done: false
  episode_len_mean: 132.13
  episode_reward_max: 751.1650618889585
  episode_reward_mean: 533.1952726155291
  episode_reward_min: -133.3943069678915
  episodes_this_iter: 76
  episodes_total: 6514
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3771.349
    load_time_ms: 1.503
    num_steps_sampled: 940000
    num_steps_trained: 940000
    rl_0:
      cur_kl_coeff: 1.0842024309794459e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0529531240463257
      kl: 0.016595153138041496
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-36-41
  done: false
  episode_len_mean: 138.23
  episode_reward_max: 768.6911228862381
  episode_reward_mean: 578.4868234630108
  episode_reward_min: -125.99355961598447
  episodes_this_iter: 73
  episodes_total: 6887
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3772.158
    load_time_ms: 1.484
    num_steps_sampled: 990000
    num_steps_trained: 990000
    rl_0:
      cur_kl_coeff: 1.0842024309794459e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0158644914627075
      kl: 0.028880229219794273
      policy_loss: 0.006119837984442711
      total_loss: 1707.9130859375
      vf_explained_var: 0.7412115931510925
      vf_loss: 1707.9068603515625
    sample_time_ms: 19059.68
    update_time_ms: 5.162
  iterations_since_restore: 99
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 2902 s, 104 iter, 1040000 ts, 539 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-38-59
  done: false
  episode_len_mean: 133.71
  episode_reward_max: 776.6917620065043
  episode_reward_mean: 539.0676558299028
  episode_reward_min: -119.79532038039476
  episodes_this_iter: 74
  episodes_total: 7339
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3780.183
    load_time_ms: 1.537
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
    rl_0:
      cur_kl_coeff: 3.6591827198794896e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0987006425857544
      kl: 0.04419689252972603

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-40-52
  done: false
  episode_len_mean: 136.28
  episode_reward_max: 763.340550842181
  episode_reward_mean: 551.5988683037972
  episode_reward_min: -136.1593303615421
  episodes_this_iter: 75
  episodes_total: 7710
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3774.959
    load_time_ms: 1.59
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
    rl_0:
      cur_kl_coeff: 1.8524607713018193e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.125540852546692
      kl: 0.03111746348440647
      policy_loss: 0.005952584557235241
      total_loss: 1852.3953857421875
      vf_explained_var: 0.7522364258766174
      vf_loss: 1852.389404296875
    sample_time_ms: 18955.526
    update_time_ms: 5.055
  iterations_since_restore: 110
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 3153 s, 115 iter, 1150000 ts, 568 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-43-09
  done: false
  episode_len_mean: 136.55
  episode_reward_max: 754.1690013784515
  episode_reward_mean: 563.1634850019227
  episode_reward_min: -119.2619011794802
  episodes_this_iter: 74
  episodes_total: 8151
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3768.661
    load_time_ms: 1.572
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 1.4067125520919935e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.0946146249771118
      kl: 0.04084090143442154


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-45-03
  done: false
  episode_len_mean: 139.81
  episode_reward_max: 760.629024516409
  episode_reward_mean: 582.5703464051331
  episode_reward_min: -132.7584319204157
  episodes_this_iter: 73
  episodes_total: 8520
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3755.641
    load_time_ms: 1.573
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 1.0682224492580213e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.1123510599136353
      kl: 0.10721945017576218
      policy_loss: 0.01199457235634327
      total_loss: 1714.436279296875
      vf_explained_var: 0.7822834849357605
      vf_loss: 1714.42431640625
    sample_time_ms: 18970.897
    update_time_ms: 4.926
  iterations_since_restore: 121
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 3403 s, 126 iter, 1260000 ts, 515 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-47-21
  done: false
  episode_len_mean: 134.13
  episode_reward_max: 751.8812726978697
  episode_reward_mean: 539.304897375011
  episode_reward_min: -124.29921277935074
  episodes_this_iter: 77
  episodes_total: 8977
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3796.996
    load_time_ms: 1.735
    num_steps_sampled: 1270000
    num_steps_trained: 1270000
    rl_0:
      cur_kl_coeff: 1.216772225502561e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0380679368972778
      kl: 0.8790704607963562
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-49-16
  done: false
  episode_len_mean: 133.33
  episode_reward_max: 748.8454686963456
  episode_reward_mean: 553.651839813217
  episode_reward_min: -101.98493158499012
  episodes_this_iter: 77
  episodes_total: 9345
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3817.815
    load_time_ms: 1.693
    num_steps_sampled: 1320000
    num_steps_trained: 1320000
    rl_0:
      cur_kl_coeff: 9.239862081497087e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.9887579679489136
      kl: 0.0930606797337532
      policy_loss: 0.01044643484055996
      total_loss: 2108.437255859375
      vf_explained_var: 0.7414127588272095
      vf_loss: 2108.4267578125
    sample_time_ms: 19069.823
    update_time_ms: 5.821
  iterations_since_restore: 132
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 3658 s, 137 iter, 1370000 ts, 593 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-51-36
  done: false
  episode_len_mean: 139.17
  episode_reward_max: 785.4261996444305
  episode_reward_mean: 623.209203841083
  episode_reward_min: -132.88557905351524
  episodes_this_iter: 72
  episodes_total: 9799
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3845.103
    load_time_ms: 1.582
    num_steps_sampled: 1380000
    num_steps_trained: 1380000
    rl_0:
      cur_kl_coeff: 1.0524782475119891e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8618301749229431
      kl: 0.043288372457027435

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-53-31
  done: false
  episode_len_mean: 117.12
  episode_reward_max: 768.6611778969608
  episode_reward_mean: 443.30492835483363
  episode_reward_min: -124.04064226203032
  episodes_this_iter: 87
  episodes_total: 10201
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3775.999
    load_time_ms: 1.446
    num_steps_sampled: 1430000
    num_steps_trained: 1430000
    rl_0:
      cur_kl_coeff: 7.992255945596383e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8420457243919373
      kl: 0.049966175109148026
      policy_loss: 0.006923365406692028
      total_loss: 4219.8427734375
      vf_explained_var: 0.5359997153282166
      vf_loss: 4219.8359375
    sample_time_ms: 19244.524
    update_time_ms: 5.353
  iterations_since_restore: 143
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 3911 s, 148 iter, 1480000 ts, 488 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-55-48
  done: false
  episode_len_mean: 134.82
  episode_reward_max: 747.878688386417
  episode_reward_mean: 584.8470163159822
  episode_reward_min: -106.55626848731458
  episodes_this_iter: 75
  episodes_total: 10670
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3776.579
    load_time_ms: 1.49
    num_steps_sampled: 1490000
    num_steps_trained: 1490000
    rl_0:
      cur_kl_coeff: 9.103681350430826e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.897191047668457
      kl: 13.25737476348877
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_19-57-51
  done: false
  episode_len_mean: 132.01
  episode_reward_max: 768.573076792758
  episode_reward_mean: 548.0780451063561
  episode_reward_min: -125.2817828308051
  episodes_this_iter: 75
  episodes_total: 11048
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4109.439
    load_time_ms: 1.52
    num_steps_sampled: 1540000
    num_steps_trained: 1540000
    rl_0:
      cur_kl_coeff: 6.913105504713357e-12
      cur_lr: 4.999999873689376e-05
      entropy: 0.9508528113365173
      kl: 0.3583918511867523
      policy_loss: 0.012717743404209614
      total_loss: 2268.363037109375
      vf_explained_var: 0.7289877533912659
      vf_loss: 2268.35009765625
    sample_time_ms: 19495.999
    update_time_ms: 5.209
  iterations_since_restore: 154
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 4274 s, 159 iter, 1590000 ts, 572 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-02-53
  done: false
  episode_len_mean: 134.09
  episode_reward_max: 773.2406797740654
  episode_reward_mean: 578.9477881279338
  episode_reward_min: -126.19984144810566
  episodes_this_iter: 77
  episodes_total: 11508
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 8113.005
    load_time_ms: 1.905
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
    rl_0:
      cur_kl_coeff: 7.874461499524088e-11
      cur_lr: 4.999999873689376e-05
      entropy: 0.9332566857337952
      kl: 1.1521176099777222


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-12-24
  done: false
  episode_len_mean: 132.63
  episode_reward_max: 758.7920614263263
  episode_reward_mean: 564.67172221742
  episode_reward_min: -135.56021601878155
  episodes_this_iter: 77
  episodes_total: 11881
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 15970.559
    load_time_ms: 3.107
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
    rl_0:
      cur_kl_coeff: 5.979668427080753e-10
      cur_lr: 4.999999873689376e-05
      entropy: 0.9324943423271179
      kl: 0.3908366560935974
      policy_loss: 0.02443879470229149
      total_loss: 2370.571533203125
      vf_explained_var: 0.7217196226119995
      vf_loss: 2370.546875
    sample_time_ms: 67229.027
    update_time_ms: 18.862
  iterations_since_restore: 165
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 5072 s, 170 iter, 1700000 ts, 567 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-15-12
  done: false
  episode_len_mean: 135.5
  episode_reward_max: 770.3999337646893
  episode_reward_mean: 568.218652109039
  episode_reward_min: -134.84273093812095
  episodes_this_iter: 75
  episodes_total: 12338
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 11099.386
    load_time_ms: 2.488
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 6.811216035629286e-09
      cur_lr: 4.999999873689376e-05
      entropy: 0.980352520942688
      kl: 1.7845650911331177
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-17-06
  done: false
  episode_len_mean: 136.76
  episode_reward_max: 760.1995563909635
  episode_reward_mean: 586.7289733450048
  episode_reward_min: -135.19247038513447
  episodes_this_iter: 74
  episodes_total: 12715
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3776.258
    load_time_ms: 1.424
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 5.172267236730477e-08
      cur_lr: 4.999999873689376e-05
      entropy: 0.9458523988723755
      kl: 0.47760480642318726
      policy_loss: 0.018076341599225998
      total_loss: 1794.797119140625
      vf_explained_var: 0.7882454991340637
      vf_loss: 1794.77880859375
    sample_time_ms: 19424.041
    update_time_ms: 5.019
  iterations_since_restore: 176
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 5322 s, 181 iter, 1810000 ts, 636 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-19-22
  done: false
  episode_len_mean: 139.66
  episode_reward_max: 757.2530047669812
  episode_reward_mean: 608.7334073450457
  episode_reward_min: -104.61561572039068
  episodes_this_iter: 73
  episodes_total: 13158
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3794.27
    load_time_ms: 1.548
    num_steps_sampled: 1820000
    num_steps_trained: 1820000
    rl_0:
      cur_kl_coeff: 5.891533874091692e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.0177414417266846
      kl: 0.8798685073852539
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-21-14
  done: false
  episode_len_mean: 131.46
  episode_reward_max: 774.1287259845501
  episode_reward_mean: 530.9410051551996
  episode_reward_min: -143.41217164886447
  episodes_this_iter: 78
  episodes_total: 13537
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3805.048
    load_time_ms: 1.472
    num_steps_sampled: 1870000
    num_steps_trained: 1870000
    rl_0:
      cur_kl_coeff: 4.473884928302141e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.0169146060943604
      kl: 0.8649888634681702
      policy_loss: 0.03600487485527992
      total_loss: 2571.307373046875
      vf_explained_var: 0.7065931558609009
      vf_loss: 2571.271240234375
    sample_time_ms: 18612.877
    update_time_ms: 5.212
  iterations_since_restore: 187
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 5569 s, 192 iter, 1920000 ts, 559 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-23-30
  done: false
  episode_len_mean: 134.4
  episode_reward_max: 758.8344150659202
  episode_reward_mean: 553.6423859918273
  episode_reward_min: -125.5028000133745
  episodes_this_iter: 75
  episodes_total: 13984
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3790.387
    load_time_ms: 1.505
    num_steps_sampled: 1930000
    num_steps_trained: 1930000
    rl_0:
      cur_kl_coeff: 5.096033419249579e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.069740891456604
      kl: 67.8360366821289
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-25-23
  done: false
  episode_len_mean: 132.74
  episode_reward_max: 761.07434858368
  episode_reward_mean: 553.0918265985642
  episode_reward_min: -115.70655369299406
  episodes_this_iter: 78
  episodes_total: 14358
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3786.224
    load_time_ms: 1.649
    num_steps_sampled: 1980000
    num_steps_trained: 1980000
    rl_0:
      cur_kl_coeff: 0.0003869801003020257
      cur_lr: 4.999999873689376e-05
      entropy: 1.0541095733642578
      kl: 0.11030906438827515
      policy_loss: 0.01583588309586048
      total_loss: 2828.56005859375
      vf_explained_var: 0.6813880801200867
      vf_loss: 2828.54443359375
    sample_time_ms: 18840.782
    update_time_ms: 5.801
  iterations_since_restore: 198
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 5818 s, 203 iter, 2030000 ts, 595 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-27-38
  done: false
  episode_len_mean: 134.22
  episode_reward_max: 761.5434648458738
  episode_reward_mean: 550.6146457330932
  episode_reward_min: -122.49225243189608
  episodes_this_iter: 77
  episodes_total: 14795
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3780.429
    load_time_ms: 1.539
    num_steps_sampled: 2040000
    num_steps_trained: 2040000
    rl_0:
      cur_kl_coeff: 0.004407945554703474
      cur_lr: 4.999999873689376e-05
      entropy: 1.1045349836349487
      kl: 0.14915287494659424


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-29-29
  done: false
  episode_len_mean: 138.26
  episode_reward_max: 742.0736737752845
  episode_reward_mean: 589.2199576045313
  episode_reward_min: -129.66778610521294
  episodes_this_iter: 73
  episodes_total: 15162
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3771.672
    load_time_ms: 1.504
    num_steps_sampled: 2090000
    num_steps_trained: 2090000
    rl_0:
      cur_kl_coeff: 0.033472832292318344
      cur_lr: 4.999999873689376e-05
      entropy: 1.0406819581985474
      kl: 0.04220255836844444
      policy_loss: 0.009572976268827915
      total_loss: 1526.288818359375
      vf_explained_var: 0.8283238410949707
      vf_loss: 1526.27783203125
    sample_time_ms: 18483.629
    update_time_ms: 5.636
  iterations_since_restore: 209
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 6062 s, 214 iter, 2140000 ts, 597 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-31-43
  done: false
  episode_len_mean: 137.25
  episode_reward_max: 757.4075702333246
  episode_reward_mean: 581.0423680389189
  episode_reward_min: -133.182757708373
  episodes_this_iter: 75
  episodes_total: 15599
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3782.799
    load_time_ms: 1.551
    num_steps_sampled: 2150000
    num_steps_trained: 2150000
    rl_0:
      cur_kl_coeff: 0.11297080665826797
      cur_lr: 4.999999873689376e-05
      entropy: 1.033029556274414
      kl: 0.035302046686410904
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-33-35
  done: false
  episode_len_mean: 135.88
  episode_reward_max: 774.886748259922
  episode_reward_mean: 572.7145645641335
  episode_reward_min: -125.36454687399709
  episodes_this_iter: 75
  episodes_total: 15973
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3793.475
    load_time_ms: 1.459
    num_steps_sampled: 2200000
    num_steps_trained: 2200000
    rl_0:
      cur_kl_coeff: 0.16945624351501465
      cur_lr: 4.999999873689376e-05
      entropy: 1.0537598133087158
      kl: 0.022926388308405876
      policy_loss: 0.0037106466479599476
      total_loss: 1619.012451171875
      vf_explained_var: 0.8339851498603821
      vf_loss: 1619.0047607421875
    sample_time_ms: 18532.404
    update_time_ms: 5.157
  iterations_since_restore: 220
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 6308 s, 225 iter, 2250000 ts, 608 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-35-50
  done: false
  episode_len_mean: 136.11
  episode_reward_max: 770.2346714122207
  episode_reward_mean: 585.1713003776557
  episode_reward_min: -109.26999318758669
  episodes_this_iter: 75
  episodes_total: 16412
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3795.095
    load_time_ms: 1.383
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 0.2541843354701996
      cur_lr: 4.999999873689376e-05
      entropy: 1.016162633895874
      kl: 0.017409710213541985
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-37-44
  done: false
  episode_len_mean: 135.03
  episode_reward_max: 756.3993811924424
  episode_reward_mean: 560.8134050334148
  episode_reward_min: -130.23788143721515
  episodes_this_iter: 76
  episodes_total: 16791
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3791.58
    load_time_ms: 1.405
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 0.19063825905323029
      cur_lr: 4.999999873689376e-05
      entropy: 1.0647261142730713
      kl: 0.05495554208755493
      policy_loss: 0.0071721612475812435
      total_loss: 731.0362548828125
      vf_explained_var: 0.9387306571006775
      vf_loss: 731.0185546875
    sample_time_ms: 18778.778
    update_time_ms: 5.952
  iterations_since_restore: 231
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 6558 s, 236 iter, 2360000 ts, 570 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-40-00
  done: false
  episode_len_mean: 135.09
  episode_reward_max: 731.7059248355031
  episode_reward_mean: 569.0712970623209
  episode_reward_min: -112.32436957138695
  episodes_this_iter: 75
  episodes_total: 17235
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3812.648
    load_time_ms: 1.475
    num_steps_sampled: 2370000
    num_steps_trained: 2370000
    rl_0:
      cur_kl_coeff: 0.28595736622810364
      cur_lr: 4.999999873689376e-05
      entropy: 1.0473238229751587
      kl: 0.008472190238535404


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-41-53
  done: false
  episode_len_mean: 137.59
  episode_reward_max: 773.6917682123226
  episode_reward_mean: 591.8889027759126
  episode_reward_min: -119.41659764885102
  episodes_this_iter: 74
  episodes_total: 17607
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3810.253
    load_time_ms: 1.561
    num_steps_sampled: 2420000
    num_steps_trained: 2420000
    rl_0:
      cur_kl_coeff: 0.14297868311405182
      cur_lr: 4.999999873689376e-05
      entropy: 1.0589070320129395
      kl: 0.023301368579268456
      policy_loss: 0.0024969601072371006
      total_loss: 465.9463195800781
      vf_explained_var: 0.9628045558929443
      vf_loss: 465.94049072265625
    sample_time_ms: 18846.71
    update_time_ms: 5.441
  iterations_since_restore: 242
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 6807 s, 247 iter, 2470000 ts, 602 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-44-09
  done: false
  episode_len_mean: 142.73
  episode_reward_max: 761.6242429322525
  episode_reward_mean: 626.0547931690678
  episode_reward_min: -107.02477184522088
  episodes_this_iter: 72
  episodes_total: 18043
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3799.689
    load_time_ms: 1.616
    num_steps_sampled: 2480000
    num_steps_trained: 2480000
    rl_0:
      cur_kl_coeff: 0.14297868311405182
      cur_lr: 4.999999873689376e-05
      entropy: 1.1097464561462402
      kl: 0.010946185328066349


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-46-02
  done: false
  episode_len_mean: 134.26
  episode_reward_max: 772.4701697379726
  episode_reward_mean: 568.7443906546702
  episode_reward_min: -126.05322080720423
  episodes_this_iter: 75
  episodes_total: 18415
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3796.994
    load_time_ms: 1.546
    num_steps_sampled: 2530000
    num_steps_trained: 2530000
    rl_0:
      cur_kl_coeff: 0.21446803212165833
      cur_lr: 4.999999873689376e-05
      entropy: 1.09429931640625
      kl: 0.009949927218258381
      policy_loss: 0.0010127363493666053
      total_loss: 297.9016418457031
      vf_explained_var: 0.9764953851699829
      vf_loss: 297.89849853515625
    sample_time_ms: 18721.896
    update_time_ms: 5.376
  iterations_since_restore: 253
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 7054 s, 258 iter, 2580000 ts, 601 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-48-16
  done: false
  episode_len_mean: 138.52
  episode_reward_max: 748.1446856893674
  episode_reward_mean: 597.3514319873238
  episode_reward_min: -98.34547423118454
  episodes_this_iter: 74
  episodes_total: 18861
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3787.325
    load_time_ms: 1.716
    num_steps_sampled: 2590000
    num_steps_trained: 2590000
    rl_0:
      cur_kl_coeff: 0.10723401606082916
      cur_lr: 4.999999873689376e-05
      entropy: 1.121764063835144
      kl: 0.015085839666426182
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-50-08
  done: false
  episode_len_mean: 135.57
  episode_reward_max: 778.5585856661902
  episode_reward_mean: 578.6595660859439
  episode_reward_min: -132.23397125431873
  episodes_this_iter: 76
  episodes_total: 19229
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3795.829
    load_time_ms: 1.666
    num_steps_sampled: 2640000
    num_steps_trained: 2640000
    rl_0:
      cur_kl_coeff: 0.05361700803041458
      cur_lr: 4.999999873689376e-05
      entropy: 1.1457128524780273
      kl: 0.037569914013147354
      policy_loss: -0.0005893359775654972
      total_loss: 1069.295166015625
      vf_explained_var: 0.9082379341125488
      vf_loss: 1069.293701171875
    sample_time_ms: 18498.729
    update_time_ms: 5.513
  iterations_since_restore: 264
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 7301 s, 269 iter, 2690000 ts, 613 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-52-24
  done: false
  episode_len_mean: 137.76
  episode_reward_max: 757.8000896330212
  episode_reward_mean: 592.7127089236224
  episode_reward_min: -123.7941924745911
  episodes_this_iter: 73
  episodes_total: 19666
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3798.817
    load_time_ms: 1.556
    num_steps_sampled: 2700000
    num_steps_trained: 2700000
    rl_0:
      cur_kl_coeff: 0.05361700803041458
      cur_lr: 4.999999873689376e-05
      entropy: 1.1322269439697266
      kl: 0.015658153221011162
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-54-15
  done: false
  episode_len_mean: 141.62
  episode_reward_max: 766.7991016189413
  episode_reward_mean: 624.8798567148639
  episode_reward_min: -112.19426688098957
  episodes_this_iter: 72
  episodes_total: 20029
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3798.929
    load_time_ms: 1.548
    num_steps_sampled: 2750000
    num_steps_trained: 2750000
    rl_0:
      cur_kl_coeff: 0.02680850401520729
      cur_lr: 4.999999873689376e-05
      entropy: 1.16073739528656
      kl: 0.020292771980166435
      policy_loss: 0.0014390479773283005
      total_loss: 306.4714050292969
      vf_explained_var: 0.9780684113502502
      vf_loss: 306.4694519042969
    sample_time_ms: 18564.752
    update_time_ms: 5.161
  iterations_since_restore: 275
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 7544 s, 280 iter, 2800000 ts, 603 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-56-28
  done: false
  episode_len_mean: 139.78
  episode_reward_max: 760.8383710722406
  episode_reward_mean: 624.6260869749785
  episode_reward_min: -106.71762854572327
  episodes_this_iter: 72
  episodes_total: 20468
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3798.385
    load_time_ms: 1.724
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 0.013404252007603645
      cur_lr: 4.999999873689376e-05
      entropy: 1.1562960147857666
      kl: 0.01845688745379448


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_20-58-20
  done: false
  episode_len_mean: 134.38
  episode_reward_max: 755.5307948477734
  episode_reward_mean: 591.4630962635255
  episode_reward_min: -119.85570462618675
  episodes_this_iter: 75
  episodes_total: 20838
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3787.659
    load_time_ms: 1.767
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 0.020106378942728043
      cur_lr: 4.999999873689376e-05
      entropy: 1.093211054801941
      kl: 0.06912800669670105
      policy_loss: 0.004240372218191624
      total_loss: 597.6254272460938
      vf_explained_var: 0.9549345374107361
      vf_loss: 597.6197509765625
    sample_time_ms: 18384.58
    update_time_ms: 5.326
  iterations_since_restore: 286
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 7790 s, 291 iter, 2910000 ts, 537 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-00-33
  done: false
  episode_len_mean: 136.39
  episode_reward_max: 777.507505039257
  episode_reward_mean: 603.287986577545
  episode_reward_min: -126.82820955248032
  episodes_this_iter: 73
  episodes_total: 21291
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3802.702
    load_time_ms: 1.638
    num_steps_sampled: 2920000
    num_steps_trained: 2920000
    rl_0:
      cur_kl_coeff: 0.015079782344400883
      cur_lr: 4.999999873689376e-05
      entropy: 1.1635072231292725
      kl: 0.07682270556688309
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-02-26
  done: false
  episode_len_mean: 137.92
  episode_reward_max: 782.2854368546147
  episode_reward_mean: 622.4816383884998
  episode_reward_min: -117.316912098533
  episodes_this_iter: 74
  episodes_total: 21666
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3816.0
    load_time_ms: 1.529
    num_steps_sampled: 2970000
    num_steps_trained: 2970000
    rl_0:
      cur_kl_coeff: 0.0056549194268882275
      cur_lr: 4.999999873689376e-05
      entropy: 1.1869841814041138
      kl: 0.021419163793325424
      policy_loss: -0.0025040151085704565
      total_loss: 676.2749633789062
      vf_explained_var: 0.9490929841995239
      vf_loss: 676.27734375
    sample_time_ms: 18589.415
    update_time_ms: 5.378
  iterations_since_restore: 297
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 8038 s, 302 iter, 3020000 ts, 607 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-04-42
  done: false
  episode_len_mean: 134.88
  episode_reward_max: 766.0710436140788
  episode_reward_mean: 605.0722398908342
  episode_reward_min: -123.92636243423591
  episodes_this_iter: 74
  episodes_total: 22121
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3805.064
    load_time_ms: 1.458
    num_steps_sampled: 3030000
    num_steps_trained: 3030000
    rl_0:
      cur_kl_coeff: 0.0028274597134441137
      cur_lr: 4.999999873689376e-05
      entropy: 1.0952187776565552
      kl: 0.02176935598254203

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-06-34
  done: false
  episode_len_mean: 137.65
  episode_reward_max: 762.088944663777
  episode_reward_mean: 630.3194966689997
  episode_reward_min: -124.1856510096329
  episodes_this_iter: 74
  episodes_total: 22485
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3820.77
    load_time_ms: 1.444
    num_steps_sampled: 3080000
    num_steps_trained: 3080000
    rl_0:
      cur_kl_coeff: 0.0028274597134441137
      cur_lr: 4.999999873689376e-05
      entropy: 1.0321688652038574
      kl: 0.014226949773728848
      policy_loss: 0.0004694515373557806
      total_loss: 143.54913330078125
      vf_explained_var: 0.9884412884712219
      vf_loss: 143.5486297607422
    sample_time_ms: 18602.543
    update_time_ms: 5.382
  iterations_since_restore: 308
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 8299 s, 313 iter, 3130000 ts, 598 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-09-08
  done: false
  episode_len_mean: 130.93
  episode_reward_max: 767.79558180049
  episode_reward_mean: 574.7804001542585
  episode_reward_min: -124.36090933494685
  episodes_this_iter: 78
  episodes_total: 22940
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4074.852
    load_time_ms: 1.441
    num_steps_sampled: 3140000
    num_steps_trained: 3140000
    rl_0:
      cur_kl_coeff: 0.004241189453750849
      cur_lr: 4.999999873689376e-05
      entropy: 1.0035427808761597
      kl: 0.01603560894727707
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-11-21
  done: false
  episode_len_mean: 135.58
  episode_reward_max: 782.5070699054409
  episode_reward_mean: 618.9500588936996
  episode_reward_min: -104.27085919680728
  episodes_this_iter: 75
  episodes_total: 23318
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4141.108
    load_time_ms: 1.48
    num_steps_sampled: 3190000
    num_steps_trained: 3190000
    rl_0:
      cur_kl_coeff: 0.004241189453750849
      cur_lr: 4.999999873689376e-05
      entropy: 0.987153172492981
      kl: 0.021816512569785118
      policy_loss: -0.0014705187641084194
      total_loss: 1042.279296875
      vf_explained_var: 0.9104366898536682
      vf_loss: 1042.2806396484375
    sample_time_ms: 22022.862
    update_time_ms: 5.831
  iterations_since_restore: 319
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 8582 s, 324 iter, 3240000 ts, 609 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-13-52
  done: false
  episode_len_mean: 139.17
  episode_reward_max: 777.7516330995848
  episode_reward_mean: 641.1004122714822
  episode_reward_min: -102.85599892575458
  episodes_this_iter: 71
  episodes_total: 23757
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4177.901
    load_time_ms: 1.662
    num_steps_sampled: 3250000
    num_steps_trained: 3250000
    rl_0:
      cur_kl_coeff: 0.004241189453750849
      cur_lr: 4.999999873689376e-05
      entropy: 1.029346227645874
      kl: 0.029091626405715942


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-16-11
  done: false
  episode_len_mean: 138.09
  episode_reward_max: 779.7272842112712
  episode_reward_mean: 627.2626343100873
  episode_reward_min: -131.67878729364503
  episodes_this_iter: 73
  episodes_total: 24126
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4285.245
    load_time_ms: 1.752
    num_steps_sampled: 3300000
    num_steps_trained: 3300000
    rl_0:
      cur_kl_coeff: 0.004241189453750849
      cur_lr: 4.999999873689376e-05
      entropy: 1.0375399589538574
      kl: 0.015220802277326584
      policy_loss: -0.00031496048904955387
      total_loss: 439.4825134277344
      vf_explained_var: 0.9665846228599548
      vf_loss: 439.4827575683594
    sample_time_ms: 22354.581
    update_time_ms: 5.434
  iterations_since_restore: 330
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 8905 s, 335 iter, 3350000 ts, 679 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-19-18
  done: false
  episode_len_mean: 132.54
  episode_reward_max: 770.9852425692465
  episode_reward_mean: 598.4587116253509
  episode_reward_min: -136.82207502906093
  episodes_this_iter: 77
  episodes_total: 24568
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 5378.327
    load_time_ms: 1.773
    num_steps_sampled: 3360000
    num_steps_trained: 3360000
    rl_0:
      cur_kl_coeff: 0.0021205947268754244
      cur_lr: 4.999999873689376e-05
      entropy: 0.986768364906311
      kl: 0.020663805305957794

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-21-25
  done: false
  episode_len_mean: 136.35
  episode_reward_max: 775.834518066874
  episode_reward_mean: 621.974687014163
  episode_reward_min: -123.030868540906
  episodes_this_iter: 75
  episodes_total: 24941
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 5204.092
    load_time_ms: 1.77
    num_steps_sampled: 3410000
    num_steps_trained: 3410000
    rl_0:
      cur_kl_coeff: 0.0010602973634377122
      cur_lr: 4.999999873689376e-05
      entropy: 1.0723974704742432
      kl: 0.01002692710608244
      policy_loss: -0.001505858963355422
      total_loss: 193.5396728515625
      vf_explained_var: 0.9846165180206299
      vf_loss: 193.5411834716797
    sample_time_ms: 23317.304
    update_time_ms: 7.677
  iterations_since_restore: 341
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 9176 s, 346 iter, 3460000 ts, 624 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-23-42
  done: false
  episode_len_mean: 133.86
  episode_reward_max: 765.8208463878287
  episode_reward_mean: 601.6401235337455
  episode_reward_min: -113.39676911576849
  episodes_this_iter: 77
  episodes_total: 25391
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4129.719
    load_time_ms: 1.666
    num_steps_sampled: 3470000
    num_steps_trained: 3470000
    rl_0:
      cur_kl_coeff: 0.0010602973634377122
      cur_lr: 4.999999873689376e-05
      entropy: 1.0118016004562378
      kl: 0.01681279577314853

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-25-35
  done: false
  episode_len_mean: 140.35
  episode_reward_max: 772.9584651039836
  episode_reward_mean: 665.5844391565145
  episode_reward_min: -101.71679853218929
  episodes_this_iter: 72
  episodes_total: 25766
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3812.376
    load_time_ms: 1.601
    num_steps_sampled: 3520000
    num_steps_trained: 3520000
    rl_0:
      cur_kl_coeff: 0.0010602973634377122
      cur_lr: 4.999999873689376e-05
      entropy: 1.0645664930343628
      kl: 0.01439011748880148
      policy_loss: -0.004128917120397091
      total_loss: 352.20196533203125
      vf_explained_var: 0.9713241457939148
      vf_loss: 352.2060546875
    sample_time_ms: 18916.936
    update_time_ms: 5.15
  iterations_since_restore: 352
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 9425 s, 357 iter, 3570000 ts, 602 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-27-52
  done: false
  episode_len_mean: 136.67
  episode_reward_max: 767.1460136732879
  episode_reward_mean: 639.5330252875393
  episode_reward_min: -122.17020502465986
  episodes_this_iter: 75
  episodes_total: 26220
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3797.172
    load_time_ms: 1.464
    num_steps_sampled: 3580000
    num_steps_trained: 3580000
    rl_0:
      cur_kl_coeff: 0.00026507434085942805
      cur_lr: 4.999999873689376e-05
      entropy: 1.0982593297958374
      kl: 0.0143008325248956

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-29-45
  done: false
  episode_len_mean: 135.97
  episode_reward_max: 783.1180017696271
  episode_reward_mean: 633.4047550344654
  episode_reward_min: -130.3163788646087
  episodes_this_iter: 74
  episodes_total: 26594
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3799.944
    load_time_ms: 1.575
    num_steps_sampled: 3630000
    num_steps_trained: 3630000
    rl_0:
      cur_kl_coeff: 0.00026507434085942805
      cur_lr: 4.999999873689376e-05
      entropy: 1.1295233964920044
      kl: 0.013654467649757862
      policy_loss: -0.0018053791718557477
      total_loss: 482.5008544921875
      vf_explained_var: 0.9641844630241394
      vf_loss: 482.5025939941406
    sample_time_ms: 18804.731
    update_time_ms: 5.336
  iterations_since_restore: 363
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 9673 s, 368 iter, 3680000 ts, 647 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-32-00
  done: false
  episode_len_mean: 138.41
  episode_reward_max: 762.6566884891989
  episode_reward_mean: 643.9835006956376
  episode_reward_min: -115.06301587154337
  episodes_this_iter: 73
  episodes_total: 27036
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3800.309
    load_time_ms: 1.576
    num_steps_sampled: 3690000
    num_steps_trained: 3690000
    rl_0:
      cur_kl_coeff: 6.626858521485701e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.0875595808029175
      kl: 0.01307732053101062

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-33-53
  done: false
  episode_len_mean: 134.44
  episode_reward_max: 776.9749421565574
  episode_reward_mean: 617.7313066078359
  episode_reward_min: -84.15676998640551
  episodes_this_iter: 76
  episodes_total: 27407
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3798.769
    load_time_ms: 1.503
    num_steps_sampled: 3740000
    num_steps_trained: 3740000
    rl_0:
      cur_kl_coeff: 6.626858521485701e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.1072083711624146
      kl: 0.006363990250974894
      policy_loss: -0.0008919439860619605
      total_loss: 375.1651916503906
      vf_explained_var: 0.9731642603874207
      vf_loss: 375.1661071777344
    sample_time_ms: 18702.696
    update_time_ms: 5.755
  iterations_since_restore: 374
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 9919 s, 379 iter, 3790000 ts, 651 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-36-06
  done: false
  episode_len_mean: 135.86
  episode_reward_max: 780.798487982168
  episode_reward_mean: 623.3177693459171
  episode_reward_min: -140.14664988289314
  episodes_this_iter: 75
  episodes_total: 27853
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3807.927
    load_time_ms: 1.65
    num_steps_sampled: 3800000
    num_steps_trained: 3800000
    rl_0:
      cur_kl_coeff: 8.283573151857127e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.0831634998321533
      kl: 0.011021208949387074


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-37-59
  done: false
  episode_len_mean: 132.33
  episode_reward_max: 773.1960279312812
  episode_reward_mean: 604.7693056543862
  episode_reward_min: -117.24742090277446
  episodes_this_iter: 76
  episodes_total: 28225
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3799.376
    load_time_ms: 1.708
    num_steps_sampled: 3850000
    num_steps_trained: 3850000
    rl_0:
      cur_kl_coeff: 2.0708932879642816e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.1016881465911865
      kl: 0.02670258656144142
      policy_loss: -0.009520978666841984
      total_loss: 788.393310546875
      vf_explained_var: 0.9511154294013977
      vf_loss: 788.40283203125
    sample_time_ms: 18455.528
    update_time_ms: 5.173
  iterations_since_restore: 385
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 10166 s, 390 iter, 3900000 ts, 633 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-40-14
  done: false
  episode_len_mean: 143.74
  episode_reward_max: 781.9192320903286
  episode_reward_mean: 676.1463956948614
  episode_reward_min: -131.9299699807026
  episodes_this_iter: 70
  episodes_total: 28666
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3806.557
    load_time_ms: 1.701
    num_steps_sampled: 3910000
    num_steps_trained: 3910000
    rl_0:
      cur_kl_coeff: 1.0354466439821408e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.1740566492080688
      kl: 0.0096023781225085

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-42-06
  done: false
  episode_len_mean: 134.99
  episode_reward_max: 766.6957875928339
  episode_reward_mean: 605.1566973450775
  episode_reward_min: -118.14377367885962
  episodes_this_iter: 75
  episodes_total: 29028
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3816.322
    load_time_ms: 1.538
    num_steps_sampled: 3960000
    num_steps_trained: 3960000
    rl_0:
      cur_kl_coeff: 5.177233219910704e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.1280730962753296
      kl: 0.011448662728071213
      policy_loss: -0.0008997748955152929
      total_loss: 564.5302734375
      vf_explained_var: 0.9596307277679443
      vf_loss: 564.5311279296875
    sample_time_ms: 18587.867
    update_time_ms: 5.119
  iterations_since_restore: 396
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 10413 s, 401 iter, 4010000 ts, 629 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-44-21
  done: false
  episode_len_mean: 141.86
  episode_reward_max: 786.9852453646043
  episode_reward_mean: 668.655898896388
  episode_reward_min: -85.95594846578867
  episodes_this_iter: 71
  episodes_total: 29462
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3809.087
    load_time_ms: 1.547
    num_steps_sampled: 4020000
    num_steps_trained: 4020000
    rl_0:
      cur_kl_coeff: 1.1648774034256348e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.1223300695419312
      kl: 0.00981731899082660

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-46-12
  done: false
  episode_len_mean: 137.91
  episode_reward_max: 780.7514411697582
  episode_reward_mean: 643.0829151341359
  episode_reward_min: -120.26602228786906
  episodes_this_iter: 75
  episodes_total: 29829
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3823.28
    load_time_ms: 1.611
    num_steps_sampled: 4070000
    num_steps_trained: 4070000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.0940741300582886
      kl: 0.01199251413345337
      policy_loss: -0.0025340793654322624
      total_loss: 316.9797668457031
      vf_explained_var: 0.9775983095169067
      vf_loss: 316.9822692871094
    sample_time_ms: 18415.006
    update_time_ms: 5.222
  iterations_since_restore: 407
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 10657 s, 412 iter, 4120000 ts, 645 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-48-25
  done: false
  episode_len_mean: 141.1
  episode_reward_max: 776.3053640688486
  episode_reward_mean: 658.8197331765384
  episode_reward_min: -135.43273821043482
  episodes_this_iter: 71
  episodes_total: 30258
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3783.617
    load_time_ms: 1.591
    num_steps_sampled: 4130000
    num_steps_trained: 4130000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.1479778289794922
      kl: 0.02441413514316082

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-50-18
  done: false
  episode_len_mean: 146.13
  episode_reward_max: 774.4939704896412
  episode_reward_mean: 673.313990383091
  episode_reward_min: -61.48706375332074
  episodes_this_iter: 69
  episodes_total: 30617
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3784.984
    load_time_ms: 1.536
    num_steps_sampled: 4180000
    num_steps_trained: 4180000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.1129204034805298
      kl: 0.02203778363764286
      policy_loss: -0.0036876308731734753
      total_loss: 429.533447265625
      vf_explained_var: 0.9626940488815308
      vf_loss: 429.537109375
    sample_time_ms: 18529.586
    update_time_ms: 5.319
  iterations_since_restore: 418
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 10901 s, 423 iter, 4230000 ts, 655 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-52-30
  done: false
  episode_len_mean: 140.75
  episode_reward_max: 776.0199562561239
  episode_reward_mean: 645.9675280852792
  episode_reward_min: -129.22956940895727
  episodes_this_iter: 72
  episodes_total: 31042
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3792.879
    load_time_ms: 1.461
    num_steps_sampled: 4240000
    num_steps_trained: 4240000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.141244888305664
      kl: 0.02537393197417259

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-54-23
  done: false
  episode_len_mean: 142.84
  episode_reward_max: 770.5988581954359
  episode_reward_mean: 668.6613519656082
  episode_reward_min: -127.6764137291072
  episodes_this_iter: 71
  episodes_total: 31397
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3804.106
    load_time_ms: 1.484
    num_steps_sampled: 4290000
    num_steps_trained: 4290000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.1119369268417358
      kl: 0.019708123058080673
      policy_loss: -0.002536034444347024
      total_loss: 236.64913940429688
      vf_explained_var: 0.9817714095115662
      vf_loss: 236.65167236328125
    sample_time_ms: 18427.741
    update_time_ms: 5.31
  iterations_since_restore: 429
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 11149 s, 434 iter, 4340000 ts, 690 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-56-38
  done: false
  episode_len_mean: 141.11
  episode_reward_max: 789.6126296492452
  episode_reward_mean: 663.9637910327631
  episode_reward_min: -117.79896421935271
  episodes_this_iter: 72
  episodes_total: 31819
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3808.78
    load_time_ms: 1.66
    num_steps_sampled: 4350000
    num_steps_trained: 4350000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.058124303817749
      kl: 0.014925059862434864


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_21-58-29
  done: false
  episode_len_mean: 145.23
  episode_reward_max: 779.3172945417784
  episode_reward_mean: 689.1439593950354
  episode_reward_min: -38.62920384999279
  episodes_this_iter: 69
  episodes_total: 32169
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3805.593
    load_time_ms: 1.668
    num_steps_sampled: 4400000
    num_steps_trained: 4400000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.0190519094467163
      kl: 0.011705317534506321
      policy_loss: -0.0016568127321079373
      total_loss: 276.1044616699219
      vf_explained_var: 0.9773784279823303
      vf_loss: 276.1061096191406
    sample_time_ms: 18492.851
    update_time_ms: 5.676
  iterations_since_restore: 440
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 11392 s, 445 iter, 4450000 ts, 690 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-00-42
  done: false
  episode_len_mean: 144.88
  episode_reward_max: 781.379234524302
  episode_reward_mean: 692.5754904762074
  episode_reward_min: -106.22817397225191
  episodes_this_iter: 69
  episodes_total: 32584
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3798.075
    load_time_ms: 1.538
    num_steps_sampled: 4460000
    num_steps_trained: 4460000
    rl_0:
      cur_kl_coeff: 5.824387017128174e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.0373623371124268
      kl: 0.02372201345860958

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-02-34
  done: false
  episode_len_mean: 143.53
  episode_reward_max: 776.501359779505
  episode_reward_mean: 690.2473170782063
  episode_reward_min: -110.31110648559945
  episodes_this_iter: 71
  episodes_total: 32940
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3778.462
    load_time_ms: 1.47
    num_steps_sampled: 4510000
    num_steps_trained: 4510000
    rl_0:
      cur_kl_coeff: 2.912193508564087e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.058855652809143
      kl: 0.02967449277639389
      policy_loss: -0.0022228260058909655
      total_loss: 283.73883056640625
      vf_explained_var: 0.9773898720741272
      vf_loss: 283.74102783203125
    sample_time_ms: 18421.58
    update_time_ms: 5.605
  iterations_since_restore: 451
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 11636 s, 456 iter, 4560000 ts, 709 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-04-45
  done: false
  episode_len_mean: 142.03
  episode_reward_max: 795.8264209205907
  episode_reward_mean: 711.2965988740174
  episode_reward_min: -44.13899269838177
  episodes_this_iter: 71
  episodes_total: 33364
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3751.403
    load_time_ms: 1.431
    num_steps_sampled: 4570000
    num_steps_trained: 4570000
    rl_0:
      cur_kl_coeff: 2.912193508564087e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.0114214420318604
      kl: 0.02944816648960113

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-06-36
  done: false
  episode_len_mean: 138.15
  episode_reward_max: 784.0548669146876
  episode_reward_mean: 688.8630590264655
  episode_reward_min: -92.2580740078231
  episodes_this_iter: 72
  episodes_total: 33724
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3751.719
    load_time_ms: 1.544
    num_steps_sampled: 4620000
    num_steps_trained: 4620000
    rl_0:
      cur_kl_coeff: 2.912193508564087e-07
      cur_lr: 4.999999873689376e-05
      entropy: 0.9917690753936768
      kl: 0.014256834983825684
      policy_loss: -0.0006927636568434536
      total_loss: 134.0164794921875
      vf_explained_var: 0.9896336793899536
      vf_loss: 134.0171661376953
    sample_time_ms: 18236.739
    update_time_ms: 5.006
  iterations_since_restore: 462
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 11881 s, 467 iter, 4670000 ts, 654 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-08-51
  done: false
  episode_len_mean: 129.93
  episode_reward_max: 784.0583047332127
  episode_reward_mean: 618.8569894362474
  episode_reward_min: -132.64578251172213
  episodes_this_iter: 78
  episodes_total: 34169
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3767.699
    load_time_ms: 1.494
    num_steps_sampled: 4680000
    num_steps_trained: 4680000
    rl_0:
      cur_kl_coeff: 4.3682899786290363e-07
      cur_lr: 4.999999873689376e-05
      entropy: 0.9507472515106201
      kl: 0.021054411306977

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-10-42
  done: false
  episode_len_mean: 133.64
  episode_reward_max: 783.5880515289344
  episode_reward_mean: 666.1947784140601
  episode_reward_min: -111.6369371351017
  episodes_this_iter: 74
  episodes_total: 34538
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3784.264
    load_time_ms: 1.478
    num_steps_sampled: 4730000
    num_steps_trained: 4730000
    rl_0:
      cur_kl_coeff: 4.3682899786290363e-07
      cur_lr: 4.999999873689376e-05
      entropy: 0.8138097524642944
      kl: 0.015192979015409946
      policy_loss: 0.00048705420340411365
      total_loss: 339.8606262207031
      vf_explained_var: 0.9735676646232605
      vf_loss: 339.86016845703125
    sample_time_ms: 18528.396
    update_time_ms: 5.165
  iterations_since_restore: 473
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_re

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 12123 s, 478 iter, 4780000 ts, 687 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-12-53
  done: false
  episode_len_mean: 139.6
  episode_reward_max: 792.9285767851642
  episode_reward_mean: 712.9881219350922
  episode_reward_min: -100.64622680165014
  episodes_this_iter: 72
  episodes_total: 34977
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3746.146
    load_time_ms: 1.426
    num_steps_sampled: 4790000
    num_steps_trained: 4790000
    rl_0:
      cur_kl_coeff: 4.3682899786290363e-07
      cur_lr: 4.999999873689376e-05
      entropy: 0.844096839427948
      kl: 1.351245641708374
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-14-44
  done: false
  episode_len_mean: 138.34
  episode_reward_max: 792.9809219022686
  episode_reward_mean: 705.0239138141774
  episode_reward_min: -120.96199075687171
  episodes_this_iter: 73
  episodes_total: 35345
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3759.821
    load_time_ms: 1.553
    num_steps_sampled: 4840000
    num_steps_trained: 4840000
    rl_0:
      cur_kl_coeff: 9.828653446675162e-07
      cur_lr: 4.999999873689376e-05
      entropy: 0.8929060101509094
      kl: 1.2398892641067505
      policy_loss: 0.0024306834675371647
      total_loss: 105.36154174804688
      vf_explained_var: 0.9937601089477539
      vf_loss: 105.35912322998047
    sample_time_ms: 18128.799
    update_time_ms: 5.391
  iterations_since_restore: 484
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 12367 s, 489 iter, 4890000 ts, 725 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-16-58
  done: false
  episode_len_mean: 138.92
  episode_reward_max: 800.8261914198714
  episode_reward_mean: 718.7050660764797
  episode_reward_min: -45.61714232965116
  episodes_this_iter: 72
  episodes_total: 35782
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3753.086
    load_time_ms: 1.565
    num_steps_sampled: 4900000
    num_steps_trained: 4900000
    rl_0:
      cur_kl_coeff: 3.317170239824918e-06
      cur_lr: 4.999999873689376e-05
      entropy: 0.978504478931427
      kl: 0.024076983332633972

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-18-51
  done: false
  episode_len_mean: 138.64
  episode_reward_max: 785.3563181599415
  episode_reward_mean: 719.286595986997
  episode_reward_min: -116.69503332414278
  episodes_this_iter: 72
  episodes_total: 36144
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3765.421
    load_time_ms: 1.614
    num_steps_sampled: 4950000
    num_steps_trained: 4950000
    rl_0:
      cur_kl_coeff: 4.975755018676864e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.0415146350860596
      kl: 0.03174060583114624
      policy_loss: -0.0011978039983659983
      total_loss: 118.91586303710938
      vf_explained_var: 0.9929873943328857
      vf_loss: 118.91707611083984
    sample_time_ms: 18644.44
    update_time_ms: 5.375
  iterations_since_restore: 495
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 12613 s, 500 iter, 5000000 ts, 696 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-21-05
  done: false
  episode_len_mean: 135.86
  episode_reward_max: 790.5919857563097
  episode_reward_mean: 704.7157918421519
  episode_reward_min: -85.98716777599253
  episodes_this_iter: 74
  episodes_total: 36587
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3781.091
    load_time_ms: 1.491
    num_steps_sampled: 5010000
    num_steps_trained: 5010000
    rl_0:
      cur_kl_coeff: 7.463632755388971e-06
      cur_lr: 4.999999873689376e-05
      entropy: 0.9589176774024963
      kl: 0.01505229715257883

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-22-55
  done: false
  episode_len_mean: 136.08
  episode_reward_max: 790.761498896612
  episode_reward_mean: 711.7813021346045
  episode_reward_min: -119.42445647999432
  episodes_this_iter: 75
  episodes_total: 36958
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3737.769
    load_time_ms: 1.43
    num_steps_sampled: 5060000
    num_steps_trained: 5060000
    rl_0:
      cur_kl_coeff: 7.463632755388971e-06
      cur_lr: 4.999999873689376e-05
      entropy: 0.9453606009483337
      kl: 2.940443277359009
      policy_loss: -0.0008568331249989569
      total_loss: 182.17202758789062
      vf_explained_var: 0.9878937005996704
      vf_loss: 182.1728515625
    sample_time_ms: 18408.824
    update_time_ms: 5.159
  iterations_since_restore: 506
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 12858 s, 511 iter, 5110000 ts, 692 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-25-13
  done: false
  episode_len_mean: 132.32
  episode_reward_max: 790.4686906476651
  episode_reward_mean: 683.937139043976
  episode_reward_min: -116.7132152530639
  episodes_this_iter: 77
  episodes_total: 37412
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3764.299
    load_time_ms: 1.448
    num_steps_sampled: 5120000
    num_steps_trained: 5120000
    rl_0:
      cur_kl_coeff: 1.1195449587830808e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.9589323401451111
      kl: 0.02226311340928077

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-27-42
  done: false
  episode_len_mean: 132.18
  episode_reward_max: 784.9730475383099
  episode_reward_mean: 698.4087193857882
  episode_reward_min: -130.1079696479084
  episodes_this_iter: 77
  episodes_total: 37791
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 4581.462
    load_time_ms: 1.568
    num_steps_sampled: 5170000
    num_steps_trained: 5170000
    rl_0:
      cur_kl_coeff: 2.518975998100359e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.8526040315628052
      kl: 0.02178562618792057
      policy_loss: -0.0007526524714194238
      total_loss: 377.3722839355469
      vf_explained_var: 0.9703081846237183
      vf_loss: 377.3730163574219
    sample_time_ms: 21749.276
    update_time_ms: 6.149
  iterations_since_restore: 517
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 13322 s, 522 iter, 5220000 ts, 695 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-32-59
  done: false
  episode_len_mean: 131.59
  episode_reward_max: 787.8554665140366
  episode_reward_mean: 688.5768912004186
  episode_reward_min: -97.47504217397264
  episodes_this_iter: 76
  episodes_total: 38245
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 8534.914
    load_time_ms: 2.447
    num_steps_sampled: 5230000
    num_steps_trained: 5230000
    rl_0:
      cur_kl_coeff: 2.518975998100359e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.8869775533676147
      kl: 0.01552028395235538

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-36-52
  done: false
  episode_len_mean: 131.43
  episode_reward_max: 785.9180548274524
  episode_reward_mean: 696.2930901660977
  episode_reward_min: -99.24408519219836
  episodes_this_iter: 76
  episodes_total: 38630
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 11247.664
    load_time_ms: 2.99
    num_steps_sampled: 5280000
    num_steps_trained: 5280000
    rl_0:
      cur_kl_coeff: 3.778464088100009e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.8891235589981079
      kl: 0.012130885384976864
      policy_loss: -0.0021781004033982754
      total_loss: 261.4269104003906
      vf_explained_var: 0.9775881767272949
      vf_loss: 261.4290771484375
    sample_time_ms: 40797.681
    update_time_ms: 11.135
  iterations_since_restore: 528
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 13878 s, 533 iter, 5330000 ts, 648 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-43-51
  done: false
  episode_len_mean: 127.03
  episode_reward_max: 789.5662951408443
  episode_reward_mean: 651.594699278648
  episode_reward_min: -112.42782069549226
  episodes_this_iter: 78
  episodes_total: 39091
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 13347.624
    load_time_ms: 3.68
    num_steps_sampled: 5340000
    num_steps_trained: 5340000
    rl_0:
      cur_kl_coeff: 3.778464088100009e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.8264707922935486
      kl: 0.02150389738380909

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-47-01
  done: false
  episode_len_mean: 132.05
  episode_reward_max: 793.6379172223052
  episode_reward_mean: 707.5284490037219
  episode_reward_min: -121.0942283459747
  episodes_this_iter: 75
  episodes_total: 39472
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 8819.953
    load_time_ms: 3.092
    num_steps_sampled: 5390000
    num_steps_trained: 5390000
    rl_0:
      cur_kl_coeff: 3.778464088100009e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.7412936687469482
      kl: 1.8084098100662231
      policy_loss: -0.0036812610924243927
      total_loss: 60.26507568359375
      vf_explained_var: 0.9949896335601807
      vf_loss: 60.26869201660156
    sample_time_ms: 41289.709
    update_time_ms: 13.446
  iterations_since_restore: 539
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 14301 s, 544 iter, 5440000 ts, 699 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-49-16
  done: false
  episode_len_mean: 131.49
  episode_reward_max: 783.6545689347188
  episode_reward_mean: 710.1521769383603
  episode_reward_min: -100.59216469803363
  episodes_this_iter: 76
  episodes_total: 39928
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3814.877
    load_time_ms: 1.658
    num_steps_sampled: 5450000
    num_steps_trained: 5450000
    rl_0:
      cur_kl_coeff: 5.6676974054425955e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.7808560132980347
      kl: 0.031844064593315

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-51-10
  done: false
  episode_len_mean: 131.36
  episode_reward_max: 782.4373911830153
  episode_reward_mean: 681.7688151589974
  episode_reward_min: -128.37888404701366
  episodes_this_iter: 77
  episodes_total: 40310
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3804.203
    load_time_ms: 1.481
    num_steps_sampled: 5500000
    num_steps_trained: 5500000
    rl_0:
      cur_kl_coeff: 5.6676974054425955e-05
      cur_lr: 4.999999873689376e-05
      entropy: 0.8035807609558105
      kl: 0.024516429752111435
      policy_loss: -0.004442167468369007
      total_loss: 667.3524169921875
      vf_explained_var: 0.9537349343299866
      vf_loss: 667.3567504882812
    sample_time_ms: 18710.536
    update_time_ms: 5.402
  iterations_since_restore: 550
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 14549 s, 555 iter, 5550000 ts, 717 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-53-24
  done: false
  episode_len_mean: 127.08
  episode_reward_max: 793.3473127172456
  episode_reward_mean: 671.3069654387022
  episode_reward_min: -112.09670258950177
  episodes_this_iter: 80
  episodes_total: 40768
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3790.314
    load_time_ms: 1.513
    num_steps_sampled: 5560000
    num_steps_trained: 5560000
    rl_0:
      cur_kl_coeff: 0.00012752317707054317
      cur_lr: 4.999999873689376e-05
      entropy: 0.7173680663108826
      kl: 0.025792898610234

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-55-18
  done: false
  episode_len_mean: 131.02
  episode_reward_max: 794.3012365348105
  episode_reward_mean: 714.0121584112329
  episode_reward_min: -83.57646649615523
  episodes_this_iter: 76
  episodes_total: 41157
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3766.949
    load_time_ms: 1.548
    num_steps_sampled: 5610000
    num_steps_trained: 5610000
    rl_0:
      cur_kl_coeff: 0.00019128475105389953
      cur_lr: 4.999999873689376e-05
      entropy: 0.6422457695007324
      kl: 1.4761987924575806
      policy_loss: -0.00213471963070333
      total_loss: 73.05372619628906
      vf_explained_var: 0.9951651096343994
      vf_loss: 73.05558013916016
    sample_time_ms: 18769.751
    update_time_ms: 5.383
  iterations_since_restore: 561
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 14798 s, 566 iter, 5660000 ts, 731 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-57-34
  done: false
  episode_len_mean: 129.8
  episode_reward_max: 791.3443475964679
  episode_reward_mean: 730.4982627179415
  episode_reward_min: 680.6175604510863
  episodes_this_iter: 78
  episodes_total: 41615
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3756.424
    load_time_ms: 1.388
    num_steps_sampled: 5670000
    num_steps_trained: 5670000
    rl_0:
      cur_kl_coeff: 0.0006455860566347837
      cur_lr: 4.999999873689376e-05
      entropy: 0.6001368761062622
      kl: 0.020231610164046288


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_22-59-27
  done: false
  episode_len_mean: 126.64
  episode_reward_max: 805.3716036489168
  episode_reward_mean: 700.4118194751526
  episode_reward_min: -129.95330189553385
  episodes_this_iter: 79
  episodes_total: 42003
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3762.146
    load_time_ms: 1.507
    num_steps_sampled: 5720000
    num_steps_trained: 5720000
    rl_0:
      cur_kl_coeff: 0.0006455860566347837
      cur_lr: 4.999999873689376e-05
      entropy: 0.6717346906661987
      kl: 0.02519562467932701
      policy_loss: -0.006400298327207565
      total_loss: 533.4564208984375
      vf_explained_var: 0.9639305472373962
      vf_loss: 533.4627685546875
    sample_time_ms: 18870.442
    update_time_ms: 5.21
  iterations_since_restore: 572
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 15044 s, 577 iter, 5770000 ts, 715 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-01-40
  done: false
  episode_len_mean: 124.83
  episode_reward_max: 791.3107523040768
  episode_reward_mean: 686.684992292661
  episode_reward_min: -52.823567275134025
  episodes_this_iter: 81
  episodes_total: 42474
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3760.377
    load_time_ms: 1.5
    num_steps_sampled: 5780000
    num_steps_trained: 5780000
    rl_0:
      cur_kl_coeff: 0.0021788531448692083
      cur_lr: 4.999999873689376e-05
      entropy: 0.676324725151062
      kl: 0.02298686094582081
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-03-31
  done: false
  episode_len_mean: 128.1
  episode_reward_max: 802.1267617927481
  episode_reward_mean: 727.4657780416304
  episode_reward_min: -58.45575454319439
  episodes_this_iter: 79
  episodes_total: 42867
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3764.384
    load_time_ms: 1.49
    num_steps_sampled: 5830000
    num_steps_trained: 5830000
    rl_0:
      cur_kl_coeff: 0.003268279368057847
      cur_lr: 4.999999873689376e-05
      entropy: 0.6380210518836975
      kl: 0.02250230312347412
      policy_loss: -0.0063351355493068695
      total_loss: 186.39002990722656
      vf_explained_var: 0.9842793345451355
      vf_loss: 186.39630126953125
    sample_time_ms: 18380.352
    update_time_ms: 5.168
  iterations_since_restore: 583
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 15290 s, 588 iter, 5880000 ts, 707 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-05-46
  done: false
  episode_len_mean: 127.95
  episode_reward_max: 807.2231195823899
  episode_reward_mean: 720.3376047388157
  episode_reward_min: -93.2121242981404
  episodes_this_iter: 78
  episodes_total: 43340
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3769.267
    load_time_ms: 1.539
    num_steps_sampled: 5890000
    num_steps_trained: 5890000
    rl_0:
      cur_kl_coeff: 0.004902419168502092
      cur_lr: 4.999999873689376e-05
      entropy: 0.6450379490852356
      kl: 0.3020269572734833
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-07-38
  done: false
  episode_len_mean: 126.89
  episode_reward_max: 793.252836994696
  episode_reward_mean: 686.8781203713963
  episode_reward_min: -83.72668461934119
  episodes_this_iter: 78
  episodes_total: 43736
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3760.911
    load_time_ms: 1.708
    num_steps_sampled: 5940000
    num_steps_trained: 5940000
    rl_0:
      cur_kl_coeff: 0.007353628519922495
      cur_lr: 4.999999873689376e-05
      entropy: 0.7188136577606201
      kl: 0.018016166985034943
      policy_loss: -0.0035285658668726683
      total_loss: 455.2890625
      vf_explained_var: 0.962726891040802
      vf_loss: 455.29248046875
    sample_time_ms: 18617.161
    update_time_ms: 5.443
  iterations_since_restore: 594
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 15537 s, 599 iter, 5990000 ts, 713 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-09-53
  done: false
  episode_len_mean: 128.1
  episode_reward_max: 789.2186656668312
  episode_reward_mean: 716.8677387104359
  episode_reward_min: -52.028926516307536
  episodes_this_iter: 77
  episodes_total: 44205
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3743.039
    load_time_ms: 1.597
    num_steps_sampled: 6000000
    num_steps_trained: 6000000
    rl_0:
      cur_kl_coeff: 0.011030443012714386
      cur_lr: 4.999999873689376e-05
      entropy: 0.6205211281776428
      kl: 0.023229578509926796

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-11-46
  done: false
  episode_len_mean: 129.42
  episode_reward_max: 795.5693086607808
  episode_reward_mean: 722.7969300073833
  episode_reward_min: -53.98882299951674
  episodes_this_iter: 77
  episodes_total: 44596
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3743.101
    load_time_ms: 1.424
    num_steps_sampled: 6050000
    num_steps_trained: 6050000
    rl_0:
      cur_kl_coeff: 0.011030443012714386
      cur_lr: 4.999999873689376e-05
      entropy: 0.6589773893356323
      kl: 0.02135213278234005
      policy_loss: -0.0032080472446978092
      total_loss: 121.53509521484375
      vf_explained_var: 0.9894582033157349
      vf_loss: 121.5380630493164
    sample_time_ms: 18711.296
    update_time_ms: 5.438
  iterations_since_restore: 605
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 15783 s, 610 iter, 6100000 ts, 692 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-14-00
  done: false
  episode_len_mean: 130.92
  episode_reward_max: 787.6008291369702
  episode_reward_mean: 722.4169855475568
  episode_reward_min: -18.946680030055177
  episodes_this_iter: 78
  episodes_total: 45065
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3745.593
    load_time_ms: 1.526
    num_steps_sampled: 6110000
    num_steps_trained: 6110000
    rl_0:
      cur_kl_coeff: 0.011030443012714386
      cur_lr: 4.999999873689376e-05
      entropy: 0.6340611577033997
      kl: 0.0369570329785347


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-15-51
  done: false
  episode_len_mean: 124.48
  episode_reward_max: 781.2655118080922
  episode_reward_mean: 665.3804004259956
  episode_reward_min: -103.32579584484002
  episodes_this_iter: 79
  episodes_total: 45457
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3736.419
    load_time_ms: 1.547
    num_steps_sampled: 6160000
    num_steps_trained: 6160000
    rl_0:
      cur_kl_coeff: 0.02481849491596222
      cur_lr: 4.999999873689376e-05
      entropy: 0.7076490521430969
      kl: 0.016278287395834923
      policy_loss: -0.0012389554176479578
      total_loss: 974.8639526367188
      vf_explained_var: 0.9230816960334778
      vf_loss: 974.8648071289062
    sample_time_ms: 18484.752
    update_time_ms: 5.288
  iterations_since_restore: 616
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 16028 s, 621 iter, 6210000 ts, 691 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-18-06
  done: false
  episode_len_mean: 130.28
  episode_reward_max: 797.1885472179727
  episode_reward_mean: 715.985135023147
  episode_reward_min: -38.48658045054984
  episodes_this_iter: 77
  episodes_total: 45929
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3757.218
    load_time_ms: 1.679
    num_steps_sampled: 6220000
    num_steps_trained: 6220000
    rl_0:
      cur_kl_coeff: 0.03722774237394333
      cur_lr: 4.999999873689376e-05
      entropy: 0.6855751276016235
      kl: 0.04887540638446808
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-19-58
  done: false
  episode_len_mean: 130.24
  episode_reward_max: 786.6689277974465
  episode_reward_mean: 706.5770026243918
  episode_reward_min: -106.62644063288711
  episodes_this_iter: 78
  episodes_total: 46318
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3778.408
    load_time_ms: 1.628
    num_steps_sampled: 6270000
    num_steps_trained: 6270000
    rl_0:
      cur_kl_coeff: 0.05584161728620529
      cur_lr: 4.999999873689376e-05
      entropy: 0.6970230340957642
      kl: 0.025771016255021095
      policy_loss: -0.004147632978856564
      total_loss: 287.2734680175781
      vf_explained_var: 0.9746991395950317
      vf_loss: 287.27618408203125
    sample_time_ms: 18580.181
    update_time_ms: 5.665
  iterations_since_restore: 627
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 16273 s, 632 iter, 6320000 ts, 685 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-22-11
  done: false
  episode_len_mean: 129.0
  episode_reward_max: 798.7513877715639
  episode_reward_mean: 654.5842258633338
  episode_reward_min: -102.56916008965945
  episodes_this_iter: 79
  episodes_total: 46790
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3760.33
    load_time_ms: 1.558
    num_steps_sampled: 6330000
    num_steps_trained: 6330000
    rl_0:
      cur_kl_coeff: 0.05584161728620529
      cur_lr: 4.999999873689376e-05
      entropy: 0.786137580871582
      kl: 0.014422202482819557
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-24-01
  done: false
  episode_len_mean: 130.09
  episode_reward_max: 796.3617143617231
  episode_reward_mean: 715.7109227898738
  episode_reward_min: -53.89117230981006
  episodes_this_iter: 78
  episodes_total: 47179
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3738.244
    load_time_ms: 1.538
    num_steps_sampled: 6380000
    num_steps_trained: 6380000
    rl_0:
      cur_kl_coeff: 0.05584161728620529
      cur_lr: 4.999999873689376e-05
      entropy: 0.5869515538215637
      kl: 0.01093091070652008
      policy_loss: -0.0014970445772632957
      total_loss: 86.09154510498047
      vf_explained_var: 0.9937268495559692
      vf_loss: 86.09242248535156
    sample_time_ms: 18286.718
    update_time_ms: 5.516
  iterations_since_restore: 638
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 16518 s, 643 iter, 6430000 ts, 715 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-26-17
  done: false
  episode_len_mean: 129.97
  episode_reward_max: 802.6705165802207
  episode_reward_mean: 725.4478766612523
  episode_reward_min: -25.26886386285068
  episodes_this_iter: 78
  episodes_total: 47641
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3731.5
    load_time_ms: 1.56
    num_steps_sampled: 6440000
    num_steps_trained: 6440000
    rl_0:
      cur_kl_coeff: 0.05584161728620529
      cur_lr: 4.999999873689376e-05
      entropy: 0.5092790722846985
      kl: 0.024443387985229492
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-28-10
  done: false
  episode_len_mean: 128.4
  episode_reward_max: 774.7471491690231
  episode_reward_mean: 720.2276533496056
  episode_reward_min: 130.02651057432632
  episodes_this_iter: 78
  episodes_total: 48028
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3745.775
    load_time_ms: 1.622
    num_steps_sampled: 6490000
    num_steps_trained: 6490000
    rl_0:
      cur_kl_coeff: 0.08376242220401764
      cur_lr: 4.999999873689376e-05
      entropy: 0.48630571365356445
      kl: 0.02634219266474247
      policy_loss: -0.0030615723226219416
      total_loss: 138.26976013183594
      vf_explained_var: 0.9889239072799683
      vf_loss: 138.27061462402344
    sample_time_ms: 18794.289
    update_time_ms: 5.126
  iterations_since_restore: 649
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 16766 s, 654 iter, 6540000 ts, 688 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-30-25
  done: false
  episode_len_mean: 132.1
  episode_reward_max: 789.89355019259
  episode_reward_mean: 713.1166252726694
  episode_reward_min: -60.0152045096259
  episodes_this_iter: 76
  episodes_total: 48493
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3782.908
    load_time_ms: 1.703
    num_steps_sampled: 6550000
    num_steps_trained: 6550000
    rl_0:
      cur_kl_coeff: 0.04188121110200882
      cur_lr: 4.999999873689376e-05
      entropy: 0.7088059782981873
      kl: 0.016527194529771805
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-32-16
  done: false
  episode_len_mean: 131.89
  episode_reward_max: 791.9093190658343
  episode_reward_mean: 712.1762330832648
  episode_reward_min: -80.60150249975807
  episodes_this_iter: 77
  episodes_total: 48880
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3757.306
    load_time_ms: 1.58
    num_steps_sampled: 6600000
    num_steps_trained: 6600000
    rl_0:
      cur_kl_coeff: 0.06282181292772293
      cur_lr: 4.999999873689376e-05
      entropy: 0.7075968980789185
      kl: 0.02690179832279682
      policy_loss: -0.004555739928036928
      total_loss: 98.0064468383789
      vf_explained_var: 0.9920980930328369
      vf_loss: 98.00931549072266
    sample_time_ms: 18494.308
    update_time_ms: 5.178
  iterations_since_restore: 660
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 17010 s, 665 iter, 6650000 ts, 723 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-34-29
  done: false
  episode_len_mean: 129.33
  episode_reward_max: 793.3175622626162
  episode_reward_mean: 698.8729405302558
  episode_reward_min: -81.23677320113168
  episodes_this_iter: 78
  episodes_total: 49350
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3723.17
    load_time_ms: 1.388
    num_steps_sampled: 6660000
    num_steps_trained: 6660000
    rl_0:
      cur_kl_coeff: 0.047116365283727646
      cur_lr: 4.999999873689376e-05
      entropy: 0.7692645192146301
      kl: 0.019112300127744675


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-36-22
  done: false
  episode_len_mean: 129.48
  episode_reward_max: 786.7598388336224
  episode_reward_mean: 718.3532840234657
  episode_reward_min: -58.48999569502166
  episodes_this_iter: 77
  episodes_total: 49739
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.065
    load_time_ms: 1.35
    num_steps_sampled: 6710000
    num_steps_trained: 6710000
    rl_0:
      cur_kl_coeff: 0.047116365283727646
      cur_lr: 4.999999873689376e-05
      entropy: 0.6690681576728821
      kl: 0.0379757285118103
      policy_loss: -0.0004298457643017173
      total_loss: 226.9357452392578
      vf_explained_var: 0.981141209602356
      vf_loss: 226.9344024658203
    sample_time_ms: 18667.721
    update_time_ms: 5.101
  iterations_since_restore: 671
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 17257 s, 676 iter, 6760000 ts, 680 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-38-37
  done: false
  episode_len_mean: 129.33
  episode_reward_max: 805.1812246777423
  episode_reward_mean: 712.4495509184395
  episode_reward_min: -106.03390049632648
  episodes_this_iter: 79
  episodes_total: 50213
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.931
    load_time_ms: 1.602
    num_steps_sampled: 6770000
    num_steps_trained: 6770000
    rl_0:
      cur_kl_coeff: 0.047116365283727646
      cur_lr: 4.999999873689376e-05
      entropy: 0.592819333076477
      kl: 0.025553831830620766

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-40-29
  done: false
  episode_len_mean: 130.58
  episode_reward_max: 791.5746676539068
  episode_reward_mean: 704.1601992641412
  episode_reward_min: -48.77045296922435
  episodes_this_iter: 76
  episodes_total: 50600
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3717.326
    load_time_ms: 1.591
    num_steps_sampled: 6820000
    num_steps_trained: 6820000
    rl_0:
      cur_kl_coeff: 0.047116365283727646
      cur_lr: 4.999999873689376e-05
      entropy: 0.7116148471832275
      kl: 0.016279544681310654
      policy_loss: -0.005269872024655342
      total_loss: 358.11669921875
      vf_explained_var: 0.9748079180717468
      vf_loss: 358.1211853027344
    sample_time_ms: 18683.735
    update_time_ms: 5.336
  iterations_since_restore: 682
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 17503 s, 687 iter, 6870000 ts, 720 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-42-43
  done: false
  episode_len_mean: 132.14
  episode_reward_max: 799.4438764322348
  episode_reward_mean: 733.0192123274475
  episode_reward_min: 201.72832610478866
  episodes_this_iter: 77
  episodes_total: 51065
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3721.07
    load_time_ms: 1.371
    num_steps_sampled: 6880000
    num_steps_trained: 6880000
    rl_0:
      cur_kl_coeff: 0.07067453861236572
      cur_lr: 4.999999873689376e-05
      entropy: 0.6334137320518494
      kl: 0.009075450710952282
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-44-33
  done: false
  episode_len_mean: 130.07
  episode_reward_max: 785.690207893896
  episode_reward_mean: 715.28268091157
  episode_reward_min: -70.214627497317
  episodes_this_iter: 77
  episodes_total: 51448
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3726.599
    load_time_ms: 1.384
    num_steps_sampled: 6930000
    num_steps_trained: 6930000
    rl_0:
      cur_kl_coeff: 0.026502951979637146
      cur_lr: 4.999999873689376e-05
      entropy: 0.6202524900436401
      kl: 0.04345310479402542
      policy_loss: -0.003295746399089694
      total_loss: 67.98738098144531
      vf_explained_var: 0.9943271279335022
      vf_loss: 67.98953247070312
    sample_time_ms: 18310.548
    update_time_ms: 5.072
  iterations_since_restore: 693
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 17745 s, 698 iter, 6980000 ts, 719 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-46-45
  done: false
  episode_len_mean: 129.02
  episode_reward_max: 803.0505474961086
  episode_reward_mean: 706.7706177473706
  episode_reward_min: -90.2156831515108
  episodes_this_iter: 78
  episodes_total: 51915
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3735.95
    load_time_ms: 1.436
    num_steps_sampled: 6990000
    num_steps_trained: 6990000
    rl_0:
      cur_kl_coeff: 0.03975442796945572
      cur_lr: 4.999999873689376e-05
      entropy: 0.7529179453849792
      kl: 0.014962990768253803
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-48-37
  done: false
  episode_len_mean: 130.17
  episode_reward_max: 801.1523974430646
  episode_reward_mean: 719.0597289272092
  episode_reward_min: -87.92691182275674
  episodes_this_iter: 77
  episodes_total: 52307
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.887
    load_time_ms: 1.473
    num_steps_sampled: 7040000
    num_steps_trained: 7040000
    rl_0:
      cur_kl_coeff: 0.01987721398472786
      cur_lr: 4.999999873689376e-05
      entropy: 0.7103136777877808
      kl: 0.012959180399775505
      policy_loss: -8.297128806589171e-05
      total_loss: 181.66714477539062
      vf_explained_var: 0.9845048785209656
      vf_loss: 181.66696166992188
    sample_time_ms: 18477.452
    update_time_ms: 6.188
  iterations_since_restore: 704
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 17991 s, 709 iter, 7090000 ts, 687 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-50-51
  done: false
  episode_len_mean: 130.69
  episode_reward_max: 795.9860917079251
  episode_reward_mean: 691.5183325167299
  episode_reward_min: -80.46009319082458
  episodes_this_iter: 78
  episodes_total: 52774
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3715.428
    load_time_ms: 1.453
    num_steps_sampled: 7100000
    num_steps_trained: 7100000
    rl_0:
      cur_kl_coeff: 0.01987721398472786
      cur_lr: 4.999999873689376e-05
      entropy: 0.6783220767974854
      kl: 0.04016254469752312
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-52-44
  done: false
  episode_len_mean: 128.65
  episode_reward_max: 789.745569808801
  episode_reward_mean: 692.636695932696
  episode_reward_min: -111.85433072012715
  episodes_this_iter: 78
  episodes_total: 53161
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3714.551
    load_time_ms: 1.442
    num_steps_sampled: 7150000
    num_steps_trained: 7150000
    rl_0:
      cur_kl_coeff: 0.014907913282513618
      cur_lr: 4.999999873689376e-05
      entropy: 0.6939635872840881
      kl: 0.019903631880879402
      policy_loss: -0.002963592763990164
      total_loss: 499.29547119140625
      vf_explained_var: 0.9648776650428772
      vf_loss: 499.2980651855469
    sample_time_ms: 18648.969
    update_time_ms: 5.095
  iterations_since_restore: 715
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 18237 s, 720 iter, 7200000 ts, 696 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-54-58
  done: false
  episode_len_mean: 131.84
  episode_reward_max: 796.8041997924508
  episode_reward_mean: 715.189083829231
  episode_reward_min: -6.809252556871144
  episodes_this_iter: 75
  episodes_total: 53623
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3741.337
    load_time_ms: 1.475
    num_steps_sampled: 7210000
    num_steps_trained: 7210000
    rl_0:
      cur_kl_coeff: 0.014907913282513618
      cur_lr: 4.999999873689376e-05
      entropy: 0.7990543842315674
      kl: 0.05126065015792847
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-56-51
  done: false
  episode_len_mean: 129.61
  episode_reward_max: 791.9589359359471
  episode_reward_mean: 709.3487140693195
  episode_reward_min: -47.26186414930234
  episodes_this_iter: 78
  episodes_total: 54015
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3759.662
    load_time_ms: 1.557
    num_steps_sampled: 7260000
    num_steps_trained: 7260000
    rl_0:
      cur_kl_coeff: 0.022361868992447853
      cur_lr: 4.999999873689376e-05
      entropy: 0.6871851682662964
      kl: 0.017353605479002
      policy_loss: -0.0031909297686070204
      total_loss: 498.1952209472656
      vf_explained_var: 0.9621246457099915
      vf_loss: 498.1980285644531
    sample_time_ms: 18635.097
    update_time_ms: 4.888
  iterations_since_restore: 726
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 18484 s, 731 iter, 7310000 ts, 725 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-27_23-59-05
  done: false
  episode_len_mean: 128.79
  episode_reward_max: 802.5183963743959
  episode_reward_mean: 704.4102492980164
  episode_reward_min: -90.08516638008636
  episodes_this_iter: 79
  episodes_total: 54481
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3742.042
    load_time_ms: 1.361
    num_steps_sampled: 7320000
    num_steps_trained: 7320000
    rl_0:
      cur_kl_coeff: 0.033542804419994354
      cur_lr: 4.999999873689376e-05
      entropy: 0.6447903513908386
      kl: 0.014696585945785046

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-00-56
  done: false
  episode_len_mean: 130.77
  episode_reward_max: 784.9813550587122
  episode_reward_mean: 693.3252191077834
  episode_reward_min: -65.43885199418287
  episodes_this_iter: 77
  episodes_total: 54867
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3768.107
    load_time_ms: 1.442
    num_steps_sampled: 7370000
    num_steps_trained: 7370000
    rl_0:
      cur_kl_coeff: 0.016771402209997177
      cur_lr: 4.999999873689376e-05
      entropy: 0.7384443283081055
      kl: 0.03359314799308777
      policy_loss: -0.006955228745937347
      total_loss: 578.545166015625
      vf_explained_var: 0.9580619931221008
      vf_loss: 578.5515747070312
    sample_time_ms: 18470.334
    update_time_ms: 5.91
  iterations_since_restore: 737
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 18728 s, 742 iter, 7420000 ts, 676 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-03-10
  done: false
  episode_len_mean: 133.69
  episode_reward_max: 790.3332662018726
  episode_reward_mean: 680.4319539559085
  episode_reward_min: -68.09060047715812
  episodes_this_iter: 76
  episodes_total: 55325
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3747.085
    load_time_ms: 1.491
    num_steps_sampled: 7430000
    num_steps_trained: 7430000
    rl_0:
      cur_kl_coeff: 0.016771402209997177
      cur_lr: 4.999999873689376e-05
      entropy: 0.7527135014533997
      kl: 0.03183087334036827


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-05-00
  done: false
  episode_len_mean: 137.32
  episode_reward_max: 797.0247066338216
  episode_reward_mean: 675.2993212751492
  episode_reward_min: -70.2079463025195
  episodes_this_iter: 73
  episodes_total: 55705
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3727.877
    load_time_ms: 1.466
    num_steps_sampled: 7480000
    num_steps_trained: 7480000
    rl_0:
      cur_kl_coeff: 0.025157099589705467
      cur_lr: 4.999999873689376e-05
      entropy: 0.7931416630744934
      kl: 0.019182462245225906
      policy_loss: -0.0021738046780228615
      total_loss: 333.4220886230469
      vf_explained_var: 0.9777225852012634
      vf_loss: 333.4237365722656
    sample_time_ms: 18407.576
    update_time_ms: 5.414
  iterations_since_restore: 748
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 18969 s, 753 iter, 7530000 ts, 704 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-07-12
  done: false
  episode_len_mean: 133.34
  episode_reward_max: 795.0725695655857
  episode_reward_mean: 681.9508787194205
  episode_reward_min: -73.67442559375522
  episodes_this_iter: 77
  episodes_total: 56169
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3726.01
    load_time_ms: 1.46
    num_steps_sampled: 7540000
    num_steps_trained: 7540000
    rl_0:
      cur_kl_coeff: 0.025157099589705467
      cur_lr: 4.999999873689376e-05
      entropy: 0.6843897700309753
      kl: 0.02265242300927639
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-09-03
  done: false
  episode_len_mean: 134.93
  episode_reward_max: 798.5205965927174
  episode_reward_mean: 675.9640007770894
  episode_reward_min: -87.12583326575924
  episodes_this_iter: 74
  episodes_total: 56553
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3745.595
    load_time_ms: 1.46
    num_steps_sampled: 7590000
    num_steps_trained: 7590000
    rl_0:
      cur_kl_coeff: 0.025157099589705467
      cur_lr: 4.999999873689376e-05
      entropy: 0.8035790920257568
      kl: 0.017747661098837852
      policy_loss: -0.002389201894402504
      total_loss: 522.8125
      vf_explained_var: 0.9623939394950867
      vf_loss: 522.8143920898438
    sample_time_ms: 18331.035
    update_time_ms: 5.118
  iterations_since_restore: 759
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 19214 s, 764 iter, 7640000 ts, 692 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-11-16
  done: false
  episode_len_mean: 129.59
  episode_reward_max: 793.9401433048681
  episode_reward_mean: 683.561626912786
  episode_reward_min: -101.02456099360016
  episodes_this_iter: 79
  episodes_total: 57019
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3769.146
    load_time_ms: 1.327
    num_steps_sampled: 7650000
    num_steps_trained: 7650000
    rl_0:
      cur_kl_coeff: 0.037735652178525925
      cur_lr: 4.999999873689376e-05
      entropy: 0.5471988320350647
      kl: 0.025862714275717735

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-13-09
  done: false
  episode_len_mean: 126.67
  episode_reward_max: 786.3912854198587
  episode_reward_mean: 689.6042028899443
  episode_reward_min: -70.95430461600046
  episodes_this_iter: 79
  episodes_total: 57407
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3729.483
    load_time_ms: 1.418
    num_steps_sampled: 7700000
    num_steps_trained: 7700000
    rl_0:
      cur_kl_coeff: 0.037735652178525925
      cur_lr: 4.999999873689376e-05
      entropy: 0.6618503332138062
      kl: 0.013014288619160652
      policy_loss: -0.0018241634825244546
      total_loss: 631.9862060546875
      vf_explained_var: 0.9556517004966736
      vf_loss: 631.9874877929688
    sample_time_ms: 18553.837
    update_time_ms: 5.168
  iterations_since_restore: 770
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 19460 s, 775 iter, 7750000 ts, 717 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-15-24
  done: false
  episode_len_mean: 129.93
  episode_reward_max: 795.0202324165849
  episode_reward_mean: 692.9946712798619
  episode_reward_min: -72.81507962077062
  episodes_this_iter: 78
  episodes_total: 57877
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3735.282
    load_time_ms: 1.523
    num_steps_sampled: 7760000
    num_steps_trained: 7760000
    rl_0:
      cur_kl_coeff: 0.028301740065217018
      cur_lr: 4.999999873689376e-05
      entropy: 0.727612316608429
      kl: 0.01247217133641243
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-17-17
  done: false
  episode_len_mean: 129.67
  episode_reward_max: 790.354176191486
  episode_reward_mean: 730.201751139692
  episode_reward_min: 161.34208558153813
  episodes_this_iter: 78
  episodes_total: 58267
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3758.443
    load_time_ms: 1.561
    num_steps_sampled: 7810000
    num_steps_trained: 7810000
    rl_0:
      cur_kl_coeff: 0.021226301789283752
      cur_lr: 4.999999873689376e-05
      entropy: 0.61777263879776
      kl: 0.016973547637462616
      policy_loss: -0.0008459427626803517
      total_loss: 101.79071044921875
      vf_explained_var: 0.9923291206359863
      vf_loss: 101.79119110107422
    sample_time_ms: 18767.004
    update_time_ms: 5.59
  iterations_since_restore: 781
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 19708 s, 786 iter, 7860000 ts, 710 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-19-32
  done: false
  episode_len_mean: 125.2
  episode_reward_max: 787.8157592565784
  episode_reward_mean: 703.443691092217
  episode_reward_min: -88.10762853297541
  episodes_this_iter: 81
  episodes_total: 58743
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3752.939
    load_time_ms: 1.532
    num_steps_sampled: 7870000
    num_steps_trained: 7870000
    rl_0:
      cur_kl_coeff: 0.03183945640921593
      cur_lr: 4.999999873689376e-05
      entropy: 0.592430830001831
      kl: 0.01626514457166195
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-21-22
  done: false
  episode_len_mean: 128.94
  episode_reward_max: 794.6886121184531
  episode_reward_mean: 727.5478338664354
  episode_reward_min: 24.353447753245973
  episodes_this_iter: 78
  episodes_total: 59136
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3719.347
    load_time_ms: 1.497
    num_steps_sampled: 7920000
    num_steps_trained: 7920000
    rl_0:
      cur_kl_coeff: 0.04775918275117874
      cur_lr: 4.999999873689376e-05
      entropy: 0.6869862675666809
      kl: 0.009973695501685143
      policy_loss: -0.0014204081380739808
      total_loss: 48.54036331176758
      vf_explained_var: 0.9960260391235352
      vf_loss: 48.54130935668945
    sample_time_ms: 18367.249
    update_time_ms: 5.941
  iterations_since_restore: 792
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 19952 s, 797 iter, 7970000 ts, 718 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-23-36
  done: false
  episode_len_mean: 127.76
  episode_reward_max: 786.8933591228515
  episode_reward_mean: 710.4116362020213
  episode_reward_min: -60.03830102534016
  episodes_this_iter: 79
  episodes_total: 59604
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.128
    load_time_ms: 1.425
    num_steps_sampled: 7980000
    num_steps_trained: 7980000
    rl_0:
      cur_kl_coeff: 0.011939795687794685
      cur_lr: 4.999999873689376e-05
      entropy: 0.6150192022323608
      kl: 0.02480791136622429


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-25-30
  done: false
  episode_len_mean: 128.92
  episode_reward_max: 787.6335039940426
  episode_reward_mean: 700.6131418937919
  episode_reward_min: -57.18591167404114
  episodes_this_iter: 78
  episodes_total: 59999
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3703.748
    load_time_ms: 1.441
    num_steps_sampled: 8030000
    num_steps_trained: 8030000
    rl_0:
      cur_kl_coeff: 0.011939795687794685
      cur_lr: 4.999999873689376e-05
      entropy: 0.6661993265151978
      kl: 0.0153057174757123
      policy_loss: -0.0017344012157991529
      total_loss: 360.5059509277344
      vf_explained_var: 0.974254310131073
      vf_loss: 360.5074768066406
    sample_time_ms: 18774.561
    update_time_ms: 4.936
  iterations_since_restore: 803
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 20199 s, 808 iter, 8080000 ts, 717 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-27-44
  done: false
  episode_len_mean: 126.14
  episode_reward_max: 797.4298286054795
  episode_reward_mean: 708.2120760000126
  episode_reward_min: -111.007628587648
  episodes_this_iter: 81
  episodes_total: 60474
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3682.928
    load_time_ms: 1.656
    num_steps_sampled: 8090000
    num_steps_trained: 8090000
    rl_0:
      cur_kl_coeff: 0.026864541694521904
      cur_lr: 4.999999873689376e-05
      entropy: 0.6207329034805298
      kl: 0.03271730616688728
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-29-35
  done: false
  episode_len_mean: 127.66
  episode_reward_max: 795.28405018717
  episode_reward_mean: 682.5532261091972
  episode_reward_min: -86.52531412520084
  episodes_this_iter: 79
  episodes_total: 60866
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3680.662
    load_time_ms: 1.764
    num_steps_sampled: 8140000
    num_steps_trained: 8140000
    rl_0:
      cur_kl_coeff: 0.04029681161046028
      cur_lr: 4.999999873689376e-05
      entropy: 0.8501510620117188
      kl: 0.01959763653576374
      policy_loss: -0.003990530036389828
      total_loss: 729.5357666015625
      vf_explained_var: 0.9542869329452515
      vf_loss: 729.5389404296875
    sample_time_ms: 18565.277
    update_time_ms: 5.647
  iterations_since_restore: 814
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 20446 s, 819 iter, 8190000 ts, 692 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-31-51
  done: false
  episode_len_mean: 130.02
  episode_reward_max: 795.586675675915
  episode_reward_mean: 700.6889892713932
  episode_reward_min: -102.37850675527984
  episodes_this_iter: 78
  episodes_total: 61336
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3719.732
    load_time_ms: 1.552
    num_steps_sampled: 8200000
    num_steps_trained: 8200000
    rl_0:
      cur_kl_coeff: 0.04029681161046028
      cur_lr: 4.999999873689376e-05
      entropy: 0.5670694708824158
      kl: 0.032184913754463196


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-33-44
  done: false
  episode_len_mean: 129.48
  episode_reward_max: 788.891071304809
  episode_reward_mean: 698.7957711654794
  episode_reward_min: -102.14761858302407
  episodes_this_iter: 78
  episodes_total: 61731
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3712.207
    load_time_ms: 1.548
    num_steps_sampled: 8250000
    num_steps_trained: 8250000
    rl_0:
      cur_kl_coeff: 0.04029681161046028
      cur_lr: 4.999999873689376e-05
      entropy: 0.6788041591644287
      kl: 0.023733196780085564
      policy_loss: -0.003396922955289483
      total_loss: 332.18414306640625
      vf_explained_var: 0.9748202562332153
      vf_loss: 332.18658447265625
    sample_time_ms: 18808.338
    update_time_ms: 5.481
  iterations_since_restore: 825
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 20691 s, 830 iter, 8300000 ts, 696 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-35-56
  done: false
  episode_len_mean: 130.46
  episode_reward_max: 787.172102520805
  episode_reward_mean: 696.458049883675
  episode_reward_min: -99.60310183246575
  episodes_this_iter: 76
  episodes_total: 62199
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3724.913
    load_time_ms: 1.599
    num_steps_sampled: 8310000
    num_steps_trained: 8310000
    rl_0:
      cur_kl_coeff: 0.04029681161046028
      cur_lr: 4.999999873689376e-05
      entropy: 0.6837502717971802
      kl: 0.015731560066342354
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-37-45
  done: false
  episode_len_mean: 129.79
  episode_reward_max: 797.5141536332383
  episode_reward_mean: 684.0978988258
  episode_reward_min: -97.23643593790042
  episodes_this_iter: 78
  episodes_total: 62594
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3729.317
    load_time_ms: 1.514
    num_steps_sampled: 8360000
    num_steps_trained: 8360000
    rl_0:
      cur_kl_coeff: 0.06044521555304527
      cur_lr: 4.999999873689376e-05
      entropy: 0.5602695941925049
      kl: 0.02168823964893818
      policy_loss: -0.0036273610312491655
      total_loss: 281.2252197265625
      vf_explained_var: 0.9801933169364929
      vf_loss: 281.2275085449219
    sample_time_ms: 18061.44
    update_time_ms: 5.479
  iterations_since_restore: 836
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 20932 s, 841 iter, 8410000 ts, 684 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-39-58
  done: false
  episode_len_mean: 127.71
  episode_reward_max: 797.2251157511294
  episode_reward_mean: 714.9392277966748
  episode_reward_min: -80.68397039824517
  episodes_this_iter: 78
  episodes_total: 63052
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3710.853
    load_time_ms: 1.614
    num_steps_sampled: 8420000
    num_steps_trained: 8420000
    rl_0:
      cur_kl_coeff: 0.09066783636808395
      cur_lr: 4.999999873689376e-05
      entropy: 0.5156921148300171
      kl: 0.01919718272984028
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-41-48
  done: false
  episode_len_mean: 127.93
  episode_reward_max: 795.8839076504399
  episode_reward_mean: 695.3024413041638
  episode_reward_min: -124.5522064806413
  episodes_this_iter: 79
  episodes_total: 63437
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3723.96
    load_time_ms: 1.828
    num_steps_sampled: 8470000
    num_steps_trained: 8470000
    rl_0:
      cur_kl_coeff: 0.09066783636808395
      cur_lr: 4.999999873689376e-05
      entropy: 0.6639829874038696
      kl: 0.013404090888798237
      policy_loss: -0.0041008638218045235
      total_loss: 506.8338928222656
      vf_explained_var: 0.9583975076675415
      vf_loss: 506.83673095703125
    sample_time_ms: 18316.533
    update_time_ms: 5.291
  iterations_since_restore: 847
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 21175 s, 852 iter, 8520000 ts, 706 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-44-01
  done: false
  episode_len_mean: 129.19
  episode_reward_max: 783.0280152731979
  episode_reward_mean: 691.74740662343
  episode_reward_min: -65.82124630322733
  episodes_this_iter: 78
  episodes_total: 63905
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3726.33
    load_time_ms: 1.599
    num_steps_sampled: 8530000
    num_steps_trained: 8530000
    rl_0:
      cur_kl_coeff: 0.09066783636808395
      cur_lr: 4.999999873689376e-05
      entropy: 0.7762320041656494
      kl: 0.013416177593171597
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-45-51
  done: false
  episode_len_mean: 131.56
  episode_reward_max: 797.5395131957513
  episode_reward_mean: 709.7541723440046
  episode_reward_min: -47.865763673724075
  episodes_this_iter: 77
  episodes_total: 64292
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3715.898
    load_time_ms: 1.525
    num_steps_sampled: 8580000
    num_steps_trained: 8580000
    rl_0:
      cur_kl_coeff: 0.09066783636808395
      cur_lr: 4.999999873689376e-05
      entropy: 0.7040625214576721
      kl: 0.018975380808115005
      policy_loss: 0.00012385730224195868
      total_loss: 300.7856140136719
      vf_explained_var: 0.9795718193054199
      vf_loss: 300.78375244140625
    sample_time_ms: 18166.977
    update_time_ms: 5.442
  iterations_since_restore: 858
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 21416 s, 863 iter, 8630000 ts, 686 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-48-02
  done: false
  episode_len_mean: 129.25
  episode_reward_max: 793.8433661954888
  episode_reward_mean: 667.6204343998553
  episode_reward_min: -87.96392773439787
  episodes_this_iter: 78
  episodes_total: 64749
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3735.31
    load_time_ms: 1.621
    num_steps_sampled: 8640000
    num_steps_trained: 8640000
    rl_0:
      cur_kl_coeff: 0.04533391818404198
      cur_lr: 4.999999873689376e-05
      entropy: 0.8927038311958313
      kl: 0.020464487373828888
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-49-50
  done: false
  episode_len_mean: 133.34
  episode_reward_max: 785.9746131018286
  episode_reward_mean: 698.3652853275981
  episode_reward_min: -99.99878731477177
  episodes_this_iter: 76
  episodes_total: 65132
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3740.269
    load_time_ms: 1.577
    num_steps_sampled: 8690000
    num_steps_trained: 8690000
    rl_0:
      cur_kl_coeff: 0.04533391818404198
      cur_lr: 4.999999873689376e-05
      entropy: 0.778560221195221
      kl: 0.034747641533613205
      policy_loss: -0.0018670497229322791
      total_loss: 103.12152099609375
      vf_explained_var: 0.9928595423698425
      vf_loss: 103.12181854248047
    sample_time_ms: 17907.18
    update_time_ms: 5.473
  iterations_since_restore: 869
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 21656 s, 874 iter, 8740000 ts, 688 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-52-04
  done: false
  episode_len_mean: 138.88
  episode_reward_max: 808.0596593218576
  episode_reward_mean: 698.8426337830351
  episode_reward_min: -92.56027757343138
  episodes_this_iter: 76
  episodes_total: 65575
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3741.49
    load_time_ms: 1.827
    num_steps_sampled: 8750000
    num_steps_trained: 8750000
    rl_0:
      cur_kl_coeff: 0.04533391818404198
      cur_lr: 4.999999873689376e-05
      entropy: 0.8844804763793945
      kl: 0.022818826138973236
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-53-54
  done: false
  episode_len_mean: 133.14
  episode_reward_max: 793.8518555989419
  episode_reward_mean: 681.6102419200464
  episode_reward_min: -81.8674255957832
  episodes_this_iter: 78
  episodes_total: 65934
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3748.123
    load_time_ms: 1.908
    num_steps_sampled: 8800000
    num_steps_trained: 8800000
    rl_0:
      cur_kl_coeff: 0.04533391818404198
      cur_lr: 4.999999873689376e-05
      entropy: 0.7322846055030823
      kl: 0.012695902027189732
      policy_loss: -0.0013410414103418589
      total_loss: 135.1444549560547
      vf_explained_var: 0.9922064542770386
      vf_loss: 135.1452178955078
    sample_time_ms: 18399.83
    update_time_ms: 5.578
  iterations_since_restore: 880
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 21898 s, 885 iter, 8850000 ts, 676 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-56-06
  done: false
  episode_len_mean: 144.86
  episode_reward_max: 784.4240527959715
  episode_reward_mean: 658.3089058639365
  episode_reward_min: -97.93170667140528
  episodes_this_iter: 72
  episodes_total: 66377
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3724.391
    load_time_ms: 1.758
    num_steps_sampled: 8860000
    num_steps_trained: 8860000
    rl_0:
      cur_kl_coeff: 0.04533391818404198
      cur_lr: 4.999999873689376e-05
      entropy: 1.1313018798828125
      kl: 0.015142596326768398


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_00-57-57
  done: false
  episode_len_mean: 133.51
  episode_reward_max: 804.0992517024358
  episode_reward_mean: 680.9258540086016
  episode_reward_min: -109.40059324041431
  episodes_this_iter: 75
  episodes_total: 66750
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3747.338
    load_time_ms: 1.601
    num_steps_sampled: 8910000
    num_steps_trained: 8910000
    rl_0:
      cur_kl_coeff: 0.06800085306167603
      cur_lr: 4.999999873689376e-05
      entropy: 0.9879719614982605
      kl: 0.024619344621896744
      policy_loss: -0.008418739773333073
      total_loss: 603.467529296875
      vf_explained_var: 0.9609273076057434
      vf_loss: 603.4743041992188
    sample_time_ms: 18270.874
    update_time_ms: 5.163
  iterations_since_restore: 891
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 22140 s, 896 iter, 8960000 ts, 652 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-00-08
  done: false
  episode_len_mean: 135.26
  episode_reward_max: 792.0616913624132
  episode_reward_mean: 652.7222241189586
  episode_reward_min: -87.69043922959524
  episodes_this_iter: 75
  episodes_total: 67193
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3745.907
    load_time_ms: 1.517
    num_steps_sampled: 8970000
    num_steps_trained: 8970000
    rl_0:
      cur_kl_coeff: 0.06800085306167603
      cur_lr: 4.999999873689376e-05
      entropy: 1.1299188137054443
      kl: 0.021989474073052406


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-01-59
  done: false
  episode_len_mean: 131.12
  episode_reward_max: 790.641569236774
  episode_reward_mean: 704.8500864692668
  episode_reward_min: -14.535837937992653
  episodes_this_iter: 77
  episodes_total: 67555
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3722.676
    load_time_ms: 1.541
    num_steps_sampled: 9020000
    num_steps_trained: 9020000
    rl_0:
      cur_kl_coeff: 0.06800085306167603
      cur_lr: 4.999999873689376e-05
      entropy: 0.7623259425163269
      kl: 0.009874780662357807
      policy_loss: 0.000771290622651577
      total_loss: 153.10421752929688
      vf_explained_var: 0.9884875416755676
      vf_loss: 153.10275268554688
    sample_time_ms: 18200.492
    update_time_ms: 5.04
  iterations_since_restore: 902
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 22383 s, 907 iter, 9070000 ts, 626 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-04-12
  done: false
  episode_len_mean: 153.99
  episode_reward_max: 801.737574863036
  episode_reward_mean: 657.0446298688079
  episode_reward_min: -84.69127116435477
  episodes_this_iter: 69
  episodes_total: 67986
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3735.565
    load_time_ms: 1.459
    num_steps_sampled: 9080000
    num_steps_trained: 9080000
    rl_0:
      cur_kl_coeff: 0.03400042653083801
      cur_lr: 4.999999873689376e-05
      entropy: 1.3180971145629883
      kl: 0.022065524011850357
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-06-02
  done: false
  episode_len_mean: 141.18
  episode_reward_max: 794.578558508578
  episode_reward_mean: 685.5070202696307
  episode_reward_min: -65.15566088341478
  episodes_this_iter: 76
  episodes_total: 68354
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3755.403
    load_time_ms: 1.442
    num_steps_sampled: 9130000
    num_steps_trained: 9130000
    rl_0:
      cur_kl_coeff: 0.03400042653083801
      cur_lr: 4.999999873689376e-05
      entropy: 0.8837272524833679
      kl: 0.018345903605222702
      policy_loss: -0.003781203180551529
      total_loss: 251.3907470703125
      vf_explained_var: 0.9810182452201843
      vf_loss: 251.3938751220703
    sample_time_ms: 18272.776
    update_time_ms: 5.657
  iterations_since_restore: 913
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 22625 s, 918 iter, 9180000 ts, 692 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-08-13
  done: false
  episode_len_mean: 146.45
  episode_reward_max: 803.225949683783
  episode_reward_mean: 658.8548971105979
  episode_reward_min: -65.66791204053419
  episodes_this_iter: 72
  episodes_total: 68788
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3735.846
    load_time_ms: 1.706
    num_steps_sampled: 9190000
    num_steps_trained: 9190000
    rl_0:
      cur_kl_coeff: 0.017000213265419006
      cur_lr: 4.999999873689376e-05
      entropy: 1.412072777748108
      kl: 0.02358015440404415
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-10-03
  done: false
  episode_len_mean: 132.41
  episode_reward_max: 776.387254132979
  episode_reward_mean: 672.1570523245736
  episode_reward_min: -115.4094067947104
  episodes_this_iter: 76
  episodes_total: 69154
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3740.101
    load_time_ms: 1.705
    num_steps_sampled: 9240000
    num_steps_trained: 9240000
    rl_0:
      cur_kl_coeff: 0.017000213265419006
      cur_lr: 4.999999873689376e-05
      entropy: 0.9615128636360168
      kl: 0.029923085123300552
      policy_loss: 0.00011340251512592658
      total_loss: 495.4586486816406
      vf_explained_var: 0.9664349555969238
      vf_loss: 495.4579772949219
    sample_time_ms: 18076.617
    update_time_ms: 5.622
  iterations_since_restore: 924
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 22868 s, 929 iter, 9290000 ts, 681 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-12-17
  done: false
  episode_len_mean: 148.76
  episode_reward_max: 791.0172361470866
  episode_reward_mean: 657.0181408225352
  episode_reward_min: -99.43640388492078
  episodes_this_iter: 70
  episodes_total: 69596
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3767.046
    load_time_ms: 1.571
    num_steps_sampled: 9300000
    num_steps_trained: 9300000
    rl_0:
      cur_kl_coeff: 0.017000213265419006
      cur_lr: 4.999999873689376e-05
      entropy: 1.373822569847107
      kl: 0.016399163752794266


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-14-10
  done: false
  episode_len_mean: 132.27
  episode_reward_max: 783.1699072665944
  episode_reward_mean: 672.8359615629332
  episode_reward_min: -92.63155077214267
  episodes_this_iter: 76
  episodes_total: 69970
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3751.238
    load_time_ms: 1.425
    num_steps_sampled: 9350000
    num_steps_trained: 9350000
    rl_0:
      cur_kl_coeff: 0.017000213265419006
      cur_lr: 4.999999873689376e-05
      entropy: 0.8814517259597778
      kl: 0.07263956218957901
      policy_loss: -0.008582877926528454
      total_loss: 528.4722290039062
      vf_explained_var: 0.962590217590332
      vf_loss: 528.4796752929688
    sample_time_ms: 18683.851
    update_time_ms: 5.548
  iterations_since_restore: 935
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 23112 s, 940 iter, 9400000 ts, 659 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-16-23
  done: false
  episode_len_mean: 145.37
  episode_reward_max: 794.3529834363503
  episode_reward_mean: 648.6989413704449
  episode_reward_min: -87.41204637351989
  episodes_this_iter: 70
  episodes_total: 70408
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3740.023
    load_time_ms: 1.49
    num_steps_sampled: 9410000
    num_steps_trained: 9410000
    rl_0:
      cur_kl_coeff: 0.025500327348709106
      cur_lr: 4.999999873689376e-05
      entropy: 1.425061583518982
      kl: 0.02152116969227791
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-18-12
  done: false
  episode_len_mean: 135.9
  episode_reward_max: 799.666315919888
  episode_reward_mean: 683.2352136926714
  episode_reward_min: -114.26533262227049
  episodes_this_iter: 76
  episodes_total: 70783
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3723.762
    load_time_ms: 1.651
    num_steps_sampled: 9460000
    num_steps_trained: 9460000
    rl_0:
      cur_kl_coeff: 0.025500327348709106
      cur_lr: 4.999999873689376e-05
      entropy: 0.8510206937789917
      kl: 0.01530765276402235
      policy_loss: 2.3232629246194847e-05
      total_loss: 260.8758850097656
      vf_explained_var: 0.9791104197502136
      vf_loss: 260.87548828125
    sample_time_ms: 18217.123
    update_time_ms: 5.183
  iterations_since_restore: 946
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 23354 s, 951 iter, 9510000 ts, 643 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-20-24
  done: false
  episode_len_mean: 149.61
  episode_reward_max: 798.3543606375727
  episode_reward_mean: 667.1319927629297
  episode_reward_min: -115.43347092970443
  episodes_this_iter: 70
  episodes_total: 71214
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3738.916
    load_time_ms: 1.669
    num_steps_sampled: 9520000
    num_steps_trained: 9520000
    rl_0:
      cur_kl_coeff: 0.025500327348709106
      cur_lr: 4.999999873689376e-05
      entropy: 1.097300410270691
      kl: 0.017430324107408524

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-22-16
  done: false
  episode_len_mean: 140.78
  episode_reward_max: 793.551141617108
  episode_reward_mean: 630.432917402238
  episode_reward_min: -114.29442957847544
  episodes_this_iter: 71
  episodes_total: 71585
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3730.661
    load_time_ms: 1.635
    num_steps_sampled: 9570000
    num_steps_trained: 9570000
    rl_0:
      cur_kl_coeff: 0.025500327348709106
      cur_lr: 4.999999873689376e-05
      entropy: 1.429462194442749
      kl: 0.023056482896208763
      policy_loss: -0.002641575876623392
      total_loss: 698.9857177734375
      vf_explained_var: 0.9587247967720032
      vf_loss: 698.98779296875
    sample_time_ms: 18439.248
    update_time_ms: 5.267
  iterations_since_restore: 957
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 23598 s, 962 iter, 9620000 ts, 696 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-24-29
  done: false
  episode_len_mean: 130.99
  episode_reward_max: 790.7316153046431
  episode_reward_mean: 686.1762487829545
  episode_reward_min: -109.78990914346446
  episodes_this_iter: 79
  episodes_total: 72042
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.236
    load_time_ms: 1.565
    num_steps_sampled: 9630000
    num_steps_trained: 9630000
    rl_0:
      cur_kl_coeff: 0.025500327348709106
      cur_lr: 4.999999873689376e-05
      entropy: 0.8938746452331543
      kl: 0.01488398481160402

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-26-19
  done: false
  episode_len_mean: 128.7
  episode_reward_max: 796.3288890399725
  episode_reward_mean: 702.8355225700777
  episode_reward_min: -67.76080441527091
  episodes_this_iter: 78
  episodes_total: 72423
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3705.943
    load_time_ms: 1.534
    num_steps_sampled: 9680000
    num_steps_trained: 9680000
    rl_0:
      cur_kl_coeff: 0.01912524551153183
      cur_lr: 4.999999873689376e-05
      entropy: 0.80035799741745
      kl: 0.019420718774199486
      policy_loss: -0.002995844930410385
      total_loss: 376.93280029296875
      vf_explained_var: 0.9694415330886841
      vf_loss: 376.9354248046875
    sample_time_ms: 18186.968
    update_time_ms: 5.329
  iterations_since_restore: 968
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 23838 s, 973 iter, 9730000 ts, 678 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-28-28
  done: false
  episode_len_mean: 130.84
  episode_reward_max: 787.0906212930091
  episode_reward_mean: 732.1127557270004
  episode_reward_min: 528.9903260126307
  episodes_this_iter: 77
  episodes_total: 72886
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3710.44
    load_time_ms: 1.532
    num_steps_sampled: 9740000
    num_steps_trained: 9740000
    rl_0:
      cur_kl_coeff: 0.01912524551153183
      cur_lr: 4.999999873689376e-05
      entropy: 0.8469604253768921
      kl: 0.024673985317349434
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-30-17
  done: false
  episode_len_mean: 132.29
  episode_reward_max: 788.7568782821708
  episode_reward_mean: 698.9765775681019
  episode_reward_min: -133.8693519431127
  episodes_this_iter: 78
  episodes_total: 73270
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3713.785
    load_time_ms: 1.567
    num_steps_sampled: 9790000
    num_steps_trained: 9790000
    rl_0:
      cur_kl_coeff: 0.01912524551153183
      cur_lr: 4.999999873689376e-05
      entropy: 0.8439419865608215
      kl: 0.01727464608848095
      policy_loss: -0.0034659577067941427
      total_loss: 177.73648071289062
      vf_explained_var: 0.9876263737678528
      vf_loss: 177.73963928222656
    sample_time_ms: 17916.586
    update_time_ms: 5.287
  iterations_since_restore: 979
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 24077 s, 984 iter, 9840000 ts, 710 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-32-29
  done: false
  episode_len_mean: 132.36
  episode_reward_max: 794.6099575894347
  episode_reward_mean: 717.421872994941
  episode_reward_min: -60.74083524538912
  episodes_this_iter: 78
  episodes_total: 73733
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.193
    load_time_ms: 1.52
    num_steps_sampled: 9850000
    num_steps_trained: 9850000
    rl_0:
      cur_kl_coeff: 0.01912524551153183
      cur_lr: 4.999999873689376e-05
      entropy: 0.7819920778274536
      kl: 0.03739117830991745
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-34-20
  done: false
  episode_len_mean: 127.92
  episode_reward_max: 787.8460850504433
  episode_reward_mean: 697.5877244614986
  episode_reward_min: -89.7640772517754
  episodes_this_iter: 79
  episodes_total: 74119
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3714.687
    load_time_ms: 1.53
    num_steps_sampled: 9900000
    num_steps_trained: 9900000
    rl_0:
      cur_kl_coeff: 0.01912524551153183
      cur_lr: 4.999999873689376e-05
      entropy: 0.8064212203025818
      kl: 0.013590800575911999
      policy_loss: -0.0010026206728070974
      total_loss: 346.4015197753906
      vf_explained_var: 0.9702374339103699
      vf_loss: 346.4022521972656
    sample_time_ms: 18346.264
    update_time_ms: 6.031
  iterations_since_restore: 990
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 4068
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=4068], 24322 s, 995 iter, 9950000 ts, 703 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-28_01-36-34
  done: false
  episode_len_mean: 136.16
  episode_reward_max: 791.6800608931242
  episode_reward_mean: 717.5784267215184
  episode_reward_min: 155.99720865512734
  episodes_this_iter: 78
  episodes_total: 74583
  experiment_id: f959ca97c66b498388a02f90a5450ca0
  hostname: Gandalf
  info:
    grad_time_ms: 3756.84
    load_time_ms: 1.548
    num_steps_sampled: 9960000
    num_steps_trained: 9960000
    rl_0:
      cur_kl_coeff: 0.028687868267297745
      cur_lr: 4.999999873689376e-05
      entropy: 0.7913187742233276
      kl: 0.02261200360953808
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	TERMINATED [pid=4068], 24433 s, 1000 iter, 10000000 ts, 698 rew

