# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500                                 #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv', 'MultiAgentIntersectionEnv_baseline_1', 'MultiAgentIntersectionEnv_baseline_2', 'MultiAgentIntersectionEnv_baseline_3', 'MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        # desired velocity for all vehicles in the network, in m/s
        "target_velocity": 30,
        # initial teamspirit
        "ap_teamspirit_0": -1,
        "ap_teamspirit_1": -1,
        # shuffle teamspirit?
        "ap_teamspirit_shuffle": False    
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-04-09_13-11-00_30240/logs.
Waiting for redis server at 127.0.0.1:24259 to respond...
Waiting for redis server at 127.0.0.1:62896 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=1c71038f05007dc71caeb68d84db6ade856d9270dd1ec1a0



{'node_ip_address': '172.16.123.117',
 'object_store_addresses': ['/tmp/ray/session_2019-04-09_13-11-00_30240/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-04-09_13-11-00_30240/sockets/raylet'],
 'redis_address': '172.16.123.117:24259',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=1c71038f05007dc71caeb68d84db6ade856d9270dd1ec1a0'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate default 0.999
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network defaule 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return 'rl_0'

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['rl_0']
        }
    })

 Starting SUMO on port 52973


New Teamspirit:
0.976419485470094
-0.4442784097254695


In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0_2019-04-09_13-11-02cdaml41i -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-11-57
  done: false
  episode_len_mean: 485.95
  episode_reward_max: 123.0041521056342
  episode_reward_mean: -8.499962313751194
  episode_reward_min: -200.90302216972958
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6407.958
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 135 s, 5 iter, 50000 ts, -50.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-14-11
  done: false
  episode_len_mean: 297.34
  episode_reward_max: 157.88026843530776
  episode_reward_mean: -50.07914276241572
  episode_reward_min: -212.38164665177027
  episodes_this_iter: 41
  episodes_total: 167
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4877.544
    load_time_ms: 10.471
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.012500002980232239
      cur_lr: 4.999999873689376e-05
      entropy: 1.380710482597351
      kl: 0.008414514362812042
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-16-21
  done: false
  episode_len_mean: 161.61
  episode_reward_max: 100.78826665440077
  episode_reward_mean: -52.440004880118934
  episode_reward_min: -208.23114062178718
  episodes_this_iter: 64
  episodes_total: 447
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4445.614
    load_time_ms: 1.73
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00039062509313225746
      cur_lr: 4.999999873689376e-05
      entropy: 1.3647818565368652
      kl: 0.00386752188205719
      policy_loss: -0.0005341600044630468
      total_loss: 734.5462646484375
      vf_explained_var: 0.3651774525642395
      vf_loss: 734.5467529296875
    sample_time_ms: 21887.172
    update_time_ms: 5.91
  iterations_since_restore: 11
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 420 s, 16 iter, 160000 ts, -74.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-18-56
  done: false
  episode_len_mean: 131.66
  episode_reward_max: 70.42767800017589
  episode_reward_mean: -75.5627476129271
  episode_reward_min: -205.24244449241382
  episodes_this_iter: 76
  episodes_total: 862
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4299.872
    load_time_ms: 1.774
    num_steps_sampled: 170000
    num_steps_trained: 170000
    rl_0:
      cur_kl_coeff: 6.103517080191523e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3468657732009888
      kl: 0.004225405398756266
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-21-21
  done: false
  episode_len_mean: 117.63
  episode_reward_max: 62.73081598320505
  episode_reward_mean: -71.7889069665056
  episode_reward_min: -206.84527056337703
  episodes_this_iter: 81
  episodes_total: 1259
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4631.72
    load_time_ms: 1.819
    num_steps_sampled: 220000
    num_steps_trained: 220000
    rl_0:
      cur_kl_coeff: 1.907349087559851e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.329553484916687
      kl: 0.0080961799249053
      policy_loss: -0.0028062909841537476
      total_loss: 991.7139282226562
      vf_explained_var: 0.45929795503616333
      vf_loss: 991.7167358398438
    sample_time_ms: 22530.555
    update_time_ms: 6.415
  iterations_since_restore: 22
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 729 s, 27 iter, 270000 ts, -69.9 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-24-05
  done: false
  episode_len_mean: 110.98
  episode_reward_max: 52.018286963408
  episode_reward_mean: -63.471653389786134
  episode_reward_min: -203.09993689942326
  episodes_this_iter: 90
  episodes_total: 1806
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5012.303
    load_time_ms: 1.784
    num_steps_sampled: 280000
    num_steps_trained: 280000
    rl_0:
      cur_kl_coeff: 2.980232949312267e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.2882546186447144
      kl: 0.00465485779568553
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-26-16
  done: false
  episode_len_mean: 103.78
  episode_reward_max: 60.768172368314126
  episode_reward_mean: -67.23992727357248
  episode_reward_min: -202.27829937848736
  episodes_this_iter: 96
  episodes_total: 2277
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4503.441
    load_time_ms: 1.766
    num_steps_sampled: 330000
    num_steps_trained: 330000
    rl_0:
      cur_kl_coeff: 9.313227966600834e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.2451201677322388
      kl: 0.0038572573103010654
      policy_loss: -0.0020013332832604647
      total_loss: 1148.1849365234375
      vf_explained_var: 0.523297905921936
      vf_loss: 1148.18701171875
    sample_time_ms: 22277.217
    update_time_ms: 5.962
  iterations_since_restore: 33
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 1027 s, 38 iter, 380000 ts, -57.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-29-04
  done: false
  episode_len_mean: 106.39
  episode_reward_max: 73.30477179951754
  episode_reward_mean: -53.15155035871268
  episode_reward_min: -203.53502362976653
  episodes_this_iter: 94
  episodes_total: 2856
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4732.046
    load_time_ms: 1.752
    num_steps_sampled: 390000
    num_steps_trained: 390000
    rl_0:
      cur_kl_coeff: 1.4551918697813804e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.196708083152771
      kl: 0.004398655146360397

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-31-13
  done: false
  episode_len_mean: 107.98
  episode_reward_max: 78.88430261738985
  episode_reward_mean: -40.688788269471154
  episode_reward_min: -205.90763808229485
  episodes_this_iter: 93
  episodes_total: 3326
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4696.059
    load_time_ms: 1.772
    num_steps_sampled: 440000
    num_steps_trained: 440000
    rl_0:
      cur_kl_coeff: 4.5474745930668137e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.1462059020996094
      kl: 0.004547902848571539
      policy_loss: -0.0016135374316945672
      total_loss: 1210.069091796875
      vf_explained_var: 0.4250567555427551
      vf_loss: 1210.0706787109375
    sample_time_ms: 22085.148
    update_time_ms: 6.299
  iterations_since_restore: 44
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 1316 s, 49 iter, 490000 ts, -52.1 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-33-54
  done: false
  episode_len_mean: 103.3
  episode_reward_max: 63.46602818085465
  episode_reward_mean: -59.91273278871914
  episode_reward_min: -204.18160275508856
  episodes_this_iter: 98
  episodes_total: 3910
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4342.332
    load_time_ms: 1.821
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 7.105429051666896e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.0888195037841797
      kl: 0.006299504078924656


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-36-07
  done: false
  episode_len_mean: 103.97
  episode_reward_max: 49.343749531203066
  episode_reward_mean: -43.081170708716535
  episode_reward_min: -203.6717731693624
  episodes_this_iter: 96
  episodes_total: 4406
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4333.217
    load_time_ms: 1.74
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 2.220446578645905e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.054858684539795
      kl: 0.005582212004810572
      policy_loss: -0.0005996286636218429
      total_loss: 1320.48681640625
      vf_explained_var: 0.4547688066959381
      vf_loss: 1320.4874267578125
    sample_time_ms: 22498.957
    update_time_ms: 5.466
  iterations_since_restore: 55
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 1614 s, 60 iter, 600000 ts, -45.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-38-53
  done: false
  episode_len_mean: 99.15841584158416
  episode_reward_max: 45.143505880240504
  episode_reward_mean: -54.198587135743416
  episode_reward_min: -203.49129793454688
  episodes_this_iter: 101
  episodes_total: 4996
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4491.573
    load_time_ms: 1.595
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 3.4694477791342267e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9955657720565796
      kl: 0.004

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-41-03
  done: false
  episode_len_mean: 94.97115384615384
  episode_reward_max: 31.830785021328516
  episode_reward_mean: -68.80393622429686
  episode_reward_min: -203.68874930578795
  episodes_this_iter: 104
  episodes_total: 5515
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4541.04
    load_time_ms: 1.583
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.0842024309794459e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.9410213828086853
      kl: 0.0038620501291006804
      policy_loss: -0.0009526694193482399
      total_loss: 1386.31005859375
      vf_explained_var: 0.47105103731155396
      vf_loss: 1386.3111572265625
    sample_time_ms: 22194.964
    update_time_ms: 6.238
  iterations_since_restore: 66
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 3030

  custom_metrics: {}
  date: 2019-04-09_13-43-06
  done: false
  episode_len_mean: 94.65094339622641
  episode_reward_max: 30.22451712628606
  episode_reward_mean: -65.2384583380912
  episode_reward_min: -202.51230409914288
  episodes_this_iter: 106
  episodes_total: 6035
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4285.764
    load_time_ms: 1.613
    num_steps_sampled: 710000
    num_steps_trained: 710000
    rl_0:
      cur_kl_coeff: 3.3881325968107683e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.9223096966743469
      kl: 0.005640849936753511
      policy_loss: -0.0015313720796257257
      total_loss: 1391.4886474609375
      vf_explained_var: 0.42455512285232544
      vf_loss: 1391.490234375
    sample_time_ms: 21025.348
    update_time_ms: 5.675
  iterations_since_restore: 71
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -32.61922916904559
  time_since_restore: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 2020 s, 76 iter, 760000 ts, -77.5 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-45-38
  done: false
  episode_len_mean: 93.4392523364486
  episode_reward_max: 20.174919777410935
  episode_reward_mean: -68.14959590190334
  episode_reward_min: -202.37220308616924
  episodes_this_iter: 107
  episodes_total: 6673
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4247.27
    load_time_ms: 1.667
    num_steps_sampled: 770000
    num_steps_trained: 770000
    rl_0:
      cur_kl_coeff: 5.2939571825168255e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.8554137945175171
      kl: 0.005289

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-47-49
  done: false
  episode_len_mean: 97.7843137254902
  episode_reward_max: 23.786416796915425
  episode_reward_mean: -58.363244948499194
  episode_reward_min: -203.70146304790006
  episodes_this_iter: 102
  episodes_total: 7202
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4463.251
    load_time_ms: 1.792
    num_steps_sampled: 820000
    num_steps_trained: 820000
    rl_0:
      cur_kl_coeff: 1.654361619536508e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.8472520709037781
      kl: 0.0048398603685200214
      policy_loss: -0.0009178169420920312
      total_loss: 1396.052978515625
      vf_explained_var: 0.37273576855659485
      vf_loss: 1396.053955078125
    sample_time_ms: 21145.761
    update_time_ms: 7.027
  iterations_since_restore: 82
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 3030

  custom_metrics: {}
  date: 2019-04-09_13-50-07
  done: false
  episode_len_mean: 92.70370370370371
  episode_reward_max: 26.391072902676996
  episode_reward_mean: -72.28958742809962
  episode_reward_min: -202.8307724919642
  episodes_this_iter: 108
  episodes_total: 7723
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4590.027
    load_time_ms: 1.693
    num_steps_sampled: 870000
    num_steps_trained: 870000
    rl_0:
      cur_kl_coeff: 5.1698800610515874e-27
      cur_lr: 4.999999873689376e-05
      entropy: 0.8248167634010315
      kl: 0.004779214505106211
      policy_loss: -0.0021821013651788235
      total_loss: 1322.5924072265625
      vf_explained_var: 0.4541299343109131
      vf_loss: 1322.5946044921875
    sample_time_ms: 22250.036
    update_time_ms: 7.906
  iterations_since_restore: 87
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -36.144793714049804
  time_since_rest

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 2439 s, 92 iter, 920000 ts, -59 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-52-37
  done: false
  episode_len_mean: 94.93396226415095
  episode_reward_max: 20.871463203140422
  episode_reward_mean: -66.91782162817799
  episode_reward_min: -201.6255191237148
  episodes_this_iter: 106
  episodes_total: 8347
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4383.603
    load_time_ms: 1.56
    num_steps_sampled: 930000
    num_steps_trained: 930000
    rl_0:
      cur_kl_coeff: 8.077937595393105e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.8112401962280273
      kl: 0.005716491

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-54-43
  done: false
  episode_len_mean: 90.35454545454546
  episode_reward_max: 15.335283187907388
  episode_reward_mean: -77.65931072168493
  episode_reward_min: -202.18043071138104
  episodes_this_iter: 110
  episodes_total: 8882
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4297.831
    load_time_ms: 1.669
    num_steps_sampled: 980000
    num_steps_trained: 980000
    rl_0:
      cur_kl_coeff: 2.5243554985603454e-30
      cur_lr: 4.999999873689376e-05
      entropy: 0.7727394104003906
      kl: 0.005413609091192484
      policy_loss: -0.001954768318682909
      total_loss: 1461.2982177734375
      vf_explained_var: 0.41504189372062683
      vf_loss: 1461.300537109375
    sample_time_ms: 20295.61
    update_time_ms: 6.647
  iterations_since_restore: 98
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302

  custom_metrics: {}
  date: 2019-04-09_13-56-56
  done: false
  episode_len_mean: 94.84761904761905
  episode_reward_max: 17.29877167097174
  episode_reward_mean: -67.0170028654003
  episode_reward_min: -201.17333283759478
  episodes_this_iter: 105
  episodes_total: 9430
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4639.564
    load_time_ms: 1.644
    num_steps_sampled: 1030000
    num_steps_trained: 1030000
    rl_0:
      cur_kl_coeff: 7.888610933001079e-32
      cur_lr: 4.999999873689376e-05
      entropy: 0.7307636141777039
      kl: 0.004998000804334879
      policy_loss: -0.0006135260919108987
      total_loss: 1418.1708984375
      vf_explained_var: 0.44998258352279663
      vf_loss: 1418.171630859375
    sample_time_ms: 21186.633
    update_time_ms: 6.871
  iterations_since_restore: 103
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -33.50850143270015
  time_since_restore

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 2858 s, 108 iter, 1080000 ts, -98.5 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_13-59-36
  done: false
  episode_len_mean: 91.85321100917432
  episode_reward_max: 12.887953478583832
  episode_reward_mean: -75.56262462262679
  episode_reward_min: -201.09537817012693
  episodes_this_iter: 109
  episodes_total: 10086
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4849.177
    load_time_ms: 1.674
    num_steps_sampled: 1090000
    num_steps_trained: 1090000
    rl_0:
      cur_kl_coeff: 1.2325954582814187e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.6948843002319336
      kl: 0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 2980 s, 113 iter, 1130000 ts, -85.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-01-38
  done: false
  episode_len_mean: 89.54954954954955
  episode_reward_max: 10.073178500607742
  episode_reward_mean: -79.03862179997014
  episode_reward_min: -200.9305255220726
  episodes_this_iter: 111
  episodes_total: 10636
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4291.066
    load_time_ms: 1.784
    num_steps_sampled: 1140000
    num_steps_trained: 1140000
    rl_0:
      cur_kl_coeff: 3.8518608071294333e-35
      cur_lr: 4.999999873689376e-05
      entropy: 0.6684631109237671
      kl: 0.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 3103 s, 118 iter, 1180000 ts, -76.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-03-42
  done: false
  episode_len_mean: 90.74774774774775
  episode_reward_max: 12.25806354058453
  episode_reward_mean: -79.86433540043275
  episode_reward_min: -200.96411593953837
  episodes_this_iter: 111
  episodes_total: 11182
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4179.566
    load_time_ms: 1.774
    num_steps_sampled: 1190000
    num_steps_trained: 1190000
    rl_0:
      cur_kl_coeff: 1.2037065022279479e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.6238279938697815
      kl: 0.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 3223 s, 123 iter, 1230000 ts, -69.2 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-05-42
  done: false
  episode_len_mean: 89.19469026548673
  episode_reward_max: 8.905845322412418
  episode_reward_mean: -82.83973063948339
  episode_reward_min: -200.57095371952303
  episodes_this_iter: 113
  episodes_total: 11735
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4172.982
    load_time_ms: 1.75
    num_steps_sampled: 1240000
    num_steps_trained: 1240000
    rl_0:
      cur_kl_coeff: 3.761582819462337e-38
      cur_lr: 4.999999873689376e-05
      entropy: 0.5763477087020874
      kl: 0.00

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-07-43
  done: false
  episode_len_mean: 99.43
  episode_reward_max: 7.2591075337985735
  episode_reward_mean: -57.04698642384553
  episode_reward_min: -200.7021090440061
  episodes_this_iter: 100
  episodes_total: 12291
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4227.969
    load_time_ms: 1.636
    num_steps_sampled: 1290000
    num_steps_trained: 1290000
    rl_0:
      cur_kl_coeff: 1.1754946310819804e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.5695739388465881
      kl: 0.007411550264805555
      policy_loss: -0.0025158654898405075
      total_loss: 1437.813720703125
      vf_explained_var: 0.41832923889160156
      vf_loss: 1437.816162109375
    sample_time_ms: 19802.261
    update_time_ms: 5.67
  iterations_since_restore: 129
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy

  custom_metrics: {}
  date: 2019-04-09_14-09-45
  done: false
  episode_len_mean: 89.75
  episode_reward_max: 7.461549153822463
  episode_reward_mean: -83.86948904324979
  episode_reward_min: -200.6686016920333
  episodes_this_iter: 112
  episodes_total: 12852
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4319.073
    load_time_ms: 1.711
    num_steps_sampled: 1340000
    num_steps_trained: 1340000
    rl_0:
      cur_kl_coeff: 3.6733637943810755e-41
      cur_lr: 4.999999873689376e-05
      entropy: 0.5185015797615051
      kl: 0.008375751785933971
      policy_loss: -0.0020104916766285896
      total_loss: 1385.0006103515625
      vf_explained_var: 0.49867844581604004
      vf_loss: 1385.0025634765625
    sample_time_ms: 19911.089
    update_time_ms: 5.69
  iterations_since_restore: 134
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -41.93474452162489
  time_since_restore: 3489.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 3617 s, 139 iter, 1390000 ts, -86.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-12-17
  done: false
  episode_len_mean: 86.94017094017094
  episode_reward_max: 6.9403164476752295
  episode_reward_mean: -88.98763519562172
  episode_reward_min: -200.317571291991
  episodes_this_iter: 117
  episodes_total: 13528
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4322.889
    load_time_ms: 1.775
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
    rl_0:
      cur_kl_coeff: 5.74532370373175e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.48776695132255554
      kl: 0.00

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 3739 s, 144 iter, 1440000 ts, -87.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-14-21
  done: false
  episode_len_mean: 94.19626168224299
  episode_reward_max: 7.744306652348826
  episode_reward_mean: -68.74162317977482
  episode_reward_min: -200.22193907564665
  episodes_this_iter: 107
  episodes_total: 14088
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4217.123
    load_time_ms: 1.58
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
    rl_0:
      cur_kl_coeff: 1.8216880036222622e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.45043256878852844
      kl: 0.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 3871 s, 149 iter, 1490000 ts, -80.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-16-32
  done: false
  episode_len_mean: 83.13333333333334
  episode_reward_max: 7.904371470771968
  episode_reward_mean: -98.1322138507479
  episode_reward_min: -200.18534041383816
  episodes_this_iter: 120
  episodes_total: 14662
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4329.632
    load_time_ms: 1.567
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.40174999833106995
      kl: 0.006670753005892038


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-18-34
  done: false
  episode_len_mean: 89.23214285714286
  episode_reward_max: 7.228048956325394
  episode_reward_mean: -82.66931904659292
  episode_reward_min: -200.21733781737663
  episodes_this_iter: 112
  episodes_total: 15219
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4327.182
    load_time_ms: 1.686
    num_steps_sampled: 1550000
    num_steps_trained: 1550000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3707539141178131
      kl: 0.00557762011885643
      policy_loss: -0.0013121970696374774
      total_loss: 1370.0377197265625
      vf_explained_var: 0.5012130737304688
      vf_loss: 1370.038818359375
    sample_time_ms: 20918.954
    update_time_ms: 5.926
  iterations_since_restore: 155
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 4138 s, 160 iter, 1600000 ts, -91.5 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-21-03
  done: false
  episode_len_mean: 87.56521739130434
  episode_reward_max: 7.174711101959087
  episode_reward_mean: -86.8148458888353
  episode_reward_min: -200.1712515667041
  episodes_this_iter: 115
  episodes_total: 15915
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4187.136
    load_time_ms: 1.7
    num_steps_sampled: 1610000
    num_steps_trained: 1610000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.32506662607192993
      kl: 0.007866689004004002
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-23-05
  done: false
  episode_len_mean: 88.77876106194691
  episode_reward_max: 6.945826890179542
  episode_reward_mean: -82.77134986352402
  episode_reward_min: -200.0887059290851
  episodes_this_iter: 113
  episodes_total: 16477
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4200.729
    load_time_ms: 1.905
    num_steps_sampled: 1660000
    num_steps_trained: 1660000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2909829318523407
      kl: 0.0062798988074064255
      policy_loss: 0.0006760916439816356
      total_loss: 1435.3177490234375
      vf_explained_var: 0.45720604062080383
      vf_loss: 1435.317138671875
    sample_time_ms: 20393.378
    update_time_ms: 5.522
  iterations_since_restore: 166
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 4412 s, 171 iter, 1710000 ts, -78.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-25-34
  done: false
  episode_len_mean: 89.39285714285714
  episode_reward_max: 6.878592501297089
  episode_reward_mean: -81.55004605525052
  episode_reward_min: -200.14301396270503
  episodes_this_iter: 112
  episodes_total: 17167
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4190.608
    load_time_ms: 1.794
    num_steps_sampled: 1720000
    num_steps_trained: 1720000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.23189303278923035
      kl: 0.007510917261242866

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-27-34
  done: false
  episode_len_mean: 94.38679245283019
  episode_reward_max: 6.904307403277452
  episode_reward_mean: -69.8241610369107
  episode_reward_min: -200.0522264923896
  episodes_this_iter: 106
  episodes_total: 17725
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4167.819
    load_time_ms: 1.671
    num_steps_sampled: 1770000
    num_steps_trained: 1770000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.19548174738883972
      kl: 0.006936319172382355
      policy_loss: -0.0007537542260251939
      total_loss: 1481.4215087890625
      vf_explained_var: 0.4162760376930237
      vf_loss: 1481.4222412109375
    sample_time_ms: 20285.391
    update_time_ms: 5.792
  iterations_since_restore: 177
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 4677 s, 182 iter, 1820000 ts, -86.1 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-29-58
  done: false
  episode_len_mean: 88.90178571428571
  episode_reward_max: 6.619117984382555
  episode_reward_mean: -84.06867785097913
  episode_reward_min: -200.04522383452797
  episodes_this_iter: 112
  episodes_total: 18400
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4126.559
    load_time_ms: 1.782
    num_steps_sampled: 1830000
    num_steps_trained: 1830000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.13374407589435577
      kl: 0.006814158987253904

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-31-58
  done: false
  episode_len_mean: 91.44954128440367
  episode_reward_max: 6.976991924026711
  episode_reward_mean: -75.10092060886734
  episode_reward_min: -200.0417481872576
  episodes_this_iter: 109
  episodes_total: 18967
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4035.521
    load_time_ms: 1.728
    num_steps_sampled: 1880000
    num_steps_trained: 1880000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.09081617742776871
      kl: 0.005979857407510281
      policy_loss: -0.0005822418606840074
      total_loss: 1442.644775390625
      vf_explained_var: 0.4461990296840668
      vf_loss: 1442.6453857421875
    sample_time_ms: 19706.774
    update_time_ms: 5.617
  iterations_since_restore: 188
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 4948 s, 193 iter, 1930000 ts, -78 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-34-30
  done: false
  episode_len_mean: 86.36206896551724
  episode_reward_max: 6.210101750636425
  episode_reward_mean: -87.99357450246876
  episode_reward_min: -200.0435492851741
  episodes_this_iter: 116
  episodes_total: 19657
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4145.903
    load_time_ms: 1.569
    num_steps_sampled: 1940000
    num_steps_trained: 1940000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.03135902062058449
      kl: 0.008072548545897007
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-36-25
  done: false
  episode_len_mean: 93.48598130841121
  episode_reward_max: 5.214538615239096
  episode_reward_mean: -69.5748762224599
  episode_reward_min: -200.0752615824191
  episodes_this_iter: 107
  episodes_total: 20217
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4099.455
    load_time_ms: 1.492
    num_steps_sampled: 1990000
    num_steps_trained: 1990000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.0019206299912184477
      kl: 0.0077630518935620785
      policy_loss: -0.0022786695044487715
      total_loss: 1424.817138671875
      vf_explained_var: 0.46424269676208496
      vf_loss: 1424.8194580078125
    sample_time_ms: 20223.799
    update_time_ms: 5.746
  iterations_since_restore: 199
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_re

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 5204 s, 204 iter, 2040000 ts, -73.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-38-46
  done: false
  episode_len_mean: 87.12173913043478
  episode_reward_max: 5.9582460973534115
  episode_reward_mean: -86.72139616638238
  episode_reward_min: -200.0711306133868
  episodes_this_iter: 115
  episodes_total: 20888
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4039.393
    load_time_ms: 1.525
    num_steps_sampled: 2050000
    num_steps_trained: 2050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.03578464314341545
      kl: 0.00905251409858465

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-40-43
  done: false
  episode_len_mean: 84.42372881355932
  episode_reward_max: 5.821803249031643
  episode_reward_mean: -95.04817166815454
  episode_reward_min: -200.0512149441444
  episodes_this_iter: 118
  episodes_total: 21470
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4033.393
    load_time_ms: 1.614
    num_steps_sampled: 2100000
    num_steps_trained: 2100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.07784973829984665
      kl: 0.00777012575417757
      policy_loss: 0.0008450091117992997
      total_loss: 1367.437744140625
      vf_explained_var: 0.5035943388938904
      vf_loss: 1367.4368896484375
    sample_time_ms: 19364.004
    update_time_ms: 5.534
  iterations_since_restore: 210
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 5463 s, 215 iter, 2150000 ts, -94.5 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-43-06
  done: false
  episode_len_mean: 89.46846846846847
  episode_reward_max: 5.6681052059049275
  episode_reward_mean: -82.9801182900369
  episode_reward_min: -200.08012831164876
  episodes_this_iter: 111
  episodes_total: 22158
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4053.25
    load_time_ms: 1.694
    num_steps_sampled: 2160000
    num_steps_trained: 2160000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.11558765918016434
      kl: 0.006582813337445259

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-45-08
  done: false
  episode_len_mean: 88.50877192982456
  episode_reward_max: 5.533612325501016
  episode_reward_mean: -86.01668412782999
  episode_reward_min: -200.077107765989
  episodes_this_iter: 114
  episodes_total: 22724
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4217.043
    load_time_ms: 1.699
    num_steps_sampled: 2210000
    num_steps_trained: 2210000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.15455980598926544
      kl: 0.008721902035176754
      policy_loss: 0.0010790704982355237
      total_loss: 1394.146240234375
      vf_explained_var: 0.49569693207740784
      vf_loss: 1394.145263671875
    sample_time_ms: 19780.032
    update_time_ms: 5.608
  iterations_since_restore: 221
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 5728 s, 226 iter, 2260000 ts, -95.2 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-47-31
  done: false
  episode_len_mean: 86.27350427350427
  episode_reward_max: 4.905248070039956
  episode_reward_mean: -90.89388252001282
  episode_reward_min: -200.04891351758556
  episodes_this_iter: 117
  episodes_total: 23417
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4214.795
    load_time_ms: 1.756
    num_steps_sampled: 2270000
    num_steps_trained: 2270000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.22256726026535034
      kl: 0.00943562481552362

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-49-28
  done: false
  episode_len_mean: 86.44827586206897
  episode_reward_max: 5.330489900008209
  episode_reward_mean: -89.39831038991798
  episode_reward_min: -200.07603677915785
  episodes_this_iter: 116
  episodes_total: 23997
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4046.205
    load_time_ms: 1.784
    num_steps_sampled: 2320000
    num_steps_trained: 2320000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.2681059241294861
      kl: 0.008702438324689865
      policy_loss: -2.1770596504211426e-05
      total_loss: 1431.613037109375
      vf_explained_var: 0.468283087015152
      vf_loss: 1431.613037109375
    sample_time_ms: 19380.063
    update_time_ms: 6.182
  iterations_since_restore: 232
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 5986 s, 237 iter, 2370000 ts, -77.1 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-51-49
  done: false
  episode_len_mean: 88.52631578947368
  episode_reward_max: 6.4671211083668565
  episode_reward_mean: -82.557792182833
  episode_reward_min: -200.07296162953287
  episodes_this_iter: 114
  episodes_total: 24670
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4038.743
    load_time_ms: 1.653
    num_steps_sampled: 2380000
    num_steps_trained: 2380000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.31078633666038513
      kl: 0.010312343016266823

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-53-47
  done: false
  episode_len_mean: 91.89908256880734
  episode_reward_max: 4.555499147281958
  episode_reward_mean: -75.10823702822726
  episode_reward_min: -200.0763767258681
  episodes_this_iter: 109
  episodes_total: 25222
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4035.529
    load_time_ms: 1.507
    num_steps_sampled: 2430000
    num_steps_trained: 2430000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.33997493982315063
      kl: 0.010150950402021408
      policy_loss: 0.0011968502076342702
      total_loss: 1354.381591796875
      vf_explained_var: 0.46435225009918213
      vf_loss: 1354.38037109375
    sample_time_ms: 19431.018
    update_time_ms: 5.736
  iterations_since_restore: 243
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 6243 s, 248 iter, 2480000 ts, -75 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-56-05
  done: false
  episode_len_mean: 84.13675213675214
  episode_reward_max: 5.489963897191119
  episode_reward_mean: -97.69441161277958
  episode_reward_min: -200.07840039051177
  episodes_this_iter: 117
  episodes_total: 25903
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4031.563
    load_time_ms: 1.637
    num_steps_sampled: 2490000
    num_steps_trained: 2490000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.4160456955432892
      kl: 0.009183070622384548
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_14-58-02
  done: false
  episode_len_mean: 81.4297520661157
  episode_reward_max: 5.2916242617648885
  episode_reward_mean: -104.32658353560396
  episode_reward_min: -200.11852236275882
  episodes_this_iter: 121
  episodes_total: 26463
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4030.801
    load_time_ms: 1.707
    num_steps_sampled: 2540000
    num_steps_trained: 2540000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.43008989095687866
      kl: 0.009311242029070854
      policy_loss: 0.002032569842413068
      total_loss: 1185.11572265625
      vf_explained_var: 0.5850363969802856
      vf_loss: 1185.113525390625
    sample_time_ms: 19117.681
    update_time_ms: 5.8
  iterations_since_restore: 254
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 6504 s, 259 iter, 2590000 ts, -96.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-00-30
  done: false
  episode_len_mean: 86.94736842105263
  episode_reward_max: 4.6443965304029975
  episode_reward_mean: -87.82002078312262
  episode_reward_min: -200.05742009800258
  episodes_this_iter: 114
  episodes_total: 27162
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4101.856
    load_time_ms: 1.658
    num_steps_sampled: 2600000
    num_steps_trained: 2600000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.46574530005455017
      kl: 0.0120301926508545

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-02-41
  done: false
  episode_len_mean: 85.6068376068376
  episode_reward_max: 4.633385064695101
  episode_reward_mean: -92.23141000399157
  episode_reward_min: -200.0921713075607
  episodes_this_iter: 117
  episodes_total: 27739
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4241.051
    load_time_ms: 1.708
    num_steps_sampled: 2650000
    num_steps_trained: 2650000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5245605707168579
      kl: 0.01070231944322586
      policy_loss: 0.003106202930212021
      total_loss: 1421.2568359375
      vf_explained_var: 0.5031846761703491
      vf_loss: 1421.2537841796875
    sample_time_ms: 21137.187
    update_time_ms: 5.33
  iterations_since_restore: 265
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 6780 s, 270 iter, 2700000 ts, -75.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-05-05
  done: false
  episode_len_mean: 87.90265486725664
  episode_reward_max: 4.682939517057223
  episode_reward_mean: -84.77561733766122
  episode_reward_min: -200.06287857651748
  episodes_this_iter: 113
  episodes_total: 28389
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4346.126
    load_time_ms: 1.719
    num_steps_sampled: 2710000
    num_steps_trained: 2710000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5513591170310974
      kl: 0.014414802193641663

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-07-11
  done: false
  episode_len_mean: 87.15652173913044
  episode_reward_max: 5.20458314715636
  episode_reward_mean: -88.78538620535487
  episode_reward_min: -200.06161673194367
  episodes_this_iter: 115
  episodes_total: 28965
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4264.468
    load_time_ms: 1.763
    num_steps_sampled: 2760000
    num_steps_trained: 2760000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.5909231305122375
      kl: 0.014242422766983509
      policy_loss: 0.0053284442983567715
      total_loss: 1384.491943359375
      vf_explained_var: 0.498484343290329
      vf_loss: 1384.48681640625
    sample_time_ms: 20322.278
    update_time_ms: 5.605
  iterations_since_restore: 276
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 7055 s, 281 iter, 2810000 ts, -87.5 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-09-40
  done: false
  episode_len_mean: 82.13223140495867
  episode_reward_max: 4.838791652164101
  episode_reward_mean: -102.687209125852
  episode_reward_min: -200.0921480916897
  episodes_this_iter: 121
  episodes_total: 29659
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4229.759
    load_time_ms: 1.674
    num_steps_sampled: 2820000
    num_steps_trained: 2820000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.6574206948280334
      kl: 0.013818548060953617
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-11-39
  done: false
  episode_len_mean: 94.54716981132076
  episode_reward_max: 5.625324730635166
  episode_reward_mean: -66.26944753603544
  episode_reward_min: -200.0362991937982
  episodes_this_iter: 106
  episodes_total: 30223
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4154.239
    load_time_ms: 1.598
    num_steps_sampled: 2870000
    num_steps_trained: 2870000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.666886568069458
      kl: 0.012319546192884445
      policy_loss: 0.0028966243844479322
      total_loss: 1500.602783203125
      vf_explained_var: 0.3956129252910614
      vf_loss: 1500.5999755859375
    sample_time_ms: 20070.233
    update_time_ms: 5.338
  iterations_since_restore: 287
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 7313 s, 292 iter, 2920000 ts, -102 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-13-59
  done: false
  episode_len_mean: 87.59130434782608
  episode_reward_max: 5.886707114752454
  episode_reward_mean: -83.44435134952131
  episode_reward_min: -200.04312962222593
  episodes_this_iter: 115
  episodes_total: 30937
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4087.733
    load_time_ms: 1.617
    num_steps_sampled: 2930000
    num_steps_trained: 2930000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7270228862762451
      kl: 0.014946365728974342


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-15-58
  done: false
  episode_len_mean: 78.84251968503936
  episode_reward_max: 5.162073403115292
  episode_reward_mean: -110.34103160185995
  episode_reward_min: -200.09870494450962
  episodes_this_iter: 127
  episodes_total: 31514
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4057.577
    load_time_ms: 1.572
    num_steps_sampled: 2980000
    num_steps_trained: 2980000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.7687071561813354
      kl: 0.018840797245502472
      policy_loss: 0.0025479416362941265
      total_loss: 1252.6103515625
      vf_explained_var: 0.5882577896118164
      vf_loss: 1252.6077880859375
    sample_time_ms: 19497.897
    update_time_ms: 5.263
  iterations_since_restore: 298
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 7574 s, 303 iter, 3030000 ts, -91.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-18-25
  done: false
  episode_len_mean: 87.67543859649123
  episode_reward_max: 5.510964084478556
  episode_reward_mean: -86.11531998653088
  episode_reward_min: -200.02964811168658
  episodes_this_iter: 114
  episodes_total: 32209
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4257.35
    load_time_ms: 1.508
    num_steps_sampled: 3040000
    num_steps_trained: 3040000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.795604407787323
      kl: 0.024749035015702248
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-20-33
  done: false
  episode_len_mean: 86.18260869565218
  episode_reward_max: 5.491491601808175
  episode_reward_mean: -88.91154699599831
  episode_reward_min: -200.04238813177406
  episodes_this_iter: 115
  episodes_total: 32768
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4458.428
    load_time_ms: 1.653
    num_steps_sampled: 3090000
    num_steps_trained: 3090000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.8308760523796082
      kl: 0.01651383750140667
      policy_loss: 0.0050352164544165134
      total_loss: 1434.8697509765625
      vf_explained_var: 0.48018255829811096
      vf_loss: 1434.86474609375
    sample_time_ms: 20600.06
    update_time_ms: 7.247
  iterations_since_restore: 309
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 7852 s, 314 iter, 3140000 ts, -83 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-23-00
  done: false
  episode_len_mean: 85.35897435897436
  episode_reward_max: 5.525135084762274
  episode_reward_mean: -90.38933146859094
  episode_reward_min: -200.0397835479509
  episodes_this_iter: 117
  episodes_total: 33444
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4206.817
    load_time_ms: 1.582
    num_steps_sampled: 3150000
    num_steps_trained: 3150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.8422842025756836
      kl: 0.018078036606311798
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-24-59
  done: false
  episode_len_mean: 85.14406779661017
  episode_reward_max: 5.711001939767647
  episode_reward_mean: -93.30862677026312
  episode_reward_min: -200.06096330092637
  episodes_this_iter: 118
  episodes_total: 34020
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4139.671
    load_time_ms: 1.679
    num_steps_sampled: 3200000
    num_steps_trained: 3200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.8794776797294617
      kl: 0.02077561803162098
      policy_loss: 0.004772683139890432
      total_loss: 1377.6339111328125
      vf_explained_var: 0.49608319997787476
      vf_loss: 1377.629150390625
    sample_time_ms: 20016.444
    update_time_ms: 5.829
  iterations_since_restore: 320
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 8113 s, 325 iter, 3250000 ts, -86.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-27-21
  done: false
  episode_len_mean: 87.75221238938053
  episode_reward_max: 5.637324012926691
  episode_reward_mean: -86.64087349906973
  episode_reward_min: -200.0702512648655
  episodes_this_iter: 113
  episodes_total: 34695
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4062.58
    load_time_ms: 1.729
    num_steps_sampled: 3260000
    num_steps_trained: 3260000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.9279522895812988
      kl: 0.0189974308013916
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-29-46
  done: false
  episode_len_mean: 92.73394495412845
  episode_reward_max: 5.243351450275514
  episode_reward_mean: -71.6349862328752
  episode_reward_min: -200.04465428608594
  episodes_this_iter: 109
  episodes_total: 35267
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 4593.627
    load_time_ms: 1.615
    num_steps_sampled: 3310000
    num_steps_trained: 3310000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -0.9665234088897705
      kl: 0.023295925930142403
      policy_loss: 0.00744620943441987
      total_loss: 1434.031494140625
      vf_explained_var: 0.4330146014690399
      vf_loss: 1434.02392578125
    sample_time_ms: 21714.313
    update_time_ms: 6.025
  iterations_since_restore: 331
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 8466 s, 336 iter, 3360000 ts, -99.1 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-33-33
  done: false
  episode_len_mean: 84.94871794871794
  episode_reward_max: 5.41686783420063
  episode_reward_mean: -91.92206741608773
  episode_reward_min: -200.05450663145237
  episodes_this_iter: 117
  episodes_total: 35958
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6616.907
    load_time_ms: 2.05
    num_steps_sampled: 3370000
    num_steps_trained: 3370000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.019765019416809
      kl: 0.02714330144226551
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-37-02
  done: false
  episode_len_mean: 91.11009174311927
  episode_reward_max: 5.652881454261587
  episode_reward_mean: -75.39824202423794
  episode_reward_min: -200.03647013124305
  episodes_this_iter: 109
  episodes_total: 36529
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 7945.341
    load_time_ms: 2.087
    num_steps_sampled: 3420000
    num_steps_trained: 3420000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.03675377368927
      kl: 0.03073975443840027
      policy_loss: 0.009816956706345081
      total_loss: 1491.6883544921875
      vf_explained_var: 0.44374871253967285
      vf_loss: 1491.678466796875
    sample_time_ms: 32499.71
    update_time_ms: 9.217
  iterations_since_restore: 342
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 8933 s, 347 iter, 3470000 ts, -78 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-41-15
  done: false
  episode_len_mean: 90.52252252252252
  episode_reward_max: 5.72750480498573
  episode_reward_mean: -79.40617338448702
  episode_reward_min: -200.05650117778035
  episodes_this_iter: 111
  episodes_total: 37211
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 7641.745
    load_time_ms: 1.77
    num_steps_sampled: 3480000
    num_steps_trained: 3480000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.0826936960220337
      kl: 0.024386350065469742
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-44-52
  done: false
  episode_len_mean: 89.33628318584071
  episode_reward_max: 5.647998010007748
  episode_reward_mean: -81.55508267572432
  episode_reward_min: -200.0598210282542
  episodes_this_iter: 113
  episodes_total: 37768
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 7029.447
    load_time_ms: 1.791
    num_steps_sampled: 3530000
    num_steps_trained: 3530000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.109866976737976
      kl: 0.039971496909856796
      policy_loss: 0.017460601404309273
      total_loss: 1469.034912109375
      vf_explained_var: 0.46588853001594543
      vf_loss: 1469.017578125
    sample_time_ms: 35448.823
    update_time_ms: 9.877
  iterations_since_restore: 353
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 9397 s, 358 iter, 3580000 ts, -78.2 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-49-01
  done: false
  episode_len_mean: 92.17272727272727
  episode_reward_max: 5.207380486003693
  episode_reward_mean: -72.81642809912424
  episode_reward_min: -200.05128794266287
  episodes_this_iter: 110
  episodes_total: 38435
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 7362.292
    load_time_ms: 2.192
    num_steps_sampled: 3590000
    num_steps_trained: 3590000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.1508375406265259
      kl: 0.026741033419966698

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-53-59
  done: false
  episode_len_mean: 82.9
  episode_reward_max: 5.3206532985669925
  episode_reward_mean: -98.28748619754879
  episode_reward_min: -200.05567045048133
  episodes_this_iter: 120
  episodes_total: 39021
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 9183.935
    load_time_ms: 2.819
    num_steps_sampled: 3640000
    num_steps_trained: 3640000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.1805875301361084
      kl: 0.03674685209989548
      policy_loss: 0.013766519725322723
      total_loss: 1319.2135009765625
      vf_explained_var: 0.5458095073699951
      vf_loss: 1319.1995849609375
    sample_time_ms: 42201.904
    update_time_ms: 16.752
  iterations_since_restore: 364
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 9956 s, 369 iter, 3690000 ts, -91.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_15-58-40
  done: false
  episode_len_mean: 87.51754385964912
  episode_reward_max: 5.664401632400622
  episode_reward_mean: -83.99497263466867
  episode_reward_min: -200.03793364689238
  episodes_this_iter: 114
  episodes_total: 39722
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 9348.534
    load_time_ms: 2.443
    num_steps_sampled: 3700000
    num_steps_trained: 3700000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.1947122812271118
      kl: 0.032881975173950195

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-02-28
  done: false
  episode_len_mean: 94.20560747663552
  episode_reward_max: 5.7545865000486565
  episode_reward_mean: -69.35974281583015
  episode_reward_min: -200.0517644461624
  episodes_this_iter: 107
  episodes_total: 40307
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 8054.397
    load_time_ms: 2.13
    num_steps_sampled: 3750000
    num_steps_trained: 3750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.216723918914795
      kl: 0.03687399625778198
      policy_loss: 0.015750428661704063
      total_loss: 1391.872314453125
      vf_explained_var: 0.45396217703819275
      vf_loss: 1391.8565673828125
    sample_time_ms: 38992.444
    update_time_ms: 11.305
  iterations_since_restore: 375
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 10406 s, 380 iter, 3800000 ts, -92.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-05-44
  done: false
  episode_len_mean: 93.12962962962963
  episode_reward_max: 5.614894388600374
  episode_reward_mean: -70.76622884270968
  episode_reward_min: -200.0616612077156
  episodes_this_iter: 108
  episodes_total: 41008
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6760.124
    load_time_ms: 2.018
    num_steps_sampled: 3810000
    num_steps_trained: 3810000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.2362045049667358
      kl: 0.030025392770767212

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-08-16
  done: false
  episode_len_mean: 81.7560975609756
  episode_reward_max: 5.850900952110802
  episode_reward_mean: -100.79777598691867
  episode_reward_min: -200.05318186170066
  episodes_this_iter: 123
  episodes_total: 41601
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5329.742
    load_time_ms: 1.682
    num_steps_sampled: 3860000
    num_steps_trained: 3860000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.2708104848861694
      kl: 0.03684542328119278
      policy_loss: 0.01764741912484169
      total_loss: 1334.9449462890625
      vf_explained_var: 0.5526317954063416
      vf_loss: 1334.9271240234375
    sample_time_ms: 24964.28
    update_time_ms: 7.191
  iterations_since_restore: 386
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 10740 s, 391 iter, 3910000 ts, -94 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-11-18
  done: false
  episode_len_mean: 89.9090909090909
  episode_reward_max: 5.857841997246565
  episode_reward_mean: -79.8265671146789
  episode_reward_min: -200.03930959739782
  episodes_this_iter: 110
  episodes_total: 42287
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5312.36
    load_time_ms: 1.605
    num_steps_sampled: 3920000
    num_steps_trained: 3920000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.2777926921844482
      kl: 0.04732618108391762
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-13-48
  done: false
  episode_len_mean: 89.03571428571429
  episode_reward_max: 8.729135291366285
  episode_reward_mean: -84.2120415558771
  episode_reward_min: -200.07409614729
  episodes_this_iter: 112
  episodes_total: 42852
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5304.393
    load_time_ms: 1.499
    num_steps_sampled: 3970000
    num_steps_trained: 3970000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.2815641164779663
      kl: 0.03621947020292282
      policy_loss: 0.01308648381382227
      total_loss: 1405.4371337890625
      vf_explained_var: 0.5051189661026001
      vf_loss: 1405.423828125
    sample_time_ms: 24825.722
    update_time_ms: 6.9
  iterations_since_restore: 397
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 11071 s, 402 iter, 4020000 ts, -83.2 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-16-49
  done: false
  episode_len_mean: 86.12931034482759
  episode_reward_max: 10.957828355358433
  episode_reward_mean: -89.14226075602072
  episode_reward_min: -200.0744502910427
  episodes_this_iter: 116
  episodes_total: 43542
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5308.415
    load_time_ms: 1.599
    num_steps_sampled: 4030000
    num_steps_trained: 4030000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.279882550239563
      kl: 0.035361163318157196

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-19-20
  done: false
  episode_len_mean: 91.54629629629629
  episode_reward_max: 9.007035932303781
  episode_reward_mean: -75.76877406037279
  episode_reward_min: -200.0499964843857
  episodes_this_iter: 108
  episodes_total: 44115
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5303.328
    load_time_ms: 1.768
    num_steps_sampled: 4080000
    num_steps_trained: 4080000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.283940076828003
      kl: 0.039414264261722565
      policy_loss: 0.015046494081616402
      total_loss: 1483.36279296875
      vf_explained_var: 0.45218443870544434
      vf_loss: 1483.3475341796875
    sample_time_ms: 24853.909
    update_time_ms: 8.163
  iterations_since_restore: 408
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 11404 s, 413 iter, 4130000 ts, -83.1 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-22-23
  done: false
  episode_len_mean: 89.63963963963964
  episode_reward_max: 6.732986246134596
  episode_reward_mean: -81.35194237355509
  episode_reward_min: -200.12101334543138
  episodes_this_iter: 111
  episodes_total: 44797
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5325.108
    load_time_ms: 1.68
    num_steps_sampled: 4140000
    num_steps_trained: 4140000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3110967874526978
      kl: 0.03692241758108139


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-24-54
  done: false
  episode_len_mean: 83.35833333333333
  episode_reward_max: 9.019459866547404
  episode_reward_mean: -96.90941307677403
  episode_reward_min: -200.07251128793058
  episodes_this_iter: 120
  episodes_total: 45383
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5296.059
    load_time_ms: 1.626
    num_steps_sampled: 4190000
    num_steps_trained: 4190000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3361849784851074
      kl: 0.040190067142248154
      policy_loss: 0.01753983274102211
      total_loss: 1334.3177490234375
      vf_explained_var: 0.5342628359794617
      vf_loss: 1334.300048828125
    sample_time_ms: 24930.76
    update_time_ms: 7.169
  iterations_since_restore: 419
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 11737 s, 424 iter, 4240000 ts, -95.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-27-57
  done: false
  episode_len_mean: 86.56521739130434
  episode_reward_max: 8.995790549037425
  episode_reward_mean: -88.50232721144222
  episode_reward_min: -200.11988126493515
  episodes_this_iter: 115
  episodes_total: 46089
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5282.761
    load_time_ms: 1.587
    num_steps_sampled: 4250000
    num_steps_trained: 4250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3407626152038574
      kl: 0.04284637793898582

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-30-30
  done: false
  episode_len_mean: 97.45544554455445
  episode_reward_max: 7.687560431095743
  episode_reward_mean: -59.69023581632189
  episode_reward_min: -200.0810968469022
  episodes_this_iter: 101
  episodes_total: 46646
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5451.387
    load_time_ms: 1.547
    num_steps_sampled: 4300000
    num_steps_trained: 4300000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3315280675888062
      kl: 0.03378800302743912
      policy_loss: 0.012287908233702183
      total_loss: 1549.904296875
      vf_explained_var: 0.38238880038261414
      vf_loss: 1549.891845703125
    sample_time_ms: 25079.372
    update_time_ms: 7.844
  iterations_since_restore: 430
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 12074 s, 435 iter, 4350000 ts, -87.9 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-33-35
  done: false
  episode_len_mean: 90.0625
  episode_reward_max: 11.495828123417466
  episode_reward_mean: -78.29298567202788
  episode_reward_min: -200.0984274674085
  episodes_this_iter: 112
  episodes_total: 47328
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5394.497
    load_time_ms: 1.589
    num_steps_sampled: 4360000
    num_steps_trained: 4360000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.351982593536377
      kl: 0.0356752946972847
      polic

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-36-43
  done: false
  episode_len_mean: 85.12931034482759
  episode_reward_max: 7.0274101547187
  episode_reward_mean: -91.33985785357288
  episode_reward_min: -200.12244370441596
  episodes_this_iter: 116
  episodes_total: 47900
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5981.163
    load_time_ms: 1.718
    num_steps_sampled: 4410000
    num_steps_trained: 4410000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.35394287109375
      kl: 0.04618007689714432
      policy_loss: 0.019335104152560234
      total_loss: 1416.207275390625
      vf_explained_var: 0.4966525435447693
      vf_loss: 1416.18798828125
    sample_time_ms: 28062.73
    update_time_ms: 7.573
  iterations_since_restore: 441
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 12499 s, 446 iter, 4460000 ts, -88.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-40-39
  done: false
  episode_len_mean: 85.4957264957265
  episode_reward_max: 12.657808576174226
  episode_reward_mean: -93.79948123617456
  episode_reward_min: -200.21572296436875
  episodes_this_iter: 117
  episodes_total: 48584
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6763.433
    load_time_ms: 1.855
    num_steps_sampled: 4470000
    num_steps_trained: 4470000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3445709943771362
      kl: 0.05406634137034416

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-43-10
  done: false
  episode_len_mean: 93.12264150943396
  episode_reward_max: 10.502741675873454
  episode_reward_mean: -71.23796161030815
  episode_reward_min: -200.06967686872426
  episodes_this_iter: 106
  episodes_total: 49162
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5726.222
    load_time_ms: 1.637
    num_steps_sampled: 4520000
    num_steps_trained: 4520000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3165075778961182
      kl: 0.035127829760313034
      policy_loss: 0.01266622543334961
      total_loss: 1421.7786865234375
      vf_explained_var: 0.4406076967716217
      vf_loss: 1421.76611328125
    sample_time_ms: 27523.075
    update_time_ms: 9.391
  iterations_since_restore: 452
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 12832 s, 457 iter, 4570000 ts, -95.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-46-13
  done: false
  episode_len_mean: 79.22222222222223
  episode_reward_max: 10.245446032222395
  episode_reward_mean: -108.18094052448114
  episode_reward_min: -200.14388374570095
  episodes_this_iter: 126
  episodes_total: 49880
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5322.433
    load_time_ms: 1.686
    num_steps_sampled: 4580000
    num_steps_trained: 4580000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3343905210494995
      kl: 0.044036429375410

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-48-45
  done: false
  episode_len_mean: 89.44642857142857
  episode_reward_max: 11.45942122134213
  episode_reward_mean: -84.24518387443757
  episode_reward_min: -200.14777206625865
  episodes_this_iter: 112
  episodes_total: 50443
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5306.363
    load_time_ms: 1.693
    num_steps_sampled: 4630000
    num_steps_trained: 4630000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.318987488746643
      kl: 0.044912029057741165
      policy_loss: 0.01704130321741104
      total_loss: 1470.9046630859375
      vf_explained_var: 0.4586135745048523
      vf_loss: 1470.8875732421875
    sample_time_ms: 25032.658
    update_time_ms: 7.493
  iterations_since_restore: 463
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 13165 s, 468 iter, 4680000 ts, -80.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-51-46
  done: false
  episode_len_mean: 92.0925925925926
  episode_reward_max: 12.216050353996922
  episode_reward_mean: -75.63479813281045
  episode_reward_min: -200.25351703159117
  episodes_this_iter: 108
  episodes_total: 51117
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5283.691
    load_time_ms: 1.699
    num_steps_sampled: 4690000
    num_steps_trained: 4690000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3191039562225342
      kl: 0.04333561658859253

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-54-18
  done: false
  episode_len_mean: 90.23423423423424
  episode_reward_max: 10.35682079578035
  episode_reward_mean: -81.26642872879303
  episode_reward_min: -200.1584620345899
  episodes_this_iter: 111
  episodes_total: 51694
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5253.715
    load_time_ms: 1.734
    num_steps_sampled: 4740000
    num_steps_trained: 4740000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3279377222061157
      kl: 0.034788988530635834
      policy_loss: 0.01757415011525154
      total_loss: 1485.7872314453125
      vf_explained_var: 0.4790535867214203
      vf_loss: 1485.769775390625
    sample_time_ms: 24949.729
    update_time_ms: 8.005
  iterations_since_restore: 474
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 9.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 13497 s, 479 iter, 4790000 ts, -91.9 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-57-19
  done: false
  episode_len_mean: 89.39285714285714
  episode_reward_max: 11.243130945861601
  episode_reward_mean: -82.00414701052323
  episode_reward_min: -200.06027631744624
  episodes_this_iter: 112
  episodes_total: 52365
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5233.926
    load_time_ms: 1.693
    num_steps_sampled: 4800000
    num_steps_trained: 4800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3332971334457397
      kl: 0.0364174731075763

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_16-59-51
  done: false
  episode_len_mean: 88.24107142857143
  episode_reward_max: 12.197211832262708
  episode_reward_mean: -83.66405100285594
  episode_reward_min: -200.07351452221758
  episodes_this_iter: 112
  episodes_total: 52942
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5391.295
    load_time_ms: 1.637
    num_steps_sampled: 4850000
    num_steps_trained: 4850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3488643169403076
      kl: 0.04189702123403549
      policy_loss: 0.01920901983976364
      total_loss: 1467.379150390625
      vf_explained_var: 0.4539704918861389
      vf_loss: 1467.3599853515625
    sample_time_ms: 24910.514
    update_time_ms: 7.659
  iterations_since_restore: 485
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 13830 s, 490 iter, 4900000 ts, -79 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-02-52
  done: false
  episode_len_mean: 82.67768595041322
  episode_reward_max: 12.945063641600537
  episode_reward_mean: -100.66074316704332
  episode_reward_min: -200.1514942552817
  episodes_this_iter: 121
  episodes_total: 53609
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5424.679
    load_time_ms: 1.672
    num_steps_sampled: 4910000
    num_steps_trained: 4910000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3678622245788574
      kl: 0.03674580529332161

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-05-24
  done: false
  episode_len_mean: 88.29464285714286
  episode_reward_max: 8.692243238265387
  episode_reward_mean: -84.19554961475704
  episode_reward_min: -200.04708400911386
  episodes_this_iter: 112
  episodes_total: 54168
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5384.571
    load_time_ms: 1.821
    num_steps_sampled: 4960000
    num_steps_trained: 4960000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3499953746795654
      kl: 0.04932606220245361
      policy_loss: 0.020824536681175232
      total_loss: 1454.278076171875
      vf_explained_var: 0.48871055245399475
      vf_loss: 1454.2572021484375
    sample_time_ms: 24771.991
    update_time_ms: 6.875
  iterations_since_restore: 496
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 14162 s, 501 iter, 5010000 ts, -87.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-08-24
  done: false
  episode_len_mean: 84.80508474576271
  episode_reward_max: 9.240073488208154
  episode_reward_mean: -93.71867190636584
  episode_reward_min: -200.12000446934147
  episodes_this_iter: 118
  episodes_total: 54865
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5359.996
    load_time_ms: 1.696
    num_steps_sampled: 5020000
    num_steps_trained: 5020000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3660703897476196
      kl: 0.0518786124885082

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-10-55
  done: false
  episode_len_mean: 88.69642857142857
  episode_reward_max: 10.173396741403186
  episode_reward_mean: -84.34261842109693
  episode_reward_min: -200.05764119203914
  episodes_this_iter: 112
  episodes_total: 55429
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5294.036
    load_time_ms: 1.612
    num_steps_sampled: 5070000
    num_steps_trained: 5070000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3743082284927368
      kl: 0.042640626430511475
      policy_loss: 0.01691659353673458
      total_loss: 1375.2237548828125
      vf_explained_var: 0.5204933285713196
      vf_loss: 1375.2069091796875
    sample_time_ms: 24749.317
    update_time_ms: 6.866
  iterations_since_restore: 507
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 14491 s, 512 iter, 5120000 ts, -80.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-13-54
  done: false
  episode_len_mean: 87.76106194690266
  episode_reward_max: 8.956140237380975
  episode_reward_mean: -86.87037445193823
  episode_reward_min: -200.08486455595576
  episodes_this_iter: 113
  episodes_total: 56133
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5249.808
    load_time_ms: 1.639
    num_steps_sampled: 5130000
    num_steps_trained: 5130000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.389020323753357
      kl: 0.0388512946665287


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-16-26
  done: false
  episode_len_mean: 91.99090909090908
  episode_reward_max: 10.083146647811883
  episode_reward_mean: -74.70978648153668
  episode_reward_min: -200.23042426805637
  episodes_this_iter: 110
  episodes_total: 56696
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5244.317
    load_time_ms: 1.73
    num_steps_sampled: 5180000
    num_steps_trained: 5180000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.368722915649414
      kl: 0.04757815599441528
      policy_loss: 0.01927950233221054
      total_loss: 1493.21630859375
      vf_explained_var: 0.44489043951034546
      vf_loss: 1493.197265625
    sample_time_ms: 24845.724
    update_time_ms: 8.142
  iterations_since_restore: 518
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 14821 s, 523 iter, 5230000 ts, -97.8 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-19-25
  done: false
  episode_len_mean: 91.49074074074075
  episode_reward_max: 9.55666041044094
  episode_reward_mean: -76.01216926627365
  episode_reward_min: -200.12849349190014
  episodes_this_iter: 108
  episodes_total: 57394
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5197.113
    load_time_ms: 1.619
    num_steps_sampled: 5240000
    num_steps_trained: 5240000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3837392330169678
      kl: 0.04444649443030357

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-21-56
  done: false
  episode_len_mean: 86.47826086956522
  episode_reward_max: 12.067727200166818
  episode_reward_mean: -88.37948761325441
  episode_reward_min: -200.0683558611324
  episodes_this_iter: 115
  episodes_total: 57969
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5326.881
    load_time_ms: 1.651
    num_steps_sampled: 5290000
    num_steps_trained: 5290000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.4030368328094482
      kl: 0.038546282798051834
      policy_loss: 0.016442419961094856
      total_loss: 1431.999755859375
      vf_explained_var: 0.5094059705734253
      vf_loss: 1431.983154296875
    sample_time_ms: 24641.437
    update_time_ms: 8.168
  iterations_since_restore: 529
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 15150 s, 534 iter, 5340000 ts, -75 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-24-53
  done: false
  episode_len_mean: 92.06481481481481
  episode_reward_max: 10.125956784947826
  episode_reward_mean: -74.30890623361846
  episode_reward_min: -200.11955421881288
  episodes_this_iter: 108
  episodes_total: 58652
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5299.358
    load_time_ms: 1.662
    num_steps_sampled: 5350000
    num_steps_trained: 5350000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3875528573989868
      kl: 0.04380902275443077

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-27-22
  done: false
  episode_len_mean: 95.17142857142858
  episode_reward_max: 9.46710195555002
  episode_reward_mean: -66.96763322590058
  episode_reward_min: -200.04071325614396
  episodes_this_iter: 105
  episodes_total: 59230
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5217.018
    load_time_ms: 1.487
    num_steps_sampled: 5400000
    num_steps_trained: 5400000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3696329593658447
      kl: 0.05082087591290474
      policy_loss: 0.022324049845337868
      total_loss: 1513.09130859375
      vf_explained_var: 0.420133501291275
      vf_loss: 1513.0689697265625
    sample_time_ms: 24297.388
    update_time_ms: 7.309
  iterations_since_restore: 540
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 15478 s, 545 iter, 5450000 ts, -69 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-30-22
  done: false
  episode_len_mean: 86.99130434782609
  episode_reward_max: 11.842812534918632
  episode_reward_mean: -86.9888226508529
  episode_reward_min: -200.12321271543107
  episodes_this_iter: 115
  episodes_total: 59906
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5244.696
    load_time_ms: 1.699
    num_steps_sampled: 5460000
    num_steps_trained: 5460000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.4021670818328857
      kl: 0.04380529373884201


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-32-51
  done: false
  episode_len_mean: 84.22033898305085
  episode_reward_max: 12.510247549266087
  episode_reward_mean: -96.2855005630177
  episode_reward_min: -200.0856225927353
  episodes_this_iter: 118
  episodes_total: 60480
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5219.067
    load_time_ms: 1.728
    num_steps_sampled: 5510000
    num_steps_trained: 5510000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3810880184173584
      kl: 0.054907090961933136
      policy_loss: 0.01905326545238495
      total_loss: 1374.2578125
      vf_explained_var: 0.49731093645095825
      vf_loss: 1374.23876953125
    sample_time_ms: 24634.442
    update_time_ms: 7.928
  iterations_since_restore: 551
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 15804 s, 556 iter, 5560000 ts, -101 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-35-49
  done: false
  episode_len_mean: 84.42857142857143
  episode_reward_max: 12.095251075788406
  episode_reward_mean: -93.79564148822115
  episode_reward_min: -200.24315242568338
  episodes_this_iter: 119
  episodes_total: 61168
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5173.822
    load_time_ms: 1.516
    num_steps_sampled: 5570000
    num_steps_trained: 5570000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3426218032836914
      kl: 0.0451070889830589

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-38-18
  done: false
  episode_len_mean: 84.23333333333333
  episode_reward_max: 13.050650806649152
  episode_reward_mean: -94.81216098800981
  episode_reward_min: -200.2772280105145
  episodes_this_iter: 120
  episodes_total: 61763
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5254.671
    load_time_ms: 1.612
    num_steps_sampled: 5620000
    num_steps_trained: 5620000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3479148149490356
      kl: 0.035363148897886276
      policy_loss: 0.01655852422118187
      total_loss: 1361.578125
      vf_explained_var: 0.53857421875
      vf_loss: 1361.5615234375
    sample_time_ms: 24354.168
    update_time_ms: 7.135
  iterations_since_restore: 562
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 16131 s, 567 iter, 5670000 ts, -78.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-41-16
  done: false
  episode_len_mean: 88.23008849557522
  episode_reward_max: 10.393287760832667
  episode_reward_mean: -85.4230422156818
  episode_reward_min: -200.35688125213858
  episodes_this_iter: 113
  episodes_total: 62440
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5360.491
    load_time_ms: 1.63
    num_steps_sampled: 5680000
    num_steps_trained: 5680000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.348288893699646
      kl: 0.03511080890893936


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-43-47
  done: false
  episode_len_mean: 89.48648648648648
  episode_reward_max: 11.934569596053604
  episode_reward_mean: -80.98061862540615
  episode_reward_min: -200.2475320923918
  episodes_this_iter: 111
  episodes_total: 63013
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5368.452
    load_time_ms: 1.941
    num_steps_sampled: 5730000
    num_steps_trained: 5730000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.347485065460205
      kl: 0.04686831310391426
      policy_loss: 0.019554298371076584
      total_loss: 1513.693359375
      vf_explained_var: 0.43563589453697205
      vf_loss: 1513.6737060546875
    sample_time_ms: 24484.554
    update_time_ms: 7.726
  iterations_since_restore: 573
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 16461 s, 578 iter, 5780000 ts, -95.1 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-46-47
  done: false
  episode_len_mean: 81.00819672131148
  episode_reward_max: 10.76377294281763
  episode_reward_mean: -105.23176756421678
  episode_reward_min: -200.22882718799463
  episodes_this_iter: 122
  episodes_total: 63714
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5326.128
    load_time_ms: 2.032
    num_steps_sampled: 5790000
    num_steps_trained: 5790000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3737467527389526
      kl: 0.041132718324661

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-49-15
  done: false
  episode_len_mean: 91.90654205607477
  episode_reward_max: 8.510611587450342
  episode_reward_mean: -76.60727674058863
  episode_reward_min: -200.93157138473003
  episodes_this_iter: 107
  episodes_total: 64279
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5337.784
    load_time_ms: 1.54
    num_steps_sampled: 5840000
    num_steps_trained: 5840000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3579169511795044
      kl: 0.05121944099664688
      policy_loss: 0.024601105600595474
      total_loss: 1462.21142578125
      vf_explained_var: 0.46533453464508057
      vf_loss: 1462.186767578125
    sample_time_ms: 24450.236
    update_time_ms: 8.467
  iterations_since_restore: 584
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 16788 s, 589 iter, 5890000 ts, -86 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-52-15
  done: false
  episode_len_mean: 78.2578125
  episode_reward_max: 10.38498621555079
  episode_reward_mean: -112.61122714313866
  episode_reward_min: -200.17144135141373
  episodes_this_iter: 128
  episodes_total: 64985
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5340.752
    load_time_ms: 1.527
    num_steps_sampled: 5900000
    num_steps_trained: 5900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3715704679489136
      kl: 0.03291955217719078
      

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-54-45
  done: false
  episode_len_mean: 85.5546218487395
  episode_reward_max: 13.069584337277522
  episode_reward_mean: -92.35703105611272
  episode_reward_min: -200.1869305459261
  episodes_this_iter: 119
  episodes_total: 65559
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5345.358
    load_time_ms: 1.593
    num_steps_sampled: 5950000
    num_steps_trained: 5950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3808519840240479
      kl: 0.04443248733878136
      policy_loss: 0.017026616260409355
      total_loss: 1273.9415283203125
      vf_explained_var: 0.5682233572006226
      vf_loss: 1273.9246826171875
    sample_time_ms: 24489.178
    update_time_ms: 6.555
  iterations_since_restore: 595
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 17118 s, 600 iter, 6000000 ts, -69.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_17-57-46
  done: false
  episode_len_mean: 86.49137931034483
  episode_reward_max: 11.388417441116134
  episode_reward_mean: -88.6894864742923
  episode_reward_min: -200.20354102146626
  episodes_this_iter: 116
  episodes_total: 66234
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5410.152
    load_time_ms: 1.502
    num_steps_sampled: 6010000
    num_steps_trained: 6010000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.4156301021575928
      kl: 0.0383533537387847

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-00-20
  done: false
  episode_len_mean: 91.83636363636364
  episode_reward_max: 12.040704167545485
  episode_reward_mean: -74.56999341915764
  episode_reward_min: -200.10728894495963
  episodes_this_iter: 110
  episodes_total: 66799
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5407.458
    load_time_ms: 1.446
    num_steps_sampled: 6060000
    num_steps_trained: 6060000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.3659955263137817
      kl: 0.03278445452451706
      policy_loss: 0.012630950659513474
      total_loss: 1503.9014892578125
      vf_explained_var: 0.44753342866897583
      vf_loss: 1503.8887939453125
    sample_time_ms: 25116.233
    update_time_ms: 7.222
  iterations_since_restore: 606
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 17456 s, 611 iter, 6110000 ts, -80.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-03-25
  done: false
  episode_len_mean: 88.73214285714286
  episode_reward_max: 7.438093997723453
  episode_reward_mean: -84.62475694071018
  episode_reward_min: -200.12325526494095
  episodes_this_iter: 112
  episodes_total: 67489
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5551.19
    load_time_ms: 1.53
    num_steps_sampled: 6120000
    num_steps_trained: 6120000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: -1.4006426334381104
      kl: 0.04135700315237045


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-06-20
  done: false
  episode_len_mean: 90.92792792792793
  episode_reward_max: 11.388271427647034
  episode_reward_mean: -75.41445272092845
  episode_reward_min: -200.23137798156893
  episodes_this_iter: 111
  episodes_total: 68041
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5785.693
    load_time_ms: 1.734
    num_steps_sampled: 6170000
    num_steps_trained: 6170000
    rl_0:
      cur_kl_coeff: 2.802596928649634e-45
      cur_lr: 4.999999873689376e-05
      entropy: -1.4020501375198364
      kl: 6.194957733154297
      policy_loss: 0.07821430265903473
      total_loss: 1564.566650390625
      vf_explained_var: 0.4139963686466217
      vf_loss: 1564.4884033203125
    sample_time_ms: 27032.156
    update_time_ms: 8.24
  iterations_since_restore: 617
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-08-53
  done: false
  episode_len_mean: 90.31531531531532
  episode_reward_max: 5.626586386522288
  episode_reward_mean: -79.60477909970679
  episode_reward_min: -200.1995964662492
  episodes_this_iter: 111
  episodes_total: 68621
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5643.081
    load_time_ms: 1.896
    num_steps_sampled: 6220000
    num_steps_trained: 6220000
    rl_0:
      cur_kl_coeff: 7.006492321624085e-45
      cur_lr: 4.999999873689376e-05
      entropy: -1.419724941253662
      kl: 0.053411103785037994
      policy_loss: 0.020170630887150764
      total_loss: 1448.4774169921875
      vf_explained_var: 0.46541449427604675
      vf_loss: 1448.4571533203125
    sample_time_ms: 27000.313
    update_time_ms: 8.869
  iterations_since_restore: 622
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 303

  custom_metrics: {}
  date: 2019-04-09_18-11-46
  done: false
  episode_len_mean: 88.51327433628319
  episode_reward_max: 12.459443333483316
  episode_reward_mean: -84.73192522966481
  episode_reward_min: -200.32648765922755
  episodes_this_iter: 113
  episodes_total: 69202
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5785.16
    load_time_ms: 1.956
    num_steps_sampled: 6270000
    num_steps_trained: 6270000
    rl_0:
      cur_kl_coeff: 2.1019476964872256e-44
      cur_lr: 4.999999873689376e-05
      entropy: -1.3835961818695068
      kl: 0.03906853869557381
      policy_loss: 0.014045383781194687
      total_loss: 1446.0450439453125
      vf_explained_var: 0.4902752637863159
      vf_loss: 1446.031005859375
    sample_time_ms: 26741.99
    update_time_ms: 9.566
  iterations_since_restore: 627
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -42.365962614832405
  time_since_rest

  custom_metrics: {}
  date: 2019-04-09_18-14-43
  done: false
  episode_len_mean: 88.10526315789474
  episode_reward_max: 12.573899501859124
  episode_reward_mean: -86.45188665896309
  episode_reward_min: -200.30592526265568
  episodes_this_iter: 114
  episodes_total: 69776
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6191.91
    load_time_ms: 1.896
    num_steps_sampled: 6320000
    num_steps_trained: 6320000
    rl_0:
      cur_kl_coeff: 4.764414778704378e-44
      cur_lr: 4.999999873689376e-05
      entropy: -1.3946987390518188
      kl: 0.040776245296001434
      policy_loss: 0.012464474886655807
      total_loss: 1391.6180419921875
      vf_explained_var: 0.5033143162727356
      vf_loss: 1391.6055908203125
    sample_time_ms: 28728.02
    update_time_ms: 9.436
  iterations_since_restore: 632
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -43.225943329481545
  time_since_res

  custom_metrics: {}
  date: 2019-04-09_18-17-15
  done: false
  episode_len_mean: 82.89166666666667
  episode_reward_max: 14.629084941400507
  episode_reward_mean: -101.41639841130561
  episode_reward_min: -200.54633456673452
  episodes_this_iter: 120
  episodes_total: 70356
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5796.237
    load_time_ms: 1.807
    num_steps_sampled: 6370000
    num_steps_trained: 6370000
    rl_0:
      cur_kl_coeff: 2.4382593279251817e-43
      cur_lr: 4.999999873689376e-05
      entropy: -1.3850432634353638
      kl: 0.04903544485569
      policy_loss: 0.019443413242697716
      total_loss: 1362.6534423828125
      vf_explained_var: 0.5227795243263245
      vf_loss: 1362.634033203125
    sample_time_ms: 27049.754
    update_time_ms: 8.633
  iterations_since_restore: 637
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -50.7081992056528
  time_since_restor

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 10.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 18473 s, 642 iter, 6420000 ts, -91.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-20-34
  done: false
  episode_len_mean: 86.28070175438596
  episode_reward_max: 10.092601144184368
  episode_reward_mean: -91.89724926274938
  episode_reward_min: -200.44572617827782
  episodes_this_iter: 114
  episodes_total: 71060
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5800.454
    load_time_ms: 1.856
    num_steps_sampled: 6430000
    num_steps_trained: 6430000
    rl_0:
      cur_kl_coeff: 1.8497139729087585e-42
      cur_lr: 4.999999873689376e-05
      entropy: -1.399385690689087
      kl:

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-23-52
  done: false
  episode_len_mean: 86.92105263157895
  episode_reward_max: 13.254539955977386
  episode_reward_mean: -89.6676084481635
  episode_reward_min: -201.6543595950752
  episodes_this_iter: 114
  episodes_total: 71642
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6335.72
    load_time_ms: 2.027
    num_steps_sampled: 6480000
    num_steps_trained: 6480000
    rl_0:
      cur_kl_coeff: 1.405081970178494e-41
      cur_lr: 4.999999873689376e-05
      entropy: -1.41688871383667
      kl: 0.02929215505719185
      policy_loss: 0.009641454555094242
      total_loss: 1392.2491455078125
      vf_explained_var: 0.4992873966693878
      vf_loss: 1392.2396240234375
    sample_time_ms: 30113.598
    update_time_ms: 8.349
  iterations_since_restore: 648
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
 

  custom_metrics: {}
  date: 2019-04-09_18-26-57
  done: false
  episode_len_mean: 91.23636363636363
  episode_reward_max: 15.003365055092935
  episode_reward_mean: -78.391890647155
  episode_reward_min: -200.6430987638127
  episodes_this_iter: 110
  episodes_total: 72207
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6281.244
    load_time_ms: 1.904
    num_steps_sampled: 6530000
    num_steps_trained: 6530000
    rl_0:
      cur_kl_coeff: 4.742274262968046e-41
      cur_lr: 4.999999873689376e-05
      entropy: -1.4213446378707886
      kl: 0.037928685545921326
      policy_loss: 0.015493025071918964
      total_loss: 1496.4737548828125
      vf_explained_var: 0.4445865750312805
      vf_loss: 1496.4583740234375
    sample_time_ms: 31951.315
    update_time_ms: 9.102
  iterations_since_restore: 653
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -39.195945323577504
  time_since_rest

  custom_metrics: {}
  date: 2019-04-09_18-30-05
  done: false
  episode_len_mean: 88.2280701754386
  episode_reward_max: 14.956654107584965
  episode_reward_mean: -87.49268850582463
  episode_reward_min: -201.41869311585896
  episodes_this_iter: 114
  episodes_total: 72779
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6075.416
    load_time_ms: 1.898
    num_steps_sampled: 6580000
    num_steps_trained: 6580000
    rl_0:
      cur_kl_coeff: 1.0670047026754887e-40
      cur_lr: 4.999999873689376e-05
      entropy: -1.4433666467666626
      kl: 0.04373621940612793
      policy_loss: 0.021655181422829628
      total_loss: 1423.4827880859375
      vf_explained_var: 0.5023245215415955
      vf_loss: 1423.461181640625
    sample_time_ms: 31108.108
    update_time_ms: 7.798
  iterations_since_restore: 658
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -43.74634425291233
  time_since_rest

  custom_metrics: {}
  date: 2019-04-09_18-33-19
  done: false
  episode_len_mean: 86.9298245614035
  episode_reward_max: 14.993783656804446
  episode_reward_mean: -91.1366415858337
  episode_reward_min: -202.00389465545572
  episodes_this_iter: 114
  episodes_total: 73362
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6369.123
    load_time_ms: 2.272
    num_steps_sampled: 6630000
    num_steps_trained: 6630000
    rl_0:
      cur_kl_coeff: 8.102517915495741e-40
      cur_lr: 4.999999873689376e-05
      entropy: -1.4428062438964844
      kl: 0.059045400470495224
      policy_loss: 0.023906871676445007
      total_loss: 1397.269775390625
      vf_explained_var: 0.508046567440033
      vf_loss: 1397.2459716796875
    sample_time_ms: 31748.616
    update_time_ms: 7.097
  iterations_since_restore: 663
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -45.568320792916865
  time_since_resto

  custom_metrics: {}
  date: 2019-04-09_18-36-39
  done: false
  episode_len_mean: 84.29166666666667
  episode_reward_max: 8.646445053038331
  episode_reward_mean: -100.37655224317285
  episode_reward_min: -202.32889099592285
  episodes_this_iter: 120
  episodes_total: 73935
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 7139.803
    load_time_ms: 2.45
    num_steps_sampled: 6680000
    num_steps_trained: 6680000
    rl_0:
      cur_kl_coeff: 6.152852125723622e-39
      cur_lr: 4.999999873689376e-05
      entropy: -1.4369055032730103
      kl: 0.047136690467596054
      policy_loss: 0.01839604787528515
      total_loss: 1405.628662109375
      vf_explained_var: 0.5137699842453003
      vf_loss: 1405.6103515625
    sample_time_ms: 32167.667
    update_time_ms: 8.697
  iterations_since_restore: 668
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -50.18827612158643
  time_since_restore:

  custom_metrics: {}
  date: 2019-04-09_18-39-44
  done: false
  episode_len_mean: 84.85470085470085
  episode_reward_max: 13.420995827396975
  episode_reward_mean: -94.04648352850573
  episode_reward_min: -200.93281564665784
  episodes_this_iter: 117
  episodes_total: 74519
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 7012.288
    load_time_ms: 2.136
    num_steps_sampled: 6730000
    num_steps_trained: 6730000
    rl_0:
      cur_kl_coeff: 3.114881336098891e-38
      cur_lr: 4.999999873689376e-05
      entropy: -1.4086867570877075
      kl: 0.03335663676261902
      policy_loss: 0.012739698402583599
      total_loss: 1366.8990478515625
      vf_explained_var: 0.5286872386932373
      vf_loss: 1366.8863525390625
    sample_time_ms: 31375.496
    update_time_ms: 9.151
  iterations_since_restore: 673
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -47.023241764252845
  time_since_re

  custom_metrics: {}
  date: 2019-04-09_18-43-01
  done: false
  episode_len_mean: 87.74561403508773
  episode_reward_max: 11.790025861910852
  episode_reward_mean: -86.66975578496921
  episode_reward_min: -201.04302858250045
  episodes_this_iter: 114
  episodes_total: 75074
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6464.184
    load_time_ms: 1.822
    num_steps_sampled: 6780000
    num_steps_trained: 6780000
    rl_0:
      cur_kl_coeff: 1.0512723808684526e-37
      cur_lr: 4.999999873689376e-05
      entropy: -1.4240524768829346
      kl: 0.047926198691129684
      policy_loss: 0.01588595286011696
      total_loss: 1383.8536376953125
      vf_explained_var: 0.5072106719017029
      vf_loss: 1383.8377685546875
    sample_time_ms: 31660.318
    update_time_ms: 7.358
  iterations_since_restore: 678
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -43.33487789248463
  time_since_re

  custom_metrics: {}
  date: 2019-04-09_18-46-04
  done: false
  episode_len_mean: 86.89565217391305
  episode_reward_max: 12.227859825449055
  episode_reward_mean: -90.99737605176071
  episode_reward_min: -200.82920112147087
  episodes_this_iter: 115
  episodes_total: 75663
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6863.376
    load_time_ms: 1.846
    num_steps_sampled: 6830000
    num_steps_trained: 6830000
    rl_0:
      cur_kl_coeff: 5.322066995672419e-37
      cur_lr: 4.999999873689376e-05
      entropy: -1.4160300493240356
      kl: 0.03442857041954994
      policy_loss: 0.013186451978981495
      total_loss: 1410.7159423828125
      vf_explained_var: 0.48760557174682617
      vf_loss: 1410.702880859375
    sample_time_ms: 31025.669
    update_time_ms: 7.165
  iterations_since_restore: 683
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -45.498688025880355
  time_since_re

  custom_metrics: {}
  date: 2019-04-09_18-49-13
  done: false
  episode_len_mean: 91.24770642201835
  episode_reward_max: 16.571341548719353
  episode_reward_mean: -76.6246854401251
  episode_reward_min: -200.69212010968832
  episodes_this_iter: 109
  episodes_total: 76243
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6866.552
    load_time_ms: 1.944
    num_steps_sampled: 6880000
    num_steps_trained: 6880000
    rl_0:
      cur_kl_coeff: 1.796197554987503e-36
      cur_lr: 4.999999873689376e-05
      entropy: -1.4088870286941528
      kl: 0.055106133222579956
      policy_loss: 0.02051789127290249
      total_loss: 1445.3612060546875
      vf_explained_var: 0.45362913608551025
      vf_loss: 1445.3406982421875
    sample_time_ms: 30176.71
    update_time_ms: 10.65
  iterations_since_restore: 688
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -38.31234272006255
  time_since_rest

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 20426 s, 693 iter, 6930000 ts, -103 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-53-03
  done: false
  episode_len_mean: 91.68468468468468
  episode_reward_max: 14.664200612591092
  episode_reward_mean: -77.70674096174987
  episode_reward_min: -200.67434800457954
  episodes_this_iter: 111
  episodes_total: 76931
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6724.524
    load_time_ms: 1.951
    num_steps_sampled: 6940000
    num_steps_trained: 6940000
    rl_0:
      cur_kl_coeff: 1.3639874241513782e-35
      cur_lr: 4.999999873689376e-05
      entropy: -1.4222819805145264
      kl:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 11.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=30302], 20595 s, 698 iter, 6980000 ts, -98.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-55-47
  done: false
  episode_len_mean: 83.94166666666666
  episode_reward_max: 11.966805737352818
  episode_reward_mean: -97.00392409937055
  episode_reward_min: -201.499038907765
  episodes_this_iter: 120
  episodes_total: 77513
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5841.329
    load_time_ms: 1.742
    num_steps_sampled: 6990000
    num_steps_trained: 6990000
    rl_0:
      cur_kl_coeff: 1.0357780080605534e-34
      cur_lr: 4.999999873689376e-05
      entropy: -1.4507945775985718
      kl: 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-04-09_18-58-14
  done: false
  episode_len_mean: 82.0
  episode_reward_max: 15.813648802308862
  episode_reward_mean: -100.81820880596023
  episode_reward_min: -200.76039289891713
  episodes_this_iter: 122
  episodes_total: 78096
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5436.739
    load_time_ms: 1.568
    num_steps_sampled: 7040000
    num_steps_trained: 7040000
    rl_0:
      cur_kl_coeff: 5.243626373871348e-34
      cur_lr: 4.999999873689376e-05
      entropy: -1.4215832948684692
      kl: 0.04540468752384186
      policy_loss: 0.017488084733486176
      total_loss: 1283.5828857421875
      vf_explained_var: 0.5564661026000977
      vf_loss: 1283.5654296875
    sample_time_ms: 25585.456
    update_time_ms: 7.3
  iterations_since_restore: 704
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_rewa

  custom_metrics: {}
  date: 2019-04-09_19-00-53
  done: false
  episode_len_mean: 84.21848739495799
  episode_reward_max: 14.567627296005941
  episode_reward_mean: -95.53286484493414
  episode_reward_min: -201.27183454439177
  episodes_this_iter: 119
  episodes_total: 78665
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 5744.334
    load_time_ms: 1.627
    num_steps_sampled: 7090000
    num_steps_trained: 7090000
    rl_0:
      cur_kl_coeff: 3.981878012841063e-33
      cur_lr: 4.999999873689376e-05
      entropy: -1.459372878074646
      kl: 0.06059017404913902
      policy_loss: 0.021716149523854256
      total_loss: 1457.4415283203125
      vf_explained_var: 0.49373334646224976
      vf_loss: 1457.419677734375
    sample_time_ms: 24811.588
    update_time_ms: 7.185
  iterations_since_restore: 709
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -47.766432422467076
  time_since_res

  custom_metrics: {}
  date: 2019-04-09_19-03-44
  done: false
  episode_len_mean: 87.9375
  episode_reward_max: 18.120278824687297
  episode_reward_mean: -89.47321447914157
  episode_reward_min: -201.0405767598789
  episodes_this_iter: 112
  episodes_total: 79251
  experiment_id: 22b1cc9446844e61a011e26f0fdde742
  hostname: Gandalf
  info:
    grad_time_ms: 6053.825
    load_time_ms: 1.76
    num_steps_sampled: 7140000
    num_steps_trained: 7140000
    rl_0:
      cur_kl_coeff: 3.023739106173143e-32
      cur_lr: 4.999999873689376e-05
      entropy: -1.405792474746704
      kl: 11.528087615966797
      policy_loss: 0.17199382185935974
      total_loss: 1437.7547607421875
      vf_explained_var: 0.5020006895065308
      vf_loss: 1437.5828857421875
    sample_time_ms: 26837.189
    update_time_ms: 9.662
  iterations_since_restore: 714
  node_ip: 172.16.123.117
  num_metric_batches_dropped: 0
  pid: 30302
  policy_reward_mean:
    rl_0: -44.736607239570795
  time_since_restore: 21101.25