# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500                                 #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv', 'MultiAgentIntersectionEnv_baseline_1', 'MultiAgentIntersectionEnv_baseline_2', 'MultiAgentIntersectionEnv_baseline_3', 'MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        # desired velocity for all vehicles in the network, in m/s
        "target_velocity": 30,
        # initial teamspirit
        "ap_teamspirit_0": -1,
        "ap_teamspirit_1": -1,
        # shuffle teamspirit?
        "ap_teamspirit_shuffle": True    
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-30_18-08-52_7633/logs.
Waiting for redis server at 127.0.0.1:42231 to respond...
Waiting for redis server at 127.0.0.1:50969 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=e780c08def7cacb64256967715c94ae4cfd2fb06f11817b8



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-03-30_18-08-52_7633/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-30_18-08-52_7633/sockets/raylet'],
 'redis_address': '192.168.2.102:42231',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=e780c08def7cacb64256967715c94ae4cfd2fb06f11817b8'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate default 0.999
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network defaule 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return 'rl_0'

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['rl_0']
        }
    })

 Starting SUMO on port 55245


New Teamspirit:
-0.5393521226962619
0.17788004549039993


In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0_2019-03-30_18-08-54osfmm20k -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-09-44
  done: false
  episode_len_mean: 450.3333333333333
  episode_reward_max: 446.540044230622
  episode_reward_mean: 237.318160124313
  episode_reward_min: -65.25893908935677
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4671.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 121 s, 5 iter, 50000 ts, 483 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-11-46
  done: false
  episode_len_mean: 317.26
  episode_reward_max: 1107.553908557878
  episode_reward_mean: 560.8260890809104
  episode_reward_min: -93.2680168733728
  episodes_this_iter: 35
  episodes_total: 168
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4204.283
    load_time_ms: 9.054
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.006250001490116119
      cur_lr: 4.999999873689376e-05
      entropy: 1.4084738492965698
      kl: 0.0033381700050085783
      polic

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-13-47
  done: false
  episode_len_mean: 197.98
  episode_reward_max: 1124.4251855484695
  episode_reward_mean: 623.4015024460542
  episode_reward_min: -91.32249735106
  episodes_this_iter: 54
  episodes_total: 393
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4081.259
    load_time_ms: 1.616
    num_steps_sampled: 110000
    num_steps_trained: 110000
    rl_0:
      cur_kl_coeff: 0.00019531254656612873
      cur_lr: 4.999999873689376e-05
      entropy: 1.372615933418274
      kl: 0.0037068778183311224
      policy_loss: -0.0013310007052496076
      total_loss: 2731.52685546875
      vf_explained_var: 0.09271156042814255
      vf_loss: 2731.528564453125
    sample_time_ms: 20124.788
    update_time_ms: 6.625
  iterations_since_restore: 11
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 390 s, 16 iter, 160000 ts, 561 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-16-16
  done: false
  episode_len_mean: 163.06
  episode_reward_max: 1093.2206064303496
  episode_reward_mean: 634.8426057010863
  episode_reward_min: -69.90847952706781
  episodes_this_iter: 60
  episodes_total: 724
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4065.489
    load_time_ms: 1.515
    num_steps_sampled: 170000
    num_steps_trained: 170000
    rl_0:
      cur_kl_coeff: 3.0517585400957614e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3331423997879028
      kl: 0.003791195573285222
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-18-19
  done: false
  episode_len_mean: 177.21
  episode_reward_max: 1117.115998055278
  episode_reward_mean: 672.6511593510472
  episode_reward_min: -105.99853480988938
  episodes_this_iter: 59
  episodes_total: 1016
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4063.11
    load_time_ms: 1.5
    num_steps_sampled: 220000
    num_steps_trained: 220000
    rl_0:
      cur_kl_coeff: 1.907349087559851e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.3433706760406494
      kl: 0.004157834220677614
      policy_loss: -0.002163971308618784
      total_loss: 3686.760009765625
      vf_explained_var: 0.28146669268608093
      vf_loss: 3686.761962890625
    sample_time_ms: 20626.799
    update_time_ms: 6.148
  iterations_since_restore: 22
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 660 s, 27 iter, 270000 ts, 755 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-20-45
  done: false
  episode_len_mean: 183.26
  episode_reward_max: 1090.550099980552
  episode_reward_mean: 762.8092661557192
  episode_reward_min: -82.98746330643367
  episodes_this_iter: 52
  episodes_total: 1383
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4043.075
    load_time_ms: 1.638
    num_steps_sampled: 280000
    num_steps_trained: 280000
    rl_0:
      cur_kl_coeff: 2.980232949312267e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.3994286060333252
      kl: 0.008993882685899734
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-22-50
  done: false
  episode_len_mean: 171.55
  episode_reward_max: 1111.8241060283954
  episode_reward_mean: 780.6914790995246
  episode_reward_min: -73.9570977116314
  episodes_this_iter: 57
  episodes_total: 1667
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4053.584
    load_time_ms: 1.593
    num_steps_sampled: 330000
    num_steps_trained: 330000
    rl_0:
      cur_kl_coeff: 9.313227966600834e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3112518787384033
      kl: 0.004653040319681168
      policy_loss: -0.0022690375335514545
      total_loss: 4152.9189453125
      vf_explained_var: 0.34570014476776123
      vf_loss: 4152.92138671875
    sample_time_ms: 20443.212
    update_time_ms: 5.369
  iterations_since_restore: 33
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 930 s, 38 iter, 380000 ts, 724 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-25-16
  done: false
  episode_len_mean: 173.92
  episode_reward_max: 1159.5510197425358
  episode_reward_mean: 748.5170951028921
  episode_reward_min: -61.74996583605895
  episodes_this_iter: 57
  episodes_total: 2023
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4058.802
    load_time_ms: 1.526
    num_steps_sampled: 390000
    num_steps_trained: 390000
    rl_0:
      cur_kl_coeff: 1.4551918697813804e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.330047607421875
      kl: 0.0062732994556427
      

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-27-15
  done: false
  episode_len_mean: 170.49
  episode_reward_max: 1099.5025684285392
  episode_reward_mean: 802.0863994535883
  episode_reward_min: -91.11457147412848
  episodes_this_iter: 58
  episodes_total: 2316
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4070.509
    load_time_ms: 1.525
    num_steps_sampled: 440000
    num_steps_trained: 440000
    rl_0:
      cur_kl_coeff: 4.5474745930668137e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.2108081579208374
      kl: 0.012782350182533264
      policy_loss: -0.004360204096883535
      total_loss: 3877.43359375
      vf_explained_var: 0.39552438259124756
      vf_loss: 3877.4375
    sample_time_ms: 19931.35
    update_time_ms: 5.522
  iterations_since_restore: 44
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:
    rl

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 1193 s, 49 iter, 490000 ts, 757 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-29-39
  done: false
  episode_len_mean: 173.47
  episode_reward_max: 1082.2935884388326
  episode_reward_mean: 801.9245177740133
  episode_reward_min: -100.64443072601006
  episodes_this_iter: 58
  episodes_total: 2673
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4068.361
    load_time_ms: 1.577
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 1.4210858103333793e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.209606647491455
      kl: 0.01233711652457714
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-31-39
  done: false
  episode_len_mean: 191.17
  episode_reward_max: 1168.2858916808163
  episode_reward_mean: 801.6874011297856
  episode_reward_min: -92.99511521676285
  episodes_this_iter: 52
  episodes_total: 2950
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4081.237
    load_time_ms: 1.6
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 1.776357262916724e-16
      cur_lr: 4.999999873689376e-05
      entropy: 1.3836376667022705
      kl: 0.009547418914735317
      policy_loss: -0.0029874001629650593
      total_loss: 2873.715087890625
      vf_explained_var: 0.6084468364715576
      vf_loss: 2873.71826171875
    sample_time_ms: 19813.152
    update_time_ms: 5.543
  iterations_since_restore: 55
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 1459 s, 60 iter, 600000 ts, 819 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-34-06
  done: false
  episode_len_mean: 178.99
  episode_reward_max: 1092.1928024204626
  episode_reward_mean: 810.2539534158011
  episode_reward_min: -22.038267087483632
  episodes_this_iter: 52
  episodes_total: 3291
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4079.261
    load_time_ms: 1.509
    num_steps_sampled: 610000
    num_steps_trained: 610000
    rl_0:
      cur_kl_coeff: 2.220446578645905e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.3917882442474365
      kl: 0.006629211828112602
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-36-08
  done: false
  episode_len_mean: 172.46
  episode_reward_max: 1129.2630107294244
  episode_reward_mean: 797.805012970536
  episode_reward_min: -80.06289373084853
  episodes_this_iter: 60
  episodes_total: 3574
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4079.33
    load_time_ms: 1.536
    num_steps_sampled: 660000
    num_steps_trained: 660000
    rl_0:
      cur_kl_coeff: 1.3877791116536907e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.2282333374023438
      kl: 0.029518146067857742
      policy_loss: -0.005449789110571146
      total_loss: 3151.203125
      vf_explained_var: 0.5875621438026428
      vf_loss: 3151.208740234375
    sample_time_ms: 20316.291
    update_time_ms: 5.591
  iterations_since_restore: 66
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 1723 s, 71 iter, 710000 ts, 798 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-38-30
  done: false
  episode_len_mean: 170.67
  episode_reward_max: 1100.7682568862947
  episode_reward_mean: 826.5977943035624
  episode_reward_min: -91.3228585102851
  episodes_this_iter: 58
  episodes_total: 3918
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 4064.778
    load_time_ms: 1.569
    num_steps_sampled: 720000
    num_steps_trained: 720000
    rl_0:
      cur_kl_coeff: 3.4694477791342267e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.212500810623169
      kl: 0.02213987149298191
     

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-40-25
  done: false
  episode_len_mean: 169.43
  episode_reward_max: 1079.6805947663486
  episode_reward_mean: 788.8864400004891
  episode_reward_min: -48.78945530034531
  episodes_this_iter: 59
  episodes_total: 4201
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3971.841
    load_time_ms: 1.57
    num_steps_sampled: 770000
    num_steps_trained: 770000
    rl_0:
      cur_kl_coeff: 1.7347238895671134e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.255030870437622
      kl: 0.013832268305122852
      policy_loss: -0.002557950560003519
      total_loss: 2826.67333984375
      vf_explained_var: 0.7076217532157898
      vf_loss: 2826.67578125
    sample_time_ms: 19343.157
    update_time_ms: 6.825
  iterations_since_restore: 77
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 1977 s, 82 iter, 820000 ts, 829 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-42-45
  done: false
  episode_len_mean: 179.12
  episode_reward_max: 1115.7157426617339
  episode_reward_mean: 793.081723093772
  episode_reward_min: -80.69298703186337
  episodes_this_iter: 58
  episodes_total: 4539
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.672
    load_time_ms: 1.379
    num_steps_sampled: 830000
    num_steps_trained: 830000
    rl_0:
      cur_kl_coeff: 8.673619447835567e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.2399529218673706
      kl: 0.011597086675465107
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-44-43
  done: false
  episode_len_mean: 171.54
  episode_reward_max: 1132.2941684414955
  episode_reward_mean: 845.0994660209798
  episode_reward_min: -92.80355511380561
  episodes_this_iter: 58
  episodes_total: 4833
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3896.32
    load_time_ms: 1.433
    num_steps_sampled: 880000
    num_steps_trained: 880000
    rl_0:
      cur_kl_coeff: 8.673619447835567e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0883610248565674
      kl: 0.02442345954477787
      policy_loss: 0.0013851393014192581
      total_loss: 2600.84765625
      vf_explained_var: 0.7403693795204163
      vf_loss: 2600.846435546875
    sample_time_ms: 19496.291
    update_time_ms: 5.527
  iterations_since_restore: 88
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 2237 s, 93 iter, 930000 ts, 804 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-47-04
  done: false
  episode_len_mean: 165.94
  episode_reward_max: 1095.4449264005257
  episode_reward_mean: 786.7361068754798
  episode_reward_min: -82.11349305367506
  episodes_this_iter: 63
  episodes_total: 5193
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3893.916
    load_time_ms: 1.631
    num_steps_sampled: 940000
    num_steps_trained: 940000
    rl_0:
      cur_kl_coeff: 1.301042529434423e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9300137758255005
      kl: 0.09657149761915207
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-49-01
  done: false
  episode_len_mean: 163.29
  episode_reward_max: 1141.141721601514
  episode_reward_mean: 844.1073176468442
  episode_reward_min: -85.60900571248592
  episodes_this_iter: 62
  episodes_total: 5498
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3906.801
    load_time_ms: 1.617
    num_steps_sampled: 990000
    num_steps_trained: 990000
    rl_0:
      cur_kl_coeff: 6.586528508042169e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.8942686915397644
      kl: 0.04650158807635307
      policy_loss: 0.014241967350244522
      total_loss: 2660.878173828125
      vf_explained_var: 0.7811834812164307
      vf_loss: 2660.8642578125
    sample_time_ms: 19403.915
    update_time_ms: 6.236
  iterations_since_restore: 99
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 2492 s, 104 iter, 1040000 ts, 787 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-51-19
  done: false
  episode_len_mean: 167.31
  episode_reward_max: 1128.7656727618016
  episode_reward_mean: 778.3719957711348
  episode_reward_min: -82.58451173757678
  episodes_this_iter: 60
  episodes_total: 5860
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3905.926
    load_time_ms: 1.661
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
    rl_0:
      cur_kl_coeff: 7.502467771671245e-18
      cur_lr: 4.999999873689376e-05
      entropy: 1.1225841045379639
      kl: 0.6834498643875122
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-53-18
  done: false
  episode_len_mean: 169.05
  episode_reward_max: 1119.9564686607926
  episode_reward_mean: 794.4635377707887
  episode_reward_min: -74.61612726005043
  episodes_this_iter: 61
  episodes_total: 6157
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3879.726
    load_time_ms: 1.546
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
    rl_0:
      cur_kl_coeff: 5.697185954879787e-17
      cur_lr: 4.999999873689376e-05
      entropy: 1.100933313369751
      kl: 0.09604086726903915
      policy_loss: 0.032131556421518326
      total_loss: 2163.939453125
      vf_explained_var: 0.831710934638977
      vf_loss: 2163.906982421875
    sample_time_ms: 19442.323
    update_time_ms: 6.642
  iterations_since_restore: 110
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 2753 s, 115 iter, 1150000 ts, 805 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-55-41
  done: false
  episode_len_mean: 161.71
  episode_reward_max: 1119.360465698523
  episode_reward_mean: 825.9421957226192
  episode_reward_min: -49.33569313065445
  episodes_this_iter: 60
  episodes_total: 6533
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3882.168
    load_time_ms: 1.424
    num_steps_sampled: 1160000
    num_steps_trained: 1160000
    rl_0:
      cur_kl_coeff: 6.489450281160216e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.8990891575813293
      kl: 268.48272705078125
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_18-57-41
  done: false
  episode_len_mean: 163.47
  episode_reward_max: 1067.0639660018003
  episode_reward_mean: 806.3591881112098
  episode_reward_min: -82.68881648609906
  episodes_this_iter: 56
  episodes_total: 6823
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3908.18
    load_time_ms: 1.474
    num_steps_sampled: 1210000
    num_steps_trained: 1210000
    rl_0:
      cur_kl_coeff: 4.9279263022929554e-15
      cur_lr: 4.999999873689376e-05
      entropy: 1.2761578559875488
      kl: 0.7929095029830933
      policy_loss: 0.05363516882061958
      total_loss: 1656.5562744140625
      vf_explained_var: 0.8949035406112671
      vf_loss: 1656.5025634765625
    sample_time_ms: 19828.603
    update_time_ms: 5.854
  iterations_since_restore: 121
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 3014 s, 126 iter, 1260000 ts, 817 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-00-03
  done: false
  episode_len_mean: 170.34
  episode_reward_max: 1107.0377882137627
  episode_reward_mean: 893.6847019317559
  episode_reward_min: -41.31058437729641
  episodes_this_iter: 62
  episodes_total: 7173
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3913.791
    load_time_ms: 1.471
    num_steps_sampled: 1270000
    num_steps_trained: 1270000
    rl_0:
      cur_kl_coeff: 5.6132175459393946e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.8214613199234009
      kl: 0.06110236421227455

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-02-01
  done: false
  episode_len_mean: 169.31
  episode_reward_max: 1132.5103420422597
  episode_reward_mean: 736.3091721836062
  episode_reward_min: -82.49720056932607
  episodes_this_iter: 63
  episodes_total: 7472
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3901.3
    load_time_ms: 1.53
    num_steps_sampled: 1320000
    num_steps_trained: 1320000
    rl_0:
      cur_kl_coeff: 4.2625362332675276e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.3076387643814087
      kl: 0.10500258952379227
      policy_loss: 0.03404530882835388
      total_loss: 2358.06005859375
      vf_explained_var: 0.8412878513336182
      vf_loss: 2358.026123046875
    sample_time_ms: 19698.913
    update_time_ms: 5.117
  iterations_since_restore: 132
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 3274 s, 137 iter, 1370000 ts, 758 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-04-23
  done: false
  episode_len_mean: 156.33
  episode_reward_max: 1095.2706521242028
  episode_reward_mean: 792.4522382112776
  episode_reward_min: -78.87313625732662
  episodes_this_iter: 63
  episodes_total: 7847
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3875.539
    load_time_ms: 1.667
    num_steps_sampled: 1380000
    num_steps_trained: 1380000
    rl_0:
      cur_kl_coeff: 4.855295852868036e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.0983819961547852
      kl: 0.8538356423377991
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-06-21
  done: false
  episode_len_mean: 162.4
  episode_reward_max: 1135.0120489750752
  episode_reward_mean: 786.7267390187595
  episode_reward_min: -93.9371629908343
  episodes_this_iter: 62
  episodes_total: 8150
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3878.161
    load_time_ms: 1.691
    num_steps_sampled: 1430000
    num_steps_trained: 1430000
    rl_0:
      cur_kl_coeff: 3.6869896025137905e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.269425868988037
      kl: 0.04052333906292915
      policy_loss: 0.011393722146749496
      total_loss: 1908.588134765625
      vf_explained_var: 0.8714274764060974
      vf_loss: 1908.5767822265625
    sample_time_ms: 19687.869
    update_time_ms: 4.816
  iterations_since_restore: 143
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 3534 s, 148 iter, 1480000 ts, 842 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-08-43
  done: false
  episode_len_mean: 163.51
  episode_reward_max: 1125.129929135148
  episode_reward_mean: 779.6792734929493
  episode_reward_min: -90.47573217402764
  episodes_this_iter: 62
  episodes_total: 8510
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3902.701
    load_time_ms: 1.593
    num_steps_sampled: 1490000
    num_steps_trained: 1490000
    rl_0:
      cur_kl_coeff: 4.199711967078912e-10
      cur_lr: 4.999999873689376e-05
      entropy: 1.15851628780365
      kl: 0.06804696470499039
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-10-41
  done: false
  episode_len_mean: 170.96
  episode_reward_max: 1126.8046751171964
  episode_reward_mean: 845.4009460921184
  episode_reward_min: -72.51883926448305
  episodes_this_iter: 57
  episodes_total: 8814
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3907.709
    load_time_ms: 1.658
    num_steps_sampled: 1540000
    num_steps_trained: 1540000
    rl_0:
      cur_kl_coeff: 3.1891556062646487e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.4069567918777466
      kl: 0.0382692888379097
      policy_loss: 0.012568345293402672
      total_loss: 1556.0584716796875
      vf_explained_var: 0.9081858396530151
      vf_loss: 1556.0460205078125
    sample_time_ms: 19543.612
    update_time_ms: 5.432
  iterations_since_restore: 154
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 3792 s, 159 iter, 1590000 ts, 819 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-13-03
  done: false
  episode_len_mean: 165.65
  episode_reward_max: 1126.087518818706
  episode_reward_mean: 827.6998933941583
  episode_reward_min: -86.0008155990774
  episodes_this_iter: 58
  episodes_total: 9185
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3905.643
    load_time_ms: 1.634
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
    rl_0:
      cur_kl_coeff: 1.6145104808629185e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3350632190704346
      kl: 0.061680927872657776


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-15-01
  done: false
  episode_len_mean: 163.85
  episode_reward_max: 1086.0345865134168
  episode_reward_mean: 840.2775202587571
  episode_reward_min: -84.50009653786442
  episodes_this_iter: 63
  episodes_total: 9488
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3910.963
    load_time_ms: 1.674
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
    rl_0:
      cur_kl_coeff: 8.173458354576724e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.1854239702224731
      kl: 60.29533386230469
      policy_loss: 0.09990409761667252
      total_loss: 1906.474609375
      vf_explained_var: 0.8939045667648315
      vf_loss: 1906.3746337890625
    sample_time_ms: 19645.436
    update_time_ms: 6.317
  iterations_since_restore: 165
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 4053 s, 170 iter, 1700000 ts, 779 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-17-24
  done: false
  episode_len_mean: 159.2
  episode_reward_max: 1146.58581027196
  episode_reward_mean: 801.0023549815836
  episode_reward_min: -65.90100287786015
  episodes_this_iter: 65
  episodes_total: 9855
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3891.11
    load_time_ms: 1.629
    num_steps_sampled: 1710000
    num_steps_trained: 1710000
    rl_0:
      cur_kl_coeff: 4.1378137893843814e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.1518112421035767
      kl: 0.05289381742477417
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-19-19
  done: false
  episode_len_mean: 164.96
  episode_reward_max: 1117.8375200951662
  episode_reward_mean: 797.7028961630855
  episode_reward_min: -89.85517079069274
  episodes_this_iter: 58
  episodes_total: 10158
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3885.16
    load_time_ms: 1.551
    num_steps_sampled: 1760000
    num_steps_trained: 1760000
    rl_0:
      cur_kl_coeff: 2.094767978633172e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.318601369857788
      kl: 0.0931718498468399
      policy_loss: 0.013629581779241562
      total_loss: 1637.8411865234375
      vf_explained_var: 0.913762629032135
      vf_loss: 1637.827392578125
    sample_time_ms: 19504.758
    update_time_ms: 6.587
  iterations_since_restore: 176
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 4309 s, 181 iter, 1810000 ts, 812 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-21-40
  done: false
  episode_len_mean: 163.46
  episode_reward_max: 1115.8556464290637
  episode_reward_mean: 834.6909796451409
  episode_reward_min: -85.51143599442628
  episodes_this_iter: 62
  episodes_total: 10517
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3892.325
    load_time_ms: 1.413
    num_steps_sampled: 1820000
    num_steps_trained: 1820000
    rl_0:
      cur_kl_coeff: 2.3860717192292213e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.2782739400863647
      kl: 0.0480321459472179

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-23-38
  done: false
  episode_len_mean: 164.43
  episode_reward_max: 1117.6003127062854
  episode_reward_mean: 871.0613122050314
  episode_reward_min: -83.7854233672988
  episodes_this_iter: 61
  episodes_total: 10826
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3887.355
    load_time_ms: 1.384
    num_steps_sampled: 1870000
    num_steps_trained: 1870000
    rl_0:
      cur_kl_coeff: 8.052992779994383e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.2719451189041138
      kl: 0.05250247195363045
      policy_loss: 0.008342070505023003
      total_loss: 1012.4776611328125
      vf_explained_var: 0.9529592990875244
      vf_loss: 1012.4693603515625
    sample_time_ms: 19590.711
    update_time_ms: 5.512
  iterations_since_restore: 187
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 4567 s, 192 iter, 1920000 ts, 833 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-25-59
  done: false
  episode_len_mean: 165.3
  episode_reward_max: 1124.0084005373951
  episode_reward_mean: 799.704662455079
  episode_reward_min: -79.00854425674987
  episodes_this_iter: 61
  episodes_total: 11190
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3875.312
    load_time_ms: 1.401
    num_steps_sampled: 1930000
    num_steps_trained: 1930000
    rl_0:
      cur_kl_coeff: 0.0009172860882245004
      cur_lr: 4.999999873689376e-05
      entropy: 1.3148846626281738
      kl: 0.11332622170448303
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-27-57
  done: false
  episode_len_mean: 164.65
  episode_reward_max: 1141.0397831743278
  episode_reward_mean: 854.8202662407037
  episode_reward_min: -2.5173066973707705
  episodes_this_iter: 60
  episodes_total: 11501
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3871.644
    load_time_ms: 1.418
    num_steps_sampled: 1980000
    num_steps_trained: 1980000
    rl_0:
      cur_kl_coeff: 0.004643761087208986
      cur_lr: 4.999999873689376e-05
      entropy: 1.2806631326675415
      kl: 0.11285719275474548
      policy_loss: 0.008138387463986874
      total_loss: 1786.8778076171875
      vf_explained_var: 0.9167105555534363
      vf_loss: 1786.869140625
    sample_time_ms: 19581.022
    update_time_ms: 5.661
  iterations_since_restore: 198
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 4827 s, 203 iter, 2030000 ts, 833 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-30-19
  done: false
  episode_len_mean: 162.47
  episode_reward_max: 1108.707555472567
  episode_reward_mean: 831.2705779478081
  episode_reward_min: -83.79726680026901
  episodes_this_iter: 61
  episodes_total: 11872
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3900.313
    load_time_ms: 1.392
    num_steps_sampled: 2040000
    num_steps_trained: 2040000
    rl_0:
      cur_kl_coeff: 0.03526356443762779
      cur_lr: 4.999999873689376e-05
      entropy: 1.3218249082565308
      kl: 0.030582822859287262
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-32-15
  done: false
  episode_len_mean: 159.51
  episode_reward_max: 1169.2937884238736
  episode_reward_mean: 740.3501035302728
  episode_reward_min: -91.40279957925672
  episodes_this_iter: 66
  episodes_total: 12175
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3884.981
    load_time_ms: 1.45
    num_steps_sampled: 2090000
    num_steps_trained: 2090000
    rl_0:
      cur_kl_coeff: 0.03526356443762779
      cur_lr: 4.999999873689376e-05
      entropy: 1.3132611513137817
      kl: 0.026898089796304703
      policy_loss: 0.0009318122174590826
      total_loss: 2386.2744140625
      vf_explained_var: 0.9062463045120239
      vf_loss: 2386.2724609375
    sample_time_ms: 19539.989
    update_time_ms: 5.612
  iterations_since_restore: 209
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 5085 s, 214 iter, 2140000 ts, 734 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-34-38
  done: false
  episode_len_mean: 165.04
  episode_reward_max: 1075.3311869275517
  episode_reward_mean: 809.9752213323217
  episode_reward_min: -87.46625428367066
  episodes_this_iter: 59
  episodes_total: 12548
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3915.223
    load_time_ms: 1.464
    num_steps_sampled: 2150000
    num_steps_trained: 2150000
    rl_0:
      cur_kl_coeff: 0.03526356443762779
      cur_lr: 4.999999873689376e-05
      entropy: 1.3755062818527222
      kl: 0.03945669159293175
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-36-35
  done: false
  episode_len_mean: 162.72
  episode_reward_max: 1089.5688928254622
  episode_reward_mean: 794.8916176090685
  episode_reward_min: -107.25240778647495
  episodes_this_iter: 58
  episodes_total: 12849
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3913.047
    load_time_ms: 1.376
    num_steps_sampled: 2200000
    num_steps_trained: 2200000
    rl_0:
      cur_kl_coeff: 0.03526356443762779
      cur_lr: 4.999999873689376e-05
      entropy: 1.465054988861084
      kl: 0.04190998151898384
      policy_loss: 0.0045685130171477795
      total_loss: 1385.20556640625
      vf_explained_var: 0.9379820227622986
      vf_loss: 1385.1995849609375
    sample_time_ms: 19576.272
    update_time_ms: 5.681
  iterations_since_restore: 220
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 5344 s, 225 iter, 2250000 ts, 851 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-38-58
  done: false
  episode_len_mean: 172.25
  episode_reward_max: 1151.5999138722498
  episode_reward_mean: 918.6827979453817
  episode_reward_min: 1.4190637993049506
  episodes_this_iter: 57
  episodes_total: 13209
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3910.528
    load_time_ms: 1.517
    num_steps_sampled: 2260000
    num_steps_trained: 2260000
    rl_0:
      cur_kl_coeff: 0.07934301346540451
      cur_lr: 4.999999873689376e-05
      entropy: 1.369745135307312
      kl: 0.02584715373814106
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-40-55
  done: false
  episode_len_mean: 173.77
  episode_reward_max: 1121.6444395906572
  episode_reward_mean: 830.1032269618809
  episode_reward_min: -53.88229845555392
  episodes_this_iter: 59
  episodes_total: 13503
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.346
    load_time_ms: 1.448
    num_steps_sampled: 2310000
    num_steps_trained: 2310000
    rl_0:
      cur_kl_coeff: 0.07934301346540451
      cur_lr: 4.999999873689376e-05
      entropy: 1.4380100965499878
      kl: 0.03137526288628578
      policy_loss: 0.0029535056091845036
      total_loss: 1417.099609375
      vf_explained_var: 0.9382697939872742
      vf_loss: 1417.0943603515625
    sample_time_ms: 19727.374
    update_time_ms: 6.052
  iterations_since_restore: 231
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 5603 s, 236 iter, 2360000 ts, 869 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-43-16
  done: false
  episode_len_mean: 171.89
  episode_reward_max: 1114.5492726675527
  episode_reward_mean: 863.9980079305936
  episode_reward_min: -85.8989712399054
  episodes_this_iter: 59
  episodes_total: 13857
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3877.652
    load_time_ms: 1.414
    num_steps_sampled: 2370000
    num_steps_trained: 2370000
    rl_0:
      cur_kl_coeff: 0.07934301346540451
      cur_lr: 4.999999873689376e-05
      entropy: 1.2962687015533447
      kl: 0.023759542033076286
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-45-12
  done: false
  episode_len_mean: 171.83
  episode_reward_max: 1125.6006078924547
  episode_reward_mean: 867.5956901926937
  episode_reward_min: -41.16439084588734
  episodes_this_iter: 58
  episodes_total: 14148
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3884.918
    load_time_ms: 1.481
    num_steps_sampled: 2420000
    num_steps_trained: 2420000
    rl_0:
      cur_kl_coeff: 0.07934301346540451
      cur_lr: 4.999999873689376e-05
      entropy: 1.20304536819458
      kl: 0.027727004140615463
      policy_loss: 0.000956991920247674
      total_loss: 1027.0111083984375
      vf_explained_var: 0.9509085416793823
      vf_loss: 1027.0079345703125
    sample_time_ms: 19394.856
    update_time_ms: 5.074
  iterations_since_restore: 242
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 5858 s, 247 iter, 2470000 ts, 839 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-47-31
  done: false
  episode_len_mean: 165.78
  episode_reward_max: 1112.9151908018157
  episode_reward_mean: 889.153702119795
  episode_reward_min: -84.93149404047517
  episodes_this_iter: 61
  episodes_total: 14491
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3873.851
    load_time_ms: 1.455
    num_steps_sampled: 2480000
    num_steps_trained: 2480000
    rl_0:
      cur_kl_coeff: 0.11901451647281647
      cur_lr: 4.999999873689376e-05
      entropy: 1.183575987815857
      kl: 0.01637853868305683
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-49-29
  done: false
  episode_len_mean: 173.81
  episode_reward_max: 1095.1618962667485
  episode_reward_mean: 902.5070359908552
  episode_reward_min: -47.89744666192601
  episodes_this_iter: 59
  episodes_total: 14778
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3874.978
    load_time_ms: 1.525
    num_steps_sampled: 2530000
    num_steps_trained: 2530000
    rl_0:
      cur_kl_coeff: 0.11901451647281647
      cur_lr: 4.999999873689376e-05
      entropy: 1.3011595010757446
      kl: 0.013384074904024601
      policy_loss: 0.0008040716056711972
      total_loss: 710.1720581054688
      vf_explained_var: 0.9680651426315308
      vf_loss: 710.1697387695312
    sample_time_ms: 19452.844
    update_time_ms: 5.052
  iterations_since_restore: 253
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 6114 s, 258 iter, 2580000 ts, 844 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-51-48
  done: false
  episode_len_mean: 167.42
  episode_reward_max: 1102.9940817839874
  episode_reward_mean: 822.7390450708288
  episode_reward_min: -49.28990126705983
  episodes_this_iter: 61
  episodes_total: 15125
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3896.328
    load_time_ms: 1.65
    num_steps_sampled: 2590000
    num_steps_trained: 2590000
    rl_0:
      cur_kl_coeff: 0.11901451647281647
      cur_lr: 4.999999873689376e-05
      entropy: 1.3480063676834106
      kl: 0.03405742719769478
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-53-45
  done: false
  episode_len_mean: 174.51
  episode_reward_max: 1129.2833075244928
  episode_reward_mean: 877.0723835900098
  episode_reward_min: -23.496957738032677
  episodes_this_iter: 58
  episodes_total: 15414
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3895.491
    load_time_ms: 1.619
    num_steps_sampled: 2640000
    num_steps_trained: 2640000
    rl_0:
      cur_kl_coeff: 0.11901451647281647
      cur_lr: 4.999999873689376e-05
      entropy: 1.3437981605529785
      kl: 0.010727004148066044
      policy_loss: -0.0007736006518825889
      total_loss: 705.8556518554688
      vf_explained_var: 0.9719439744949341
      vf_loss: 705.8551635742188
    sample_time_ms: 19344.544
    update_time_ms: 5.651
  iterations_since_restore: 264
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 6372 s, 269 iter, 2690000 ts, 882 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-56-06
  done: false
  episode_len_mean: 163.8
  episode_reward_max: 1110.6819773487177
  episode_reward_mean: 828.7744163045428
  episode_reward_min: -83.41497417862166
  episodes_this_iter: 64
  episodes_total: 15760
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3882.87
    load_time_ms: 1.604
    num_steps_sampled: 2700000
    num_steps_trained: 2700000
    rl_0:
      cur_kl_coeff: 0.11901451647281647
      cur_lr: 4.999999873689376e-05
      entropy: 1.233353853225708
      kl: 0.021266456693410873
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_19-58-04
  done: false
  episode_len_mean: 172.99
  episode_reward_max: 1090.983300667671
  episode_reward_mean: 889.8971041139905
  episode_reward_min: -76.79317899939593
  episodes_this_iter: 57
  episodes_total: 16053
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3876.579
    load_time_ms: 1.461
    num_steps_sampled: 2750000
    num_steps_trained: 2750000
    rl_0:
      cur_kl_coeff: 0.059507258236408234
      cur_lr: 4.999999873689376e-05
      entropy: 1.40337073802948
      kl: 0.013911082409322262
      policy_loss: -4.7931876906659454e-05
      total_loss: 584.1410522460938
      vf_explained_var: 0.9712815880775452
      vf_loss: 584.1403198242188
    sample_time_ms: 19619.363
    update_time_ms: 5.391
  iterations_since_restore: 275
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 6631 s, 280 iter, 2800000 ts, 833 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-00-25
  done: false
  episode_len_mean: 167.55
  episode_reward_max: 1150.416234154173
  episode_reward_mean: 843.4244384863362
  episode_reward_min: -62.36821838389827
  episodes_this_iter: 60
  episodes_total: 16404
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3905.818
    load_time_ms: 1.491
    num_steps_sampled: 2810000
    num_steps_trained: 2810000
    rl_0:
      cur_kl_coeff: 0.059507258236408234
      cur_lr: 4.999999873689376e-05
      entropy: 1.3340113162994385
      kl: 0.01657634787261486
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-02-21
  done: false
  episode_len_mean: 170.69
  episode_reward_max: 1106.859463937057
  episode_reward_mean: 875.5456660857161
  episode_reward_min: -86.57626737375854
  episodes_this_iter: 59
  episodes_total: 16700
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3887.078
    load_time_ms: 1.663
    num_steps_sampled: 2860000
    num_steps_trained: 2860000
    rl_0:
      cur_kl_coeff: 0.08926088362932205
      cur_lr: 4.999999873689376e-05
      entropy: 1.4341561794281006
      kl: 0.019071059301495552
      policy_loss: -0.002226315438747406
      total_loss: 749.555908203125
      vf_explained_var: 0.9666429162025452
      vf_loss: 749.556396484375
    sample_time_ms: 19404.181
    update_time_ms: 5.044
  iterations_since_restore: 286
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 6890 s, 291 iter, 2910000 ts, 893 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-04-45
  done: false
  episode_len_mean: 168.77
  episode_reward_max: 1132.0282397665464
  episode_reward_mean: 848.5124658518223
  episode_reward_min: -59.47082972980701
  episodes_this_iter: 58
  episodes_total: 17054
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3895.097
    load_time_ms: 1.624
    num_steps_sampled: 2920000
    num_steps_trained: 2920000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.3648654222488403
      kl: 0.02486621029675007


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-06-41
  done: false
  episode_len_mean: 181.62
  episode_reward_max: 1117.068789082974
  episode_reward_mean: 889.7644720867709
  episode_reward_min: -74.72008282010142
  episodes_this_iter: 54
  episodes_total: 17339
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3917.413
    load_time_ms: 1.605
    num_steps_sampled: 2970000
    num_steps_trained: 2970000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.5815905332565308
      kl: 0.017175432294607162
      policy_loss: -0.0026892339810729027
      total_loss: 549.0551147460938
      vf_explained_var: 0.9751713275909424
      vf_loss: 549.0570068359375
    sample_time_ms: 19652.606
    update_time_ms: 5.039
  iterations_since_restore: 297
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 7144 s, 302 iter, 3020000 ts, 904 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-09-00
  done: false
  episode_len_mean: 170.27
  episode_reward_max: 1121.7544149313946
  episode_reward_mean: 901.0641012838609
  episode_reward_min: -100.1549081348195
  episodes_this_iter: 59
  episodes_total: 17692
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3887.813
    load_time_ms: 1.416
    num_steps_sampled: 3030000
    num_steps_trained: 3030000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.2533007860183716
      kl: 0.010563020594418049

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-10-58
  done: false
  episode_len_mean: 176.45
  episode_reward_max: 1161.784232803456
  episode_reward_mean: 918.3554705739679
  episode_reward_min: -52.21347631952122
  episodes_this_iter: 57
  episodes_total: 17980
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3878.591
    load_time_ms: 1.535
    num_steps_sampled: 3080000
    num_steps_trained: 3080000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.3232752084732056
      kl: 0.018882812932133675
      policy_loss: -0.002272937912493944
      total_loss: 1082.7215576171875
      vf_explained_var: 0.954788088798523
      vf_loss: 1082.72314453125
    sample_time_ms: 19425.891
    update_time_ms: 6.781
  iterations_since_restore: 308
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 7404 s, 313 iter, 3130000 ts, 890 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-13-20
  done: false
  episode_len_mean: 168.29
  episode_reward_max: 1097.2682360150172
  episode_reward_mean: 878.5610737560595
  episode_reward_min: -93.97393587001875
  episodes_this_iter: 59
  episodes_total: 18327
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3897.306
    load_time_ms: 1.59
    num_steps_sampled: 3140000
    num_steps_trained: 3140000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.2061638832092285
      kl: 0.0189950130879879
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-15-17
  done: false
  episode_len_mean: 169.36
  episode_reward_max: 1106.515668227915
  episode_reward_mean: 871.0931615062668
  episode_reward_min: -96.83434934691962
  episodes_this_iter: 59
  episodes_total: 18614
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.376
    load_time_ms: 1.621
    num_steps_sampled: 3190000
    num_steps_trained: 3190000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.319593906402588
      kl: 0.016543984413146973
      policy_loss: -0.00037636092747561634
      total_loss: 1226.1861572265625
      vf_explained_var: 0.9487780332565308
      vf_loss: 1226.1856689453125
    sample_time_ms: 19513.481
    update_time_ms: 5.777
  iterations_since_restore: 319
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 7660 s, 324 iter, 3240000 ts, 918 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-17-36
  done: false
  episode_len_mean: 167.33
  episode_reward_max: 1112.7336476611108
  episode_reward_mean: 917.5669265551883
  episode_reward_min: -37.47557608855578
  episodes_this_iter: 60
  episodes_total: 18964
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3907.758
    load_time_ms: 1.668
    num_steps_sampled: 3250000
    num_steps_trained: 3250000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.1774232387542725
      kl: 0.01937885396182537


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-19-32
  done: false
  episode_len_mean: 174.01
  episode_reward_max: 1113.716455552025
  episode_reward_mean: 954.3442753086894
  episode_reward_min: -98.35746735007966
  episodes_this_iter: 56
  episodes_total: 19245
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3899.462
    load_time_ms: 1.487
    num_steps_sampled: 3300000
    num_steps_trained: 3300000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.2316839694976807
      kl: 0.019854098558425903
      policy_loss: -0.004484560340642929
      total_loss: 710.9193725585938
      vf_explained_var: 0.9682855606079102
      vf_loss: 710.9230346679688
    sample_time_ms: 19233.864
    update_time_ms: 4.802
  iterations_since_restore: 330
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 7914 s, 335 iter, 3350000 ts, 868 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-21-51
  done: false
  episode_len_mean: 172.49
  episode_reward_max: 1104.2863495290726
  episode_reward_mean: 890.4195958564223
  episode_reward_min: -68.89786627605702
  episodes_this_iter: 59
  episodes_total: 19589
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3896.148
    load_time_ms: 1.387
    num_steps_sampled: 3360000
    num_steps_trained: 3360000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.1343847513198853
      kl: 0.01679261215031147


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-23-48
  done: false
  episode_len_mean: 175.04
  episode_reward_max: 1134.3682807436467
  episode_reward_mean: 891.5913458327585
  episode_reward_min: -49.96163394736888
  episodes_this_iter: 56
  episodes_total: 19877
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3899.957
    load_time_ms: 1.48
    num_steps_sampled: 3410000
    num_steps_trained: 3410000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.2651543617248535
      kl: 0.016384383663535118
      policy_loss: 0.0006666833069175482
      total_loss: 782.3565673828125
      vf_explained_var: 0.9710928201675415
      vf_loss: 782.355224609375
    sample_time_ms: 19259.972
    update_time_ms: 5.786
  iterations_since_restore: 341
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 8172 s, 346 iter, 3460000 ts, 914 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-26-10
  done: false
  episode_len_mean: 172.21
  episode_reward_max: 1085.084809064007
  episode_reward_mean: 868.9468179755713
  episode_reward_min: -97.45724820743716
  episodes_this_iter: 61
  episodes_total: 20231
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.975
    load_time_ms: 1.558
    num_steps_sampled: 3470000
    num_steps_trained: 3470000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 1.0962666273117065
      kl: 0.016808846965432167


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-28-07
  done: false
  episode_len_mean: 163.68
  episode_reward_max: 1108.955805871844
  episode_reward_mean: 896.6370122865916
  episode_reward_min: -50.75179233436057
  episodes_this_iter: 62
  episodes_total: 20522
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3900.859
    load_time_ms: 1.587
    num_steps_sampled: 3520000
    num_steps_trained: 3520000
    rl_0:
      cur_kl_coeff: 0.044630441814661026
      cur_lr: 4.999999873689376e-05
      entropy: 0.9848312735557556
      kl: 0.02308579348027706
      policy_loss: -0.0039005104918032885
      total_loss: 2001.3377685546875
      vf_explained_var: 0.9167219996452332
      vf_loss: 2001.340576171875
    sample_time_ms: 19687.526
    update_time_ms: 6.049
  iterations_since_restore: 352
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 8429 s, 357 iter, 3570000 ts, 875 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-30-27
  done: false
  episode_len_mean: 164.57
  episode_reward_max: 1139.48174650593
  episode_reward_mean: 868.7498957689365
  episode_reward_min: -92.4175315163541
  episodes_this_iter: 60
  episodes_total: 20876
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3887.516
    load_time_ms: 1.463
    num_steps_sampled: 3580000
    num_steps_trained: 3580000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.1113183498382568
      kl: 0.021739263087511063
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-32-22
  done: false
  episode_len_mean: 164.98
  episode_reward_max: 1163.5205815836325
  episode_reward_mean: 914.3126379661761
  episode_reward_min: -84.56933474198844
  episodes_this_iter: 62
  episodes_total: 21171
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3879.286
    load_time_ms: 1.536
    num_steps_sampled: 3630000
    num_steps_trained: 3630000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.0313467979431152
      kl: 0.012972326949238777
      policy_loss: -0.002883418696001172
      total_loss: 887.9768676757812
      vf_explained_var: 0.964655339717865
      vf_loss: 887.9793701171875
    sample_time_ms: 19182.042
    update_time_ms: 5.479
  iterations_since_restore: 363
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 8685 s, 368 iter, 3680000 ts, 918 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-34-43
  done: false
  episode_len_mean: 176.18
  episode_reward_max: 1132.5836401312963
  episode_reward_mean: 939.2336556717647
  episode_reward_min: -69.18956818695844
  episodes_this_iter: 60
  episodes_total: 21525
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3904.584
    load_time_ms: 1.589
    num_steps_sampled: 3690000
    num_steps_trained: 3690000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.199527621269226
      kl: 0.018171194940805435


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-36-42
  done: false
  episode_len_mean: 181.33
  episode_reward_max: 1148.17644343087
  episode_reward_mean: 921.1134305372703
  episode_reward_min: -30.32496424070004
  episodes_this_iter: 54
  episodes_total: 21805
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3924.307
    load_time_ms: 1.432
    num_steps_sampled: 3740000
    num_steps_trained: 3740000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.290733814239502
      kl: 0.016017822548747063
      policy_loss: -0.0015288946451619267
      total_loss: 700.46630859375
      vf_explained_var: 0.9684072732925415
      vf_loss: 700.4674682617188
    sample_time_ms: 19648.056
    update_time_ms: 5.383
  iterations_since_restore: 374
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 8944 s, 379 iter, 3790000 ts, 896 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-39-03
  done: false
  episode_len_mean: 174.95
  episode_reward_max: 1137.3581795805758
  episode_reward_mean: 891.8741579317262
  episode_reward_min: -68.00405436790058
  episodes_this_iter: 58
  episodes_total: 22154
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3911.921
    load_time_ms: 1.408
    num_steps_sampled: 3800000
    num_steps_trained: 3800000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.1259812116622925
      kl: 0.01425081305205822


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-40-59
  done: false
  episode_len_mean: 168.35
  episode_reward_max: 1118.946557926709
  episode_reward_mean: 879.2131805739024
  episode_reward_min: -85.2400401122252
  episodes_this_iter: 62
  episodes_total: 22450
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3917.07
    load_time_ms: 1.52
    num_steps_sampled: 3850000
    num_steps_trained: 3850000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.1236335039138794
      kl: 0.01725088618695736
      policy_loss: -0.00433792220428586
      total_loss: 1727.441162109375
      vf_explained_var: 0.9266921877861023
      vf_loss: 1727.445068359375
    sample_time_ms: 19416.12
    update_time_ms: 5.618
  iterations_since_restore: 385
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 9201 s, 390 iter, 3900000 ts, 948 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-43-20
  done: false
  episode_len_mean: 177.04
  episode_reward_max: 1150.4874599175241
  episode_reward_mean: 943.0531686831064
  episode_reward_min: -81.81128506189145
  episodes_this_iter: 54
  episodes_total: 22798
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3897.649
    load_time_ms: 1.608
    num_steps_sampled: 3910000
    num_steps_trained: 3910000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.040661334991455
      kl: 0.01669672504067421
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-45-17
  done: false
  episode_len_mean: 170.34
  episode_reward_max: 1110.3476522223593
  episode_reward_mean: 951.9301383549267
  episode_reward_min: -51.433396146454925
  episodes_this_iter: 61
  episodes_total: 23086
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3908.878
    load_time_ms: 1.55
    num_steps_sampled: 3960000
    num_steps_trained: 3960000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 0.9438226222991943
      kl: 0.0160690788179636
      policy_loss: 7.496831676689908e-05
      total_loss: 659.5469970703125
      vf_explained_var: 0.972770094871521
      vf_loss: 659.5465087890625
    sample_time_ms: 19486.404
    update_time_ms: 5.935
  iterations_since_restore: 396
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 9460 s, 401 iter, 4010000 ts, 902 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-47-41
  done: false
  episode_len_mean: 175.52
  episode_reward_max: 1185.8811426598277
  episode_reward_mean: 874.5323646910088
  episode_reward_min: -93.66865331520177
  episodes_this_iter: 59
  episodes_total: 23430
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3936.503
    load_time_ms: 1.515
    num_steps_sampled: 4020000
    num_steps_trained: 4020000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.0492196083068848
      kl: 0.02501044236123562


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-49-39
  done: false
  episode_len_mean: 187.91
  episode_reward_max: 1124.4554038481888
  episode_reward_mean: 934.2420046494346
  episode_reward_min: -80.85527195688002
  episodes_this_iter: 53
  episodes_total: 23701
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.707
    load_time_ms: 1.668
    num_steps_sampled: 4070000
    num_steps_trained: 4070000
    rl_0:
      cur_kl_coeff: 0.022315220907330513
      cur_lr: 4.999999873689376e-05
      entropy: 1.166486382484436
      kl: 0.019985869526863098
      policy_loss: -0.0006867782794870436
      total_loss: 748.0989990234375
      vf_explained_var: 0.9665480256080627
      vf_loss: 748.0992431640625
    sample_time_ms: 19811.693
    update_time_ms: 5.48
  iterations_since_restore: 407
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 9720 s, 412 iter, 4120000 ts, 941 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-52-01
  done: false
  episode_len_mean: 181.69
  episode_reward_max: 1161.8391437173273
  episode_reward_mean: 907.3569436200569
  episode_reward_min: -44.312554567081634
  episodes_this_iter: 56
  episodes_total: 24032
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.812
    load_time_ms: 1.611
    num_steps_sampled: 4130000
    num_steps_trained: 4130000
    rl_0:
      cur_kl_coeff: 0.033472832292318344
      cur_lr: 4.999999873689376e-05
      entropy: 1.1577950716018677
      kl: 0.01332406327128410

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-54-01
  done: false
  episode_len_mean: 191.19
  episode_reward_max: 1119.9523204437114
  episode_reward_mean: 910.003487394193
  episode_reward_min: -61.41590382915888
  episodes_this_iter: 56
  episodes_total: 24293
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3895.472
    load_time_ms: 1.403
    num_steps_sampled: 4180000
    num_steps_trained: 4180000
    rl_0:
      cur_kl_coeff: 0.033472832292318344
      cur_lr: 4.999999873689376e-05
      entropy: 1.073224425315857
      kl: 0.03781965747475624
      policy_loss: -0.0021118123549968004
      total_loss: 1176.0521240234375
      vf_explained_var: 0.9551211595535278
      vf_loss: 1176.052978515625
    sample_time_ms: 19876.956
    update_time_ms: 5.848
  iterations_since_restore: 418
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 9984 s, 423 iter, 4230000 ts, 948 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-56-25
  done: false
  episode_len_mean: 175.69
  episode_reward_max: 1159.038241389297
  episode_reward_mean: 986.8108256438181
  episode_reward_min: -96.20468841596842
  episodes_this_iter: 55
  episodes_total: 24624
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3883.284
    load_time_ms: 1.431
    num_steps_sampled: 4240000
    num_steps_trained: 4240000
    rl_0:
      cur_kl_coeff: 0.033472832292318344
      cur_lr: 4.999999873689376e-05
      entropy: 1.0353878736495972
      kl: 0.01439135055989027
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_20-58-23
  done: false
  episode_len_mean: 181.24
  episode_reward_max: 1160.1663624211958
  episode_reward_mean: 972.6809199386785
  episode_reward_min: -49.082870127894296
  episodes_this_iter: 57
  episodes_total: 24891
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3878.076
    load_time_ms: 1.537
    num_steps_sampled: 4290000
    num_steps_trained: 4290000
    rl_0:
      cur_kl_coeff: 0.033472832292318344
      cur_lr: 4.999999873689376e-05
      entropy: 0.7577583193778992
      kl: 0.0206757839769125
      policy_loss: -0.0018834793008863926
      total_loss: 785.1224975585938
      vf_explained_var: 0.9637572169303894
      vf_loss: 785.1237182617188
    sample_time_ms: 19881.454
    update_time_ms: 5.248
  iterations_since_restore: 429
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 10245 s, 434 iter, 4340000 ts, 905 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-00-47
  done: false
  episode_len_mean: 180.02
  episode_reward_max: 1129.737081774519
  episode_reward_mean: 930.9268620430335
  episode_reward_min: -94.91445899530322
  episodes_this_iter: 54
  episodes_total: 25221
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3877.741
    load_time_ms: 1.48
    num_steps_sampled: 4350000
    num_steps_trained: 4350000
    rl_0:
      cur_kl_coeff: 0.033472832292318344
      cur_lr: 4.999999873689376e-05
      entropy: 1.0112699270248413
      kl: 0.024290965870022774


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-02-45
  done: false
  episode_len_mean: 170.29
  episode_reward_max: 1163.7103499909035
  episode_reward_mean: 925.8593463054833
  episode_reward_min: -52.807828160653
  episodes_this_iter: 55
  episodes_total: 25501
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3893.758
    load_time_ms: 1.509
    num_steps_sampled: 4400000
    num_steps_trained: 4400000
    rl_0:
      cur_kl_coeff: 0.05020924285054207
      cur_lr: 4.999999873689376e-05
      entropy: 0.793685257434845
      kl: 0.021014567464590073
      policy_loss: -0.0019095727475360036
      total_loss: 490.43646240234375
      vf_explained_var: 0.9781150817871094
      vf_loss: 490.4372863769531
    sample_time_ms: 19845.201
    update_time_ms: 5.755
  iterations_since_restore: 440
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 10506 s, 445 iter, 4450000 ts, 944 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-05-08
  done: false
  episode_len_mean: 169.06
  episode_reward_max: 1128.6969022426897
  episode_reward_mean: 993.9598682121127
  episode_reward_min: -35.916871739382685
  episodes_this_iter: 58
  episodes_total: 25836
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3912.358
    load_time_ms: 1.474
    num_steps_sampled: 4460000
    num_steps_trained: 4460000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.7066893577575684
      kl: 0.0220539346337318

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-07-06
  done: false
  episode_len_mean: 176.41
  episode_reward_max: 1125.4708619107846
  episode_reward_mean: 976.2069072393924
  episode_reward_min: 31.40678242855364
  episodes_this_iter: 58
  episodes_total: 26117
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.285
    load_time_ms: 1.582
    num_steps_sampled: 4510000
    num_steps_trained: 4510000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.8778944611549377
      kl: 0.020767763257026672
      policy_loss: -0.0011583310551941395
      total_loss: 290.89697265625
      vf_explained_var: 0.9864926338195801
      vf_loss: 290.89739990234375
    sample_time_ms: 19703.499
    update_time_ms: 5.188
  iterations_since_restore: 451
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 10766 s, 456 iter, 4560000 ts, 940 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-09-29
  done: false
  episode_len_mean: 184.41
  episode_reward_max: 1155.6775298647283
  episode_reward_mean: 963.2289709751371
  episode_reward_min: -30.124520466483347
  episodes_this_iter: 51
  episodes_total: 26439
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3888.168
    load_time_ms: 1.567
    num_steps_sampled: 4570000
    num_steps_trained: 4570000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.9911110997200012
      kl: 0.0227777659893035

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-11-30
  done: false
  episode_len_mean: 179.63
  episode_reward_max: 1143.527344392858
  episode_reward_mean: 978.5927573931384
  episode_reward_min: -26.543056789602616
  episodes_this_iter: 56
  episodes_total: 26716
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3893.807
    load_time_ms: 1.62
    num_steps_sampled: 4620000
    num_steps_trained: 4620000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.7422218918800354
      kl: 0.03721558302640915
      policy_loss: -0.0022728496696799994
      total_loss: 156.5262451171875
      vf_explained_var: 0.993112325668335
      vf_loss: 156.52711486816406
    sample_time_ms: 20078.153
    update_time_ms: 5.354
  iterations_since_restore: 462
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 11030 s, 467 iter, 4670000 ts, 948 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-13-53
  done: false
  episode_len_mean: 193.27
  episode_reward_max: 1152.8334761808765
  episode_reward_mean: 993.5459973970399
  episode_reward_min: 114.87582656519349
  episodes_this_iter: 52
  episodes_total: 27046
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3890.695
    load_time_ms: 1.581
    num_steps_sampled: 4680000
    num_steps_trained: 4680000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.8094145059585571
      kl: 0.01003732811659574

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-15-55
  done: false
  episode_len_mean: 178.35
  episode_reward_max: 1150.6549786706378
  episode_reward_mean: 948.2750736772088
  episode_reward_min: -23.221700816458366
  episodes_this_iter: 55
  episodes_total: 27328
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3885.756
    load_time_ms: 1.515
    num_steps_sampled: 4730000
    num_steps_trained: 4730000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.6885159015655518
      kl: 0.02416102960705757
      policy_loss: -0.004264358896762133
      total_loss: 420.38763427734375
      vf_explained_var: 0.9806064963340759
      vf_loss: 420.39093017578125
    sample_time_ms: 20024.3
    update_time_ms: 5.318
  iterations_since_restore: 473
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 11294 s, 478 iter, 4780000 ts, 924 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-18-18
  done: false
  episode_len_mean: 183.32
  episode_reward_max: 1126.6041435537143
  episode_reward_mean: 952.2867129837418
  episode_reward_min: -87.7920816306633
  episodes_this_iter: 53
  episodes_total: 27664
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3887.766
    load_time_ms: 1.578
    num_steps_sampled: 4790000
    num_steps_trained: 4790000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.8049721121788025
      kl: 0.02706083282828331


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-20-19
  done: false
  episode_len_mean: 177.62
  episode_reward_max: 1142.331845268229
  episode_reward_mean: 977.3341181105387
  episode_reward_min: 31.308585907327114
  episodes_this_iter: 60
  episodes_total: 27948
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3889.56
    load_time_ms: 1.533
    num_steps_sampled: 4840000
    num_steps_trained: 4840000
    rl_0:
      cur_kl_coeff: 0.037656936794519424
      cur_lr: 4.999999873689376e-05
      entropy: 0.7255322933197021
      kl: 0.027880845591425896
      policy_loss: -0.003429759293794632
      total_loss: 388.1324157714844
      vf_explained_var: 0.9841206669807434
      vf_loss: 388.134765625
    sample_time_ms: 20077.36
    update_time_ms: 5.078
  iterations_since_restore: 484
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 11560 s, 489 iter, 4890000 ts, 975 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-22-44
  done: false
  episode_len_mean: 181.05
  episode_reward_max: 1139.543053368881
  episode_reward_mean: 973.1136689613697
  episode_reward_min: -43.11211179050645
  episodes_this_iter: 53
  episodes_total: 28284
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3903.163
    load_time_ms: 1.456
    num_steps_sampled: 4900000
    num_steps_trained: 4900000
    rl_0:
      cur_kl_coeff: 0.05648540332913399
      cur_lr: 4.999999873689376e-05
      entropy: 0.9120293855667114
      kl: 0.025941548869013786


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-24-44
  done: false
  episode_len_mean: 174.11
  episode_reward_max: 1152.273514514229
  episode_reward_mean: 952.0245876971119
  episode_reward_min: -65.02766803045563
  episodes_this_iter: 59
  episodes_total: 28567
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3913.944
    load_time_ms: 1.594
    num_steps_sampled: 4950000
    num_steps_trained: 4950000
    rl_0:
      cur_kl_coeff: 0.05648540332913399
      cur_lr: 4.999999873689376e-05
      entropy: 0.7950710654258728
      kl: 0.01916356198489666
      policy_loss: -0.0015417310642078519
      total_loss: 1180.5843505859375
      vf_explained_var: 0.9496710300445557
      vf_loss: 1180.5848388671875
    sample_time_ms: 20050.465
    update_time_ms: 5.313
  iterations_since_restore: 495
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 11823 s, 500 iter, 5000000 ts, 968 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-27-08
  done: false
  episode_len_mean: 175.29
  episode_reward_max: 1122.770319793716
  episode_reward_mean: 961.4199201075318
  episode_reward_min: -8.52452397905833
  episodes_this_iter: 60
  episodes_total: 28907
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3918.205
    load_time_ms: 1.652
    num_steps_sampled: 5010000
    num_steps_trained: 5010000
    rl_0:
      cur_kl_coeff: 0.08472812175750732
      cur_lr: 4.999999873689376e-05
      entropy: 0.8047667145729065
      kl: 0.00510905496776104
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-29-10
  done: false
  episode_len_mean: 177.68
  episode_reward_max: 1133.703885852864
  episode_reward_mean: 938.1489605077039
  episode_reward_min: -60.77095471700915
  episodes_this_iter: 53
  episodes_total: 29198
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.38
    load_time_ms: 1.509
    num_steps_sampled: 5060000
    num_steps_trained: 5060000
    rl_0:
      cur_kl_coeff: 0.03177304193377495
      cur_lr: 4.999999873689376e-05
      entropy: 1.2167327404022217
      kl: 0.021577538922429085
      policy_loss: -0.0033499812707304955
      total_loss: 787.6063842773438
      vf_explained_var: 0.963348388671875
      vf_loss: 787.6090087890625
    sample_time_ms: 20379.105
    update_time_ms: 6.075
  iterations_since_restore: 506
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 12090 s, 511 iter, 5110000 ts, 951 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-31-35
  done: false
  episode_len_mean: 178.39
  episode_reward_max: 1109.3133279105575
  episode_reward_mean: 958.5478824542074
  episode_reward_min: -15.937142171968446
  episodes_this_iter: 54
  episodes_total: 29541
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3891.043
    load_time_ms: 1.335
    num_steps_sampled: 5120000
    num_steps_trained: 5120000
    rl_0:
      cur_kl_coeff: 0.03177304193377495
      cur_lr: 4.999999873689376e-05
      entropy: 0.9852578639984131
      kl: 0.0187997967004776


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-33-35
  done: false
  episode_len_mean: 168.38
  episode_reward_max: 1131.5153966250439
  episode_reward_mean: 910.1475976143198
  episode_reward_min: -73.43163460648559
  episodes_this_iter: 57
  episodes_total: 29824
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3887.297
    load_time_ms: 1.343
    num_steps_sampled: 5170000
    num_steps_trained: 5170000
    rl_0:
      cur_kl_coeff: 0.04765956476330757
      cur_lr: 4.999999873689376e-05
      entropy: 0.7261096239089966
      kl: 0.014666085131466389
      policy_loss: 0.00031979955383576453
      total_loss: 685.0442504882812
      vf_explained_var: 0.9705191254615784
      vf_loss: 685.0431518554688
    sample_time_ms: 20109.162
    update_time_ms: 5.055
  iterations_since_restore: 517
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 12353 s, 522 iter, 5220000 ts, 983 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-35-59
  done: false
  episode_len_mean: 164.31
  episode_reward_max: 1159.7941158543024
  episode_reward_mean: 941.6029222573026
  episode_reward_min: -80.60075215285605
  episodes_this_iter: 61
  episodes_total: 30174
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3893.502
    load_time_ms: 1.61
    num_steps_sampled: 5230000
    num_steps_trained: 5230000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.6824864149093628
      kl: 0.014204121194779873


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-37-58
  done: false
  episode_len_mean: 167.59
  episode_reward_max: 1142.5208710384986
  episode_reward_mean: 978.9666557616206
  episode_reward_min: -66.87674778534739
  episodes_this_iter: 59
  episodes_total: 30464
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3895.911
    load_time_ms: 1.603
    num_steps_sampled: 5280000
    num_steps_trained: 5280000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.5829845666885376
      kl: 0.023420006036758423
      policy_loss: -0.001290920889005065
      total_loss: 101.34544372558594
      vf_explained_var: 0.9957851767539978
      vf_loss: 101.34506225585938
    sample_time_ms: 20014.607
    update_time_ms: 5.148
  iterations_since_restore: 528
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 12617 s, 533 iter, 5330000 ts, 960 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-40-23
  done: false
  episode_len_mean: 174.69
  episode_reward_max: 1108.5480596680447
  episode_reward_mean: 977.1480349640041
  episode_reward_min: 20.58994586540439
  episodes_this_iter: 56
  episodes_total: 30803
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3909.391
    load_time_ms: 1.533
    num_steps_sampled: 5340000
    num_steps_trained: 5340000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.731653094291687
      kl: 0.017199967056512833
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-42-22
  done: false
  episode_len_mean: 180.99
  episode_reward_max: 1169.5521102141618
  episode_reward_mean: 941.9737076446522
  episode_reward_min: -94.00164727921879
  episodes_this_iter: 55
  episodes_total: 31087
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3906.416
    load_time_ms: 1.629
    num_steps_sampled: 5390000
    num_steps_trained: 5390000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.9609527587890625
      kl: 0.014907638542354107
      policy_loss: -0.004359141923487186
      total_loss: 1275.1422119140625
      vf_explained_var: 0.9401869177818298
      vf_loss: 1275.1453857421875
    sample_time_ms: 19949.185
    update_time_ms: 5.029
  iterations_since_restore: 539
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 12880 s, 544 iter, 5440000 ts, 990 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-44-46
  done: false
  episode_len_mean: 171.54
  episode_reward_max: 1171.1166167250728
  episode_reward_mean: 1001.0933053199124
  episode_reward_min: 24.552283756078253
  episodes_this_iter: 58
  episodes_total: 31436
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3904.469
    load_time_ms: 1.676
    num_steps_sampled: 5450000
    num_steps_trained: 5450000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.701120913028717
      kl: 0.01618960127234459


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-46-45
  done: false
  episode_len_mean: 173.17
  episode_reward_max: 1148.6159440159806
  episode_reward_mean: 959.938608179448
  episode_reward_min: -16.951917143436333
  episodes_this_iter: 56
  episodes_total: 31726
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3920.823
    load_time_ms: 1.756
    num_steps_sampled: 5500000
    num_steps_trained: 5500000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.8310970664024353
      kl: 0.014824895188212395
      policy_loss: -0.0021717739291489124
      total_loss: 772.95263671875
      vf_explained_var: 0.9647461175918579
      vf_loss: 772.9537353515625
    sample_time_ms: 19857.958
    update_time_ms: 5.644
  iterations_since_restore: 550
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 13143 s, 555 iter, 5550000 ts, 947 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-49-10
  done: false
  episode_len_mean: 168.92
  episode_reward_max: 1134.5799095817717
  episode_reward_mean: 982.6887075648615
  episode_reward_min: -60.00991552977274
  episodes_this_iter: 56
  episodes_total: 32076
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3917.366
    load_time_ms: 1.623
    num_steps_sampled: 5560000
    num_steps_trained: 5560000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.5532268285751343
      kl: 0.0161847285926342
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-51-10
  done: false
  episode_len_mean: 170.51
  episode_reward_max: 1146.9285242222286
  episode_reward_mean: 967.8217649700011
  episode_reward_min: -86.78134739875816
  episodes_this_iter: 56
  episodes_total: 32367
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3908.337
    load_time_ms: 1.432
    num_steps_sampled: 5610000
    num_steps_trained: 5610000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.6706960201263428
      kl: 0.02924639731645584
      policy_loss: -0.0019594146870076656
      total_loss: 127.57511901855469
      vf_explained_var: 0.9940803647041321
      vf_loss: 127.57499694824219
    sample_time_ms: 20028.325
    update_time_ms: 5.256
  iterations_since_restore: 561
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 13406 s, 566 iter, 5660000 ts, 973 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-53-33
  done: false
  episode_len_mean: 180.08
  episode_reward_max: 1139.9853758490112
  episode_reward_mean: 995.1900543100284
  episode_reward_min: 3.7411812178492028
  episodes_this_iter: 56
  episodes_total: 32713
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3909.08
    load_time_ms: 1.393
    num_steps_sampled: 5670000
    num_steps_trained: 5670000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.7072517275810242
      kl: 0.022680018097162247


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-55-34
  done: false
  episode_len_mean: 178.53
  episode_reward_max: 1131.1825849726515
  episode_reward_mean: 945.4917499958729
  episode_reward_min: -15.390927983722833
  episodes_this_iter: 56
  episodes_total: 33003
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3916.859
    load_time_ms: 1.425
    num_steps_sampled: 5720000
    num_steps_trained: 5720000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.8679466247558594
      kl: 0.01774558052420616
      policy_loss: 0.0011514092329889536
      total_loss: 820.3170776367188
      vf_explained_var: 0.9644849896430969
      vf_loss: 820.3146362304688
    sample_time_ms: 20096.997
    update_time_ms: 5.265
  iterations_since_restore: 572
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 13672 s, 577 iter, 5770000 ts, 953 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_21-57-59
  done: false
  episode_len_mean: 170.61
  episode_reward_max: 1166.6772124766057
  episode_reward_mean: 924.3014439597054
  episode_reward_min: -87.31895803246785
  episodes_this_iter: 57
  episodes_total: 33351
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3912.803
    load_time_ms: 1.495
    num_steps_sampled: 5780000
    num_steps_trained: 5780000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.8912469148635864
      kl: 0.02081749029457569


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-00-00
  done: false
  episode_len_mean: 169.54
  episode_reward_max: 1178.4796608613626
  episode_reward_mean: 941.8323214765202
  episode_reward_min: -99.36250015223202
  episodes_this_iter: 59
  episodes_total: 33646
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3908.268
    load_time_ms: 1.563
    num_steps_sampled: 5830000
    num_steps_trained: 5830000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.9084791541099548
      kl: 0.015225272625684738
      policy_loss: -0.0012099260929971933
      total_loss: 1084.5562744140625
      vf_explained_var: 0.9571139812469482
      vf_loss: 1084.5565185546875
    sample_time_ms: 20297.079
    update_time_ms: 7.105
  iterations_since_restore: 583
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rew

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 13936 s, 588 iter, 5880000 ts, 987 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-02-24
  done: false
  episode_len_mean: 166.38
  episode_reward_max: 1114.1039041750182
  episode_reward_mean: 949.9334657991077
  episode_reward_min: -59.46732312697671
  episodes_this_iter: 60
  episodes_total: 33999
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3880.831
    load_time_ms: 1.668
    num_steps_sampled: 5890000
    num_steps_trained: 5890000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.6777281165122986
      kl: 0.0200783833861351
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-04-22
  done: false
  episode_len_mean: 166.94
  episode_reward_max: 1173.0447846261816
  episode_reward_mean: 986.3339760185015
  episode_reward_min: -74.44453722051647
  episodes_this_iter: 58
  episodes_total: 34297
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3877.154
    load_time_ms: 1.63
    num_steps_sampled: 5940000
    num_steps_trained: 5940000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.7548006772994995
      kl: 0.02145526185631752
      policy_loss: -0.0003527738736011088
      total_loss: 272.31298828125
      vf_explained_var: 0.9875022768974304
      vf_loss: 272.31182861328125
    sample_time_ms: 19732.507
    update_time_ms: 6.131
  iterations_since_restore: 594
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 14195 s, 599 iter, 5990000 ts, 996 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-06-43
  done: false
  episode_len_mean: 171.21
  episode_reward_max: 1137.8011784982098
  episode_reward_mean: 970.4702024419194
  episode_reward_min: -58.628106322897764
  episodes_this_iter: 59
  episodes_total: 34648
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3897.163
    load_time_ms: 1.643
    num_steps_sampled: 6000000
    num_steps_trained: 6000000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.8492396473884583
      kl: 0.01966098695993423

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-08-43
  done: false
  episode_len_mean: 172.17
  episode_reward_max: 1145.7541071866265
  episode_reward_mean: 968.6924834982849
  episode_reward_min: -36.30192528293077
  episodes_this_iter: 58
  episodes_total: 34943
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.917
    load_time_ms: 1.603
    num_steps_sampled: 6050000
    num_steps_trained: 6050000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.7050668597221375
      kl: 0.01881120726466179
      policy_loss: 0.0010182175319641829
      total_loss: 569.6756591796875
      vf_explained_var: 0.9765225052833557
      vf_loss: 569.6734008789062
    sample_time_ms: 19857.003
    update_time_ms: 5.538
  iterations_since_restore: 605
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 14457 s, 610 iter, 6100000 ts, 999 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-11-07
  done: false
  episode_len_mean: 172.33
  episode_reward_max: 1126.257144418672
  episode_reward_mean: 996.0677951149037
  episode_reward_min: -11.765068461748854
  episodes_this_iter: 58
  episodes_total: 35293
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3909.05
    load_time_ms: 1.628
    num_steps_sampled: 6110000
    num_steps_trained: 6110000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.475970059633255
      kl: 0.0295211561024189
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-13-08
  done: false
  episode_len_mean: 169.35
  episode_reward_max: 1139.3053475394065
  episode_reward_mean: 967.6908120978994
  episode_reward_min: -84.29079223369679
  episodes_this_iter: 57
  episodes_total: 35595
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3912.307
    load_time_ms: 1.632
    num_steps_sampled: 6160000
    num_steps_trained: 6160000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.6995112895965576
      kl: 0.020542645826935768
      policy_loss: -0.0009857771219685674
      total_loss: 651.708251953125
      vf_explained_var: 0.9739114046096802
      vf_loss: 651.707763671875
    sample_time_ms: 19983.04
    update_time_ms: 5.743
  iterations_since_restore: 616
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 14721 s, 621 iter, 6210000 ts, 998 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-15-31
  done: false
  episode_len_mean: 168.81
  episode_reward_max: 1118.365689473805
  episode_reward_mean: 997.1122088169647
  episode_reward_min: -49.65019062247086
  episodes_this_iter: 58
  episodes_total: 35944
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.634
    load_time_ms: 1.514
    num_steps_sampled: 6220000
    num_steps_trained: 6220000
    rl_0:
      cur_kl_coeff: 0.07148934155702591
      cur_lr: 4.999999873689376e-05
      entropy: 0.5379127860069275
      kl: 0.01817254163324833
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-17-31
  done: false
  episode_len_mean: 164.92
  episode_reward_max: 1126.4653502420524
  episode_reward_mean: 954.6377759153617
  episode_reward_min: -85.03137789787715
  episodes_this_iter: 60
  episodes_total: 36248
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3918.612
    load_time_ms: 1.572
    num_steps_sampled: 6270000
    num_steps_trained: 6270000
    rl_0:
      cur_kl_coeff: 0.10723401606082916
      cur_lr: 4.999999873689376e-05
      entropy: 0.4154627025127411
      kl: 0.021416492760181427
      policy_loss: -0.0002167287893826142
      total_loss: 407.5095520019531
      vf_explained_var: 0.9832898378372192
      vf_loss: 407.50750732421875
    sample_time_ms: 20058.39
    update_time_ms: 5.475
  iterations_since_restore: 627
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 14986 s, 632 iter, 6320000 ts, 970 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-19-56
  done: false
  episode_len_mean: 169.95
  episode_reward_max: 1145.548416728813
  episode_reward_mean: 949.6790518869243
  episode_reward_min: -28.08169376557578
  episodes_this_iter: 60
  episodes_total: 36601
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3924.844
    load_time_ms: 1.609
    num_steps_sampled: 6330000
    num_steps_trained: 6330000
    rl_0:
      cur_kl_coeff: 0.10723401606082916
      cur_lr: 4.999999873689376e-05
      entropy: 0.6049908399581909
      kl: 0.01813165470957756
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-21-55
  done: false
  episode_len_mean: 171.88
  episode_reward_max: 1114.5758936545533
  episode_reward_mean: 961.3463134259608
  episode_reward_min: -64.79075372818971
  episodes_this_iter: 56
  episodes_total: 36895
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3920.254
    load_time_ms: 1.582
    num_steps_sampled: 6380000
    num_steps_trained: 6380000
    rl_0:
      cur_kl_coeff: 0.10723401606082916
      cur_lr: 4.999999873689376e-05
      entropy: 0.8879398703575134
      kl: 0.01962554082274437
      policy_loss: 0.000814775237813592
      total_loss: 471.1528015136719
      vf_explained_var: 0.9823796153068542
      vf_loss: 471.14984130859375
    sample_time_ms: 20034.86
    update_time_ms: 5.102
  iterations_since_restore: 638
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 15248 s, 643 iter, 6430000 ts, 959 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-24-18
  done: false
  episode_len_mean: 169.38
  episode_reward_max: 1122.8115370345986
  episode_reward_mean: 978.6349579743919
  episode_reward_min: -53.66592432003313
  episodes_this_iter: 58
  episodes_total: 37250
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.381
    load_time_ms: 1.567
    num_steps_sampled: 6440000
    num_steps_trained: 6440000
    rl_0:
      cur_kl_coeff: 0.16085103154182434
      cur_lr: 4.999999873689376e-05
      entropy: 0.6374092698097229
      kl: 0.022969717159867287

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-26-17
  done: false
  episode_len_mean: 176.36
  episode_reward_max: 1105.1164839353505
  episode_reward_mean: 934.5912254526506
  episode_reward_min: -82.2487149844317
  episodes_this_iter: 57
  episodes_total: 37536
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.585
    load_time_ms: 1.606
    num_steps_sampled: 6490000
    num_steps_trained: 6490000
    rl_0:
      cur_kl_coeff: 0.08042551577091217
      cur_lr: 4.999999873689376e-05
      entropy: 1.0528661012649536
      kl: 0.01751549355685711
      policy_loss: -0.0006772329797968268
      total_loss: 1180.0733642578125
      vf_explained_var: 0.953252911567688
      vf_loss: 1180.0726318359375
    sample_time_ms: 19742.965
    update_time_ms: 5.766
  iterations_since_restore: 649
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 15510 s, 654 iter, 6540000 ts, 987 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-28-41
  done: false
  episode_len_mean: 173.52
  episode_reward_max: 1129.8729630440039
  episode_reward_mean: 996.2336359584829
  episode_reward_min: 168.9654590692732
  episodes_this_iter: 56
  episodes_total: 37890
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3927.534
    load_time_ms: 1.691
    num_steps_sampled: 6550000
    num_steps_trained: 6550000
    rl_0:
      cur_kl_coeff: 0.08042551577091217
      cur_lr: 4.999999873689376e-05
      entropy: 0.7277498245239258
      kl: 0.021115342155098915


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-30-41
  done: false
  episode_len_mean: 163.53
  episode_reward_max: 1114.4488915275479
  episode_reward_mean: 964.7332260357856
  episode_reward_min: -55.59546531179461
  episodes_this_iter: 62
  episodes_total: 38192
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3908.724
    load_time_ms: 1.454
    num_steps_sampled: 6600000
    num_steps_trained: 6600000
    rl_0:
      cur_kl_coeff: 0.08042551577091217
      cur_lr: 4.999999873689376e-05
      entropy: 0.49933066964149475
      kl: 0.02115127071738243
      policy_loss: -0.0011597839184105396
      total_loss: 637.33984375
      vf_explained_var: 0.9730693101882935
      vf_loss: 637.33935546875
    sample_time_ms: 20054.644
    update_time_ms: 5.201
  iterations_since_restore: 660
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 15774 s, 665 iter, 6650000 ts, 1.01e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-33-04
  done: false
  episode_len_mean: 169.14
  episode_reward_max: 1112.8999577698194
  episode_reward_mean: 993.5625896069508
  episode_reward_min: -93.62181725435788
  episodes_this_iter: 59
  episodes_total: 38540
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3892.3
    load_time_ms: 1.479
    num_steps_sampled: 6660000
    num_steps_trained: 6660000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.633350670337677
      kl: 0.0116811180487275

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-35-04
  done: false
  episode_len_mean: 162.18
  episode_reward_max: 1136.2259184032746
  episode_reward_mean: 935.01186889042
  episode_reward_min: -62.79495694125498
  episodes_this_iter: 59
  episodes_total: 38835
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3926.827
    load_time_ms: 1.524
    num_steps_sampled: 6710000
    num_steps_trained: 6710000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.8409253358840942
      kl: 0.01640009880065918
      policy_loss: -0.000115954338980373
      total_loss: 729.2007446289062
      vf_explained_var: 0.9710720777511597
      vf_loss: 729.1989135742188
    sample_time_ms: 19894.236
    update_time_ms: 5.155
  iterations_since_restore: 671
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 16035 s, 676 iter, 6760000 ts, 992 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-37-26
  done: false
  episode_len_mean: 170.72
  episode_reward_max: 1123.5103945320047
  episode_reward_mean: 989.5457960207946
  episode_reward_min: -55.10809631355369
  episodes_this_iter: 60
  episodes_total: 39195
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3953.696
    load_time_ms: 1.459
    num_steps_sampled: 6770000
    num_steps_trained: 6770000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.5688751339912415
      kl: 0.01534150168299675


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-39-24
  done: false
  episode_len_mean: 165.59
  episode_reward_max: 1129.0864053807168
  episode_reward_mean: 989.4567750839341
  episode_reward_min: -37.62424940308509
  episodes_this_iter: 61
  episodes_total: 39493
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3960.287
    load_time_ms: 1.614
    num_steps_sampled: 6820000
    num_steps_trained: 6820000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.5114169120788574
      kl: 0.020510215312242508
      policy_loss: -0.0013842005282640457
      total_loss: 225.588134765625
      vf_explained_var: 0.9902688264846802
      vf_loss: 225.5870361328125
    sample_time_ms: 19580.711
    update_time_ms: 5.456
  iterations_since_restore: 682
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 16294 s, 687 iter, 6870000 ts, 1e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-41-46
  done: false
  episode_len_mean: 166.14
  episode_reward_max: 1133.3400669286116
  episode_reward_mean: 985.5017530555186
  episode_reward_min: -70.65655243121614
  episodes_this_iter: 60
  episodes_total: 39857
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3933.161
    load_time_ms: 1.476
    num_steps_sampled: 6880000
    num_steps_trained: 6880000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.692179799079895
      kl: 0.01587438955903053

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-43-46
  done: false
  episode_len_mean: 172.33
  episode_reward_max: 1143.6155311482532
  episode_reward_mean: 977.2316166360149
  episode_reward_min: -68.57326427622797
  episodes_this_iter: 57
  episodes_total: 40157
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3902.243
    load_time_ms: 1.467
    num_steps_sampled: 6930000
    num_steps_trained: 6930000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.9391546249389648
      kl: 0.022707000374794006
      policy_loss: -0.0008892267942428589
      total_loss: 559.6679077148438
      vf_explained_var: 0.9783012270927429
      vf_loss: 559.6660766601562
    sample_time_ms: 19828.311
    update_time_ms: 5.447
  iterations_since_restore: 693
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 16558 s, 698 iter, 6980000 ts, 984 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-46-11
  done: false
  episode_len_mean: 172.37
  episode_reward_max: 1132.4282917199255
  episode_reward_mean: 980.2164323685014
  episode_reward_min: -63.36967623560264
  episodes_this_iter: 62
  episodes_total: 40513
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3903.237
    load_time_ms: 1.525
    num_steps_sampled: 6990000
    num_steps_trained: 6990000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.49504631757736206
      kl: 0.01785573922097683

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-48-11
  done: false
  episode_len_mean: 168.04
  episode_reward_max: 1155.8024787203597
  episode_reward_mean: 973.1747982197188
  episode_reward_min: -77.18808947901759
  episodes_this_iter: 58
  episodes_total: 40809
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.575
    load_time_ms: 1.527
    num_steps_sampled: 7040000
    num_steps_trained: 7040000
    rl_0:
      cur_kl_coeff: 0.12063825875520706
      cur_lr: 4.999999873689376e-05
      entropy: 0.4953848421573639
      kl: 0.050459329038858414
      policy_loss: 0.0012214595917612314
      total_loss: 174.2916717529297
      vf_explained_var: 0.9931020736694336
      vf_loss: 174.2843780517578
    sample_time_ms: 20071.74
    update_time_ms: 5.42
  iterations_since_restore: 704
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 16821 s, 709 iter, 7090000 ts, 932 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-50-34
  done: false
  episode_len_mean: 176.76
  episode_reward_max: 1162.8818832320806
  episode_reward_mean: 919.0560732321447
  episode_reward_min: -51.1513928316472
  episodes_this_iter: 59
  episodes_total: 41163
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3891.756
    load_time_ms: 1.559
    num_steps_sampled: 7100000
    num_steps_trained: 7100000
    rl_0:
      cur_kl_coeff: 0.18095742166042328
      cur_lr: 4.999999873689376e-05
      entropy: 0.6596737504005432
      kl: 0.017344921827316284


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-52-35
  done: false
  episode_len_mean: 157.73
  episode_reward_max: 1133.8914215527404
  episode_reward_mean: 953.3604219233048
  episode_reward_min: -73.85766866129005
  episodes_this_iter: 63
  episodes_total: 41462
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3891.914
    load_time_ms: 1.552
    num_steps_sampled: 7150000
    num_steps_trained: 7150000
    rl_0:
      cur_kl_coeff: 0.18095742166042328
      cur_lr: 4.999999873689376e-05
      entropy: 0.4587228298187256
      kl: 0.012159609235823154
      policy_loss: -0.002219034591689706
      total_loss: 927.2155151367188
      vf_explained_var: 0.9633462429046631
      vf_loss: 927.2157592773438
    sample_time_ms: 19959.479
    update_time_ms: 6.31
  iterations_since_restore: 715
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 17084 s, 720 iter, 7200000 ts, 998 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-54-58
  done: false
  episode_len_mean: 169.54
  episode_reward_max: 1154.3934385551954
  episode_reward_mean: 1003.320665850899
  episode_reward_min: -37.77479682409796
  episodes_this_iter: 60
  episodes_total: 41818
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3907.417
    load_time_ms: 1.668
    num_steps_sampled: 7210000
    num_steps_trained: 7210000
    rl_0:
      cur_kl_coeff: 0.18095742166042328
      cur_lr: 4.999999873689376e-05
      entropy: 0.49333906173706055
      kl: 0.01015227846801281

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-56-58
  done: false
  episode_len_mean: 171.64
  episode_reward_max: 1135.9159066379873
  episode_reward_mean: 972.9588995342118
  episode_reward_min: -68.33841742338728
  episodes_this_iter: 59
  episodes_total: 42113
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3915.709
    load_time_ms: 1.619
    num_steps_sampled: 7260000
    num_steps_trained: 7260000
    rl_0:
      cur_kl_coeff: 0.18095742166042328
      cur_lr: 4.999999873689376e-05
      entropy: 0.6397736668586731
      kl: 0.011630004271864891
      policy_loss: -0.0008209309307858348
      total_loss: 440.3726501464844
      vf_explained_var: 0.9816299676895142
      vf_loss: 440.371337890625
    sample_time_ms: 19978.196
    update_time_ms: 4.998
  iterations_since_restore: 726
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 17348 s, 731 iter, 7310000 ts, 962 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_22-59-21
  done: false
  episode_len_mean: 169.57
  episode_reward_max: 1129.9322414520582
  episode_reward_mean: 961.6085688866019
  episode_reward_min: -17.516136526718043
  episodes_this_iter: 62
  episodes_total: 42477
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3894.951
    load_time_ms: 1.557
    num_steps_sampled: 7320000
    num_steps_trained: 7320000
    rl_0:
      cur_kl_coeff: 0.09047871083021164
      cur_lr: 4.999999873689376e-05
      entropy: 0.4349183440208435
      kl: 0.02034324780106544

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-01-19
  done: false
  episode_len_mean: 163.2
  episode_reward_max: 1150.1258889234896
  episode_reward_mean: 967.9524313893413
  episode_reward_min: -72.38909579310224
  episodes_this_iter: 62
  episodes_total: 42775
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3883.16
    load_time_ms: 1.686
    num_steps_sampled: 7370000
    num_steps_trained: 7370000
    rl_0:
      cur_kl_coeff: 0.09047871083021164
      cur_lr: 4.999999873689376e-05
      entropy: 0.50569748878479
      kl: 0.048608992248773575
      policy_loss: -0.00033809131127782166
      total_loss: 688.7011108398438
      vf_explained_var: 0.9739975333213806
      vf_loss: 688.6971435546875
    sample_time_ms: 19667.712
    update_time_ms: 6.89
  iterations_since_restore: 737
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 17604 s, 742 iter, 7420000 ts, 961 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-03-37
  done: false
  episode_len_mean: 167.2
  episode_reward_max: 1142.8243435447291
  episode_reward_mean: 974.194848602299
  episode_reward_min: -69.67008117847286
  episodes_this_iter: 60
  episodes_total: 43121
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3888.122
    load_time_ms: 1.722
    num_steps_sampled: 7430000
    num_steps_trained: 7430000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.47688108682632446
      kl: 0.022211147472262383


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-05-35
  done: false
  episode_len_mean: 164.13
  episode_reward_max: 1160.243359013133
  episode_reward_mean: 954.652968032279
  episode_reward_min: -97.51426607553454
  episodes_this_iter: 62
  episodes_total: 43422
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3884.75
    load_time_ms: 1.555
    num_steps_sampled: 7480000
    num_steps_trained: 7480000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.4538857936859131
      kl: 0.015314903110265732
      policy_loss: -0.0008967824396677315
      total_loss: 830.839111328125
      vf_explained_var: 0.9649321436882019
      vf_loss: 830.837890625
    sample_time_ms: 19271.647
    update_time_ms: 5.153
  iterations_since_restore: 748
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 17863 s, 753 iter, 7530000 ts, 943 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-07-58
  done: false
  episode_len_mean: 169.56
  episode_reward_max: 1137.8917306228366
  episode_reward_mean: 962.3879537455448
  episode_reward_min: 1.1349565091538807
  episodes_this_iter: 57
  episodes_total: 43789
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3889.016
    load_time_ms: 1.555
    num_steps_sampled: 7540000
    num_steps_trained: 7540000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.753372073173523
      kl: 0.023899218067526817


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-09-58
  done: false
  episode_len_mean: 175.75
  episode_reward_max: 1152.3096598317636
  episode_reward_mean: 966.7595684453848
  episode_reward_min: -66.79625393757553
  episodes_this_iter: 58
  episodes_total: 44088
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3914.576
    load_time_ms: 1.734
    num_steps_sampled: 7590000
    num_steps_trained: 7590000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.8216590881347656
      kl: 0.019470397382974625
      policy_loss: -0.0011968910694122314
      total_loss: 398.5935974121094
      vf_explained_var: 0.9841907024383545
      vf_loss: 398.5921630859375
    sample_time_ms: 19968.825
    update_time_ms: 5.255
  iterations_since_restore: 759
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 18125 s, 764 iter, 7640000 ts, 967 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-12-21
  done: false
  episode_len_mean: 175.59
  episode_reward_max: 1166.7786604341584
  episode_reward_mean: 933.1288567434311
  episode_reward_min: -76.68522332520614
  episodes_this_iter: 57
  episodes_total: 44439
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3907.499
    load_time_ms: 1.652
    num_steps_sampled: 7650000
    num_steps_trained: 7650000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.9878206253051758
      kl: 0.016991648823022842

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-14-20
  done: false
  episode_len_mean: 171.72
  episode_reward_max: 1124.1750099779163
  episode_reward_mean: 961.2199444546819
  episode_reward_min: -15.829286413873461
  episodes_this_iter: 58
  episodes_total: 44731
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3904.775
    load_time_ms: 1.56
    num_steps_sampled: 7700000
    num_steps_trained: 7700000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.8732696771621704
      kl: 0.013317260891199112
      policy_loss: -0.0033034582156687975
      total_loss: 1129.3677978515625
      vf_explained_var: 0.9531727433204651
      vf_loss: 1129.369140625
    sample_time_ms: 19963.716
    update_time_ms: 5.197
  iterations_since_restore: 770
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 18388 s, 775 iter, 7750000 ts, 976 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-16-45
  done: false
  episode_len_mean: 166.82
  episode_reward_max: 1149.2773830158387
  episode_reward_mean: 918.3207566556978
  episode_reward_min: -78.32810428389817
  episodes_this_iter: 62
  episodes_total: 45083
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3913.027
    load_time_ms: 1.524
    num_steps_sampled: 7760000
    num_steps_trained: 7760000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.6449313759803772
      kl: 0.02109653875231743


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-18-45
  done: false
  episode_len_mean: 164.56
  episode_reward_max: 1125.2354926563123
  episode_reward_mean: 921.1488587800665
  episode_reward_min: -57.811762097128366
  episodes_this_iter: 62
  episodes_total: 45372
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3911.423
    load_time_ms: 1.475
    num_steps_sampled: 7810000
    num_steps_trained: 7810000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.7284398078918457
      kl: 0.016411276534199715
      policy_loss: -0.0014272744301706553
      total_loss: 893.25
      vf_explained_var: 0.9636813402175903
      vf_loss: 893.2492065429688
    sample_time_ms: 20091.076
    update_time_ms: 5.584
  iterations_since_restore: 781
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 18653 s, 786 iter, 7860000 ts, 980 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-21-09
  done: false
  episode_len_mean: 171.78
  episode_reward_max: 1159.3007002548397
  episode_reward_mean: 980.4525839575276
  episode_reward_min: -70.93060783128388
  episodes_this_iter: 60
  episodes_total: 45725
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3898.834
    load_time_ms: 1.604
    num_steps_sampled: 7870000
    num_steps_trained: 7870000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.5699033737182617
      kl: 0.0189043041318655
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-23-10
  done: false
  episode_len_mean: 177.64
  episode_reward_max: 1137.565582757131
  episode_reward_mean: 970.7265708797829
  episode_reward_min: -3.5615082743080393
  episodes_this_iter: 57
  episodes_total: 46015
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3904.514
    load_time_ms: 1.544
    num_steps_sampled: 7920000
    num_steps_trained: 7920000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.7925058007240295
      kl: 0.014465325511991978
      policy_loss: -6.535878583235899e-06
      total_loss: 808.0029296875
      vf_explained_var: 0.964493989944458
      vf_loss: 808.0009765625
    sample_time_ms: 20109.056
    update_time_ms: 5.258
  iterations_since_restore: 792
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 18916 s, 797 iter, 7970000 ts, 963 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-25-33
  done: false
  episode_len_mean: 172.46
  episode_reward_max: 1133.1088036249744
  episode_reward_mean: 976.2355842809865
  episode_reward_min: -109.20550207666378
  episodes_this_iter: 54
  episodes_total: 46366
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3906.493
    load_time_ms: 1.592
    num_steps_sampled: 7980000
    num_steps_trained: 7980000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 1.0792956352233887
      kl: 0.01242320705205202

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-27-33
  done: false
  episode_len_mean: 164.86
  episode_reward_max: 1169.505928097205
  episode_reward_mean: 1029.8412812499146
  episode_reward_min: 887.2123919637107
  episodes_this_iter: 62
  episodes_total: 46665
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3903.166
    load_time_ms: 1.503
    num_steps_sampled: 8030000
    num_steps_trained: 8030000
    rl_0:
      cur_kl_coeff: 0.13571806252002716
      cur_lr: 4.999999873689376e-05
      entropy: 0.33443570137023926
      kl: 0.04377644136548042
      policy_loss: 0.00141542823985219
      total_loss: 34.724727630615234
      vf_explained_var: 0.9985140562057495
      vf_loss: 34.717369079589844
    sample_time_ms: 19968.356
    update_time_ms: 5.633
  iterations_since_restore: 803
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 19179 s, 808 iter, 8080000 ts, 1.03e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-29-55
  done: false
  episode_len_mean: 168.27
  episode_reward_max: 1145.0273576121865
  episode_reward_mean: 1016.821050780465
  episode_reward_min: -0.7011334140485417
  episodes_this_iter: 59
  episodes_total: 47017
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3907.698
    load_time_ms: 1.524
    num_steps_sampled: 8090000
    num_steps_trained: 8090000
    rl_0:
      cur_kl_coeff: 0.10178853571414948
      cur_lr: 4.999999873689376e-05
      entropy: 0.7571077346801758
      kl: 0.009912127628

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-31-53
  done: false
  episode_len_mean: 168.81
  episode_reward_max: 1149.6468645618284
  episode_reward_mean: 977.8651902691868
  episode_reward_min: -76.86769228058517
  episodes_this_iter: 58
  episodes_total: 47318
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3886.006
    load_time_ms: 1.591
    num_steps_sampled: 8140000
    num_steps_trained: 8140000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.6306630373001099
      kl: 0.019300952553749084
      policy_loss: -0.0008967587491497397
      total_loss: 307.3997497558594
      vf_explained_var: 0.9870432615280151
      vf_loss: 307.399169921875
    sample_time_ms: 19733.204
    update_time_ms: 5.799
  iterations_since_restore: 814
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 19438 s, 819 iter, 8190000 ts, 992 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-34-16
  done: false
  episode_len_mean: 167.18
  episode_reward_max: 1149.432370608754
  episode_reward_mean: 976.8612857472026
  episode_reward_min: -4.877431743308847
  episodes_this_iter: 60
  episodes_total: 47678
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3885.489
    load_time_ms: 1.496
    num_steps_sampled: 8200000
    num_steps_trained: 8200000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.5204561948776245
      kl: 0.02198840118944645
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-36-14
  done: false
  episode_len_mean: 173.11
  episode_reward_max: 1132.964203976741
  episode_reward_mean: 994.8408611342242
  episode_reward_min: -35.66795367158639
  episodes_this_iter: 57
  episodes_total: 47972
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3919.564
    load_time_ms: 1.435
    num_steps_sampled: 8250000
    num_steps_trained: 8250000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.945371687412262
      kl: 0.017482982948422432
      policy_loss: -0.0019800434820353985
      total_loss: 564.1319580078125
      vf_explained_var: 0.9772099256515503
      vf_loss: 564.1326293945312
    sample_time_ms: 19753.838
    update_time_ms: 5.507
  iterations_since_restore: 825
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 19701 s, 830 iter, 8300000 ts, 1.01e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-38-39
  done: false
  episode_len_mean: 164.8
  episode_reward_max: 1103.4291499779517
  episode_reward_mean: 955.5515885364771
  episode_reward_min: -67.63308246116657
  episodes_this_iter: 63
  episodes_total: 48324
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3919.733
    load_time_ms: 1.533
    num_steps_sampled: 8310000
    num_steps_trained: 8310000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.6152089238166809
      kl: 0.018951872363686

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-40-38
  done: false
  episode_len_mean: 163.85
  episode_reward_max: 1143.9502410371301
  episode_reward_mean: 969.2311112468877
  episode_reward_min: -26.997091832818967
  episodes_this_iter: 60
  episodes_total: 48624
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3904.751
    load_time_ms: 1.63
    num_steps_sampled: 8360000
    num_steps_trained: 8360000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.7227953672409058
      kl: 0.01820722036063671
      policy_loss: 4.697495387517847e-05
      total_loss: 805.267822265625
      vf_explained_var: 0.9694461226463318
      vf_loss: 805.2664184570312
    sample_time_ms: 19994.987
    update_time_ms: 5.241
  iterations_since_restore: 836
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_me

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 19963 s, 841 iter, 8410000 ts, 969 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-43-01
  done: false
  episode_len_mean: 167.95
  episode_reward_max: 1167.8740617740225
  episode_reward_mean: 963.1834744350344
  episode_reward_min: -35.74149293701359
  episodes_this_iter: 61
  episodes_total: 48984
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3933.675
    load_time_ms: 1.623
    num_steps_sampled: 8420000
    num_steps_trained: 8420000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.7044068574905396
      kl: 0.0208185575902462
  

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-45-00
  done: false
  episode_len_mean: 165.71
  episode_reward_max: 1125.345982888905
  episode_reward_mean: 998.1644034811336
  episode_reward_min: 1.91385998087884
  episodes_this_iter: 60
  episodes_total: 49286
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3939.775
    load_time_ms: 1.542
    num_steps_sampled: 8470000
    num_steps_trained: 8470000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.5980193018913269
      kl: 0.02438313141465187
      policy_loss: -0.0018101743189617991
      total_loss: 434.788818359375
      vf_explained_var: 0.9817610383033752
      vf_loss: 434.788818359375
    sample_time_ms: 19774.198
    update_time_ms: 5.159
  iterations_since_restore: 847
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 20223 s, 852 iter, 8520000 ts, 993 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-47-21
  done: false
  episode_len_mean: 167.93
  episode_reward_max: 1136.7065668235205
  episode_reward_mean: 958.3114553584917
  episode_reward_min: -87.86465354326356
  episodes_this_iter: 58
  episodes_total: 49640
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3917.754
    load_time_ms: 1.469
    num_steps_sampled: 8530000
    num_steps_trained: 8530000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.8686307072639465
      kl: 0.029049444943666458


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-49-20
  done: false
  episode_len_mean: 165.09
  episode_reward_max: 1131.6902625795126
  episode_reward_mean: 970.816137218563
  episode_reward_min: -59.436640262409185
  episodes_this_iter: 59
  episodes_total: 49942
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3928.116
    load_time_ms: 1.395
    num_steps_sampled: 8580000
    num_steps_trained: 8580000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.5441147089004517
      kl: 0.021873220801353455
      policy_loss: -0.0011783813824877143
      total_loss: 488.4789733886719
      vf_explained_var: 0.980609118938446
      vf_loss: 488.4784240722656
    sample_time_ms: 19653.286
    update_time_ms: 5.131
  iterations_since_restore: 858
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 20482 s, 863 iter, 8630000 ts, 1.01e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-51-41
  done: false
  episode_len_mean: 169.82
  episode_reward_max: 1124.6709367405977
  episode_reward_mean: 1017.3694671611457
  episode_reward_min: 414.7523784745796
  episodes_this_iter: 59
  episodes_total: 50303
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3918.349
    load_time_ms: 1.402
    num_steps_sampled: 8640000
    num_steps_trained: 8640000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.5692613124847412
      kl: 0.03972827643156

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-53-42
  done: false
  episode_len_mean: 166.17
  episode_reward_max: 1137.943555330165
  episode_reward_mean: 982.4036324779433
  episode_reward_min: -106.28888310813846
  episodes_this_iter: 59
  episodes_total: 50606
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3900.472
    load_time_ms: 1.354
    num_steps_sampled: 8690000
    num_steps_trained: 8690000
    rl_0:
      cur_kl_coeff: 0.0763414055109024
      cur_lr: 4.999999873689376e-05
      entropy: 0.7006295323371887
      kl: 0.02535966970026493
      policy_loss: -0.0026682065799832344
      total_loss: 343.45147705078125
      vf_explained_var: 0.9860188364982605
      vf_loss: 343.45220947265625
    sample_time_ms: 19879.355
    update_time_ms: 5.327
  iterations_since_restore: 869
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 20744 s, 874 iter, 8740000 ts, 974 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-56-03
  done: false
  episode_len_mean: 172.01
  episode_reward_max: 1124.5137385041535
  episode_reward_mean: 980.1097615464386
  episode_reward_min: -39.2397061144855
  episodes_this_iter: 58
  episodes_total: 50959
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3886.942
    load_time_ms: 1.519
    num_steps_sampled: 8750000
    num_steps_trained: 8750000
    rl_0:
      cur_kl_coeff: 0.11451210081577301
      cur_lr: 4.999999873689376e-05
      entropy: 0.7042169570922852
      kl: 0.017933307215571404


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-30_23-58-03
  done: false
  episode_len_mean: 161.4
  episode_reward_max: 1128.859177163455
  episode_reward_mean: 994.7474962892721
  episode_reward_min: -27.374503638747484
  episodes_this_iter: 62
  episodes_total: 51262
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3901.374
    load_time_ms: 1.499
    num_steps_sampled: 8800000
    num_steps_trained: 8800000
    rl_0:
      cur_kl_coeff: 0.11451210081577301
      cur_lr: 4.999999873689376e-05
      entropy: 0.25214728713035583
      kl: 0.02029796503484249
      policy_loss: -0.003179114544764161
      total_loss: 529.9653930664062
      vf_explained_var: 0.9782423377037048
      vf_loss: 529.9661865234375
    sample_time_ms: 19778.625
    update_time_ms: 5.021
  iterations_since_restore: 880
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 21004 s, 885 iter, 8850000 ts, 1.02e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-00-24
  done: false
  episode_len_mean: 168.04
  episode_reward_max: 1141.8625392990918
  episode_reward_mean: 985.9826118703288
  episode_reward_min: -20.76374010437101
  episodes_this_iter: 59
  episodes_total: 51620
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3897.518
    load_time_ms: 1.424
    num_steps_sampled: 8860000
    num_steps_trained: 8860000
    rl_0:
      cur_kl_coeff: 0.11451210081577301
      cur_lr: 4.999999873689376e-05
      entropy: 0.6262328028678894
      kl: 0.0179898794740

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-02-23
  done: false
  episode_len_mean: 165.55
  episode_reward_max: 1126.741483820145
  episode_reward_mean: 980.889447998623
  episode_reward_min: -80.84230985602196
  episodes_this_iter: 60
  episodes_total: 51920
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3883.732
    load_time_ms: 1.586
    num_steps_sampled: 8910000
    num_steps_trained: 8910000
    rl_0:
      cur_kl_coeff: 0.11451210081577301
      cur_lr: 4.999999873689376e-05
      entropy: 0.37094244360923767
      kl: 0.022992637008428574
      policy_loss: -0.0022267948370426893
      total_loss: 470.29412841796875
      vf_explained_var: 0.9792593121528625
      vf_loss: 470.29376220703125
    sample_time_ms: 19731.783
    update_time_ms: 5.216
  iterations_since_restore: 891
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewa

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 21264 s, 896 iter, 8960000 ts, 996 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-04-44
  done: false
  episode_len_mean: 168.48
  episode_reward_max: 1120.867519327417
  episode_reward_mean: 1001.1721797520347
  episode_reward_min: -68.1600863894528
  episodes_this_iter: 60
  episodes_total: 52274
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3882.94
    load_time_ms: 1.504
    num_steps_sampled: 8970000
    num_steps_trained: 8970000
    rl_0:
      cur_kl_coeff: 0.11451210081577301
      cur_lr: 4.999999873689376e-05
      entropy: 0.37893766164779663
      kl: 0.024232307448983192


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-06-45
  done: false
  episode_len_mean: 170.01
  episode_reward_max: 1111.4550008363674
  episode_reward_mean: 984.2034317407329
  episode_reward_min: -83.90699619413546
  episodes_this_iter: 59
  episodes_total: 52571
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3896.025
    load_time_ms: 1.38
    num_steps_sampled: 9020000
    num_steps_trained: 9020000
    rl_0:
      cur_kl_coeff: 0.11451210081577301
      cur_lr: 4.999999873689376e-05
      entropy: 0.17517036199569702
      kl: 0.06388194859027863
      policy_loss: -0.0002865775895770639
      total_loss: 60.56697082519531
      vf_explained_var: 0.9977162480354309
      vf_loss: 60.55994415283203
    sample_time_ms: 19923.885
    update_time_ms: 5.648
  iterations_since_restore: 902
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 21527 s, 907 iter, 9070000 ts, 952 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-09-07
  done: false
  episode_len_mean: 168.36
  episode_reward_max: 1148.2327388009633
  episode_reward_mean: 972.6488671592434
  episode_reward_min: -14.973602077284909
  episodes_this_iter: 59
  episodes_total: 52922
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3893.555
    load_time_ms: 1.587
    num_steps_sampled: 9080000
    num_steps_trained: 9080000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.5501440763473511
      kl: 0.02125297300517559


Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-11-08
  done: false
  episode_len_mean: 171.13
  episode_reward_max: 1147.003136099148
  episode_reward_mean: 960.3376964164286
  episode_reward_min: -60.809531410524045
  episodes_this_iter: 60
  episodes_total: 53214
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3894.819
    load_time_ms: 1.591
    num_steps_sampled: 9130000
    num_steps_trained: 9130000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.833781898021698
      kl: 0.025036433711647987
      policy_loss: -0.003856681752949953
      total_loss: 353.0917053222656
      vf_explained_var: 0.9872283935546875
      vf_loss: 353.09124755859375
    sample_time_ms: 19937.642
    update_time_ms: 6.817
  iterations_since_restore: 913
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 21790 s, 918 iter, 9180000 ts, 938 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-13-32
  done: false
  episode_len_mean: 170.4
  episode_reward_max: 1130.5679735215845
  episode_reward_mean: 942.588483454521
  episode_reward_min: -87.0929044944059
  episodes_this_iter: 57
  episodes_total: 53560
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3904.349
    load_time_ms: 1.611
    num_steps_sampled: 9190000
    num_steps_trained: 9190000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.9938539266586304
      kl: 0.014626726508140564
   

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-15-30
  done: false
  episode_len_mean: 160.79
  episode_reward_max: 1129.0204199652385
  episode_reward_mean: 940.8454536864627
  episode_reward_min: -72.80085646040591
  episodes_this_iter: 61
  episodes_total: 53861
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3880.515
    load_time_ms: 1.731
    num_steps_sampled: 9240000
    num_steps_trained: 9240000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.30202943086624146
      kl: 0.016587356105446815
      policy_loss: -0.0031632829923182726
      total_loss: 579.1061401367188
      vf_explained_var: 0.9748852252960205
      vf_loss: 579.1064453125
    sample_time_ms: 19851.237
    update_time_ms: 5.473
  iterations_since_restore: 924
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 22051 s, 929 iter, 9290000 ts, 939 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-17-54
  done: false
  episode_len_mean: 175.91
  episode_reward_max: 1123.7011447057992
  episode_reward_mean: 967.722163382752
  episode_reward_min: -67.24323719864276
  episodes_this_iter: 60
  episodes_total: 54206
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3865.04
    load_time_ms: 1.601
    num_steps_sampled: 9300000
    num_steps_trained: 9300000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.574245810508728
      kl: 0.02106541022658348
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-19-51
  done: false
  episode_len_mean: 166.95
  episode_reward_max: 1139.9107008312587
  episode_reward_mean: 983.3241920543705
  episode_reward_min: -35.68619411807009
  episodes_this_iter: 58
  episodes_total: 54498
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3890.091
    load_time_ms: 1.512
    num_steps_sampled: 9350000
    num_steps_trained: 9350000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.733148455619812
      kl: 0.018214449286460876
      policy_loss: -0.001590574043802917
      total_loss: 348.7069396972656
      vf_explained_var: 0.986057698726654
      vf_loss: 348.70538330078125
    sample_time_ms: 19746.151
    update_time_ms: 5.683
  iterations_since_restore: 935
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 22311 s, 940 iter, 9400000 ts, 1.02e+03 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-22-12
  done: false
  episode_len_mean: 173.05
  episode_reward_max: 1138.8299512894298
  episode_reward_mean: 1003.6509680719182
  episode_reward_min: -35.82005815243048
  episodes_this_iter: 57
  episodes_total: 54861
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3911.247
    load_time_ms: 1.47
    num_steps_sampled: 9410000
    num_steps_trained: 9410000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.6683480143547058
      kl: 0.01608735695481

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-24-10
  done: false
  episode_len_mean: 162.5
  episode_reward_max: 1157.2933390994922
  episode_reward_mean: 982.1992276624135
  episode_reward_min: -52.7960100414644
  episodes_this_iter: 61
  episodes_total: 55157
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3906.335
    load_time_ms: 1.497
    num_steps_sampled: 9460000
    num_steps_trained: 9460000
    rl_0:
      cur_kl_coeff: 0.1717681735754013
      cur_lr: 4.999999873689376e-05
      entropy: 0.45133912563323975
      kl: 0.015032238326966763
      policy_loss: -0.0032471157610416412
      total_loss: 734.7161254882812
      vf_explained_var: 0.9677501320838928
      vf_loss: 734.716796875
    sample_time_ms: 19513.562
    update_time_ms: 6.256
  iterations_since_restore: 946
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 22569 s, 951 iter, 9510000 ts, 975 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-26-32
  done: false
  episode_len_mean: 169.76
  episode_reward_max: 1134.003825338268
  episode_reward_mean: 976.2698369552776
  episode_reward_min: -43.50201579164323
  episodes_this_iter: 60
  episodes_total: 55510
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3894.804
    load_time_ms: 1.542
    num_steps_sampled: 9520000
    num_steps_trained: 9520000
    rl_0:
      cur_kl_coeff: 0.25765225291252136
      cur_lr: 4.999999873689376e-05
      entropy: 0.690679132938385
      kl: 0.014462902210652828
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-28-31
  done: false
  episode_len_mean: 165.37
  episode_reward_max: 1145.0266474321406
  episode_reward_mean: 977.3653517063153
  episode_reward_min: -89.19605328587598
  episodes_this_iter: 61
  episodes_total: 55807
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3886.955
    load_time_ms: 1.513
    num_steps_sampled: 9570000
    num_steps_trained: 9570000
    rl_0:
      cur_kl_coeff: 0.25765225291252136
      cur_lr: 4.999999873689376e-05
      entropy: 0.7102014422416687
      kl: 0.013209037482738495
      policy_loss: -0.0013995659537613392
      total_loss: 752.287353515625
      vf_explained_var: 0.9720107913017273
      vf_loss: 752.2853393554688
    sample_time_ms: 19830.602
    update_time_ms: 5.496
  iterations_since_restore: 957
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 22830 s, 962 iter, 9620000 ts, 990 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-30-53
  done: false
  episode_len_mean: 180.5
  episode_reward_max: 1137.5590349907895
  episode_reward_mean: 965.8382566256365
  episode_reward_min: -95.43477767036632
  episodes_this_iter: 57
  episodes_total: 56145
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3880.456
    load_time_ms: 1.533
    num_steps_sampled: 9630000
    num_steps_trained: 9630000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 1.1464630365371704
      kl: 0.02116253413259983
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-32-50
  done: false
  episode_len_mean: 164.91
  episode_reward_max: 1121.2063949099902
  episode_reward_mean: 984.2798525269665
  episode_reward_min: -62.17529070213999
  episodes_this_iter: 61
  episodes_total: 56439
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3876.783
    load_time_ms: 1.456
    num_steps_sampled: 9680000
    num_steps_trained: 9680000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 0.5445531606674194
      kl: 0.018785754218697548
      policy_loss: -0.000620033300947398
      total_loss: 790.2498168945312
      vf_explained_var: 0.9663702249526978
      vf_loss: 790.2479858398438
    sample_time_ms: 19493.831
    update_time_ms: 6.435
  iterations_since_restore: 968
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_reward

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 23089 s, 973 iter, 9730000 ts, 979 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-35-12
  done: false
  episode_len_mean: 178.84
  episode_reward_max: 1161.5810492301728
  episode_reward_mean: 1009.8286656459869
  episode_reward_min: -65.634468916678
  episodes_this_iter: 59
  episodes_total: 56787
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3878.579
    load_time_ms: 1.486
    num_steps_sampled: 9740000
    num_steps_trained: 9740000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 0.6592408418655396
      kl: 0.01969488337635994
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-37-11
  done: false
  episode_len_mean: 170.69
  episode_reward_max: 1162.9196071294193
  episode_reward_mean: 1008.9673813009562
  episode_reward_min: 53.433188997279416
  episodes_this_iter: 59
  episodes_total: 57081
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3883.737
    load_time_ms: 1.603
    num_steps_sampled: 9790000
    num_steps_trained: 9790000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 0.7407231330871582
      kl: 0.015120946802198887
      policy_loss: -0.003001484554260969
      total_loss: 437.1906433105469
      vf_explained_var: 0.9822737574577332
      vf_loss: 437.19171142578125
    sample_time_ms: 19826.41
    update_time_ms: 6.088
  iterations_since_restore: 979
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 23350 s, 984 iter, 9840000 ts, 999 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-39-34
  done: false
  episode_len_mean: 166.95
  episode_reward_max: 1128.7375414523162
  episode_reward_mean: 982.6853723999925
  episode_reward_min: -65.12621023927525
  episodes_this_iter: 58
  episodes_total: 57430
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3894.278
    load_time_ms: 1.589
    num_steps_sampled: 9850000
    num_steps_trained: 9850000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 0.929443359375
      kl: 0.02421778254210949
    

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-41-33
  done: false
  episode_len_mean: 187.21
  episode_reward_max: 1125.3407439612251
  episode_reward_mean: 987.5414882351419
  episode_reward_min: 67.76582654437601
  episodes_this_iter: 49
  episodes_total: 57713
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3892.186
    load_time_ms: 1.629
    num_steps_sampled: 9900000
    num_steps_trained: 9900000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 1.9097716808319092
      kl: 0.014920528046786785
      policy_loss: -0.00010753957758424804
      total_loss: 586.8517456054688
      vf_explained_var: 0.9784074425697327
      vf_loss: 586.8500366210938
    sample_time_ms: 19867.417
    update_time_ms: 5.573
  iterations_since_restore: 990
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 7681
  policy_rewar

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	RUNNING [pid=7681], 23612 s, 995 iter, 9950000 ts, 976 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:
  custom_metrics: {}
  date: 2019-03-31_00-43-57
  done: false
  episode_len_mean: 168.25
  episode_reward_max: 1160.7607466008942
  episode_reward_mean: 1017.6002274137028
  episode_reward_min: 0.888733226812576
  episodes_this_iter: 60
  episodes_total: 58064
  experiment_id: 317c3905198e469ea331d85175f15b15
  hostname: Gandalf
  info:
    grad_time_ms: 3891.972
    load_time_ms: 1.634
    num_steps_sampled: 9960000
    num_steps_trained: 9960000
    rl_0:
      cur_kl_coeff: 0.12882612645626068
      cur_lr: 4.999999873689376e-05
      entropy: 0.33415332436561584
      kl: 0.05998612567782402

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
TERMINATED trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit-v0_0:	TERMINATED [pid=7681], 23732 s, 1000 iter, 10000000 ts, 952 rew

