# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500                                 #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv', 'MultiAgentIntersectionEnv_baseline_1', 'MultiAgentIntersectionEnv_baseline_2', 'MultiAgentIntersectionEnv_baseline_3']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-20_23-42-41_12635/logs.
Waiting for redis server at 127.0.0.1:53923 to respond...
Waiting for redis server at 127.0.0.1:47681 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=a0a144a5bab3e38edf499afd28254d461f3b525382fe70af



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-03-20_23-42-41_12635/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-20_23-42-41_12635/sockets/raylet'],
 'redis_address': '192.168.2.102:53923',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=a0a144a5bab3e38edf499afd28254d461f3b525382fe70af'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.9  # discount rate default 0.999
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network defaule 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return 'rl_0'

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['rl_0']
        }
    })

 Starting SUMO on port 35979


25.810823812562866
19.209701566894147


In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-20_23-42-430ht1kvt9 -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-43-47
  done: false
  episode_len_mean: 485.3
  episode_reward_max: 260.1386717794316
  episode_reward_mean: 104.50339380636703
  episode_reward_min: -135.2918705150996
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 4752.245
    load_time_ms: 46.154
    num_steps_sampled: 10000
    num_steps_trained: 

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-45-49
  done: false
  episode_len_mean: 276.81
  episode_reward_max: 376.0977609287789
  episode_reward_mean: 113.25592566381307
  episode_reward_min: -165.7508589951873
  episodes_this_iter: 32
  episodes_total: 175
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 4293.955
    load_time_ms: 9.186
    num_steps_sampled: 60000
    num_steps_trained: 60000
    rl_0:
      cur_kl_coeff: 0.006250001490116119
      cur_lr: 4.999999873689376e-05
      entropy: 1.413277268409729
      kl: 0.0043801916763186455
      policy_loss: -0.0019741083960980177
      total_loss: 34.99400329589844
      vf_explained_var: 0.4350240230560303
      vf_loss: 34.995948791503906
    sample_time_ms: 21132.968
    update_time_ms: 119.305
  iterations_since_restore: 6
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 56.627962

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 267 s, 11 iter, 110000 ts, 114 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-48-07
  done: false
  episode_len_mean: 190.76
  episode_reward_max: 347.43331604544795
  episode_reward_mean: 111.15970060103659
  episode_reward_min: -165.23983642176515
  episodes_this_iter: 54
  episodes_total: 456
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 4039.984
    load_time_ms: 1.668
    num_steps_sampled: 120000
    num_steps_trained: 120000
    rl_0:
      cur_kl_coeff: 0.00019531254656612873
      cur_lr: 4.999999873689376e-05
      entropy: 1.3768444061279297
      kl: 0.002570231445133686
      policy_loss: -0.0011611907975748181
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 407 s, 17 iter, 170000 ts, 162 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-50-27
  done: false
  episode_len_mean: 138.9
  episode_reward_max: 332.6232781298981
  episode_reward_mean: 185.5375352776726
  episode_reward_min: -163.5594249847785
  episodes_this_iter: 74
  episodes_total: 842
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 4010.697
    load_time_ms: 1.662
    num_steps_sampled: 180000
    num_steps_trained: 180000
    rl_0:
      cur_kl_coeff: 2.441406832076609e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3100078105926514
      kl: 0.010581749491393566
      policy_loss: -0.0021600350737571716
      total_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 544 s, 23 iter, 230000 ts, 160 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-52-44
  done: false
  episode_len_mean: 118.07
  episode_reward_max: 348.85261408596284
  episode_reward_mean: 206.35341746696548
  episode_reward_min: -162.05121677224912
  episodes_this_iter: 83
  episodes_total: 1331
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3947.411
    load_time_ms: 1.593
    num_steps_sampled: 240000
    num_steps_trained: 240000
    rl_0:
      cur_kl_coeff: 6.103517080191523e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.2192636728286743
      kl: 0.011002766899764538
      policy_loss: -0.0031391833908855915
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 680 s, 29 iter, 290000 ts, 212 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-54-59
  done: false
  episode_len_mean: 114.14
  episode_reward_max: 349.0498943026712
  episode_reward_mean: 254.22445241957195
  episode_reward_min: -164.18026480315373
  episodes_this_iter: 90
  episodes_total: 1869
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3896.18
    load_time_ms: 1.508
    num_steps_sampled: 300000
    num_steps_trained: 300000
    rl_0:
      cur_kl_coeff: 3.0517585400957614e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.1543614864349365
      kl: 0.011303003877401352
      policy_loss: -0.001411590026691556
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 815 s, 35 iter, 350000 ts, 248 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_23-57-15
  done: false
  episode_len_mean: 97.04807692307692
  episode_reward_max: 347.90038462804955
  episode_reward_mean: 195.06250771873505
  episode_reward_min: -167.75556487239226
  episodes_this_iter: 104
  episodes_total: 2446
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3834.76
    load_time_ms: 1.646
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 3.0517585400957614e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.0547690391540527
      kl: 0.007706933189183474
      policy_loss: -0.001931581762619

  custom_metrics: {}
  date: 2019-03-20_23-59-07
  done: false
  episode_len_mean: 97.68932038834951
  episode_reward_max: 354.455942219033
  episode_reward_mean: 204.8881955186276
  episode_reward_min: -159.94317955975546
  episodes_this_iter: 103
  episodes_total: 2956
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3775.164
    load_time_ms: 1.768
    num_steps_sampled: 410000
    num_steps_trained: 410000
    rl_0:
      cur_kl_coeff: 1.907349087559851e-07
      cur_lr: 4.999999873689376e-05
      entropy: 1.0393081903457642
      kl: 0.009383324533700943
      policy_loss: -0.00042080230196006596
      total_loss: 35.48716735839844
      vf_explained_var: 0.833685040473938
      vf_loss: 35.48759078979492
    sample_time_ms: 18736.009
    update_time_ms: 6.105
  iterations_since_restore: 41
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 102.4440977593138
  time_since_restore: 950

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 1062 s, 46 iter, 460000 ts, 170 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-01-21
  done: false
  episode_len_mean: 100.6
  episode_reward_max: 360.30534985776546
  episode_reward_mean: 238.28113785963396
  episode_reward_min: -161.12789599816344
  episodes_this_iter: 98
  episodes_total: 3566
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3789.132
    load_time_ms: 1.77
    num_steps_sampled: 470000
    num_steps_trained: 470000
    rl_0:
      cur_kl_coeff: 2.3841863594498136e-08
      cur_lr: 4.999999873689376e-05
      entropy: 0.9870672225952148
      kl: 0.011840893886983395
      policy_loss: -0.0025972239673137665
      t

  custom_metrics: {}
  date: 2019-03-21_00-03-15
  done: false
  episode_len_mean: 90.41441441441441
  episode_reward_max: 363.3133231252419
  episode_reward_mean: 173.21169580335982
  episode_reward_min: -167.08318612759453
  episodes_this_iter: 111
  episodes_total: 4089
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3810.82
    load_time_ms: 1.617
    num_steps_sampled: 520000
    num_steps_trained: 520000
    rl_0:
      cur_kl_coeff: 5.960465898624534e-09
      cur_lr: 4.999999873689376e-05
      entropy: 0.9390104413032532
      kl: 0.012951407581567764
      policy_loss: -0.0015066079795360565
      total_loss: 47.95857620239258
      vf_explained_var: 0.8306251764297485
      vf_loss: 47.960079193115234
    sample_time_ms: 18629.078
    update_time_ms: 5.878
  iterations_since_restore: 52
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 86.6058479016799
  time_since_restore: 11

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 1311 s, 57 iter, 570000 ts, 165 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-05-32
  done: false
  episode_len_mean: 92.31481481481481
  episode_reward_max: 367.60565117838865
  episode_reward_mean: 182.31932281212727
  episode_reward_min: -166.93826372229142
  episodes_this_iter: 108
  episodes_total: 4723
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3817.73
    load_time_ms: 1.504
    num_steps_sampled: 580000
    num_steps_trained: 580000
    rl_0:
      cur_kl_coeff: 7.450582373280668e-10
      cur_lr: 4.999999873689376e-05
      entropy: 0.9479886293411255
      kl: 0.008707270957529545
      policy_loss: -0.000570088450331

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-07-24
  done: false
  episode_len_mean: 97.00980392156863
  episode_reward_max: 376.1876317032308
  episode_reward_mean: 220.91581667947855
  episode_reward_min: -164.77629606161238
  episodes_this_iter: 102
  episodes_total: 5239
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3817.03
    load_time_ms: 1.609
    num_steps_sampled: 630000
    num_steps_trained: 630000
    rl_0:
      cur_kl_coeff: 9.313227966600834e-11
      cur_lr: 4.999999873689376e-05
      entropy: 0.8993154764175415
      kl: 0.009094463661313057
      policy_loss: -0.0005572147783823311
      total_loss: 42.79899215698242
      vf_explained_var: 0.8058410882949829
      vf_loss: 42.7995491027832
    sample_time_ms: 18791.309
    update_time_ms: 5.234
  iterations_since_restore: 63
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 1557 s, 68 iter, 680000 ts, 191 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-09-38
  done: false
  episode_len_mean: 92.0925925925926
  episode_reward_max: 375.7397123210589
  episode_reward_mean: 198.09834758706924
  episode_reward_min: -166.69339057514767
  episodes_this_iter: 108
  episodes_total: 5890
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3796.355
    load_time_ms: 1.638
    num_steps_sampled: 690000
    num_steps_trained: 690000
    rl_0:
      cur_kl_coeff: 5.8207674791255215e-12
      cur_lr: 4.999999873689376e-05
      entropy: 0.822489321231842
      kl: 0.0057703363709151745
      policy_loss: -0.000269839772954

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-11-29
  done: false
  episode_len_mean: 89.10714285714286
  episode_reward_max: 374.8446999889488
  episode_reward_mean: 172.46879428623075
  episode_reward_min: -164.82365053440995
  episodes_this_iter: 112
  episodes_total: 6439
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3747.453
    load_time_ms: 1.572
    num_steps_sampled: 740000
    num_steps_trained: 740000
    rl_0:
      cur_kl_coeff: 3.637979674453451e-13
      cur_lr: 4.999999873689376e-05
      entropy: 0.7859329581260681
      kl: 0.012231891974806786
      policy_loss: -0.0021214596927165985
      total_loss: 35.48634338378906
      vf_explained_var: 0.8875454664230347
      vf_loss: 35.48846435546875
    sample_time_ms: 18457.225
    update_time_ms: 5.799
  iterations_since_restore: 74
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 1801 s, 79 iter, 790000 ts, 205 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-13-42
  done: false
  episode_len_mean: 88.92857142857143
  episode_reward_max: 374.41871532215674
  episode_reward_mean: 179.39745252731518
  episode_reward_min: -164.66657460878702
  episodes_this_iter: 112
  episodes_total: 7113
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3733.095
    load_time_ms: 1.673
    num_steps_sampled: 800000
    num_steps_trained: 800000
    rl_0:
      cur_kl_coeff: 9.094949186133627e-14
      cur_lr: 4.999999873689376e-05
      entropy: 0.7043386101722717
      kl: 0.008277609944343567
      policy_loss: -0.00155406049452

  custom_metrics: {}
  date: 2019-03-21_00-15-32
  done: false
  episode_len_mean: 88.75
  episode_reward_max: 379.83788307935095
  episode_reward_mean: 177.90980448318257
  episode_reward_min: -168.6366976044969
  episodes_this_iter: 112
  episodes_total: 7666
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3733.726
    load_time_ms: 1.606
    num_steps_sampled: 850000
    num_steps_trained: 850000
    rl_0:
      cur_kl_coeff: 2.8421716206667585e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.6708181500434875
      kl: 0.012105156667530537
      policy_loss: -0.0003625670215114951
      total_loss: 50.045230865478516
      vf_explained_var: 0.8343926668167114
      vf_loss: 50.04559326171875
    sample_time_ms: 18310.42
    update_time_ms: 5.261
  iterations_since_restore: 85
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 88.95490224159128
  time_since_restore: 1933.3809020

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 2043 s, 90 iter, 900000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-17-44
  done: false
  episode_len_mean: 85.05084745762711
  episode_reward_max: 380.55590543134014
  episode_reward_mean: 145.6096458932276
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 118
  episodes_total: 8339
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3698.947
    load_time_ms: 1.323
    num_steps_sampled: 910000
    num_steps_trained: 910000
    rl_0:
      cur_kl_coeff: 3.552714525833448e-16
      cur_lr: 4.999999873689376e-05
      entropy: 0.6394931674003601
      kl: 0.012985341250896454
      policy_loss: -0.002354277530685

  custom_metrics: {}
  date: 2019-03-21_00-19-33
  done: false
  episode_len_mean: 89.87387387387388
  episode_reward_max: 380.6845153246487
  episode_reward_mean: 183.8704053492369
  episode_reward_min: -162.59454341501453
  episodes_this_iter: 111
  episodes_total: 8907
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3696.567
    load_time_ms: 1.469
    num_steps_sampled: 960000
    num_steps_trained: 960000
    rl_0:
      cur_kl_coeff: 8.88178631458362e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.5980040431022644
      kl: 0.008378861472010612
      policy_loss: -0.00045152276288717985
      total_loss: 50.94145965576172
      vf_explained_var: 0.8304532170295715
      vf_loss: 50.9419059753418
    sample_time_ms: 18142.927
    update_time_ms: 5.143
  iterations_since_restore: 96
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 91.93520267461845
  time_since_restore: 217

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 2281 s, 101 iter, 1010000 ts, 151 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-21-43
  done: false
  episode_len_mean: 91.04545454545455
  episode_reward_max: 381.13461108639933
  episode_reward_mean: 197.91907844708075
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 110
  episodes_total: 9583
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3711.191
    load_time_ms: 1.554
    num_steps_sampled: 1020000
    num_steps_trained: 1020000
    rl_0:
      cur_kl_coeff: 2.220446578645905e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.5250459313392639
      kl: 0.007199469488114119
      policy_loss: 0.000113581903

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-23-31
  done: false
  episode_len_mean: 91.46296296296296
  episode_reward_max: 386.4852199381008
  episode_reward_mean: 188.18553942539842
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 108
  episodes_total: 10154
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3711.52
    load_time_ms: 1.475
    num_steps_sampled: 1070000
    num_steps_trained: 1070000
    rl_0:
      cur_kl_coeff: 5.551116446614763e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.49003875255584717
      kl: 0.012303014285862446
      policy_loss: -0.001776024466380477
      total_loss: 56.5742301940918
      vf_explained_var: 0.8108996152877808
      vf_loss: 56.57599639892578
    sample_time_ms: 17856.294
    update_time_ms: 5.709
  iterations_since_restore: 107
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 2522 s, 112 iter, 1120000 ts, 128 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-25-44
  done: false
  episode_len_mean: 84.6864406779661
  episode_reward_max: 385.58168898814404
  episode_reward_mean: 128.26018482762987
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 118
  episodes_total: 10844
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3709.543
    load_time_ms: 1.456
    num_steps_sampled: 1130000
    num_steps_trained: 1130000
    rl_0:
      cur_kl_coeff: 1.3877791116536907e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.46349287033081055
      kl: 0.011839812621474266
      policy_loss: -0.00021619

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-27-33
  done: false
  episode_len_mean: 87.8157894736842
  episode_reward_max: 387.3653681536712
  episode_reward_mean: 169.5966448141248
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 114
  episodes_total: 11407
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3721.828
    load_time_ms: 1.543
    num_steps_sampled: 1180000
    num_steps_trained: 1180000
    rl_0:
      cur_kl_coeff: 1.3877791116536907e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.4413619339466095
      kl: 0.012161606922745705
      policy_loss: -0.00047638133401051164
      total_loss: 53.782814025878906
      vf_explained_var: 0.8364306688308716
      vf_loss: 53.78328323364258
    sample_time_ms: 18245.517
    update_time_ms: 5.516
  iterations_since_restore: 118
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 2763 s, 123 iter, 1230000 ts, 136 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-29-45
  done: false
  episode_len_mean: 85.32478632478633
  episode_reward_max: 387.9261450705825
  episode_reward_mean: 150.90754043812615
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 117
  episodes_total: 12099
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3706.3
    load_time_ms: 1.552
    num_steps_sampled: 1240000
    num_steps_trained: 1240000
    rl_0:
      cur_kl_coeff: 6.9388955582684535e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.39875081181526184
      kl: 0.01289295218884945
      policy_loss: -0.002758706454

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-31-33
  done: false
  episode_len_mean: 85.34188034188034
  episode_reward_max: 391.3323407447203
  episode_reward_mean: 151.94602388380517
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 117
  episodes_total: 12660
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3689.841
    load_time_ms: 1.644
    num_steps_sampled: 1290000
    num_steps_trained: 1290000
    rl_0:
      cur_kl_coeff: 3.4694477791342267e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.38506633043289185
      kl: 0.012110205367207527
      policy_loss: -0.0015734403859823942
      total_loss: 52.68984603881836
      vf_explained_var: 0.8470494151115417
      vf_loss: 52.6914176940918
    sample_time_ms: 18068.343
    update_time_ms: 5.949
  iterations_since_restore: 129
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 3002 s, 134 iter, 1340000 ts, 135 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-33-45
  done: false
  episode_len_mean: 80.31451612903226
  episode_reward_max: 386.8302535125519
  episode_reward_mean: 116.10139911241288
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 124
  episodes_total: 13354
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3697.185
    load_time_ms: 1.807
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
    rl_0:
      cur_kl_coeff: 1.7347238895671134e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.31670480966567993
      kl: 0.008678601123392582
      policy_loss: 0.000936871

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-35-35
  done: false
  episode_len_mean: 87.70175438596492
  episode_reward_max: 386.60584996684713
  episode_reward_mean: 162.81779636001122
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 114
  episodes_total: 13924
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3705.997
    load_time_ms: 1.754
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
    rl_0:
      cur_kl_coeff: 8.673619447835567e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.34222227334976196
      kl: 0.015080628916621208
      policy_loss: 0.00036580616142600775
      total_loss: 64.83197021484375
      vf_explained_var: 0.8043763637542725
      vf_loss: 64.83160400390625
    sample_time_ms: 18189.866
    update_time_ms: 5.394
  iterations_since_restore: 140
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 3242 s, 145 iter, 1450000 ts, 149 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-37-46
  done: false
  episode_len_mean: 89.62162162162163
  episode_reward_max: 385.7008976448877
  episode_reward_mean: 184.28841263840437
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 111
  episodes_total: 14613
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3715.728
    load_time_ms: 1.543
    num_steps_sampled: 1460000
    num_steps_trained: 1460000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.2755805253982544
      kl: 0.012713412754237652
      policy_loss: -8.119145786

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-39-36
  done: false
  episode_len_mean: 86.40517241379311
  episode_reward_max: 384.81894437214737
  episode_reward_mean: 162.05373396988725
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 116
  episodes_total: 15192
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3711.348
    load_time_ms: 1.519
    num_steps_sampled: 1510000
    num_steps_trained: 1510000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.21290794014930725
      kl: 0.015954123809933662
      policy_loss: -0.0006059209699742496
      total_loss: 59.29518508911133
      vf_explained_var: 0.8273982405662537
      vf_loss: 59.29578399658203
    sample_time_ms: 18165.125
    update_time_ms: 5.749
  iterations_since_restore: 151
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 3486 s, 156 iter, 1560000 ts, 204 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-41-50
  done: false
  episode_len_mean: 84.14285714285714
  episode_reward_max: 385.74377721144225
  episode_reward_mean: 145.89407039754207
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 119
  episodes_total: 15866
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.614
    load_time_ms: 1.532
    num_steps_sampled: 1570000
    num_steps_trained: 1570000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.12971538305282593
      kl: 0.013961429707705975
      policy_loss: 0.00056602

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-43-40
  done: false
  episode_len_mean: 84.05882352941177
  episode_reward_max: 385.30959153042494
  episode_reward_mean: 145.86852316633255
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 119
  episodes_total: 16436
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3729.252
    load_time_ms: 1.527
    num_steps_sampled: 1620000
    num_steps_trained: 1620000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.041839953511953354
      kl: 0.017743756994605064
      policy_loss: 0.0008092239149846137
      total_loss: 52.078704833984375
      vf_explained_var: 0.8569685816764832
      vf_loss: 52.07789611816406
    sample_time_ms: 18386.25
    update_time_ms: 5.225
  iterations_since_restore: 162
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 3728 s, 167 iter, 1670000 ts, 165 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-45-52
  done: false
  episode_len_mean: 85.94915254237289
  episode_reward_max: 387.71723605080336
  episode_reward_mean: 155.7594457678945
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 118
  episodes_total: 17116
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.374
    load_time_ms: 1.602
    num_steps_sampled: 1680000
    num_steps_trained: 1680000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.05871386453509331
      kl: 0.02738032303750515
      policy_loss: 0.0017215341

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-47-40
  done: false
  episode_len_mean: 86.08547008547009
  episode_reward_max: 379.46886620730197
  episode_reward_mean: 159.0210120824059
  episode_reward_min: -162.57424737581255
  episodes_this_iter: 117
  episodes_total: 17705
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3709.629
    load_time_ms: 1.594
    num_steps_sampled: 1730000
    num_steps_trained: 1730000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.038869477808475494
      kl: 0.015040295198559761
      policy_loss: 0.0007247956236824393
      total_loss: 55.066680908203125
      vf_explained_var: 0.8417485356330872
      vf_loss: 55.065956115722656
    sample_time_ms: 17979.84
    update_time_ms: 5.204
  iterations_since_restore: 173
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 3967 s, 178 iter, 1780000 ts, 86 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-49-52
  done: false
  episode_len_mean: 82.58677685950413
  episode_reward_max: 382.8260885745653
  episode_reward_mean: 128.79411562541927
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 121
  episodes_total: 18416
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3710.905
    load_time_ms: 1.639
    num_steps_sampled: 1790000
    num_steps_trained: 1790000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.04665270447731018
      kl: 0.010391871444880962
      policy_loss: -0.000711002

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-51-41
  done: false
  episode_len_mean: 79.01574803149606
  episode_reward_max: 380.73602268033335
  episode_reward_mean: 108.16477250114731
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 127
  episodes_total: 19013
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3702.26
    load_time_ms: 1.554
    num_steps_sampled: 1840000
    num_steps_trained: 1840000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.12164175510406494
      kl: 0.018804943189024925
      policy_loss: 0.0011334816226735711
      total_loss: 75.26142883300781
      vf_explained_var: 0.8248106837272644
      vf_loss: 75.26030731201172
    sample_time_ms: 18171.096
    update_time_ms: 6.101
  iterations_since_restore: 184
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 4207 s, 189 iter, 1890000 ts, 168 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-53-53
  done: false
  episode_len_mean: 80.08870967741936
  episode_reward_max: 384.8981386057851
  episode_reward_mean: 114.54429868885902
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 124
  episodes_total: 19744
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3712.202
    load_time_ms: 1.521
    num_steps_sampled: 1900000
    num_steps_trained: 1900000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.1783325970172882
      kl: 0.025865616276860237
      policy_loss: 0.0018253201

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-55-42
  done: false
  episode_len_mean: 83.56198347107438
  episode_reward_max: 382.81768656600366
  episode_reward_mean: 136.46849048257047
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 121
  episodes_total: 20336
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3709.761
    load_time_ms: 1.497
    num_steps_sampled: 1950000
    num_steps_trained: 1950000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.20606555044651031
      kl: 0.032985370606184006
      policy_loss: 0.007296556141227484
      total_loss: 57.75835037231445
      vf_explained_var: 0.8453221321105957
      vf_loss: 57.75105667114258
    sample_time_ms: 18134.416
    update_time_ms: 5.138
  iterations_since_restore: 195
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 4448 s, 200 iter, 2000000 ts, 143 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-57-54
  done: false
  episode_len_mean: 83.61666666666666
  episode_reward_max: 386.92071609081506
  episode_reward_mean: 137.74084306224395
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 120
  episodes_total: 21066
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3716.739
    load_time_ms: 1.427
    num_steps_sampled: 2010000
    num_steps_trained: 2010000
    rl_0:
      cur_kl_coeff: 4.3368097239177834e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.23222050070762634
      kl: 0.02554161287844181
      policy_loss: 0.00123478

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_00-59-44
  done: false
  episode_len_mean: 74.1470588235294
  episode_reward_max: 386.9024760631285
  episode_reward_mean: 62.90694995432226
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 136
  episodes_total: 21681
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3696.405
    load_time_ms: 1.472
    num_steps_sampled: 2060000
    num_steps_trained: 2060000
    rl_0:
      cur_kl_coeff: 6.505212647172114e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.3372349739074707
      kl: 0.02596021257340908
      policy_loss: 0.002279288601130247
      total_loss: 83.13673400878906
      vf_explained_var: 0.8343891501426697
      vf_loss: 83.13445281982422
    sample_time_ms: 18190.018
    update_time_ms: 5.722
  iterations_since_restore: 206
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 4688 s, 211 iter, 2110000 ts, 164 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-01-54
  done: false
  episode_len_mean: 81.77868852459017
  episode_reward_max: 383.76748157938897
  episode_reward_mean: 124.90759400744548
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 122
  episodes_total: 22406
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3702.425
    load_time_ms: 1.479
    num_steps_sampled: 2120000
    num_steps_trained: 2120000
    rl_0:
      cur_kl_coeff: 6.505212647172114e-20
      cur_lr: 4.999999873689376e-05
      entropy: -0.3733658790588379
      kl: 0.04234650358557701
      policy_loss: 0.00670448085

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-03-43
  done: false
  episode_len_mean: 82.65289256198348
  episode_reward_max: 384.42734561552896
  episode_reward_mean: 127.86389797859238
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 121
  episodes_total: 23023
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3711.149
    load_time_ms: 1.43
    num_steps_sampled: 2170000
    num_steps_trained: 2170000
    rl_0:
      cur_kl_coeff: 1.4636730879517958e-19
      cur_lr: 4.999999873689376e-05
      entropy: -0.43174126744270325
      kl: 0.03440796956419945
      policy_loss: 0.0070101069286465645
      total_loss: 54.81829833984375
      vf_explained_var: 0.8618980646133423
      vf_loss: 54.81128692626953
    sample_time_ms: 17945.637
    update_time_ms: 5.072
  iterations_since_restore: 217
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 4927 s, 222 iter, 2220000 ts, 185 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-05-54
  done: false
  episode_len_mean: 78.6124031007752
  episode_reward_max: 387.7402197174177
  episode_reward_mean: 104.90311133955544
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 129
  episodes_total: 23750
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3719.421
    load_time_ms: 1.528
    num_steps_sampled: 2230000
    num_steps_trained: 2230000
    rl_0:
      cur_kl_coeff: 4.939896251784656e-19
      cur_lr: 4.999999873689376e-05
      entropy: -0.4547150433063507
      kl: 0.03037460707128048
      policy_loss: 0.005489123519

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-07-44
  done: false
  episode_len_mean: 85.82758620689656
  episode_reward_max: 388.0686483552542
  episode_reward_mean: 158.15702316902122
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 116
  episodes_total: 24347
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3698.72
    load_time_ms: 1.713
    num_steps_sampled: 2280000
    num_steps_trained: 2280000
    rl_0:
      cur_kl_coeff: 7.409843085207277e-19
      cur_lr: 4.999999873689376e-05
      entropy: -0.465786874294281
      kl: 0.032694604247808456
      policy_loss: 0.006448693107813597
      total_loss: 42.62148666381836
      vf_explained_var: 0.8760296702384949
      vf_loss: 42.61503982543945
    sample_time_ms: 18282.805
    update_time_ms: 5.452
  iterations_since_restore: 228
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 5168 s, 233 iter, 2330000 ts, 81.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-09-55
  done: false
  episode_len_mean: 85.7948717948718
  episode_reward_max: 386.2565041282745
  episode_reward_mean: 148.92050201559599
  episode_reward_min: -168.63365446886064
  episodes_this_iter: 117
  episodes_total: 25080
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3681.893
    load_time_ms: 1.697
    num_steps_sampled: 2340000
    num_steps_trained: 2340000
    rl_0:
      cur_kl_coeff: 1.1114766954256388e-18
      cur_lr: 4.999999873689376e-05
      entropy: -0.48425421118736267
      kl: 0.10212188959121704
      policy_loss: 0.011765009

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-11-45
  done: false
  episode_len_mean: 83.5
  episode_reward_max: 385.65078539772406
  episode_reward_mean: 128.8611077710924
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 120
  episodes_total: 25682
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3692.302
    load_time_ms: 1.639
    num_steps_sampled: 2390000
    num_steps_trained: 2390000
    rl_0:
      cur_kl_coeff: 8.440274692166502e-18
      cur_lr: 4.999999873689376e-05
      entropy: -0.5550723075866699
      kl: 0.05820460245013237
      policy_loss: 0.00454149441793561
      total_loss: 62.63767623901367
      vf_explained_var: 0.8390203714370728
      vf_loss: 62.63313293457031
    sample_time_ms: 18128.788
    update_time_ms: 6.315
  iterations_since_restore: 239
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 64.430

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 5408 s, 244 iter, 2440000 ts, 89.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-13-55
  done: false
  episode_len_mean: 79.71653543307086
  episode_reward_max: 384.4483770297176
  episode_reward_mean: 110.6401967012101
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 127
  episodes_total: 26437
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.498
    load_time_ms: 1.471
    num_steps_sampled: 2450000
    num_steps_trained: 2450000
    rl_0:
      cur_kl_coeff: 4.272889797032085e-17
      cur_lr: 4.999999873689376e-05
      entropy: -0.585312008857727
      kl: 0.0454171858727932
      policy_loss: 0.0087017109617

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-15-45
  done: false
  episode_len_mean: 87.43859649122807
  episode_reward_max: 385.5552638425384
  episode_reward_mean: 161.19844943565903
  episode_reward_min: -162.9629425593281
  episodes_this_iter: 114
  episodes_total: 27047
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.224
    load_time_ms: 1.515
    num_steps_sampled: 2500000
    num_steps_trained: 2500000
    rl_0:
      cur_kl_coeff: 3.244725140580108e-16
      cur_lr: 4.999999873689376e-05
      entropy: -0.5969668030738831
      kl: 0.04404321312904358
      policy_loss: 0.008016065694391727
      total_loss: 57.1723518371582
      vf_explained_var: 0.8353948593139648
      vf_loss: 57.16432571411133
    sample_time_ms: 18096.761
    update_time_ms: 5.254
  iterations_since_restore: 250
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 5647 s, 255 iter, 2550000 ts, 77.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-17-54
  done: false
  episode_len_mean: 82.9
  episode_reward_max: 382.6999302973403
  episode_reward_mean: 132.7137859341935
  episode_reward_min: -166.73175169857979
  episodes_this_iter: 120
  episodes_total: 27817
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3699.837
    load_time_ms: 1.649
    num_steps_sampled: 2560000
    num_steps_trained: 2560000
    rl_0:
      cur_kl_coeff: 1.6426424184016737e-15
      cur_lr: 4.999999873689376e-05
      entropy: -0.612988293170929
      kl: 0.04836096987128258
      policy_loss: 0.005689037032425404
     

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-19-42
  done: false
  episode_len_mean: 80.41129032258064
  episode_reward_max: 388.0986643574806
  episode_reward_mean: 105.83984183149185
  episode_reward_min: -166.72337168594836
  episodes_this_iter: 124
  episodes_total: 28425
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3707.005
    load_time_ms: 1.848
    num_steps_sampled: 2610000
    num_steps_trained: 2610000
    rl_0:
      cur_kl_coeff: 1.2473815451147403e-14
      cur_lr: 4.999999873689376e-05
      entropy: -0.5974587202072144
      kl: 0.03888419270515442
      policy_loss: 0.004331781528890133
      total_loss: 61.28101348876953
      vf_explained_var: 0.8598683476448059
      vf_loss: 61.27668762207031
    sample_time_ms: 17827.923
    update_time_ms: 4.95
  iterations_since_restore: 261
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 5885 s, 266 iter, 2660000 ts, 142 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-21-54
  done: false
  episode_len_mean: 78.04724409448819
  episode_reward_max: 383.99702788552617
  episode_reward_mean: 82.29741628441523
  episode_reward_min: -168.64072963925838
  episodes_this_iter: 127
  episodes_total: 29154
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.997
    load_time_ms: 1.764
    num_steps_sampled: 2670000
    num_steps_trained: 2670000
    rl_0:
      cur_kl_coeff: 6.314869146258756e-14
      cur_lr: 4.999999873689376e-05
      entropy: -0.6086527109146118
      kl: 0.052555352449417114
      policy_loss: 0.0064992019

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-23-44
  done: false
  episode_len_mean: 75.1203007518797
  episode_reward_max: 386.4104675714382
  episode_reward_mean: 50.46901338453802
  episode_reward_min: -164.68446391507626
  episodes_this_iter: 133
  episodes_total: 29802
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3694.457
    load_time_ms: 1.679
    num_steps_sampled: 2720000
    num_steps_trained: 2720000
    rl_0:
      cur_kl_coeff: 2.1312681166337638e-13
      cur_lr: 4.999999873689376e-05
      entropy: -0.6734029650688171
      kl: 0.03892603889107704
      policy_loss: 0.007079760078340769
      total_loss: 87.89447784423828
      vf_explained_var: 0.8290521502494812
      vf_loss: 87.88739776611328
    sample_time_ms: 18226.873
    update_time_ms: 5.94
  iterations_since_restore: 272
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 6126 s, 277 iter, 2770000 ts, 43 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-25-55
  done: false
  episode_len_mean: 69.52413793103449
  episode_reward_max: 384.3289492114829
  episode_reward_mean: 8.369905720826592
  episode_reward_min: -166.73001988836765
  episodes_this_iter: 145
  episodes_total: 30612
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.624
    load_time_ms: 1.591
    num_steps_sampled: 2780000
    num_steps_trained: 2780000
    rl_0:
      cur_kl_coeff: 1.6184317341155774e-12
      cur_lr: 4.999999873689376e-05
      entropy: -0.6905251741409302
      kl: 0.048746030777692795
      policy_loss: 0.00474717235

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-27-44
  done: false
  episode_len_mean: 78.18604651162791
  episode_reward_max: 384.69595541777085
  episode_reward_mean: 72.66407964563133
  episode_reward_min: -166.77000266465188
  episodes_this_iter: 129
  episodes_total: 31276
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3705.991
    load_time_ms: 1.556
    num_steps_sampled: 2830000
    num_steps_trained: 2830000
    rl_0:
      cur_kl_coeff: 1.2289966787315532e-11
      cur_lr: 4.999999873689376e-05
      entropy: -0.6801792979240417
      kl: 0.4055686295032501
      policy_loss: 0.01226211991161108
      total_loss: 83.92841339111328
      vf_explained_var: 0.8242616057395935
      vf_loss: 83.91615295410156
    sample_time_ms: 18019.523
    update_time_ms: 4.863
  iterations_since_restore: 283
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 6366 s, 288 iter, 2880000 ts, 73 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-29-55
  done: false
  episode_len_mean: 77.63565891472868
  episode_reward_max: 383.703656438757
  episode_reward_mean: 78.66029232864331
  episode_reward_min: -168.68819905516148
  episodes_this_iter: 129
  episodes_total: 32079
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3709.827
    load_time_ms: 1.451
    num_steps_sampled: 2890000
    num_steps_trained: 2890000
    rl_0:
      cur_kl_coeff: 6.221794607297326e-11
      cur_lr: 4.999999873689376e-05
      entropy: -0.6725555658340454
      kl: 0.03637129068374634
      policy_loss: 0.00568132195621

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-31-45
  done: false
  episode_len_mean: 71.45323741007195
  episode_reward_max: 386.3793647698595
  episode_reward_mean: 24.270836819595456
  episode_reward_min: -164.7224763159418
  episodes_this_iter: 139
  episodes_total: 32755
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3710.553
    load_time_ms: 1.508
    num_steps_sampled: 2940000
    num_steps_trained: 2940000
    rl_0:
      cur_kl_coeff: 3.149784599809635e-10
      cur_lr: 4.999999873689376e-05
      entropy: -0.7379376888275146
      kl: 0.07004006206989288
      policy_loss: 0.01092528086155653
      total_loss: 84.08001708984375
      vf_explained_var: 0.8477007746696472
      vf_loss: 84.06908416748047
    sample_time_ms: 18198.585
    update_time_ms: 5.485
  iterations_since_restore: 294
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 6605 s, 299 iter, 2990000 ts, 80.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-33-55
  done: false
  episode_len_mean: 76.54198473282443
  episode_reward_max: 383.1592985514056
  episode_reward_mean: 59.94466567070389
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 131
  episodes_total: 33546
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.037
    load_time_ms: 1.6
    num_steps_sampled: 3000000
    num_steps_trained: 3000000
    rl_0:
      cur_kl_coeff: 1.5945778031323243e-09
      cur_lr: 4.999999873689376e-05
      entropy: -0.709500253200531
      kl: 0.04167936369776726
      policy_loss: 0.0053851529955

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-35-44
  done: false
  episode_len_mean: 73.56617647058823
  episode_reward_max: 387.1127332310196
  episode_reward_mean: 44.020869007802055
  episode_reward_min: -165.21695851078272
  episodes_this_iter: 136
  episodes_total: 34222
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3691.059
    load_time_ms: 1.661
    num_steps_sampled: 3050000
    num_steps_trained: 3050000
    rl_0:
      cur_kl_coeff: 1.210882683011505e-08
      cur_lr: 4.999999873689376e-05
      entropy: -0.7682371139526367
      kl: 0.07956857979297638
      policy_loss: 0.010298884473741055
      total_loss: 68.36335754394531
      vf_explained_var: 0.8687939047813416
      vf_loss: 68.35305786132812
    sample_time_ms: 17989.19
    update_time_ms: 5.835
  iterations_since_restore: 305
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 6845 s, 310 iter, 3100000 ts, 19 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-37-55
  done: false
  episode_len_mean: 75.28030303030303
  episode_reward_max: 386.0802527788768
  episode_reward_mean: 56.857213518733005
  episode_reward_min: -166.79929212626456
  episodes_this_iter: 132
  episodes_total: 35043
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.056
    load_time_ms: 1.684
    num_steps_sampled: 3110000
    num_steps_trained: 3110000
    rl_0:
      cur_kl_coeff: 1.3792711683890957e-07
      cur_lr: 4.999999873689376e-05
      entropy: -0.7689635157585144
      kl: 0.0420122928917408
      policy_loss: 0.008064436726

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-39-45
  done: false
  episode_len_mean: 73.41176470588235
  episode_reward_max: 386.2047327560039
  episode_reward_mean: 27.72304349493951
  episode_reward_min: -166.7773478623295
  episodes_this_iter: 136
  episodes_total: 35720
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3716.586
    load_time_ms: 1.58
    num_steps_sampled: 3160000
    num_steps_trained: 3160000
    rl_0:
      cur_kl_coeff: 1.047383989316586e-06
      cur_lr: 4.999999873689376e-05
      entropy: -0.7523588538169861
      kl: 0.034898824989795685
      policy_loss: 0.002560327760875225
      total_loss: 82.81343841552734
      vf_explained_var: 0.8467257618904114
      vf_loss: 82.81087493896484
    sample_time_ms: 18106.724
    update_time_ms: 5.722
  iterations_since_restore: 316
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 7086 s, 321 iter, 3210000 ts, 33.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-41-56
  done: false
  episode_len_mean: 75.47368421052632
  episode_reward_max: 386.86352030935393
  episode_reward_mean: 53.95251161066286
  episode_reward_min: -168.91350244101523
  episodes_this_iter: 133
  episodes_total: 36555
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.05
    load_time_ms: 1.631
    num_steps_sampled: 3220000
    num_steps_trained: 3220000
    rl_0:
      cur_kl_coeff: 5.302382305671927e-06
      cur_lr: 4.999999873689376e-05
      entropy: -0.7579130530357361
      kl: 0.051501695066690445
      policy_loss: 0.0093647865

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-43-47
  done: false
  episode_len_mean: 72.73722627737226
  episode_reward_max: 387.09372682717554
  episode_reward_mean: 33.90607554301994
  episode_reward_min: -163.99848932549
  episodes_this_iter: 137
  episodes_total: 37270
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3707.171
    load_time_ms: 1.644
    num_steps_sampled: 3270000
    num_steps_trained: 3270000
    rl_0:
      cur_kl_coeff: 2.684330684132874e-05
      cur_lr: 4.999999873689376e-05
      entropy: -0.735213577747345
      kl: 0.033725421875715256
      policy_loss: 0.005312465131282806
      total_loss: 70.85467529296875
      vf_explained_var: 0.8670306205749512
      vf_loss: 70.84935760498047
    sample_time_ms: 18301.968
    update_time_ms: 5.566
  iterations_since_restore: 327
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 7327 s, 332 iter, 3320000 ts, 38.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-45-58
  done: false
  episode_len_mean: 70.83802816901408
  episode_reward_max: 384.2713737670324
  episode_reward_mean: 20.267683863060935
  episode_reward_min: -166.99244840654373
  episodes_this_iter: 142
  episodes_total: 38116
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3695.454
    load_time_ms: 1.559
    num_steps_sampled: 3330000
    num_steps_trained: 3330000
    rl_0:
      cur_kl_coeff: 0.00020384133676998317
      cur_lr: 4.999999873689376e-05
      entropy: -0.7235434055328369
      kl: 0.04430486634373665
      policy_loss: 0.008262934

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-47-49
  done: false
  episode_len_mean: 73.29411764705883
  episode_reward_max: 387.3641155050467
  episode_reward_mean: 39.690922802372036
  episode_reward_min: -162.62044113823413
  episodes_this_iter: 136
  episodes_total: 38800
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3688.738
    load_time_ms: 1.635
    num_steps_sampled: 3380000
    num_steps_trained: 3380000
    rl_0:
      cur_kl_coeff: 0.0015479204012081027
      cur_lr: 4.999999873689376e-05
      entropy: -0.7215175032615662
      kl: 0.030481301248073578
      policy_loss: 0.00678689731284976
      total_loss: 71.15458679199219
      vf_explained_var: 0.8647862672805786
      vf_loss: 71.14775085449219
    sample_time_ms: 18257.928
    update_time_ms: 5.363
  iterations_since_restore: 338
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 7570 s, 343 iter, 3430000 ts, 20.1 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-50-01
  done: false
  episode_len_mean: 69.38620689655173
  episode_reward_max: 387.38744255196684
  episode_reward_mean: 13.68347901244216
  episode_reward_min: -166.973846909132
  episodes_this_iter: 145
  episodes_total: 39644
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3735.464
    load_time_ms: 1.738
    num_steps_sampled: 3440000
    num_steps_trained: 3440000
    rl_0:
      cur_kl_coeff: 0.005224231164902449
      cur_lr: 4.999999873689376e-05
      entropy: -0.7199165225028992
      kl: 0.040390271693468094
      policy_loss: 0.009352292865

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-51-51
  done: false
  episode_len_mean: 77.35384615384615
  episode_reward_max: 387.0341605776675
  episode_reward_mean: 63.173711268419275
  episode_reward_min: -162.62983038040636
  episodes_this_iter: 130
  episodes_total: 40329
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3710.659
    load_time_ms: 1.712
    num_steps_sampled: 3490000
    num_steps_trained: 3490000
    rl_0:
      cur_kl_coeff: 0.02644767425954342
      cur_lr: 4.999999873689376e-05
      entropy: -0.6826649904251099
      kl: 0.03842110559344292
      policy_loss: 0.00875053834170103
      total_loss: 74.58114624023438
      vf_explained_var: 0.8483628630638123
      vf_loss: 74.57138061523438
    sample_time_ms: 18245.342
    update_time_ms: 5.239
  iterations_since_restore: 349
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 7813 s, 354 iter, 3540000 ts, 27 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-54-04
  done: false
  episode_len_mean: 71.32624113475177
  episode_reward_max: 388.10748571735013
  episode_reward_mean: 13.978699547985988
  episode_reward_min: -167.05887716635704
  episodes_this_iter: 141
  episodes_total: 41158
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3706.903
    load_time_ms: 1.547
    num_steps_sampled: 3550000
    num_steps_trained: 3550000
    rl_0:
      cur_kl_coeff: 0.059507258236408234
      cur_lr: 4.999999873689376e-05
      entropy: -0.6916937232017517
      kl: 0.038512323051691055
      policy_loss: 0.00811728928

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-55-53
  done: false
  episode_len_mean: 68.1156462585034
  episode_reward_max: 386.1004429832765
  episode_reward_mean: 0.02593182927868071
  episode_reward_min: -167.37382695021867
  episodes_this_iter: 147
  episodes_total: 41864
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.887
    load_time_ms: 1.453
    num_steps_sampled: 3600000
    num_steps_trained: 3600000
    rl_0:
      cur_kl_coeff: 0.08926088362932205
      cur_lr: 4.999999873689376e-05
      entropy: -0.7175024151802063
      kl: 0.0346488282084465
      policy_loss: 0.0075720432214438915
      total_loss: 66.05892181396484
      vf_explained_var: 0.8869892954826355
      vf_loss: 66.04825592041016
    sample_time_ms: 18141.47
    update_time_ms: 6.035
  iterations_since_restore: 360
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 8052 s, 365 iter, 3650000 ts, -12.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-58-04
  done: false
  episode_len_mean: 68.0204081632653
  episode_reward_max: 387.01923036778794
  episode_reward_mean: -5.639980241062012
  episode_reward_min: -167.1103300159836
  episodes_this_iter: 147
  episodes_total: 42759
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3685.917
    load_time_ms: 1.538
    num_steps_sampled: 3660000
    num_steps_trained: 3660000
    rl_0:
      cur_kl_coeff: 0.08926088362932205
      cur_lr: 4.999999873689376e-05
      entropy: -0.7123779654502869
      kl: 0.09797412902116776
      policy_loss: 0.014812015928

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_01-59-53
  done: false
  episode_len_mean: 70.1118881118881
  episode_reward_max: 388.40463733081936
  episode_reward_mean: 12.346885363909509
  episode_reward_min: -169.11148885556696
  episodes_this_iter: 143
  episodes_total: 43472
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3682.9
    load_time_ms: 1.566
    num_steps_sampled: 3710000
    num_steps_trained: 3710000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6827204823493958
      kl: 0.030720802024006844
      policy_loss: 0.005739168263971806
      total_loss: 75.37418365478516
      vf_explained_var: 0.8649671673774719
      vf_loss: 75.36433410644531
    sample_time_ms: 18012.69
    update_time_ms: 6.213
  iterations_since_restore: 371
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 8292 s, 376 iter, 3760000 ts, 30.1 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-02-05
  done: false
  episode_len_mean: 68.56164383561644
  episode_reward_max: 384.35909563281206
  episode_reward_mean: 1.36148769613218
  episode_reward_min: -167.23142184826375
  episodes_this_iter: 146
  episodes_total: 44341
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3696.419
    load_time_ms: 1.475
    num_steps_sampled: 3770000
    num_steps_trained: 3770000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6908904910087585
      kl: 0.027804436162114143
      policy_loss: 0.004505846183

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-03-55
  done: false
  episode_len_mean: 74.5
  episode_reward_max: 385.6033323853818
  episode_reward_mean: 48.335254752224266
  episode_reward_min: -166.89350134881974
  episodes_this_iter: 134
  episodes_total: 45047
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3717.938
    load_time_ms: 1.495
    num_steps_sampled: 3820000
    num_steps_trained: 3820000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6729025840759277
      kl: 0.022330626845359802
      policy_loss: 0.0025147567503154278
      total_loss: 69.08364868164062
      vf_explained_var: 0.8651854395866394
      vf_loss: 69.07814025878906
    sample_time_ms: 18294.067
    update_time_ms: 5.248
  iterations_since_restore: 382
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 24.16

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 8535 s, 387 iter, 3870000 ts, 33.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-06-09
  done: false
  episode_len_mean: 72.7956204379562
  episode_reward_max: 385.87820304921
  episode_reward_mean: 37.45328464370904
  episode_reward_min: -165.73564729573965
  episodes_this_iter: 137
  episodes_total: 45905
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.669
    load_time_ms: 1.54
    num_steps_sampled: 3880000
    num_steps_trained: 3880000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.691465437412262
      kl: 0.02504936419427395
      policy_loss: 0.002552372636273503

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-07-58
  done: false
  episode_len_mean: 66.54966887417218
  episode_reward_max: 387.1520429709416
  episode_reward_mean: -18.204980416483096
  episode_reward_min: -167.2963410474324
  episodes_this_iter: 151
  episodes_total: 46611
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3689.972
    load_time_ms: 1.44
    num_steps_sampled: 3930000
    num_steps_trained: 3930000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.7151402235031128
      kl: 0.023911211639642715
      policy_loss: 0.003958037123084068
      total_loss: 71.90786743164062
      vf_explained_var: 0.8794001936912537
      vf_loss: 71.90070343017578
    sample_time_ms: 18265.275
    update_time_ms: 5.642
  iterations_since_restore: 393
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 8774 s, 398 iter, 3980000 ts, 45.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-10-08
  done: false
  episode_len_mean: 71.07801418439716
  episode_reward_max: 386.9157087506475
  episode_reward_mean: 28.221213232400718
  episode_reward_min: -167.37055797078847
  episodes_this_iter: 141
  episodes_total: 47461
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3687.066
    load_time_ms: 1.575
    num_steps_sampled: 3990000
    num_steps_trained: 3990000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6599174737930298
      kl: 0.028432173654437065
      policy_loss: 0.00663237227

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-11-58
  done: false
  episode_len_mean: 66.52348993288591
  episode_reward_max: 387.785931748871
  episode_reward_mean: -20.790492710577375
  episode_reward_min: -167.3515633187556
  episodes_this_iter: 149
  episodes_total: 48194
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.838
    load_time_ms: 1.721
    num_steps_sampled: 4040000
    num_steps_trained: 4040000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6776866316795349
      kl: 0.027640262618660927
      policy_loss: 0.004381907172501087
      total_loss: 48.27525329589844
      vf_explained_var: 0.920590341091156
      vf_loss: 48.26716995239258
    sample_time_ms: 18081.881
    update_time_ms: 5.839
  iterations_since_restore: 404
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 9016 s, 409 iter, 4090000 ts, 5.35 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-14-10
  done: false
  episode_len_mean: 71.94964028776978
  episode_reward_max: 384.4547932608825
  episode_reward_mean: 16.947579723202285
  episode_reward_min: -167.41721920629976
  episodes_this_iter: 139
  episodes_total: 49059
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3721.984
    load_time_ms: 1.532
    num_steps_sampled: 4100000
    num_steps_trained: 4100000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6459439396858215
      kl: 0.02174144610762596
      policy_loss: 0.003825589548

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-16-00
  done: false
  episode_len_mean: 72.32374100719424
  episode_reward_max: 386.6624140535138
  episode_reward_mean: 25.120945106295395
  episode_reward_min: -167.01116843863966
  episodes_this_iter: 139
  episodes_total: 49771
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3718.867
    load_time_ms: 1.569
    num_steps_sampled: 4150000
    num_steps_trained: 4150000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6566112637519836
      kl: 0.02064397744834423
      policy_loss: 0.00519199064001441
      total_loss: 66.32080078125
      vf_explained_var: 0.8787402510643005
      vf_loss: 66.31285095214844
    sample_time_ms: 18231.642
    update_time_ms: 5.35
  iterations_since_restore: 415
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 9258 s, 420 iter, 4200000 ts, 11.2 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-18-13
  done: false
  episode_len_mean: 71.45714285714286
  episode_reward_max: 384.6494441662241
  episode_reward_mean: 11.118708774553127
  episode_reward_min: -163.7774405307436
  episodes_this_iter: 140
  episodes_total: 50646
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3712.506
    load_time_ms: 1.421
    num_steps_sampled: 4210000
    num_steps_trained: 4210000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6384292840957642
      kl: 0.027082055807113647
      policy_loss: 0.005186218768

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-20-03
  done: false
  episode_len_mean: 69.99300699300699
  episode_reward_max: 387.3469779820992
  episode_reward_mean: 20.60809287478295
  episode_reward_min: -165.2407889007592
  episodes_this_iter: 143
  episodes_total: 51389
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3724.898
    load_time_ms: 1.383
    num_steps_sampled: 4260000
    num_steps_trained: 4260000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6522973775863647
      kl: 0.030891986563801765
      policy_loss: 0.010567143559455872
      total_loss: 69.30596160888672
      vf_explained_var: 0.8773888945579529
      vf_loss: 69.29126739501953
    sample_time_ms: 18288.658
    update_time_ms: 5.058
  iterations_since_restore: 426
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 9502 s, 431 iter, 4310000 ts, 17.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-22-16
  done: false
  episode_len_mean: 71.33333333333333
  episode_reward_max: 387.46859677561645
  episode_reward_mean: 23.31489693805685
  episode_reward_min: -164.98655695411207
  episodes_this_iter: 141
  episodes_total: 52230
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3697.261
    load_time_ms: 1.661
    num_steps_sampled: 4320000
    num_steps_trained: 4320000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6436894536018372
      kl: 0.025720063596963882
      policy_loss: 0.00565006583

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-24-07
  done: false
  episode_len_mean: 70.72340425531915
  episode_reward_max: 388.11610615699857
  episode_reward_mean: 13.792406368193769
  episode_reward_min: -169.11204898807048
  episodes_this_iter: 141
  episodes_total: 52906
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3693.523
    load_time_ms: 1.612
    num_steps_sampled: 4370000
    num_steps_trained: 4370000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6572187542915344
      kl: 0.02471771650016308
      policy_loss: 0.003057349007576704
      total_loss: 57.13842010498047
      vf_explained_var: 0.8978889584541321
      vf_loss: 57.132049560546875
    sample_time_ms: 18402.449
    update_time_ms: 7.283
  iterations_since_restore: 437
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 9745 s, 442 iter, 4420000 ts, 10.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-26-21
  done: false
  episode_len_mean: 72.08759124087591
  episode_reward_max: 386.59600204588736
  episode_reward_mean: 36.97693076768048
  episode_reward_min: -168.82583639856816
  episodes_this_iter: 137
  episodes_total: 53771
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3675.03
    load_time_ms: 1.513
    num_steps_sampled: 4430000
    num_steps_trained: 4430000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6249767541885376
      kl: 0.024171262979507446
      policy_loss: 0.006334235891

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-28-12
  done: false
  episode_len_mean: 66.64
  episode_reward_max: 385.6339024466972
  episode_reward_mean: -10.628096463523283
  episode_reward_min: -166.8744049033785
  episodes_this_iter: 150
  episodes_total: 54482
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3696.373
    load_time_ms: 1.572
    num_steps_sampled: 4480000
    num_steps_trained: 4480000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6535714268684387
      kl: 0.02751259319484234
      policy_loss: 0.005787998903542757
      total_loss: 64.54130554199219
      vf_explained_var: 0.8915766477584839
      vf_loss: 64.5318374633789
    sample_time_ms: 18531.513
    update_time_ms: 5.506
  iterations_since_restore: 448
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: -5.3140

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 9989 s, 453 iter, 4530000 ts, 15.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-30-25
  done: false
  episode_len_mean: 71.51428571428572
  episode_reward_max: 388.8617402096667
  episode_reward_mean: 24.57175027396289
  episode_reward_min: -166.98874583658218
  episodes_this_iter: 140
  episodes_total: 55311
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3720.322
    load_time_ms: 1.504
    num_steps_sampled: 4540000
    num_steps_trained: 4540000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6230183839797974
      kl: 0.027869900688529015
      policy_loss: 0.004473431035

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-32-15
  done: false
  episode_len_mean: 74.35555555555555
  episode_reward_max: 386.9398763299107
  episode_reward_mean: 44.27656090437717
  episode_reward_min: -164.7758772531414
  episodes_this_iter: 135
  episodes_total: 56012
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3702.384
    load_time_ms: 1.624
    num_steps_sampled: 4590000
    num_steps_trained: 4590000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6159377694129944
      kl: 0.02776716649532318
      policy_loss: 0.00587565079331398
      total_loss: 58.26611328125
      vf_explained_var: 0.8854864835739136
      vf_loss: 58.25651550292969
    sample_time_ms: 18301.32
    update_time_ms: 5.859
  iterations_since_restore: 459
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 10229 s, 464 iter, 4640000 ts, 44.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-34-26
  done: false
  episode_len_mean: 72.15217391304348
  episode_reward_max: 384.4538594775815
  episode_reward_mean: 30.25959424141306
  episode_reward_min: -166.73980341836452
  episodes_this_iter: 138
  episodes_total: 56843
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.085
    load_time_ms: 1.81
    num_steps_sampled: 4650000
    num_steps_trained: 4650000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6227757334709167
      kl: 0.02902836725115776
      policy_loss: 0.0068615018390

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-36-15
  done: false
  episode_len_mean: 72.62773722627738
  episode_reward_max: 385.2362097651689
  episode_reward_mean: 33.505195424304326
  episode_reward_min: -166.86854267498492
  episodes_this_iter: 137
  episodes_total: 57526
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3714.65
    load_time_ms: 1.626
    num_steps_sampled: 4700000
    num_steps_trained: 4700000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6187812685966492
      kl: 0.02518012933433056
      policy_loss: 0.001886723912321031
      total_loss: 64.3946304321289
      vf_explained_var: 0.8788332939147949
      vf_loss: 64.38937377929688
    sample_time_ms: 18048.52
    update_time_ms: 6.252
  iterations_since_restore: 470
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 10471 s, 475 iter, 4750000 ts, 18.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-38-28
  done: false
  episode_len_mean: 76.6590909090909
  episode_reward_max: 387.46836767001713
  episode_reward_mean: 62.30887503921896
  episode_reward_min: -165.1427921485567
  episodes_this_iter: 132
  episodes_total: 58359
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3688.006
    load_time_ms: 1.591
    num_steps_sampled: 4760000
    num_steps_trained: 4760000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5870294570922852
      kl: 0.016849255189299583
      policy_loss: 0.002406740561

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-40-18
  done: false
  episode_len_mean: 71.07801418439716
  episode_reward_max: 389.39043686995547
  episode_reward_mean: 24.108492968193165
  episode_reward_min: -164.67961341663838
  episodes_this_iter: 141
  episodes_total: 59055
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.241
    load_time_ms: 1.542
    num_steps_sampled: 4810000
    num_steps_trained: 4810000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6034490466117859
      kl: 0.01728849671781063
      policy_loss: 0.0012069264193996787
      total_loss: 70.02623748779297
      vf_explained_var: 0.8733767867088318
      vf_loss: 70.02272033691406
    sample_time_ms: 18293.179
    update_time_ms: 5.505
  iterations_since_restore: 481
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 10713 s, 486 iter, 4860000 ts, 24.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-42-29
  done: false
  episode_len_mean: 73.9051094890511
  episode_reward_max: 390.2938872759902
  episode_reward_mean: 46.175248472198035
  episode_reward_min: -167.03639320894717
  episodes_this_iter: 137
  episodes_total: 59883
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3719.389
    load_time_ms: 1.397
    num_steps_sampled: 4870000
    num_steps_trained: 4870000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5905147194862366
      kl: 0.022214427590370178
      policy_loss: 0.00450577866

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-44-18
  done: false
  episode_len_mean: 72.28571428571429
  episode_reward_max: 392.7957300265691
  episode_reward_mean: 35.307839839205634
  episode_reward_min: -166.90019594129564
  episodes_this_iter: 140
  episodes_total: 60571
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.022
    load_time_ms: 1.415
    num_steps_sampled: 4920000
    num_steps_trained: 4920000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.613220751285553
      kl: 0.037064071744680405
      policy_loss: 0.009650154039263725
      total_loss: 64.1341781616211
      vf_explained_var: 0.8798652291297913
      vf_loss: 64.11956024169922
    sample_time_ms: 17951.744
    update_time_ms: 5.841
  iterations_since_restore: 492
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 10952 s, 497 iter, 4970000 ts, 25.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-46-30
  done: false
  episode_len_mean: 71.59285714285714
  episode_reward_max: 386.5165938681594
  episode_reward_mean: 34.05377857277197
  episode_reward_min: -166.77734662088395
  episodes_this_iter: 140
  episodes_total: 61426
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3714.905
    load_time_ms: 1.483
    num_steps_sampled: 4980000
    num_steps_trained: 4980000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6098211407661438
      kl: 0.026468835771083832
      policy_loss: 0.00500400038

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-48-20
  done: false
  episode_len_mean: 72.86029411764706
  episode_reward_max: 386.5123428221352
  episode_reward_mean: 35.596480768119996
  episode_reward_min: -166.89806855103015
  episodes_this_iter: 136
  episodes_total: 62113
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3684.241
    load_time_ms: 1.525
    num_steps_sampled: 5030000
    num_steps_trained: 5030000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5967482328414917
      kl: 0.020129982382059097
      policy_loss: 0.00308609614148736
      total_loss: 65.06221008300781
      vf_explained_var: 0.8754522800445557
      vf_loss: 65.05642700195312
    sample_time_ms: 18253.46
    update_time_ms: 5.328
  iterations_since_restore: 503
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 11196 s, 508 iter, 5080000 ts, 29.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-50-33
  done: false
  episode_len_mean: 67.79054054054055
  episode_reward_max: 385.27113837520284
  episode_reward_mean: 1.8552062161363427
  episode_reward_min: -168.79791877016066
  episodes_this_iter: 148
  episodes_total: 62956
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3725.846
    load_time_ms: 1.546
    num_steps_sampled: 5090000
    num_steps_trained: 5090000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5810316205024719
      kl: 0.018301401287317276
      policy_loss: 0.002343412

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-52-24
  done: false
  episode_len_mean: 72.15
  episode_reward_max: 388.14367142201047
  episode_reward_mean: 27.815080015688736
  episode_reward_min: -168.81619456204413
  episodes_this_iter: 140
  episodes_total: 63664
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3721.047
    load_time_ms: 1.549
    num_steps_sampled: 5140000
    num_steps_trained: 5140000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5853085517883301
      kl: 0.019758369773626328
      policy_loss: 0.0036170894745737314
      total_loss: 54.144630432128906
      vf_explained_var: 0.900146484375
      vf_loss: 54.138370513916016
    sample_time_ms: 18398.808
    update_time_ms: 5.066
  iterations_since_restore: 514
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 13.90

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 11438 s, 519 iter, 5190000 ts, 46.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-54-36
  done: false
  episode_len_mean: 70.14084507042253
  episode_reward_max: 387.2127543722523
  episode_reward_mean: 21.716736262287945
  episode_reward_min: -167.01599941798688
  episodes_this_iter: 142
  episodes_total: 64499
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3699.762
    load_time_ms: 1.62
    num_steps_sampled: 5200000
    num_steps_trained: 5200000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6253242492675781
      kl: 0.018328119069337845
      policy_loss: 0.00088690029

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-56-27
  done: false
  episode_len_mean: 71.69565217391305
  episode_reward_max: 387.45730063672283
  episode_reward_mean: 32.75799770104094
  episode_reward_min: -166.80717325982093
  episodes_this_iter: 138
  episodes_total: 65186
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.655
    load_time_ms: 1.622
    num_steps_sampled: 5250000
    num_steps_trained: 5250000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5909813046455383
      kl: 0.01766931638121605
      policy_loss: 0.002166913589462638
      total_loss: 56.24252700805664
      vf_explained_var: 0.8979139924049377
      vf_loss: 56.23799514770508
    sample_time_ms: 18280.982
    update_time_ms: 5.603
  iterations_since_restore: 525
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 11680 s, 530 iter, 5300000 ts, 8.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_02-58-39
  done: false
  episode_len_mean: 72.71532846715328
  episode_reward_max: 386.715551355242
  episode_reward_mean: 36.48251462434672
  episode_reward_min: -168.8064754358196
  episodes_this_iter: 137
  episodes_total: 66030
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3712.145
    load_time_ms: 1.536
    num_steps_sampled: 5310000
    num_steps_trained: 5310000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5635026097297668
      kl: 0.017113562673330307
      policy_loss: 0.00176369526889

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-00-29
  done: false
  episode_len_mean: 71.26241134751773
  episode_reward_max: 388.26666463082535
  episode_reward_mean: 22.086865250864165
  episode_reward_min: -162.82944004913807
  episodes_this_iter: 141
  episodes_total: 66716
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3698.584
    load_time_ms: 1.549
    num_steps_sampled: 5360000
    num_steps_trained: 5360000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5668986439704895
      kl: 0.022907422855496407
      policy_loss: 0.004678069613873959
      total_loss: 59.220157623291016
      vf_explained_var: 0.8907179236412048
      vf_loss: 59.2124137878418
    sample_time_ms: 18264.892
    update_time_ms: 5.465
  iterations_since_restore: 536
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 11921 s, 541 iter, 5410000 ts, 4.89 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-02-40
  done: false
  episode_len_mean: 75.42105263157895
  episode_reward_max: 389.5276031869994
  episode_reward_mean: 57.01449933260433
  episode_reward_min: -166.76881454428673
  episodes_this_iter: 133
  episodes_total: 67536
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3707.131
    load_time_ms: 1.561
    num_steps_sampled: 5420000
    num_steps_trained: 5420000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5564581751823425
      kl: 0.017904294654726982
      policy_loss: 0.00295948726

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-04-30
  done: false
  episode_len_mean: 69.54166666666667
  episode_reward_max: 391.1921225008272
  episode_reward_mean: 18.1932820962672
  episode_reward_min: -166.9886055030489
  episodes_this_iter: 144
  episodes_total: 68211
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3711.375
    load_time_ms: 1.515
    num_steps_sampled: 5470000
    num_steps_trained: 5470000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6000790596008301
      kl: 0.02339364029467106
      policy_loss: 0.005017556250095367
      total_loss: 67.48880004882812
      vf_explained_var: 0.8768692016601562
      vf_loss: 67.48065948486328
    sample_time_ms: 18142.65
    update_time_ms: 5.523
  iterations_since_restore: 547
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 12162 s, 552 iter, 5520000 ts, 61.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-06-41
  done: false
  episode_len_mean: 72.07913669064749
  episode_reward_max: 386.60655818162877
  episode_reward_mean: 32.871406838724994
  episode_reward_min: -164.75750996097565
  episodes_this_iter: 139
  episodes_total: 69028
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3703.428
    load_time_ms: 1.523
    num_steps_sampled: 5530000
    num_steps_trained: 5530000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6010774374008179
      kl: 0.02904420904815197
      policy_loss: 0.0059834942

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-08-29
  done: false
  episode_len_mean: 79.3015873015873
  episode_reward_max: 388.81016097915244
  episode_reward_mean: 86.8480372692363
  episode_reward_min: -162.65345758780478
  episodes_this_iter: 126
  episodes_total: 69701
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3703.234
    load_time_ms: 1.49
    num_steps_sampled: 5580000
    num_steps_trained: 5580000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5827911496162415
      kl: 0.017314394935965538
      policy_loss: 0.0033721684012562037
      total_loss: 48.49134063720703
      vf_explained_var: 0.8927473425865173
      vf_loss: 48.485652923583984
    sample_time_ms: 17908.446
    update_time_ms: 5.089
  iterations_since_restore: 558
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 12401 s, 563 iter, 5630000 ts, 47 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-10-42
  done: false
  episode_len_mean: 72.76086956521739
  episode_reward_max: 385.79234770769784
  episode_reward_mean: 39.886848614920254
  episode_reward_min: -168.7373906222248
  episodes_this_iter: 138
  episodes_total: 70517
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.486
    load_time_ms: 1.535
    num_steps_sampled: 5640000
    num_steps_trained: 5640000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.628566324710846
      kl: 0.03133806958794594
      policy_loss: 0.00528275035321

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-12-32
  done: false
  episode_len_mean: 75.47014925373135
  episode_reward_max: 387.47948881283645
  episode_reward_mean: 60.870388686355184
  episode_reward_min: -165.10902202221394
  episodes_this_iter: 134
  episodes_total: 71159
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3717.444
    load_time_ms: 1.485
    num_steps_sampled: 5690000
    num_steps_trained: 5690000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5942179560661316
      kl: 0.02203063853085041
      policy_loss: 0.0038505096454173326
      total_loss: 47.78412628173828
      vf_explained_var: 0.9073494076728821
      vf_loss: 47.77732849121094
    sample_time_ms: 18272.454
    update_time_ms: 5.888
  iterations_since_restore: 569
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 12643 s, 574 iter, 5740000 ts, 48.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-14-44
  done: false
  episode_len_mean: 76.75384615384615
  episode_reward_max: 391.3857706138115
  episode_reward_mean: 66.96517360202627
  episode_reward_min: -162.60581502315046
  episodes_this_iter: 130
  episodes_total: 71959
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.445
    load_time_ms: 1.552
    num_steps_sampled: 5750000
    num_steps_trained: 5750000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.583366334438324
      kl: 0.018159184604883194
      policy_loss: 0.003411884419

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-16-33
  done: false
  episode_len_mean: 74.73134328358209
  episode_reward_max: 386.44485780327767
  episode_reward_mean: 52.63162416575468
  episode_reward_min: -166.753619107213
  episodes_this_iter: 134
  episodes_total: 72622
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3702.327
    load_time_ms: 1.505
    num_steps_sampled: 5800000
    num_steps_trained: 5800000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5735148787498474
      kl: 0.02073473110795021
      policy_loss: 0.0020032089669257402
      total_loss: 55.781944274902344
      vf_explained_var: 0.8910387754440308
      vf_loss: 55.77716827392578
    sample_time_ms: 18102.702
    update_time_ms: 5.642
  iterations_since_restore: 580
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 12884 s, 585 iter, 5850000 ts, 66.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-18-45
  done: false
  episode_len_mean: 75.7218045112782
  episode_reward_max: 387.66997304404583
  episode_reward_mean: 65.59126443111151
  episode_reward_min: -166.7934686456585
  episodes_this_iter: 133
  episodes_total: 73391
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3707.406
    load_time_ms: 1.498
    num_steps_sampled: 5860000
    num_steps_trained: 5860000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5520073175430298
      kl: 0.019263001158833504
      policy_loss: 0.004106587264

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-20-35
  done: false
  episode_len_mean: 76.18320610687023
  episode_reward_max: 387.6741997778589
  episode_reward_mean: 67.51291467713064
  episode_reward_min: -164.67812704977035
  episodes_this_iter: 131
  episodes_total: 74058
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3695.287
    load_time_ms: 1.612
    num_steps_sampled: 5910000
    num_steps_trained: 5910000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.561142086982727
      kl: 0.019521255046129227
      policy_loss: 0.0036900753621011972
      total_loss: 41.75278091430664
      vf_explained_var: 0.9127996563911438
      vf_loss: 41.746482849121094
    sample_time_ms: 18213.798
    update_time_ms: 5.04
  iterations_since_restore: 591
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 13124 s, 596 iter, 5960000 ts, 85.3 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-22-46
  done: false
  episode_len_mean: 73.66666666666667
  episode_reward_max: 389.3805070590192
  episode_reward_mean: 53.92359932992115
  episode_reward_min: -166.7346958217287
  episodes_this_iter: 135
  episodes_total: 74841
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3714.291
    load_time_ms: 1.552
    num_steps_sampled: 5970000
    num_steps_trained: 5970000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5917608141899109
      kl: 0.022466881200671196
      policy_loss: 0.005475796293

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-24-35
  done: false
  episode_len_mean: 77.63846153846154
  episode_reward_max: 387.4650897915725
  episode_reward_mean: 81.10806597341637
  episode_reward_min: -164.70832308348656
  episodes_this_iter: 130
  episodes_total: 75487
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3713.12
    load_time_ms: 1.528
    num_steps_sampled: 6020000
    num_steps_trained: 6020000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5964846611022949
      kl: 0.016393529251217842
      policy_loss: 0.004742647521197796
      total_loss: 60.717323303222656
      vf_explained_var: 0.8701112270355225
      vf_loss: 60.71038055419922
    sample_time_ms: 18017.244
    update_time_ms: 5.0
  iterations_since_restore: 602
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 13366 s, 607 iter, 6070000 ts, 104 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-26-48
  done: false
  episode_len_mean: 71.57142857142857
  episode_reward_max: 387.9997240611476
  episode_reward_mean: 40.398067187682884
  episode_reward_min: -164.6753978838587
  episodes_this_iter: 140
  episodes_total: 76265
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3694.176
    load_time_ms: 1.543
    num_steps_sampled: 6080000
    num_steps_trained: 6080000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6026491522789001
      kl: 0.016146162524819374
      policy_loss: 0.003761455882

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-28-38
  done: false
  episode_len_mean: 77.37209302325581
  episode_reward_max: 387.56881171390495
  episode_reward_mean: 75.2268197742511
  episode_reward_min: -168.7724656057024
  episodes_this_iter: 129
  episodes_total: 76925
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3707.561
    load_time_ms: 1.538
    num_steps_sampled: 6130000
    num_steps_trained: 6130000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6008560657501221
      kl: 0.023525012657046318
      policy_loss: 0.0035606634337455034
      total_loss: 49.11646270751953
      vf_explained_var: 0.897987961769104
      vf_loss: 49.10974884033203
    sample_time_ms: 18268.194
    update_time_ms: 5.403
  iterations_since_restore: 613
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 13607 s, 618 iter, 6180000 ts, 51.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-30-49
  done: false
  episode_len_mean: 73.72058823529412
  episode_reward_max: 387.79556038871107
  episode_reward_mean: 52.52411666244541
  episode_reward_min: -165.28104897662638
  episodes_this_iter: 136
  episodes_total: 77714
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3699.016
    load_time_ms: 1.534
    num_steps_sampled: 6190000
    num_steps_trained: 6190000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5829249024391174
      kl: 0.017859479412436485
      policy_loss: 0.0026462057

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-32-38
  done: false
  episode_len_mean: 76.04545454545455
  episode_reward_max: 385.64983556817816
  episode_reward_mean: 69.18007371753916
  episode_reward_min: -166.73238131400586
  episodes_this_iter: 132
  episodes_total: 78361
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3711.241
    load_time_ms: 1.683
    num_steps_sampled: 6240000
    num_steps_trained: 6240000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5830312967300415
      kl: 0.017275473102927208
      policy_loss: 0.004110538866370916
      total_loss: 46.673370361328125
      vf_explained_var: 0.9050355553627014
      vf_loss: 46.66695022583008
    sample_time_ms: 17981.247
    update_time_ms: 5.018
  iterations_since_restore: 624
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 13845 s, 629 iter, 6290000 ts, 92.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-34-48
  done: false
  episode_len_mean: 83.19166666666666
  episode_reward_max: 389.9708885570657
  episode_reward_mean: 116.29512554422075
  episode_reward_min: -166.8353527540827
  episodes_this_iter: 120
  episodes_total: 79118
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3695.616
    load_time_ms: 1.54
    num_steps_sampled: 6300000
    num_steps_trained: 6300000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.5586126446723938
      kl: 0.018518105149269104
      policy_loss: 0.007086785044

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-36-38
  done: false
  episode_len_mean: 75.92481203007519
  episode_reward_max: 387.84459997897966
  episode_reward_mean: 63.78226966534166
  episode_reward_min: -166.8832407254839
  episodes_this_iter: 133
  episodes_total: 79786
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3706.276
    load_time_ms: 1.417
    num_steps_sampled: 6350000
    num_steps_trained: 6350000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6245220899581909
      kl: 0.019183406606316566
      policy_loss: 0.0037909429520368576
      total_loss: 56.452980041503906
      vf_explained_var: 0.8855963349342346
      vf_loss: 56.44662857055664
    sample_time_ms: 18031.617
    update_time_ms: 5.371
  iterations_since_restore: 635
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 14086 s, 640 iter, 6400000 ts, 57.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-38-49
  done: false
  episode_len_mean: 76.30534351145039
  episode_reward_max: 386.61605366158517
  episode_reward_mean: 72.81361104490517
  episode_reward_min: -166.88556360908984
  episodes_this_iter: 131
  episodes_total: 80575
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3690.7
    load_time_ms: 1.502
    num_steps_sampled: 6410000
    num_steps_trained: 6410000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6407678127288818
      kl: 0.01872163824737072
      policy_loss: 0.0049730190075

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-40-38
  done: false
  episode_len_mean: 71.79285714285714
  episode_reward_max: 386.59098298912744
  episode_reward_mean: 41.26923681323568
  episode_reward_min: -166.8164986763859
  episodes_this_iter: 140
  episodes_total: 81246
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.445
    load_time_ms: 1.722
    num_steps_sampled: 6460000
    num_steps_trained: 6460000
    rl_0:
      cur_kl_coeff: 0.13389132916927338
      cur_lr: 4.999999873689376e-05
      entropy: -0.6246812343597412
      kl: 0.019006671383976936
      policy_loss: 0.005624137353152037
      total_loss: 51.830116271972656
      vf_explained_var: 0.9031004309654236
      vf_loss: 51.82194519042969
    sample_time_ms: 18155.823
    update_time_ms: 5.828
  iterations_since_restore: 646
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 14325 s, 651 iter, 6510000 ts, 76.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-42-49
  done: false
  episode_len_mean: 80.456
  episode_reward_max: 388.38131855964815
  episode_reward_mean: 104.24900540552092
  episode_reward_min: -168.70276634022235
  episodes_this_iter: 125
  episodes_total: 82002
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3678.495
    load_time_ms: 1.828
    num_steps_sampled: 6520000
    num_steps_trained: 6520000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.6150224208831787
      kl: 0.01599491760134697
      policy_loss: 0.0026507354341447353
 

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-44-38
  done: false
  episode_len_mean: 80.6178861788618
  episode_reward_max: 388.49832889789144
  episode_reward_mean: 98.22932427895731
  episode_reward_min: -166.9069996237421
  episodes_this_iter: 123
  episodes_total: 82654
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3679.264
    load_time_ms: 1.673
    num_steps_sampled: 6570000
    num_steps_trained: 6570000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5912708640098572
      kl: 0.026215657591819763
      policy_loss: 0.007188859861344099
      total_loss: 57.29545974731445
      vf_explained_var: 0.8665266036987305
      vf_loss: 57.28300857543945
    sample_time_ms: 18021.099
    update_time_ms: 5.385
  iterations_since_restore: 657
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 14566 s, 662 iter, 6620000 ts, 38 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-46-51
  done: false
  episode_len_mean: 73.66666666666667
  episode_reward_max: 388.30684455694734
  episode_reward_mean: 48.894910386200316
  episode_reward_min: -166.84469008585452
  episodes_this_iter: 135
  episodes_total: 83442
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3667.216
    load_time_ms: 1.503
    num_steps_sampled: 6630000
    num_steps_trained: 6630000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.6191436648368835
      kl: 0.017309656366705894
      policy_loss: 0.00319927744

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-48-39
  done: false
  episode_len_mean: 78.74015748031496
  episode_reward_max: 387.3757202442849
  episode_reward_mean: 86.21265451185194
  episode_reward_min: -168.76033845397473
  episodes_this_iter: 127
  episodes_total: 84088
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3692.905
    load_time_ms: 1.541
    num_steps_sampled: 6680000
    num_steps_trained: 6680000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5767840147018433
      kl: 0.01619655266404152
      policy_loss: 0.0018812638008967042
      total_loss: 52.64321517944336
      vf_explained_var: 0.8826076984405518
      vf_loss: 52.638084411621094
    sample_time_ms: 18149.462
    update_time_ms: 5.322
  iterations_since_restore: 668
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 14805 s, 673 iter, 6730000 ts, 77.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-50-49
  done: false
  episode_len_mean: 77.85271317829458
  episode_reward_max: 386.57638017810905
  episode_reward_mean: 82.22630911832677
  episode_reward_min: -166.78415830883026
  episodes_this_iter: 129
  episodes_total: 84870
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3676.69
    load_time_ms: 1.609
    num_steps_sampled: 6740000
    num_steps_trained: 6740000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5774391293525696
      kl: 0.01634146086871624
      policy_loss: 0.002003772184

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-52-37
  done: false
  episode_len_mean: 71.12765957446808
  episode_reward_max: 387.52829671522517
  episode_reward_mean: 29.100715415305313
  episode_reward_min: -162.9454288956785
  episodes_this_iter: 141
  episodes_total: 85529
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3682.85
    load_time_ms: 1.54
    num_steps_sampled: 6790000
    num_steps_trained: 6790000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5771449208259583
      kl: 0.013141360133886337
      policy_loss: 0.0027974951080977917
      total_loss: 62.93280029296875
      vf_explained_var: 0.8845265507698059
      vf_loss: 62.92736053466797
    sample_time_ms: 17893.423
    update_time_ms: 5.468
  iterations_since_restore: 679
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 15044 s, 684 iter, 6840000 ts, 47.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-54-49
  done: false
  episode_len_mean: 79.62204724409449
  episode_reward_max: 386.3604304890663
  episode_reward_mean: 95.47405161508041
  episode_reward_min: -164.69301438066006
  episodes_this_iter: 127
  episodes_total: 86294
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3696.394
    load_time_ms: 1.461
    num_steps_sampled: 6850000
    num_steps_trained: 6850000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5492525100708008
      kl: 0.01871330663561821
      policy_loss: 0.003082892857

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-56-37
  done: false
  episode_len_mean: 78.140625
  episode_reward_max: 390.98616955503985
  episode_reward_mean: 86.38733682866615
  episode_reward_min: -168.70078453561305
  episodes_this_iter: 128
  episodes_total: 86940
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3707.523
    load_time_ms: 1.509
    num_steps_sampled: 6900000
    num_steps_trained: 6900000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5479949712753296
      kl: 0.013364036567509174
      policy_loss: 0.0014985402813181281
      total_loss: 55.03044128417969
      vf_explained_var: 0.8819959759712219
      vf_loss: 55.026268005371094
    sample_time_ms: 18023.853
    update_time_ms: 5.157
  iterations_since_restore: 690
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 15284 s, 695 iter, 6950000 ts, 89.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_03-58-50
  done: false
  episode_len_mean: 79.28
  episode_reward_max: 388.42611471452943
  episode_reward_mean: 85.46950665965778
  episode_reward_min: -168.6919263058567
  episodes_this_iter: 125
  episodes_total: 87711
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3680.941
    load_time_ms: 1.609
    num_steps_sampled: 6960000
    num_steps_trained: 6960000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5249084830284119
      kl: 0.016119271516799927
      policy_loss: 0.0022782087326049805
   

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-00-39
  done: false
  episode_len_mean: 80.2936507936508
  episode_reward_max: 391.1997175387608
  episode_reward_mean: 99.7936357274864
  episode_reward_min: -166.70001007911205
  episodes_this_iter: 126
  episodes_total: 88350
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.997
    load_time_ms: 1.622
    num_steps_sampled: 7010000
    num_steps_trained: 7010000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5147143006324768
      kl: 0.01812547817826271
      policy_loss: 0.0028999829664826393
      total_loss: 44.76457214355469
      vf_explained_var: 0.8942172527313232
      vf_loss: 44.75802993774414
    sample_time_ms: 18164.38
    update_time_ms: 5.093
  iterations_since_restore: 701
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 15525 s, 706 iter, 7060000 ts, 70.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-02-51
  done: false
  episode_len_mean: 79.16535433070867
  episode_reward_max: 387.31587289310033
  episode_reward_mean: 86.89022304202533
  episode_reward_min: -168.77869663747788
  episodes_this_iter: 127
  episodes_total: 89138
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3673.12
    load_time_ms: 1.495
    num_steps_sampled: 7070000
    num_steps_trained: 7070000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5271080136299133
      kl: 0.014298142865300179
      policy_loss: 0.00333396298

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-04-40
  done: false
  episode_len_mean: 76.48091603053435
  episode_reward_max: 390.2657070678396
  episode_reward_mean: 67.93186090823328
  episode_reward_min: -166.7476541360283
  episodes_this_iter: 131
  episodes_total: 89794
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3690.842
    load_time_ms: 1.551
    num_steps_sampled: 7120000
    num_steps_trained: 7120000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5321604609489441
      kl: 0.012635646387934685
      policy_loss: 0.0015240315115079284
      total_loss: 53.28459167480469
      vf_explained_var: 0.8889709711074829
      vf_loss: 53.28053665161133
    sample_time_ms: 18192.775
    update_time_ms: 5.331
  iterations_since_restore: 712
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 15765 s, 717 iter, 7170000 ts, 51.2 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-06-51
  done: false
  episode_len_mean: 74.55639097744361
  episode_reward_max: 389.49171512220516
  episode_reward_mean: 57.19912008908769
  episode_reward_min: -162.63492896505832
  episodes_this_iter: 133
  episodes_total: 90602
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3692.366
    load_time_ms: 1.554
    num_steps_sampled: 7180000
    num_steps_trained: 7180000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5400815010070801
      kl: 0.014243732206523418
      policy_loss: 0.0016714864

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-08-39
  done: false
  episode_len_mean: 79.5748031496063
  episode_reward_max: 389.2519792297312
  episode_reward_mean: 93.55711760083956
  episode_reward_min: -168.6439673548603
  episodes_this_iter: 127
  episodes_total: 91266
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3687.895
    load_time_ms: 1.505
    num_steps_sampled: 7230000
    num_steps_trained: 7230000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.4830560088157654
      kl: 0.013947280123829842
      policy_loss: 0.0007089804275892675
      total_loss: 42.46543884277344
      vf_explained_var: 0.9041771292686462
      vf_loss: 42.4619255065918
    sample_time_ms: 17976.18
    update_time_ms: 5.842
  iterations_since_restore: 723
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 16004 s, 728 iter, 7280000 ts, 60.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-10-52
  done: false
  episode_len_mean: 73.95620437956204
  episode_reward_max: 387.62101219108587
  episode_reward_mean: 50.41314070606391
  episode_reward_min: -166.7932194745207
  episodes_this_iter: 137
  episodes_total: 92071
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3674.95
    load_time_ms: 1.628
    num_steps_sampled: 7290000
    num_steps_trained: 7290000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5061119198799133
      kl: 0.013849188573658466
      policy_loss: 4.950490620103

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-12-42
  done: false
  episode_len_mean: 73.25
  episode_reward_max: 387.7056415826844
  episode_reward_mean: 48.638592306412704
  episode_reward_min: -166.97160244974137
  episodes_this_iter: 136
  episodes_total: 92741
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3675.098
    load_time_ms: 1.538
    num_steps_sampled: 7340000
    num_steps_trained: 7340000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5188981890678406
      kl: 0.011653545312583447
      policy_loss: 0.004111132584512234
      total_loss: 46.36370086669922
      vf_explained_var: 0.9103906154632568
      vf_loss: 46.35724639892578
    sample_time_ms: 18343.36
    update_time_ms: 5.175
  iterations_since_restore: 734
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 24.319

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 16247 s, 739 iter, 7390000 ts, 70.1 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-14-54
  done: false
  episode_len_mean: 72.85401459854015
  episode_reward_max: 390.7505970149984
  episode_reward_mean: 49.81595466333101
  episode_reward_min: -168.65215267941
  episodes_this_iter: 137
  episodes_total: 93520
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3689.99
    load_time_ms: 1.464
    num_steps_sampled: 7400000
    num_steps_trained: 7400000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5619001388549805
      kl: 0.01114351861178875
      policy_loss: 0.0007866563973948

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-16-42
  done: false
  episode_len_mean: 73.32846715328468
  episode_reward_max: 392.03196488519967
  episode_reward_mean: 49.36122292410113
  episode_reward_min: -162.68280690261363
  episodes_this_iter: 137
  episodes_total: 94182
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3685.959
    load_time_ms: 1.491
    num_steps_sampled: 7450000
    num_steps_trained: 7450000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5533380508422852
      kl: 0.017716877162456512
      policy_loss: 0.0031870929524302483
      total_loss: 55.24440383911133
      vf_explained_var: 0.8930670022964478
      vf_loss: 55.237667083740234
    sample_time_ms: 18042.885
    update_time_ms: 5.31
  iterations_since_restore: 745
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 16487 s, 750 iter, 7500000 ts, 88.2 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-18-55
  done: false
  episode_len_mean: 74.80597014925372
  episode_reward_max: 391.44355198265055
  episode_reward_mean: 60.844500372244404
  episode_reward_min: -164.75757427712918
  episodes_this_iter: 134
  episodes_total: 94956
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3698.959
    load_time_ms: 1.527
    num_steps_sampled: 7510000
    num_steps_trained: 7510000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5553386807441711
      kl: 0.012206918559968472
      policy_loss: 0.000304515

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-20-45
  done: false
  episode_len_mean: 76.38461538461539
  episode_reward_max: 388.07644381523085
  episode_reward_mean: 62.4712415502841
  episode_reward_min: -162.59021057483196
  episodes_this_iter: 130
  episodes_total: 95606
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3699.837
    load_time_ms: 1.415
    num_steps_sampled: 7560000
    num_steps_trained: 7560000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5126414895057678
      kl: 0.01528496015816927
      policy_loss: 0.0038209580816328526
      total_loss: 56.35346603393555
      vf_explained_var: 0.8828535676002502
      vf_loss: 56.34657287597656
    sample_time_ms: 18295.078
    update_time_ms: 5.433
  iterations_since_restore: 756
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 16727 s, 761 iter, 7610000 ts, 58.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-22-55
  done: false
  episode_len_mean: 75.60902255639098
  episode_reward_max: 387.9952024052537
  episode_reward_mean: 58.81182653535529
  episode_reward_min: -168.78284080621245
  episodes_this_iter: 133
  episodes_total: 96392
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3697.355
    load_time_ms: 1.475
    num_steps_sampled: 7620000
    num_steps_trained: 7620000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5228574872016907
      kl: 0.019947731867432594
      policy_loss: 0.00378838833

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-24-44
  done: false
  episode_len_mean: 74.73333333333333
  episode_reward_max: 392.41671866024745
  episode_reward_mean: 54.09024594174681
  episode_reward_min: -165.0940050367975
  episodes_this_iter: 135
  episodes_total: 97048
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3698.99
    load_time_ms: 1.587
    num_steps_sampled: 7670000
    num_steps_trained: 7670000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.5269076824188232
      kl: 0.01310561690479517
      policy_loss: 0.0035612911451607943
      total_loss: 45.83586883544922
      vf_explained_var: 0.9096026420593262
      vf_loss: 45.82967758178711
    sample_time_ms: 18092.632
    update_time_ms: 5.763
  iterations_since_restore: 767
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    r

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 16969 s, 772 iter, 7720000 ts, 64.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-26-58
  done: false
  episode_len_mean: 76.48461538461538
  episode_reward_max: 392.13683181299496
  episode_reward_mean: 72.72235680807277
  episode_reward_min: -166.79552054926395
  episodes_this_iter: 130
  episodes_total: 97845
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3687.904
    load_time_ms: 1.559
    num_steps_sampled: 7730000
    num_steps_trained: 7730000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.4742012023925781
      kl: 0.011700516566634178
      policy_loss: 0.0032845032

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-28-49
  done: false
  episode_len_mean: 80.08
  episode_reward_max: 387.1266192651868
  episode_reward_mean: 92.45216556002093
  episode_reward_min: -164.77985896357535
  episodes_this_iter: 125
  episodes_total: 98515
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3712.517
    load_time_ms: 1.52
    num_steps_sampled: 7780000
    num_steps_trained: 7780000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.4509107172489166
      kl: 0.01216500997543335
      policy_loss: 0.0007566536660306156
      total_loss: 41.482913970947266
      vf_explained_var: 0.9058845639228821
      vf_loss: 41.47971725463867
    sample_time_ms: 18475.261
    update_time_ms: 5.038
  iterations_since_restore: 778
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 46.226

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 17213 s, 783 iter, 7830000 ts, 55.2 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-31-02
  done: false
  episode_len_mean: 74.08208955223881
  episode_reward_max: 385.7517822018441
  episode_reward_mean: 55.94648960078672
  episode_reward_min: -166.96541922399044
  episodes_this_iter: 134
  episodes_total: 99323
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3737.217
    load_time_ms: 1.505
    num_steps_sampled: 7840000
    num_steps_trained: 7840000
    rl_0:
      cur_kl_coeff: 0.20083697140216827
      cur_lr: 4.999999873689376e-05
      entropy: -0.4592531621456146
      kl: 0.010903647169470787
      policy_loss: 0.00238935276

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-32-52
  done: false
  episode_len_mean: 79.05555555555556
  episode_reward_max: 392.35668433183673
  episode_reward_mean: 87.17319554632816
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 126
  episodes_total: 99972
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3708.163
    load_time_ms: 1.666
    num_steps_sampled: 7890000
    num_steps_trained: 7890000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.48561859130859375
      kl: 0.01763453148305416
      policy_loss: 0.0008280195179395378
      total_loss: 48.792091369628906
      vf_explained_var: 0.892545759677887
      vf_loss: 48.789493560791016
    sample_time_ms: 18291.772
    update_time_ms: 4.967
  iterations_since_restore: 789
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 17454 s, 794 iter, 7940000 ts, 64.5 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-35-03
  done: false
  episode_len_mean: 73.01459854014598
  episode_reward_max: 386.3674931829257
  episode_reward_mean: 46.64321339358071
  episode_reward_min: -168.71007235368728
  episodes_this_iter: 137
  episodes_total: 100761
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.26
    load_time_ms: 1.878
    num_steps_sampled: 7950000
    num_steps_trained: 7950000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5435460209846497
      kl: 0.019852757453918457
      policy_loss: 0.00399536639

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-36-53
  done: false
  episode_len_mean: 73.24087591240875
  episode_reward_max: 387.62384348867977
  episode_reward_mean: 47.87662011615527
  episode_reward_min: -164.72292286941052
  episodes_this_iter: 137
  episodes_total: 101402
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3691.772
    load_time_ms: 1.545
    num_steps_sampled: 8000000
    num_steps_trained: 8000000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5427928566932678
      kl: 0.018740125000476837
      policy_loss: 0.004886288661509752
      total_loss: 54.07976531982422
      vf_explained_var: 0.8975962400436401
      vf_loss: 54.072998046875
    sample_time_ms: 18135.168
    update_time_ms: 4.944
  iterations_since_restore: 800
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 17695 s, 805 iter, 8050000 ts, 47.1 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-39-05
  done: false
  episode_len_mean: 72.70588235294117
  episode_reward_max: 386.3739942173723
  episode_reward_mean: 40.66941320112382
  episode_reward_min: -162.5995786599779
  episodes_this_iter: 136
  episodes_total: 102198
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3695.068
    load_time_ms: 1.552
    num_steps_sampled: 8060000
    num_steps_trained: 8060000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5496194362640381
      kl: 0.01712937466800213
      policy_loss: 0.002494878834

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-40-56
  done: false
  episode_len_mean: 78.234375
  episode_reward_max: 386.99906890623083
  episode_reward_mean: 81.36770361242222
  episode_reward_min: -167.00250582381724
  episodes_this_iter: 128
  episodes_total: 102845
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3705.382
    load_time_ms: 1.677
    num_steps_sampled: 8110000
    num_steps_trained: 8110000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5200297832489014
      kl: 0.012498138472437859
      policy_loss: 0.0026812239084392786
      total_loss: 50.760440826416016
      vf_explained_var: 0.8899453282356262
      vf_loss: 50.75650405883789
    sample_time_ms: 18295.819
    update_time_ms: 5.167
  iterations_since_restore: 811
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 17938 s, 816 iter, 8160000 ts, 64.8 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-43-08
  done: false
  episode_len_mean: 75.15789473684211
  episode_reward_max: 392.29981973239984
  episode_reward_mean: 61.8504800719768
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 133
  episodes_total: 103620
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3716.074
    load_time_ms: 1.65
    num_steps_sampled: 8170000
    num_steps_trained: 8170000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5507338643074036
      kl: 0.020653782412409782
      policy_loss: 0.00162428431

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-44-58
  done: false
  episode_len_mean: 79.768
  episode_reward_max: 387.60400993047585
  episode_reward_mean: 95.75685515916453
  episode_reward_min: -166.85069757172107
  episodes_this_iter: 125
  episodes_total: 104256
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.645
    load_time_ms: 1.512
    num_steps_sampled: 8220000
    num_steps_trained: 8220000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.551566481590271
      kl: 0.014825516380369663
      policy_loss: 0.0011397665366530418
      total_loss: 46.0344352722168
      vf_explained_var: 0.89621502161026
      vf_loss: 46.03180694580078
    sample_time_ms: 18248.135
    update_time_ms: 5.671
  iterations_since_restore: 822
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 47.878

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 18179 s, 827 iter, 8270000 ts, 81.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-47-10
  done: false
  episode_len_mean: 75.12121212121212
  episode_reward_max: 388.4598532706462
  episode_reward_mean: 63.28824035783104
  episode_reward_min: -166.7232322241926
  episodes_this_iter: 132
  episodes_total: 105027
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3694.506
    load_time_ms: 1.475
    num_steps_sampled: 8280000
    num_steps_trained: 8280000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5414128303527832
      kl: 0.022188326343894005
      policy_loss: 0.00256360135

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-49-00
  done: false
  episode_len_mean: 81.72131147540983
  episode_reward_max: 390.57647740323347
  episode_reward_mean: 104.7217047170187
  episode_reward_min: -166.75142063733577
  episodes_this_iter: 122
  episodes_total: 105674
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3700.219
    load_time_ms: 1.444
    num_steps_sampled: 8330000
    num_steps_trained: 8330000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5323041081428528
      kl: 0.023679841309785843
      policy_loss: 0.003266417421400547
      total_loss: 44.779380798339844
      vf_explained_var: 0.8965402245521545
      vf_loss: 44.773738861083984
    sample_time_ms: 18247.375
    update_time_ms: 6.265
  iterations_since_restore: 833
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 18422 s, 838 iter, 8380000 ts, 129 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-51-13
  done: false
  episode_len_mean: 76.24812030075188
  episode_reward_max: 390.69855459592
  episode_reward_mean: 67.73094761807215
  episode_reward_min: -166.8691016995573
  episodes_this_iter: 133
  episodes_total: 106451
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3691.828
    load_time_ms: 1.429
    num_steps_sampled: 8390000
    num_steps_trained: 8390000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5679063200950623
      kl: 0.024887997657060623
      policy_loss: 0.00593312457203

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-53-01
  done: false
  episode_len_mean: 79.776
  episode_reward_max: 386.1178348149473
  episode_reward_mean: 99.27730691519564
  episode_reward_min: -166.8382142799759
  episodes_this_iter: 125
  episodes_total: 107073
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3691.346
    load_time_ms: 1.618
    num_steps_sampled: 8440000
    num_steps_trained: 8440000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5506431460380554
      kl: 0.017819857224822044
      policy_loss: 0.005372059065848589
      total_loss: 44.3000602722168
      vf_explained_var: 0.8981289863586426
      vf_loss: 44.29290008544922
    sample_time_ms: 18127.874
    update_time_ms: 6.303
  iterations_since_restore: 844
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 49.638

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 18661 s, 849 iter, 8490000 ts, 112 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-55-13
  done: false
  episode_len_mean: 77.08527131782945
  episode_reward_max: 388.5175880601936
  episode_reward_mean: 82.92139963117356
  episode_reward_min: -166.70499709483624
  episodes_this_iter: 129
  episodes_total: 107841
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3684.887
    load_time_ms: 1.533
    num_steps_sampled: 8500000
    num_steps_trained: 8500000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5686918497085571
      kl: 0.022150930017232895
      policy_loss: 0.00435332162

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-57-02
  done: false
  episode_len_mean: 74.34328358208955
  episode_reward_max: 387.94809966231094
  episode_reward_mean: 59.17067947322344
  episode_reward_min: -166.70655927380562
  episodes_this_iter: 134
  episodes_total: 108499
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3692.535
    load_time_ms: 1.426
    num_steps_sampled: 8550000
    num_steps_trained: 8550000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.618185818195343
      kl: 0.022863978520035744
      policy_loss: 0.0026535792276263237
      total_loss: 53.25832748413086
      vf_explained_var: 0.8937581181526184
      vf_loss: 53.253379821777344
    sample_time_ms: 18104.883
    update_time_ms: 5.998
  iterations_since_restore: 855
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 18902 s, 860 iter, 8600000 ts, 42.6 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_04-59-14
  done: false
  episode_len_mean: 77.78125
  episode_reward_max: 387.2290443951544
  episode_reward_mean: 82.50581851187602
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 128
  episodes_total: 109282
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3686.121
    load_time_ms: 1.415
    num_steps_sampled: 8610000
    num_steps_trained: 8610000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5825195908546448
      kl: 0.023935748264193535
      policy_loss: 0.004952972289174795


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-01-04
  done: false
  episode_len_mean: 73.12408759124088
  episode_reward_max: 392.02052477450013
  episode_reward_mean: 44.7646080976401
  episode_reward_min: -166.87570546897888
  episodes_this_iter: 137
  episodes_total: 109923
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3689.106
    load_time_ms: 1.554
    num_steps_sampled: 8660000
    num_steps_trained: 8660000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.6001242399215698
      kl: 0.02555657923221588
      policy_loss: 0.006259356625378132
      total_loss: 54.21207809448242
      vf_explained_var: 0.8960466384887695
      vf_loss: 54.20325469970703
    sample_time_ms: 18192.257
    update_time_ms: 5.732
  iterations_since_restore: 866
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 19142 s, 871 iter, 8710000 ts, 62.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-03-15
  done: false
  episode_len_mean: 76.56923076923077
  episode_reward_max: 388.95966495873034
  episode_reward_mean: 75.98188001331064
  episode_reward_min: -166.75910496088505
  episodes_this_iter: 130
  episodes_total: 110697
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3688.361
    load_time_ms: 1.635
    num_steps_sampled: 8720000
    num_steps_trained: 8720000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5956335067749023
      kl: 0.018955281004309654
      policy_loss: 0.002945327

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-05-03
  done: false
  episode_len_mean: 75.64661654135338
  episode_reward_max: 388.9509923993705
  episode_reward_mean: 66.5259114393285
  episode_reward_min: -166.81796621569634
  episodes_this_iter: 133
  episodes_total: 111322
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3704.721
    load_time_ms: 1.589
    num_steps_sampled: 8770000
    num_steps_trained: 8770000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5947169661521912
      kl: 0.014524193480610847
      policy_loss: 0.001281958888284862
      total_loss: 43.80884552001953
      vf_explained_var: 0.9105324149131775
      vf_loss: 43.80611038208008
    sample_time_ms: 17937.092
    update_time_ms: 5.877
  iterations_since_restore: 877
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 19382 s, 882 iter, 8820000 ts, 77.4 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-07-15
  done: false
  episode_len_mean: 80.91056910569105
  episode_reward_max: 388.7987788215266
  episode_reward_mean: 99.11234309454211
  episode_reward_min: -166.7331469627285
  episodes_this_iter: 123
  episodes_total: 112079
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3705.638
    load_time_ms: 1.601
    num_steps_sampled: 8830000
    num_steps_trained: 8830000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5885044932365417
      kl: 0.026941534131765366
      policy_loss: 0.00676297908

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-09-06
  done: false
  episode_len_mean: 81.23577235772358
  episode_reward_max: 388.6786357753131
  episode_reward_mean: 108.20750730727157
  episode_reward_min: -168.72691931661606
  episodes_this_iter: 123
  episodes_total: 112714
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3683.767
    load_time_ms: 1.59
    num_steps_sampled: 8880000
    num_steps_trained: 8880000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5793481469154358
      kl: 0.026119807735085487
      policy_loss: 0.006532586645334959
      total_loss: 41.239837646484375
      vf_explained_var: 0.9008387923240662
      vf_loss: 41.230682373046875
    sample_time_ms: 18357.105
    update_time_ms: 5.466
  iterations_since_restore: 888
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 19624 s, 893 iter, 8930000 ts, 103 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-11-18
  done: false
  episode_len_mean: 83.74166666666666
  episode_reward_max: 389.1234673199797
  episode_reward_mean: 127.39388635081168
  episode_reward_min: -166.82165143796445
  episodes_this_iter: 120
  episodes_total: 113467
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3689.828
    load_time_ms: 1.529
    num_steps_sampled: 8940000
    num_steps_trained: 8940000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5930453538894653
      kl: 0.01848115585744381
      policy_loss: 0.00318880565

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-13-06
  done: false
  episode_len_mean: 78.81746031746032
  episode_reward_max: 386.3778248014832
  episode_reward_mean: 90.59371171580156
  episode_reward_min: -168.72367157312868
  episodes_this_iter: 126
  episodes_total: 114120
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3698.53
    load_time_ms: 1.461
    num_steps_sampled: 8990000
    num_steps_trained: 8990000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.587530255317688
      kl: 0.016632571816444397
      policy_loss: 0.00526631623506546
      total_loss: 50.43157196044922
      vf_explained_var: 0.8895982503890991
      vf_loss: 50.42463684082031
    sample_time_ms: 18125.808
    update_time_ms: 5.807
  iterations_since_restore: 899
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 19864 s, 904 iter, 9040000 ts, 82.1 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-15-19
  done: false
  episode_len_mean: 83.6470588235294
  episode_reward_max: 388.54831666009244
  episode_reward_mean: 130.11473575389684
  episode_reward_min: -164.65345563920977
  episodes_this_iter: 119
  episodes_total: 114893
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3684.388
    load_time_ms: 1.431
    num_steps_sampled: 9050000
    num_steps_trained: 9050000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5973852872848511
      kl: 0.020045805722475052
      policy_loss: 0.004367303

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-17-08
  done: false
  episode_len_mean: 81.87704918032787
  episode_reward_max: 392.97742250519246
  episode_reward_mean: 109.1331723758971
  episode_reward_min: -160.48783945545196
  episodes_this_iter: 122
  episodes_total: 115502
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3715.863
    load_time_ms: 1.488
    num_steps_sampled: 9100000
    num_steps_trained: 9100000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5994777083396912
      kl: 0.021168289706110954
      policy_loss: 0.000520438130479306
      total_loss: 45.69647216796875
      vf_explained_var: 0.8917245864868164
      vf_loss: 45.69382095336914
    sample_time_ms: 18171.968
    update_time_ms: 5.478
  iterations_since_restore: 910
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 20105 s, 915 iter, 9150000 ts, 86.2 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-19-19
  done: false
  episode_len_mean: 77.765625
  episode_reward_max: 392.36455244358814
  episode_reward_mean: 85.1841852235168
  episode_reward_min: -166.73389320096015
  episodes_this_iter: 128
  episodes_total: 116272
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3682.282
    load_time_ms: 1.477
    num_steps_sampled: 9160000
    num_steps_trained: 9160000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.6024499535560608
      kl: 0.01966950297355652
      policy_loss: 0.0029789870604872704

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-21-08
  done: false
  episode_len_mean: 82.49180327868852
  episode_reward_max: 392.054126959256
  episode_reward_mean: 118.24388730604883
  episode_reward_min: -164.66215227505208
  episodes_this_iter: 122
  episodes_total: 116901
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3705.67
    load_time_ms: 1.596
    num_steps_sampled: 9210000
    num_steps_trained: 9210000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5886408686637878
      kl: 0.01741313561797142
      policy_loss: 0.0024819006212055683
      total_loss: 47.35818099975586
      vf_explained_var: 0.8813668489456177
      vf_loss: 47.353946685791016
    sample_time_ms: 18043.521
    update_time_ms: 5.426
  iterations_since_restore: 921
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 20345 s, 926 iter, 9260000 ts, 89 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-23-19
  done: false
  episode_len_mean: 76.06870229007633
  episode_reward_max: 388.49248897509557
  episode_reward_mean: 70.13682579959345
  episode_reward_min: -168.72177194186688
  episodes_this_iter: 131
  episodes_total: 117655
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3682.131
    load_time_ms: 1.585
    num_steps_sampled: 9270000
    num_steps_trained: 9270000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.6103790402412415
      kl: 0.01687874086201191
      policy_loss: 0.004044742789

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-25-10
  done: false
  episode_len_mean: 81.21487603305785
  episode_reward_max: 388.69110521258443
  episode_reward_mean: 106.02057467141154
  episode_reward_min: -166.67861106904985
  episodes_this_iter: 121
  episodes_total: 118274
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3718.118
    load_time_ms: 1.522
    num_steps_sampled: 9320000
    num_steps_trained: 9320000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.588280975818634
      kl: 0.01985028199851513
      policy_loss: 0.00554549228399992
      total_loss: 51.00493240356445
      vf_explained_var: 0.8772667050361633
      vf_loss: 50.99739074707031
    sample_time_ms: 18243.151
    update_time_ms: 5.516
  iterations_since_restore: 932
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 20587 s, 937 iter, 9370000 ts, 100 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-27-23
  done: false
  episode_len_mean: 78.1953125
  episode_reward_max: 388.7349000940876
  episode_reward_mean: 90.87577514972432
  episode_reward_min: -168.7358112456465
  episodes_this_iter: 128
  episodes_total: 119032
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3701.431
    load_time_ms: 1.495
    num_steps_sampled: 9380000
    num_steps_trained: 9380000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5719671845436096
      kl: 0.017236748710274696
      policy_loss: 0.003171738237142563


Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-29-13
  done: false
  episode_len_mean: 78.2265625
  episode_reward_max: 385.83301896978446
  episode_reward_mean: 91.23756627722818
  episode_reward_min: -166.75966990062236
  episodes_this_iter: 128
  episodes_total: 119667
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3691.118
    load_time_ms: 1.605
    num_steps_sampled: 9430000
    num_steps_trained: 9430000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5586663484573364
      kl: 0.017798949033021927
      policy_loss: 0.00312678050249815
      total_loss: 51.71112060546875
      vf_explained_var: 0.8857490420341492
      vf_loss: 51.70620346069336
    sample_time_ms: 18332.805
    update_time_ms: 5.282
  iterations_since_restore: 943
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 20828 s, 948 iter, 9480000 ts, 150 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-31-23
  done: false
  episode_len_mean: 75.81060606060606
  episode_reward_max: 385.490016921086
  episode_reward_mean: 68.36558117959548
  episode_reward_min: -164.66250250431537
  episodes_this_iter: 132
  episodes_total: 120401
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3694.788
    load_time_ms: 1.705
    num_steps_sampled: 9490000
    num_steps_trained: 9490000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5702182054519653
      kl: 0.016995644196867943
      policy_loss: 0.002566234441

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-33-11
  done: false
  episode_len_mean: 80.01587301587301
  episode_reward_max: 388.77830615320727
  episode_reward_mean: 104.17593350403365
  episode_reward_min: -166.7524261186981
  episodes_this_iter: 126
  episodes_total: 121024
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3699.935
    load_time_ms: 1.705
    num_steps_sampled: 9540000
    num_steps_trained: 9540000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5430192947387695
      kl: 0.014367153868079185
      policy_loss: 0.004173894412815571
      total_loss: 37.68292999267578
      vf_explained_var: 0.9132270812988281
      vf_loss: 37.67731475830078
    sample_time_ms: 17910.083
    update_time_ms: 5.289
  iterations_since_restore: 954
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 21067 s, 959 iter, 9590000 ts, 89.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-35-24
  done: false
  episode_len_mean: 79.72
  episode_reward_max: 385.8876638938212
  episode_reward_mean: 98.6169011706662
  episode_reward_min: -166.82097414037227
  episodes_this_iter: 125
  episodes_total: 121776
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3697.266
    load_time_ms: 1.547
    num_steps_sampled: 9600000
    num_steps_trained: 9600000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5398631691932678
      kl: 0.013949938118457794
      policy_loss: 0.0023296582512557507
   

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-37-14
  done: false
  episode_len_mean: 76.3969465648855
  episode_reward_max: 388.80196502597346
  episode_reward_mean: 75.1160104042144
  episode_reward_min: -162.57424737581255
  episodes_this_iter: 131
  episodes_total: 122397
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3684.806
    load_time_ms: 1.529
    num_steps_sampled: 9650000
    num_steps_trained: 9650000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5835044980049133
      kl: 0.02195315808057785
      policy_loss: 0.00556908966973424
      total_loss: 59.80921936035156
      vf_explained_var: 0.8820710778236389
      vf_loss: 59.80144500732422
    sample_time_ms: 18366.247
    update_time_ms: 5.478
  iterations_since_restore: 965
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 21308 s, 970 iter, 9700000 ts, 61.7 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-39-24
  done: false
  episode_len_mean: 79.44354838709677
  episode_reward_max: 388.6316028403785
  episode_reward_mean: 100.12856243272931
  episode_reward_min: -168.70907987638952
  episodes_this_iter: 124
  episodes_total: 123163
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3692.676
    load_time_ms: 1.581
    num_steps_sampled: 9710000
    num_steps_trained: 9710000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5597949028015137
      kl: 0.018322167918086052
      policy_loss: 0.002646791

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-41-13
  done: false
  episode_len_mean: 78.2734375
  episode_reward_max: 393.6716915941062
  episode_reward_mean: 89.34154047660667
  episode_reward_min: -162.97310084494592
  episodes_this_iter: 128
  episodes_total: 123797
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3696.574
    load_time_ms: 1.526
    num_steps_sampled: 9760000
    num_steps_trained: 9760000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5630508661270142
      kl: 0.017911629751324654
      policy_loss: 0.006028824020177126
      total_loss: 43.29407501220703
      vf_explained_var: 0.9028006196022034
      vf_loss: 43.286251068115234
    sample_time_ms: 17921.944
    update_time_ms: 4.834
  iterations_since_restore: 976
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
    rl_0:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 21548 s, 981 iter, 9810000 ts, 118 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-43-24
  done: false
  episode_len_mean: 80.76612903225806
  episode_reward_max: 388.25657571649805
  episode_reward_mean: 113.09439791993184
  episode_reward_min: -168.6737335123682
  episodes_this_iter: 124
  episodes_total: 124545
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3689.245
    load_time_ms: 1.448
    num_steps_sampled: 9820000
    num_steps_trained: 9820000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5172299146652222
      kl: 0.022144366055727005
      policy_loss: 0.0055385124

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-45-14
  done: false
  episode_len_mean: 84.45299145299145
  episode_reward_max: 385.73578708079395
  episode_reward_mean: 133.8345763581821
  episode_reward_min: -160.4611939071083
  episodes_this_iter: 117
  episodes_total: 125160
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3680.385
    load_time_ms: 1.604
    num_steps_sampled: 9870000
    num_steps_trained: 9870000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5269858241081238
      kl: 0.015564343892037868
      policy_loss: 0.003117077983915806
      total_loss: 38.76171112060547
      vf_explained_var: 0.8970364332199097
      vf_loss: 38.75703048706055
    sample_time_ms: 18173.204
    update_time_ms: 5.165
  iterations_since_restore: 987
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=12721], 21788 s, 992 iter, 9920000 ts, 92.9 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-47-26
  done: false
  episode_len_mean: 81.00813008130082
  episode_reward_max: 388.83289275340127
  episode_reward_mean: 111.22601706087224
  episode_reward_min: -164.71392959388731
  episodes_this_iter: 123
  episodes_total: 125903
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3678.499
    load_time_ms: 1.51
    num_steps_sampled: 9930000
    num_steps_trained: 9930000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5443569421768188
      kl: 0.019891388714313507
      policy_loss: 0.004040536

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-21_05-49-14
  done: false
  episode_len_mean: 81.91803278688525
  episode_reward_max: 381.78698660900255
  episode_reward_mean: 116.56326085406232
  episode_reward_min: -166.6732580035591
  episodes_this_iter: 122
  episodes_total: 126517
  experiment_id: d110021820ee4abd9ffac6de3e7266c0
  hostname: Gandalf
  info:
    grad_time_ms: 3687.746
    load_time_ms: 1.497
    num_steps_sampled: 9980000
    num_steps_trained: 9980000
    rl_0:
      cur_kl_coeff: 0.10041848570108414
      cur_lr: 4.999999873689376e-05
      entropy: -0.5273405313491821
      kl: 0.01697000488638878
      policy_loss: 0.0053238580003380775
      total_loss: 38.95095443725586
      vf_explained_var: 0.9049514532089233
      vf_loss: 38.94392776489258
    sample_time_ms: 18192.862
    update_time_ms: 4.908
  iterations_since_restore: 998
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 12721
  policy_reward_mean:
  