# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500                                 #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-12_14-34-58_18114/logs.
Waiting for redis server at 127.0.0.1:35001 to respond...
Waiting for redis server at 127.0.0.1:17531 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=7fbdae9c001dcdf318cbc08369cccbca47552e3da1baed9c



{'node_ip_address': '172.16.123.117',
 'object_store_addresses': ['/tmp/ray/session_2019-03-12_14-34-58_18114/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-12_14-34-58_18114/sockets/raylet'],
 'redis_address': '172.16.123.117:35001',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=7fbdae9c001dcdf318cbc08369cccbca47552e3da1baed9c'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy(), 'rl_1': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return agent_id

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn)
        }
    })

 Starting SUMO on port 42261


23.196257408802122
3.55673754868485


In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv-v0_0_2019-03-12_14-35-01ownx4tyw -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-36-35
  done: false
  episode_len_mean: 462.0952380952381
  episode_reward_max: 177.99807200266918
  episode_reward_mean: 51.85029512876206
  episode_reward_min: -141.171780641903
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4574.424
    load_time_ms: 159.314
    num_steps_sampled: 10000
    num_ste

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-38-15
  done: false
  episode_len_mean: 443.56
  episode_reward_max: 271.2659360999774
  episode_reward_mean: 97.34473175555858
  episode_reward_min: -161.12027829879614
  episodes_this_iter: 23
  episodes_total: 112
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3497.572
    load_time_ms: 33.665
    num_steps_sampled: 50000
    num_steps_trained: 50000
    rl_0:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.4250469207763672
      kl: 0.0028582620434463024
      policy_loss: -0.0028415319975465536
      total_loss: 36.61734390258789
      vf_explained_var: 0.40489599108695984
      vf_loss: 36.62015151977539
    rl_1:
      cur_kl_coeff: 0.012500000186264515
      cur_lr: 4.999999873689376e-05
      entropy: 1.41482675075531
      kl: 0.00896541029214859
      policy_loss: -0.003199500497430563
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 256 s, 9 iter, 90000 ts, 204 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-40-27
  done: false
  episode_len_mean: 294.47
  episode_reward_max: 278.89512259050395
  episode_reward_mean: 215.40306944313127
  episode_reward_min: -153.57586899538597
  episodes_this_iter: 36
  episodes_total: 272
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3428.637
    load_time_ms: 18.061
    num_steps_sampled: 100000
    num_steps_trained: 100000
    rl_0:
      cur_kl_coeff: 0.0007812500116415322
      cur_lr: 4.999999873689376e-05
      entropy: 1.417761206626892
      kl: 0.0025372228119522333
      policy_loss: -0.0008997512049973011
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 379 s, 14 iter, 140000 ts, 197 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-42-30
  done: false
  episode_len_mean: 207.46
  episode_reward_max: 275.6967459544674
  episode_reward_mean: 183.66448722667204
  episode_reward_min: -150.69026983776095
  episodes_this_iter: 48
  episodes_total: 498
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3261.901
    load_time_ms: 2.43
    num_steps_sampled: 150000
    num_steps_trained: 150000
    rl_0:
      cur_kl_coeff: 4.882812572759576e-05
      cur_lr: 4.999999873689376e-05
      entropy: 1.3759373426437378
      kl: 0.0054731774143874645
      policy_loss: -0.0013969348510727286
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 506 s, 19 iter, 190000 ts, 235 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-44-52
  done: false
  episode_len_mean: 225.37
  episode_reward_max: 276.8713119626218
  episode_reward_mean: 227.00223891505678
  episode_reward_min: -154.72176574546071
  episodes_this_iter: 45
  episodes_total: 715
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3943.722
    load_time_ms: 2.55
    num_steps_sampled: 200000
    num_steps_trained: 200000
    rl_0:
      cur_kl_coeff: 1.5258789289873675e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.395506501197815
      kl: 0.0090343551710248
      policy_loss: -0.003080629510805011
      total_l

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 682 s, 24 iter, 240000 ts, 239 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-47-36
  done: false
  episode_len_mean: 225.23
  episode_reward_max: 328.06272142414383
  episode_reward_mean: 241.12618376142424
  episode_reward_min: -148.5130223696808
  episodes_this_iter: 45
  episodes_total: 948
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4480.388
    load_time_ms: 3.11
    num_steps_sampled: 250000
    num_steps_trained: 250000
    rl_0:
      cur_kl_coeff: 9.536743306171047e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.3614243268966675
      kl: 0.001630291691981256
      policy_loss: -0.001613517990335822
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 831 s, 29 iter, 290000 ts, 242 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-50-02
  done: false
  episode_len_mean: 202.48
  episode_reward_max: 322.5477415676803
  episode_reward_mean: 236.70171432230157
  episode_reward_min: -151.62542682411396
  episodes_this_iter: 51
  episodes_total: 1205
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3973.295
    load_time_ms: 3.148
    num_steps_sampled: 300000
    num_steps_trained: 300000
    rl_0:
      cur_kl_coeff: 2.9802322831784522e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.268086314201355
      kl: 0.005785956513136625
      policy_loss: -0.0024502102751284838
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 972 s, 34 iter, 340000 ts, 248 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-52-30
  done: false
  episode_len_mean: 203.2
  episode_reward_max: 324.75212806111193
  episode_reward_mean: 249.77232760140296
  episode_reward_min: 88.8095784616653
  episodes_this_iter: 52
  episodes_total: 1453
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3782.933
    load_time_ms: 2.776
    num_steps_sampled: 350000
    num_steps_trained: 350000
    rl_0:
      cur_kl_coeff: 9.313225884932663e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3279123306274414
      kl: 0.008350067771971226
      policy_loss: -0.0032423455268144608
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1153 s, 39 iter, 390000 ts, 269 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-55-34
  done: false
  episode_len_mean: 189.46
  episode_reward_max: 329.6852325561404
  episode_reward_mean: 278.20435758088865
  episode_reward_min: 169.39291687128122
  episodes_this_iter: 52
  episodes_total: 1722
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4304.162
    load_time_ms: 3.042
    num_steps_sampled: 400000
    num_steps_trained: 400000
    rl_0:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.3190593719482422
      kl: 0.006510613951832056
      policy_loss: -0.0009651337168179452
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1339 s, 44 iter, 440000 ts, 288 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_14-58-36
  done: false
  episode_len_mean: 183.29
  episode_reward_max: 326.76897307073756
  episode_reward_mean: 287.5130025602434
  episode_reward_min: 247.686411142333
  episodes_this_iter: 54
  episodes_total: 1992
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4819.092
    load_time_ms: 3.351
    num_steps_sampled: 450000
    num_steps_trained: 450000
    rl_0:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 1.1875951290130615
      kl: 0.009444466792047024
      policy_loss: -0.0028271740302443504
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1465 s, 49 iter, 490000 ts, 298 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-00-37
  done: false
  episode_len_mean: 177.79
  episode_reward_max: 369.7586313505695
  episode_reward_mean: 295.9323559952172
  episode_reward_min: 250.96454134261717
  episodes_this_iter: 56
  episodes_total: 2272
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4028.483
    load_time_ms: 3.174
    num_steps_sampled: 500000
    num_steps_trained: 500000
    rl_0:
      cur_kl_coeff: 7.275957722603643e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.1007241010665894
      kl: 0.013715546578168869
      policy_loss: -0.004488083068281412
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1585 s, 54 iter, 540000 ts, 302 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-02-38
  done: false
  episode_len_mean: 175.15
  episode_reward_max: 346.07470761838005
  episode_reward_mean: 301.89289099719474
  episode_reward_min: 261.5908994366934
  episodes_this_iter: 56
  episodes_total: 2558
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3173.461
    load_time_ms: 2.554
    num_steps_sampled: 550000
    num_steps_trained: 550000
    rl_0:
      cur_kl_coeff: 9.094947153254554e-14
      cur_lr: 4.999999873689376e-05
      entropy: 1.084411859512329
      kl: 0.0036614048294723034
      policy_loss: -0.001939520239830017
      tot

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1707 s, 59 iter, 590000 ts, 293 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-04-40
  done: false
  episode_len_mean: 171.34
  episode_reward_max: 357.9686365196818
  episode_reward_mean: 297.7465960710316
  episode_reward_min: 259.5394013234067
  episodes_this_iter: 59
  episodes_total: 2849
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3178.715
    load_time_ms: 2.372
    num_steps_sampled: 600000
    num_steps_trained: 600000
    rl_0:
      cur_kl_coeff: 2.842170985392048e-15
      cur_lr: 4.999999873689376e-05
      entropy: 0.8940294981002808
      kl: 0.007583055645227432
      policy_loss: -0.0017124693840742111
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1839 s, 64 iter, 640000 ts, 297 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-07-03
  done: false
  episode_len_mean: 170.29
  episode_reward_max: 357.68039085343815
  episode_reward_mean: 294.61856995423915
  episode_reward_min: 255.8573460805399
  episodes_this_iter: 58
  episodes_total: 3141
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3365.523
    load_time_ms: 2.548
    num_steps_sampled: 650000
    num_steps_trained: 650000
    rl_0:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 0.8284774422645569
      kl: 0.009317923337221146
      policy_loss: -0.002731515094637871
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 1989 s, 69 iter, 690000 ts, 290 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-09-30
  done: false
  episode_len_mean: 167.63
  episode_reward_max: 362.9665700222359
  episode_reward_mean: 292.21924576544643
  episode_reward_min: 259.1365502006783
  episodes_this_iter: 59
  episodes_total: 3440
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3483.628
    load_time_ms: 2.636
    num_steps_sampled: 700000
    num_steps_trained: 700000
    rl_0:
      cur_kl_coeff: 5.551115205843844e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.761993944644928
      kl: 0.012266441248357296
      policy_loss: -0.0034351865760982037
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 2143 s, 74 iter, 740000 ts, 291 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-12-02
  done: false
  episode_len_mean: 166.22
  episode_reward_max: 337.9411959587842
  episode_reward_mean: 293.5653057693308
  episode_reward_min: 257.9395943183645
  episodes_this_iter: 59
  episodes_total: 3741
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3606.46
    load_time_ms: 2.719
    num_steps_sampled: 750000
    num_steps_trained: 750000
    rl_0:
      cur_kl_coeff: 6.938894007304805e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7095910310745239
      kl: 0.004280410706996918
      policy_loss: -0.0016166850691661239
      total

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 2333 s, 79 iter, 790000 ts, 295 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-15-19
  done: false
  episode_len_mean: 167.47
  episode_reward_max: 331.5605833393372
  episode_reward_mean: 295.86572467909775
  episode_reward_min: 258.88580352803604
  episodes_this_iter: 60
  episodes_total: 4040
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4623.853
    load_time_ms: 2.801
    num_steps_sampled: 800000
    num_steps_trained: 800000
    rl_0:
      cur_kl_coeff: 2.1684043772827515e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.671798825263977
      kl: 0.005866638384759426
      policy_loss: -0.0026449565775692463
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 2541 s, 84 iter, 840000 ts, 292 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-19-11
  done: false
  episode_len_mean: 168.36
  episode_reward_max: 338.4835989774198
  episode_reward_mean: 296.51448100072923
  episode_reward_min: 264.4152966622141
  episodes_this_iter: 60
  episodes_total: 4340
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 6337.415
    load_time_ms: 3.434
    num_steps_sampled: 850000
    num_steps_trained: 850000
    rl_0:
      cur_kl_coeff: 2.7105054716034394e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.7108908891677856
      kl: 0.0055423942394554615
      policy_loss: -0.001231831032782793
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 2770 s, 89 iter, 890000 ts, 297 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-22-49
  done: false
  episode_len_mean: 167.26
  episode_reward_max: 341.6774101742171
  episode_reward_mean: 297.0976816076761
  episode_reward_min: 262.5435243910061
  episodes_this_iter: 58
  episodes_total: 4638
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 6674.66
    load_time_ms: 4.069
    num_steps_sampled: 900000
    num_steps_trained: 900000
    rl_0:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.6747797727584839
      kl: 0.007173905149102211
      policy_loss: -0.0035354720894247293
      tota

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 2980 s, 94 iter, 940000 ts, 301 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-26-12
  done: false
  episode_len_mean: 170.79
  episode_reward_max: 344.48003420246135
  episode_reward_mean: 303.5896374260337
  episode_reward_min: 260.3957083823064
  episodes_this_iter: 58
  episodes_total: 4934
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 5153.878
    load_time_ms: 3.394
    num_steps_sampled: 950000
    num_steps_trained: 950000
    rl_0:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 0.7158182859420776
      kl: 0.0039252652786672115
      policy_loss: -0.0017201772425323725
      to

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 3159 s, 99 iter, 990000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-29-10
  done: false
  episode_len_mean: 171.92
  episode_reward_max: 355.2978858276576
  episode_reward_mean: 304.66001073951514
  episode_reward_min: 267.779006155095
  episodes_this_iter: 58
  episodes_total: 5226
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4440.522
    load_time_ms: 2.944
    num_steps_sampled: 1000000
    num_steps_trained: 1000000
    rl_0:
      cur_kl_coeff: 3.3087224995159173e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.6874036192893982
      kl: 0.007725963369011879
      policy_loss: -0.0015180177288129926
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 3399 s, 104 iter, 1040000 ts, 305 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-33-07
  done: false
  episode_len_mean: 174.0
  episode_reward_max: 349.0441708779232
  episode_reward_mean: 307.9314200974431
  episode_reward_min: 269.05567585850935
  episodes_this_iter: 57
  episodes_total: 5514
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 5336.197
    load_time_ms: 3.24
    num_steps_sampled: 1050000
    num_steps_trained: 1050000
    rl_0:
      cur_kl_coeff: 1.0339757810987241e-26
      cur_lr: 4.999999873689376e-05
      entropy: 0.8054108619689941
      kl: 0.0028933945577591658
      policy_loss: -0.001090855454094708
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 3576 s, 109 iter, 1090000 ts, 311 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-35-52
  done: false
  episode_len_mean: 174.81
  episode_reward_max: 371.1097641786338
  episode_reward_mean: 311.7494991388955
  episode_reward_min: 279.1712832550117
  episodes_this_iter: 56
  episodes_total: 5800
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 5304.598
    load_time_ms: 2.941
    num_steps_sampled: 1100000
    num_steps_trained: 1100000
    rl_0:
      cur_kl_coeff: 3.231174315933513e-28
      cur_lr: 4.999999873689376e-05
      entropy: 0.6886907815933228
      kl: 0.007605917751789093
      policy_loss: -0.0013670892221853137
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 3775 s, 114 iter, 1140000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-39-11
  done: false
  episode_len_mean: 177.96
  episode_reward_max: 362.5128529448928
  episode_reward_mean: 314.81115329688413
  episode_reward_min: 268.1963629876752
  episodes_this_iter: 58
  episodes_total: 6086
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 5126.813
    load_time_ms: 2.917
    num_steps_sampled: 1150000
    num_steps_trained: 1150000
    rl_0:
      cur_kl_coeff: 1.0097419737292228e-29
      cur_lr: 4.999999873689376e-05
      entropy: 0.761408269405365
      kl: 0.005423174239695072
      policy_loss: -0.0007734191603958607
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 4062 s, 119 iter, 1190000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-45-16
  done: false
  episode_len_mean: 178.83
  episode_reward_max: 389.7159784452137
  episode_reward_mean: 324.186787717234
  episode_reward_min: 275.25314171876636
  episodes_this_iter: 56
  episodes_total: 6365
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 8665.799
    load_time_ms: 4.241
    num_steps_sampled: 1200000
    num_steps_trained: 1200000
    rl_0:
      cur_kl_coeff: 3.1554436679038213e-31
      cur_lr: 4.999999873689376e-05
      entropy: 0.714218258857727
      kl: 0.004073218442499638
      policy_loss: -0.0005258841556496918
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 4452 s, 124 iter, 1240000 ts, 322 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-51-10
  done: false
  episode_len_mean: 181.45
  episode_reward_max: 381.72018196588306
  episode_reward_mean: 317.3255817276073
  episode_reward_min: 127.29341238053661
  episodes_this_iter: 56
  episodes_total: 6644
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 10712.829
    load_time_ms: 7.469
    num_steps_sampled: 1250000
    num_steps_trained: 1250000
    rl_0:
      cur_kl_coeff: 9.860761462199441e-33
      cur_lr: 4.999999873689376e-05
      entropy: 0.80447918176651
      kl: 0.004182243254035711
      policy_loss: -0.0009761251858435571
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 4754 s, 129 iter, 1290000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_15-56-00
  done: false
  episode_len_mean: 184.16
  episode_reward_max: 373.2155568921851
  episode_reward_mean: 324.24000283120046
  episode_reward_min: 138.81407808727715
  episodes_this_iter: 53
  episodes_total: 6918
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 9177.851
    load_time_ms: 7.162
    num_steps_sampled: 1300000
    num_steps_trained: 1300000
    rl_0:
      cur_kl_coeff: 3.0814879569373254e-34
      cur_lr: 4.999999873689376e-05
      entropy: 0.8095552921295166
      kl: 0.005292286165058613
      policy_loss: -0.0016104242531582713
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 5095 s, 134 iter, 1340000 ts, 317 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-01-27
  done: false
  episode_len_mean: 177.1
  episode_reward_max: 375.53331624019603
  episode_reward_mean: 320.195752597088
  episode_reward_min: 137.56260184409746
  episodes_this_iter: 56
  episodes_total: 7200
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 8616.096
    load_time_ms: 7.723
    num_steps_sampled: 1350000
    num_steps_trained: 1350000
    rl_0:
      cur_kl_coeff: 9.629649865429142e-36
      cur_lr: 4.999999873689376e-05
      entropy: 0.5953810214996338
      kl: 0.006822591181844473
      policy_loss: -0.0005253117997199297
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 5301 s, 139 iter, 1390000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-04-49
  done: false
  episode_len_mean: 179.01
  episode_reward_max: 379.2135773569388
  episode_reward_mean: 330.55013219940815
  episode_reward_min: 275.80316261423036
  episodes_this_iter: 55
  episodes_total: 7481
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 7560.973
    load_time_ms: 7.108
    num_steps_sampled: 1400000
    num_steps_trained: 1400000
    rl_0:
      cur_kl_coeff: 3.009265582946607e-37
      cur_lr: 4.999999873689376e-05
      entropy: 0.629163384437561
      kl: 0.005509770009666681
      policy_loss: -0.0017279001185670495
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 5534 s, 144 iter, 1440000 ts, 318 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-08-52
  done: false
  episode_len_mean: 175.73
  episode_reward_max: 391.7967330726152
  episode_reward_mean: 314.91081349566406
  episode_reward_min: 103.87658181450439
  episodes_this_iter: 58
  episodes_total: 7765
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 6594.644
    load_time_ms: 3.414
    num_steps_sampled: 1450000
    num_steps_trained: 1450000
    rl_0:
      cur_kl_coeff: 9.403954246058914e-39
      cur_lr: 4.999999873689376e-05
      entropy: 0.45225638151168823
      kl: 0.00646133441478014
      policy_loss: -0.002023466397076845
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 5705 s, 149 iter, 1490000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-11-23
  done: false
  episode_len_mean: 174.47
  episode_reward_max: 385.18977877407457
  episode_reward_mean: 321.85482349996465
  episode_reward_min: 124.72839877942272
  episodes_this_iter: 58
  episodes_total: 8051
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 5089.013
    load_time_ms: 3.027
    num_steps_sampled: 1500000
    num_steps_trained: 1500000
    rl_0:
      cur_kl_coeff: 2.93873307445879e-40
      cur_lr: 4.999999873689376e-05
      entropy: 0.5126386284828186
      kl: 0.007722851354628801
      policy_loss: -0.001232656417414546
     

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 5863 s, 154 iter, 1540000 ts, 321 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-14-09
  done: false
  episode_len_mean: 172.03
  episode_reward_max: 365.905491360634
  episode_reward_mean: 323.7616399773284
  episode_reward_min: 273.7585674853693
  episodes_this_iter: 58
  episodes_total: 8340
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3859.948
    load_time_ms: 3.074
    num_steps_sampled: 1550000
    num_steps_trained: 1550000
    rl_0:
      cur_kl_coeff: 9.184110135184851e-42
      cur_lr: 4.999999873689376e-05
      entropy: 0.3558163046836853
      kl: 0.008644387125968933
      policy_loss: 0.00021070738148409873
      t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6021 s, 159 iter, 1590000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-16-40
  done: false
  episode_len_mean: 175.41
  episode_reward_max: 375.05888101790123
  episode_reward_mean: 317.6469491906683
  episode_reward_min: 53.72954959431914
  episodes_this_iter: 58
  episodes_total: 8624
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4035.592
    load_time_ms: 3.188
    num_steps_sampled: 1600000
    num_steps_trained: 1600000
    rl_0:
      cur_kl_coeff: 2.872661851865875e-43
      cur_lr: 4.999999873689376e-05
      entropy: 0.34929677844047546
      kl: 0.011387081816792488
      policy_loss: -0.0013447960373014212
    

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6164 s, 164 iter, 1640000 ts, 320 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-19-13
  done: false
  episode_len_mean: 177.24
  episode_reward_max: 385.3088563573551
  episode_reward_mean: 317.4757177512671
  episode_reward_min: -149.11089698826197
  episodes_this_iter: 55
  episodes_total: 8906
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4078.338
    load_time_ms: 2.901
    num_steps_sampled: 1650000
    num_steps_trained: 1650000
    rl_0:
      cur_kl_coeff: 3.6433760072445244e-44
      cur_lr: 4.999999873689376e-05
      entropy: 0.43775251507759094
      kl: 0.007360916119068861
      policy_loss: 0.0001086649062926881
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6337 s, 169 iter, 1690000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-22-12
  done: false
  episode_len_mean: 178.25
  episode_reward_max: 390.966795659073
  episode_reward_mean: 326.8470913615754
  episode_reward_min: 117.90130085841457
  episodes_this_iter: 57
  episodes_total: 9188
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4477.457
    load_time_ms: 2.748
    num_steps_sampled: 1700000
    num_steps_trained: 1700000
    rl_0:
      cur_kl_coeff: 4.203895392974451e-45
      cur_lr: 4.999999873689376e-05
      entropy: 0.3097420334815979
      kl: 0.012600046582520008
      policy_loss: -7.830574759282172e-05
      

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6545 s, 174 iter, 1740000 ts, 319 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-25-37
  done: false
  episode_len_mean: 181.14
  episode_reward_max: 378.1720375355267
  episode_reward_mean: 319.7320236527369
  episode_reward_min: -139.80873576002253
  episodes_this_iter: 55
  episodes_total: 9465
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 5079.831
    load_time_ms: 3.55
    num_steps_sampled: 1750000
    num_steps_trained: 1750000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.4100096523761749
      kl: 0.006329636555165052
      policy_loss: -0.00032605582964606583
      total_loss: 54.7

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6716 s, 179 iter, 1790000 ts, 335 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-28-16
  done: false
  episode_len_mean: 182.97
  episode_reward_max: 379.8861310713848
  episode_reward_mean: 331.37088065275583
  episode_reward_min: 140.8237216512103
  episodes_this_iter: 55
  episodes_total: 9741
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4628.57
    load_time_ms: 4.303
    num_steps_sampled: 1800000
    num_steps_trained: 1800000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.39249706268310547
      kl: 0.005821987520903349
      policy_loss: -0.001021399861201644
      total_loss: 50.923

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6833 s, 184 iter, 1840000 ts, 327 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-30-13
  done: false
  episode_len_mean: 181.01
  episode_reward_max: 385.0612248048294
  episode_reward_mean: 332.56016594657433
  episode_reward_min: 97.89289471349866
  episodes_this_iter: 55
  episodes_total: 10016
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3417.022
    load_time_ms: 3.187
    num_steps_sampled: 1850000
    num_steps_trained: 1850000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.19718101620674133
      kl: 0.013234353624284267
      policy_loss: 0.00021807171287946403
      total_loss: 29.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 6950 s, 189 iter, 1890000 ts, 331 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-32-13
  done: false
  episode_len_mean: 182.54
  episode_reward_max: 388.9056898604614
  episode_reward_mean: 314.65823364860637
  episode_reward_min: 97.87825530768293
  episodes_this_iter: 55
  episodes_total: 10293
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3124.104
    load_time_ms: 2.411
    num_steps_sampled: 1900000
    num_steps_trained: 1900000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3611321449279785
      kl: 0.017090488225221634
      policy_loss: -0.004724711179733276
      total_loss: 91.01

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 7107 s, 194 iter, 1940000 ts, 329 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-34-49
  done: false
  episode_len_mean: 177.96
  episode_reward_max: 384.90398407410623
  episode_reward_mean: 315.1619819787194
  episode_reward_min: -145.47755859555454
  episodes_this_iter: 57
  episodes_total: 10573
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3698.923
    load_time_ms: 2.588
    num_steps_sampled: 1950000
    num_steps_trained: 1950000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.3514493703842163
      kl: 0.010537711903452873
      policy_loss: -0.0008661302854306996
      total_loss: 23

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 7286 s, 199 iter, 1990000 ts, 312 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-38-02
  done: false
  episode_len_mean: 176.66
  episode_reward_max: 377.4375440254077
  episode_reward_mean: 313.1563581483384
  episode_reward_min: -141.15145584919276
  episodes_this_iter: 56
  episodes_total: 10855
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4768.587
    load_time_ms: 2.863
    num_steps_sampled: 2000000
    num_steps_trained: 2000000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.2123727798461914
      kl: 0.012316057458519936
      policy_loss: -0.0013589652953669429
      total_loss: 38.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 7426 s, 204 iter, 2040000 ts, 316 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-40-10
  done: false
  episode_len_mean: 172.35
  episode_reward_max: 383.1788538710256
  episode_reward_mean: 312.82740402643304
  episode_reward_min: -144.92908031942244
  episodes_this_iter: 58
  episodes_total: 11142
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 4279.38
    load_time_ms: 2.891
    num_steps_sampled: 2050000
    num_steps_trained: 2050000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.15495453774929047
      kl: 0.014566653408110142
      policy_loss: -0.0005287954118102789
      total_loss: 13

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 7615 s, 209 iter, 2090000 ts, 325 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-43-25
  done: false
  episode_len_mean: 175.51
  episode_reward_max: 377.53498752304364
  episode_reward_mean: 329.2551001834916
  episode_reward_min: 112.53006708334738
  episodes_this_iter: 57
  episodes_total: 11429
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3944.816
    load_time_ms: 3.116
    num_steps_sampled: 2100000
    num_steps_trained: 2100000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.18886147439479828
      kl: 0.007809900213032961
      policy_loss: -0.00039338826900348067
      total_loss: 2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 7744 s, 214 iter, 2140000 ts, 313 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-45-27
  done: false
  episode_len_mean: 173.66
  episode_reward_max: 388.1348420447826
  episode_reward_mean: 314.8393066905221
  episode_reward_min: -150.01076892416776
  episodes_this_iter: 58
  episodes_total: 11718
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3922.045
    load_time_ms: 2.993
    num_steps_sampled: 2150000
    num_steps_trained: 2150000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.18521782755851746
      kl: 0.011339694261550903
      policy_loss: -3.682974784169346e-05
      total_loss: 82

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 7.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 7891 s, 219 iter, 2190000 ts, 332 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-47-59
  done: false
  episode_len_mean: 176.24
  episode_reward_max: 375.9326115120743
  episode_reward_mean: 328.6385660795962
  episode_reward_min: -150.42253352297146
  episodes_this_iter: 56
  episodes_total: 12002
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3634.437
    load_time_ms: 2.428
    num_steps_sampled: 2200000
    num_steps_trained: 2200000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.1834312379360199
      kl: 0.030773339793086052
      policy_loss: -0.0023920638486742973
      total_loss: 67.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 8.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv-v0_0:	RUNNING [pid=18174], 8020 s, 224 iter, 2240000 ts, 315 rew

Result for PPO_MultiAgentIntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-12_16-50-02
  done: false
  episode_len_mean: 173.79
  episode_reward_max: 381.5565466393143
  episode_reward_mean: 310.1378297934546
  episode_reward_min: -143.49300245252738
  episodes_this_iter: 58
  episodes_total: 12287
  experiment_id: abc197f3c9fd4810a90b9d1fdf241416
  hostname: Gandalf
  info:
    grad_time_ms: 3667.022
    load_time_ms: 2.527
    num_steps_sampled: 2250000
    num_steps_trained: 2250000
    rl_0:
      cur_kl_coeff: 0.0
      cur_lr: 4.999999873689376e-05
      entropy: 0.12315817177295685
      kl: 0.020366599783301353
      policy_loss: -0.0004213926149532199
      total_loss: 18