# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "IntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenario', 'IntersectionScenarioTW']

Available environments:
['Env', 'AccelEnv', 'LaneChangeAccelEnv', 'LaneChangeAccelPOEnv', 'LaneChangeAccelEnv_speed', 'GreenWaveTestEnv', 'GreenWaveTestEnv', 'WaveAttenuationMergePOEnv', 'TwoLoopsMergePOEnv', 'BottleneckEnv', 'BottleNeckAccelEnv', 'WaveAttenuationEnv', 'WaveAttenuationPOEnv', 'TrafficLightGridEnv', 'PO_TrafficLightGridEnv', 'DesiredVelocityEnv', 'TestEnv', 'BayBridgeEnv', 'IntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=30.0,                       #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-02-19_23-46-01_11033/logs.
Waiting for redis server at 127.0.0.1:15920 to respond...
Waiting for redis server at 127.0.0.1:47122 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=7e65830e1a33e479a0c15cfe28be83250a74accd79dd1994



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-02-19_23-46-01_11033/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-02-19_23-46-01_11033/sockets/raylet'],
 'redis_address': '192.168.2.102:15920',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=7e65830e1a33e479a0c15cfe28be83250a74accd79dd1994'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [64, 32]})  # size of hidden layers in network
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.7/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_IntersectionEnv-v0_0_2019-02-19_23-46-024o7i5y8k -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 3.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-47-32
  done: false
  episode_len_mean: 450.14285714285717
  episode_reward_max: 148.8779501735844
  episode_reward_mean: 44.48329615873053
  episode_reward_min: -61.94444688976626
  episodes_this_iter: 21
  episodes_total: 21
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 2.833007335662842


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 130 s, 6 iter, 60000 ts, 89.9 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-49-25
  done: false
  episode_len_mean: 311.34
  episode_reward_max: 231.8733811690149
  episode_reward_mean: 97.41129425941595
  episode_reward_min: -60.91913766565617
  episodes_this_iter: 35
  episodes_total: 191
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 0.0031250000465661287
      cur_lr: 4.999999873689376e-05
      entropy: 2.8212931156158447
      kl: 0.009107247926294804
      policy_loss: -0.004174435045570135
      total_loss: 337.58843994140625
      vf_explained_var: 0.2222345769405365
      vf_loss: 337.59259033203125
    grad_time_ms: 1761.715


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 235 s, 12 iter, 120000 ts, 127 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-51-12
  done: false
  episode_len_mean: 190.04
  episode_reward_max: 208.45330724732423
  episode_reward_mean: 106.70133355772091
  episode_reward_min: -58.76608713157784
  episodes_this_iter: 50
  episodes_total: 499
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.882812572759576e-05
      cur_lr: 4.999999873689376e-05
      entropy: 2.8064470291137695
      kl: 0.005517065990716219
      policy_loss: -0.003812932875007391
      total_loss: 553.7919921875
      vf_explained_var: 0.32349953055381775
      vf_loss: 553.7957763671875
    grad_time_ms: 1613.379
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 341 s, 18 iter, 180000 ts, 125 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-52-57
  done: false
  episode_len_mean: 179.29
  episode_reward_max: 170.6275329234709
  episode_reward_mean: 133.74612213180018
  episode_reward_min: -61.834554337361666
  episodes_this_iter: 57
  episodes_total: 815
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.051757857974735e-06
      cur_lr: 4.999999873689376e-05
      entropy: 2.6840670108795166
      kl: 0.008621570654213428
      policy_loss: -0.003413783386349678
      total_loss: 393.6387634277344
      vf_explained_var: 0.48050636053085327
      vf_loss: 393.6422119140625
    grad_time_ms: 1616.13

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 447 s, 24 iter, 240000 ts, 124 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-54-44
  done: false
  episode_len_mean: 146.87
  episode_reward_max: 185.19235873923145
  episode_reward_mean: 131.20924362184655
  episode_reward_min: -63.58489935308091
  episodes_this_iter: 66
  episodes_total: 1209
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 9.536743306171047e-08
      cur_lr: 4.999999873689376e-05
      entropy: 2.6573469638824463
      kl: 0.011125963181257248
      policy_loss: -0.003384200157597661
      total_loss: 609.048095703125
      vf_explained_var: 0.39044174551963806
      vf_loss: 609.0514526367188
    grad_time_ms: 1618.5


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 553 s, 30 iter, 300000 ts, 136 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-56-30
  done: false
  episode_len_mean: 153.18
  episode_reward_max: 173.9534662070472
  episode_reward_mean: 133.1175105554813
  episode_reward_min: -62.03793023984031
  episodes_this_iter: 63
  episodes_total: 1614
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.9604645663569045e-09
      cur_lr: 4.999999873689376e-05
      entropy: 2.666256904602051
      kl: 0.003599870717152953
      policy_loss: -0.0015823348658159375
      total_loss: 518.300537109375
      vf_explained_var: 0.6211780905723572
      vf_loss: 518.3021850585938
    grad_time_ms: 1621.72
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 660 s, 36 iter, 360000 ts, 141 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-19_23-58-17
  done: false
  episode_len_mean: 140.34
  episode_reward_max: 171.38110992222113
  episode_reward_mean: 138.09913600008073
  episode_reward_min: -51.915089293429155
  episodes_this_iter: 70
  episodes_total: 2020
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.8626451769865326e-10
      cur_lr: 4.999999873689376e-05
      entropy: 2.5978519916534424
      kl: 0.00807875581085682
      policy_loss: -0.002877181861549616
      total_loss: 427.0375671386719
      vf_explained_var: 0.7083578705787659
      vf_loss: 427.0404052734375
    grad_time_ms: 1619.1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 766 s, 42 iter, 420000 ts, 135 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-00-03
  done: false
  episode_len_mean: 136.06
  episode_reward_max: 168.88472758735165
  episode_reward_mean: 141.91601708723522
  episode_reward_min: -46.00545134756825
  episodes_this_iter: 72
  episodes_total: 2422
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.3283064712331658e-11
      cur_lr: 4.999999873689376e-05
      entropy: 2.4612905979156494
      kl: 0.027736421674489975
      policy_loss: -0.00821602251380682
      total_loss: 348.2488098144531
      vf_explained_var: 0.8118891716003418
      vf_loss: 348.25701904296875
    grad_time_ms: 1618.3

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 873 s, 48 iter, 480000 ts, 143 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-01-51
  done: false
  episode_len_mean: 140.98
  episode_reward_max: 184.96052307510138
  episode_reward_mean: 146.58518087077672
  episode_reward_min: -52.78293315579394
  episodes_this_iter: 71
  episodes_total: 2850
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.8207661780829145e-12
      cur_lr: 4.999999873689376e-05
      entropy: 2.5702857971191406
      kl: 0.0028734924271702766
      policy_loss: -0.0021859193220734596
      total_loss: 424.85736083984375
      vf_explained_var: 0.802794337272644
      vf_loss: 424.8595275878906
    grad_time_ms: 1621

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 992 s, 54 iter, 540000 ts, 147 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-03-53
  done: false
  episode_len_mean: 141.61
  episode_reward_max: 185.06364026097674
  episode_reward_mean: 148.73428949037137
  episode_reward_min: -58.22360251622624
  episodes_this_iter: 72
  episodes_total: 3292
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.275957722603643e-13
      cur_lr: 4.999999873689376e-05
      entropy: 2.5250282287597656
      kl: 0.00872498843818903
      policy_loss: -0.0022731218487024307
      total_loss: 444.1200866699219
      vf_explained_var: 0.767185628414154
      vf_loss: 444.1224060058594
    grad_time_ms: 1795.013

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1146 s, 60 iter, 600000 ts, 140 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-06-32
  done: false
  episode_len_mean: 139.29
  episode_reward_max: 185.92435926051417
  episode_reward_mean: 148.57541098204447
  episode_reward_min: -44.54472805692417
  episodes_this_iter: 73
  episodes_total: 3764
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.547473576627277e-14
      cur_lr: 4.999999873689376e-05
      entropy: 2.5436196327209473
      kl: 0.02680007368326187
      policy_loss: -0.005933642387390137
      total_loss: 402.71746826171875
      vf_explained_var: 0.8069671988487244
      vf_loss: 402.7234191894531
    grad_time_ms: 2348.9

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1313 s, 66 iter, 660000 ts, 147 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-09-18
  done: false
  episode_len_mean: 127.94
  episode_reward_max: 187.32803986442855
  episode_reward_mean: 157.0331248630315
  episode_reward_min: -38.320853313006964
  episodes_this_iter: 78
  episodes_total: 4231
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.2737367883136385e-14
      cur_lr: 4.999999873689376e-05
      entropy: 2.379056692123413
      kl: 0.0129982465878129
      policy_loss: -0.004504282493144274
      total_loss: 327.25970458984375
      vf_explained_var: 0.8508366346359253
      vf_loss: 327.2642517089844
    grad_time_ms: 2859.09

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1489 s, 72 iter, 720000 ts, 151 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-12-09
  done: false
  episode_len_mean: 122.53
  episode_reward_max: 186.38695931714958
  episode_reward_mean: 152.57973099152707
  episode_reward_min: -40.02450992653868
  episodes_this_iter: 81
  episodes_total: 4718
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.842170985392048e-15
      cur_lr: 4.999999873689376e-05
      entropy: 2.229957103729248
      kl: 0.029053082689642906
      policy_loss: -0.004599397536367178
      total_loss: 275.7268981933594
      vf_explained_var: 0.8633512854576111
      vf_loss: 275.7314758300781
    grad_time_ms: 2758.73

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1596 s, 78 iter, 780000 ts, 147 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-13-55
  done: false
  episode_len_mean: 124.32
  episode_reward_max: 189.5757273124291
  episode_reward_mean: 146.08844085768328
  episode_reward_min: -40.87147138880031
  episodes_this_iter: 80
  episodes_total: 5204
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.10542746348012e-16
      cur_lr: 4.999999873689376e-05
      entropy: 2.4207592010498047
      kl: 0.01647954247891903
      policy_loss: -0.006901078391820192
      total_loss: 277.7972106933594
      vf_explained_var: 0.8584588170051575
      vf_loss: 277.8040466308594
    grad_time_ms: 1725.147


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1702 s, 84 iter, 840000 ts, 149 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-15-41
  done: false
  episode_len_mean: 132.03
  episode_reward_max: 192.42622241554884
  episode_reward_mean: 152.11824914413745
  episode_reward_min: -42.53102931572068
  episodes_this_iter: 75
  episodes_total: 5675
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.88178432935015e-17
      cur_lr: 4.999999873689376e-05
      entropy: 2.647909164428711
      kl: 0.035060085356235504
      policy_loss: -0.008283304050564766
      total_loss: 145.16409301757812
      vf_explained_var: 0.9359892010688782
      vf_loss: 145.17237854003906
    grad_time_ms: 1574.1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1807 s, 90 iter, 900000 ts, 149 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-17-26
  done: false
  episode_len_mean: 128.85
  episode_reward_max: 183.39141287765298
  episode_reward_mean: 148.9214701275963
  episode_reward_min: -43.869388278168
  episodes_this_iter: 78
  episodes_total: 6132
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.440892164675075e-17
      cur_lr: 4.999999873689376e-05
      entropy: 2.584242820739746
      kl: 0.018537741154432297
      policy_loss: -0.0027568156365305185
      total_loss: 173.9114990234375
      vf_explained_var: 0.9164601564407349
      vf_loss: 173.9142303466797
    grad_time_ms: 1551.592


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 1912 s, 96 iter, 960000 ts, 148 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-19-11
  done: false
  episode_len_mean: 129.43
  episode_reward_max: 194.10354167518045
  episode_reward_mean: 154.0586615803788
  episode_reward_min: -50.55127905353463
  episodes_this_iter: 77
  episodes_total: 6596
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.2204460823375376e-17
      cur_lr: 4.999999873689376e-05
      entropy: 2.6263999938964844
      kl: 0.00889586377888918
      policy_loss: -0.0028682409320026636
      total_loss: 108.34199523925781
      vf_explained_var: 0.9507274031639099
      vf_loss: 108.34487915039062
    grad_time_ms: 1553

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2017 s, 102 iter, 1020000 ts, 146 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-20-56
  done: false
  episode_len_mean: 129.46
  episode_reward_max: 196.22198990400486
  episode_reward_mean: 150.33106331813403
  episode_reward_min: -43.55571829416965
  episodes_this_iter: 76
  episodes_total: 7061
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.775557602921922e-18
      cur_lr: 4.999999873689376e-05
      entropy: 2.685199737548828
      kl: 0.013039901852607727
      policy_loss: -0.004296127241104841
      total_loss: 150.71038818359375
      vf_explained_var: 0.9330543875694275
      vf_loss: 150.7146759033203
    grad_time_ms: 1552

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2122 s, 108 iter, 1080000 ts, 149 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-22-41
  done: false
  episode_len_mean: 129.18
  episode_reward_max: 192.49710702763343
  episode_reward_mean: 153.33829316228474
  episode_reward_min: -47.601686163703995
  episodes_this_iter: 77
  episodes_total: 7525
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.938894007304805e-19
      cur_lr: 4.999999873689376e-05
      entropy: 2.654223918914795
      kl: 0.006934914272278547
      policy_loss: -0.00410702358931303
      total_loss: 148.9091339111328
      vf_explained_var: 0.9317088723182678
      vf_loss: 148.9132537841797
    grad_time_ms: 1551.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2225 s, 114 iter, 1140000 ts, 146 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-24-25
  done: false
  episode_len_mean: 125.95
  episode_reward_max: 193.77141576950038
  episode_reward_mean: 138.1187138007557
  episode_reward_min: -53.61018095158116
  episodes_this_iter: 80
  episodes_total: 7989
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.336808754565503e-20
      cur_lr: 4.999999873689376e-05
      entropy: 2.6229074001312256
      kl: 0.009359125047922134
      policy_loss: -0.003200493985787034
      total_loss: 370.6396789550781
      vf_explained_var: 0.843515157699585
      vf_loss: 370.642822265625
    grad_time_ms: 1550.36

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2329 s, 120 iter, 1200000 ts, 147 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-26-09
  done: false
  episode_len_mean: 131.33
  episode_reward_max: 193.16121611227788
  episode_reward_mean: 149.55258525930466
  episode_reward_min: -44.5731802202097
  episodes_this_iter: 75
  episodes_total: 8452
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.7105054716034394e-21
      cur_lr: 4.999999873689376e-05
      entropy: 2.7774763107299805
      kl: 0.009257528930902481
      policy_loss: -0.0037197016645222902
      total_loss: 138.0635528564453
      vf_explained_var: 0.9438694715499878
      vf_loss: 138.0672607421875
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2433 s, 126 iter, 1260000 ts, 150 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-27-54
  done: false
  episode_len_mean: 127.23
  episode_reward_max: 192.10037973198567
  episode_reward_mean: 154.9653513224285
  episode_reward_min: -46.13817420719871
  episodes_this_iter: 79
  episodes_total: 8918
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.3552527358017197e-21
      cur_lr: 4.999999873689376e-05
      entropy: 2.4282922744750977
      kl: 0.00859239511191845
      policy_loss: 0.0018794414354488254
      total_loss: 15.990320205688477
      vf_explained_var: 0.9929774403572083
      vf_loss: 15.98844051361084
    grad_time_ms: 1572

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2537 s, 132 iter, 1320000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-29-37
  done: false
  episode_len_mean: 128.97
  episode_reward_max: 209.98084829201798
  episode_reward_mean: 156.25309148094505
  episode_reward_min: -49.18715045505232
  episodes_this_iter: 77
  episodes_total: 9380
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.6940659197521496e-22
      cur_lr: 4.999999873689376e-05
      entropy: 2.6141653060913086
      kl: 0.008442633785307407
      policy_loss: -0.00139722041785717
      total_loss: 156.4242401123047
      vf_explained_var: 0.9348983764648438
      vf_loss: 156.42564392089844
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2641 s, 138 iter, 1380000 ts, 154 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-31-21
  done: false
  episode_len_mean: 131.87
  episode_reward_max: 201.19471484717764
  episode_reward_mean: 154.14258276970088
  episode_reward_min: -43.64129319314911
  episodes_this_iter: 75
  episodes_total: 9845
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.117582399690187e-23
      cur_lr: 4.999999873689376e-05
      entropy: 2.732476234436035
      kl: 0.013048668392002583
      policy_loss: 4.575554339680821e-05
      total_loss: 118.25558471679688
      vf_explained_var: 0.9521071314811707
      vf_loss: 118.25552368164062
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2745 s, 144 iter, 1440000 ts, 149 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-33-06
  done: false
  episode_len_mean: 132.81
  episode_reward_max: 219.66014699526215
  episode_reward_mean: 154.1026771711203
  episode_reward_min: -49.11602378688236
  episodes_this_iter: 75
  episodes_total: 10310
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 2.7970550060272217
      kl: 0.02235987037420273
      policy_loss: 0.0019009875832125545
      total_loss: 121.07999420166016
      vf_explained_var: 0.9503346681594849
      vf_loss: 121.07807159423828
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2850 s, 150 iter, 1500000 ts, 152 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-34-51
  done: false
  episode_len_mean: 131.16
  episode_reward_max: 198.05439339917484
  episode_reward_mean: 158.63470172835872
  episode_reward_min: -45.2370063756952
  episodes_this_iter: 76
  episodes_total: 10775
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.0587911998450935e-23
      cur_lr: 4.999999873689376e-05
      entropy: 2.688638210296631
      kl: 0.012226060964167118
      policy_loss: -0.0019347436027601361
      total_loss: 16.46474838256836
      vf_explained_var: 0.9927075505256653
      vf_loss: 16.466686248779297
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 2954 s, 156 iter, 1560000 ts, 150 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-36-35
  done: false
  episode_len_mean: 130.02
  episode_reward_max: 199.5281516658787
  episode_reward_mean: 157.93851416938787
  episode_reward_min: -42.82474012451595
  episodes_this_iter: 77
  episodes_total: 11242
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.293955999225468e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.615016222000122
      kl: 0.011620201170444489
      policy_loss: 0.0009800211992114782
      total_loss: 15.748137474060059
      vf_explained_var: 0.9930339455604553
      vf_loss: 15.74715805053711
    grad_time_ms: 1545

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3057 s, 162 iter, 1620000 ts, 157 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-38-19
  done: false
  episode_len_mean: 128.45
  episode_reward_max: 191.70458895661466
  episode_reward_mean: 151.8967003626001
  episode_reward_min: -51.35545823479642
  episodes_this_iter: 78
  episodes_total: 11711
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.5503792762756348
      kl: 0.02158990502357483
      policy_loss: 6.194412708282471e-05
      total_loss: 157.9713592529297
      vf_explained_var: 0.9394566416740417
      vf_loss: 157.97129821777344
    grad_time_ms: 1549

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3162 s, 168 iter, 1680000 ts, 158 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-40-04
  done: false
  episode_len_mean: 127.68
  episode_reward_max: 216.2126811479429
  episode_reward_mean: 158.06955218067498
  episode_reward_min: -42.671408607227946
  episodes_this_iter: 77
  episodes_total: 12181
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.323488999806367e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.389310121536255
      kl: 0.013738791458308697
      policy_loss: -0.0016255840891972184
      total_loss: 103.84416961669922
      vf_explained_var: 0.9597044587135315
      vf_loss: 103.8458023071289
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3266 s, 174 iter, 1740000 ts, 159 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-41-48
  done: false
  episode_len_mean: 127.02
  episode_reward_max: 193.8832533347725
  episode_reward_mean: 155.41761944328616
  episode_reward_min: -40.47981074216341
  episodes_this_iter: 79
  episodes_total: 12654
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.985232957367678e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4952378273010254
      kl: 0.03326348960399628
      policy_loss: -0.0033863012213259935
      total_loss: 90.47734832763672
      vf_explained_var: 0.963230550289154
      vf_loss: 90.4807357788086
    grad_time_ms: 1554.2

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3371 s, 180 iter, 1800000 ts, 145 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-43-33
  done: false
  episode_len_mean: 124.86
  episode_reward_max: 195.08729890077421
  episode_reward_mean: 154.77156921170155
  episode_reward_min: -41.52105804104911
  episodes_this_iter: 80
  episodes_total: 13133
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.977850717950488e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.374425172805786
      kl: 0.01869310811161995
      policy_loss: 0.0012104971101507545
      total_loss: 98.38607025146484
      vf_explained_var: 0.9583505392074585
      vf_loss: 98.38484954833984
    grad_time_ms: 1565.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3476 s, 186 iter, 1860000 ts, 153 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-45-18
  done: false
  episode_len_mean: 128.1
  episode_reward_max: 213.39534085693933
  episode_reward_mean: 159.13302731512323
  episode_reward_min: -48.7091500649948
  episodes_this_iter: 78
  episodes_total: 13607
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.977850717950488e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4440879821777344
      kl: 0.024414515122771263
      policy_loss: 0.0012705893022939563
      total_loss: 59.63877868652344
      vf_explained_var: 0.9760571718215942
      vf_loss: 59.63751220703125
    grad_time_ms: 1539.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3579 s, 192 iter, 1920000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-47-01
  done: false
  episode_len_mean: 127.61
  episode_reward_max: 203.52673775857633
  episode_reward_mean: 157.559442291683
  episode_reward_min: -47.807768310945804
  episodes_this_iter: 79
  episodes_total: 14076
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.977850717950488e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.512833833694458
      kl: 0.014628107659518719
      policy_loss: -0.005969873629510403
      total_loss: 75.0965805053711
      vf_explained_var: 0.9705401659011841
      vf_loss: 75.10254669189453
    grad_time_ms: 1547.1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3682 s, 198 iter, 1980000 ts, 149 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-48-45
  done: false
  episode_len_mean: 127.93
  episode_reward_max: 207.0199805963601
  episode_reward_mean: 160.00188821828564
  episode_reward_min: -42.38923096479757
  episodes_this_iter: 78
  episodes_total: 14552
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.46677538667244e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.596921682357788
      kl: 0.017116248607635498
      policy_loss: 0.004227609373629093
      total_loss: 14.542490005493164
      vf_explained_var: 0.9941462278366089
      vf_loss: 14.538259506225586
    grad_time_ms: 1560.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3784 s, 204 iter, 2040000 ts, 153 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-50-27
  done: false
  episode_len_mean: 122.92
  episode_reward_max: 196.18556935406565
  episode_reward_mean: 154.61450233225165
  episode_reward_min: -44.541521784987786
  episodes_this_iter: 82
  episodes_total: 15033
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.70016091064117e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.273216962814331
      kl: 0.011760530062019825
      policy_loss: -0.0014949905453249812
      total_loss: 62.954559326171875
      vf_explained_var: 0.9751545786857605
      vf_loss: 62.95604705810547
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3887 s, 210 iter, 2100000 ts, 155 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-52-10
  done: false
  episode_len_mean: 123.4
  episode_reward_max: 199.30121466355152
  episode_reward_mean: 154.65177279682902
  episode_reward_min: -48.50283934367765
  episodes_this_iter: 82
  episodes_total: 15515
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.02512324677882e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.365478038787842
      kl: 0.015119442716240883
      policy_loss: -0.0011450762394815683
      total_loss: 73.69857025146484
      vf_explained_var: 0.9734674096107483
      vf_loss: 73.69972229003906
    grad_time_ms: 1566.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 3991 s, 216 iter, 2160000 ts, 155 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-53-54
  done: false
  episode_len_mean: 123.53
  episode_reward_max: 194.78614537609678
  episode_reward_mean: 155.64411490837648
  episode_reward_min: -48.218285958365634
  episodes_this_iter: 81
  episodes_total: 15994
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.02512324677882e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.3127288818359375
      kl: 0.015304474160075188
      policy_loss: -0.0010504676029086113
      total_loss: 65.9847412109375
      vf_explained_var: 0.974026620388031
      vf_loss: 65.98578643798828
    grad_time_ms: 1585

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4094 s, 222 iter, 2220000 ts, 149 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-55-38
  done: false
  episode_len_mean: 124.73
  episode_reward_max: 194.29982124313278
  episode_reward_mean: 152.4636768855054
  episode_reward_min: -49.88463029737518
  episodes_this_iter: 79
  episodes_total: 16475
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.51256162338941e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.3651583194732666
      kl: 0.011194756254553795
      policy_loss: -0.000747950398363173
      total_loss: 60.481258392333984
      vf_explained_var: 0.9756209254264832
      vf_loss: 60.48199462890625
    grad_time_ms: 1563

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4198 s, 228 iter, 2280000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-57-21
  done: false
  episode_len_mean: 127.02
  episode_reward_max: 203.32868226211843
  episode_reward_mean: 158.35500825519523
  episode_reward_min: -38.051197535330715
  episodes_this_iter: 79
  episodes_total: 16954
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.76884135040037e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4669206142425537
      kl: 0.019472967833280563
      policy_loss: 0.0010325771290808916
      total_loss: 41.64223861694336
      vf_explained_var: 0.9833142161369324
      vf_loss: 41.64120101928711
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4302 s, 234 iter, 2340000 ts, 160 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_00-59-06
  done: false
  episode_len_mean: 126.18
  episode_reward_max: 193.78391397818183
  episode_reward_mean: 161.97297531986075
  episode_reward_min: -41.969346668460574
  episodes_this_iter: 78
  episodes_total: 17430
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.65326123673965e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4160306453704834
      kl: 0.017220987007021904
      policy_loss: -0.005118322093039751
      total_loss: 78.97136688232422
      vf_explained_var: 0.9711458086967468
      vf_loss: 78.97647857666016
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4405 s, 240 iter, 2400000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-00-49
  done: false
  episode_len_mean: 129.51
  episode_reward_max: 197.79812925286814
  episode_reward_mean: 156.1140321203045
  episode_reward_min: -43.29874315769364
  episodes_this_iter: 76
  episodes_total: 17907
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.65326123673965e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.508007049560547
      kl: 0.016632303595542908
      policy_loss: -0.0036748808342963457
      total_loss: 84.17455291748047
      vf_explained_var: 0.9722654819488525
      vf_loss: 84.17823791503906
    grad_time_ms: 1541.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 4.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4509 s, 246 iter, 2460000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-02-34
  done: false
  episode_len_mean: 123.8
  episode_reward_max: 211.03390633853365
  episode_reward_mean: 152.0638726604273
  episode_reward_min: -48.049470609957616
  episodes_this_iter: 79
  episodes_total: 18387
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.479895799414001e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4195759296417236
      kl: 0.031853415071964264
      policy_loss: 0.0002749643463175744
      total_loss: 112.91169738769531
      vf_explained_var: 0.9565379023551941
      vf_loss: 112.91142272949219
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4614 s, 252 iter, 2520000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-04-19
  done: false
  episode_len_mean: 128.0
  episode_reward_max: 204.75050682862204
  episode_reward_mean: 156.58364636571153
  episode_reward_min: -44.424661271642904
  episodes_this_iter: 78
  episodes_total: 18865
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.239947899707001e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4842023849487305
      kl: 0.0315023697912693
      policy_loss: -0.0008860217058099806
      total_loss: 80.41795349121094
      vf_explained_var: 0.9702267050743103
      vf_loss: 80.4188461303711
    grad_time_ms: 1534.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4718 s, 258 iter, 2580000 ts, 147 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-06-03
  done: false
  episode_len_mean: 124.63
  episode_reward_max: 198.51830994227146
  episode_reward_mean: 158.87980095139335
  episode_reward_min: -45.94318096177411
  episodes_this_iter: 79
  episodes_total: 19341
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.239947899707001e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.3808839321136475
      kl: 0.02236214652657509
      policy_loss: -0.0012110763927921653
      total_loss: 84.13175201416016
      vf_explained_var: 0.9668876528739929
      vf_loss: 84.13297271728516
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4824 s, 264 iter, 2640000 ts, 148 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-07-49
  done: false
  episode_len_mean: 122.07
  episode_reward_max: 199.71618599938859
  episode_reward_mean: 146.40418979376076
  episode_reward_min: -52.75370570545151
  episodes_this_iter: 83
  episodes_total: 19822
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.359919285762559e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.2715184688568115
      kl: 0.02226182632148266
      policy_loss: -0.0024156798608601093
      total_loss: 203.49656677246094
      vf_explained_var: 0.9270428419113159
      vf_loss: 203.49899291992188
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 4927 s, 270 iter, 2700000 ts, 158 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-09-32
  done: false
  episode_len_mean: 125.46
  episode_reward_max: 206.61596905890082
  episode_reward_mean: 158.02828526483313
  episode_reward_min: -44.009887439632195
  episodes_this_iter: 81
  episodes_total: 20303
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.359919285762559e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.381474733352661
      kl: 0.01713995262980461
      policy_loss: -0.002338926075026393
      total_loss: 87.3355712890625
      vf_explained_var: 0.9683926701545715
      vf_loss: 87.33790588378906
    grad_time_ms: 1557.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5031 s, 276 iter, 2760000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-11-16
  done: false
  episode_len_mean: 123.84
  episode_reward_max: 206.96505487555328
  episode_reward_mean: 146.14214869443683
  episode_reward_min: -49.87507871088416
  episodes_this_iter: 80
  episodes_total: 20775
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1799596428812795e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4236600399017334
      kl: 0.020085040479898453
      policy_loss: -0.0017357569886371493
      total_loss: 222.0770721435547
      vf_explained_var: 0.9203617572784424
      vf_loss: 222.07879638671875
    grad_time_ms: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5134 s, 282 iter, 2820000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-13-00
  done: false
  episode_len_mean: 130.16
  episode_reward_max: 204.4071566037613
  episode_reward_mean: 160.12750195181695
  episode_reward_min: -50.57268079466159
  episodes_this_iter: 76
  episodes_total: 21247
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1799596428812795e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.6023693084716797
      kl: 0.019032593816518784
      policy_loss: 0.0013465730007737875
      total_loss: 64.89921569824219
      vf_explained_var: 0.9772385954856873
      vf_loss: 64.89786529541016
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5239 s, 288 iter, 2880000 ts, 148 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-14-45
  done: false
  episode_len_mean: 126.47
  episode_reward_max: 215.6811459151531
  episode_reward_mean: 156.35988227470324
  episode_reward_min: -46.01202453008971
  episodes_this_iter: 79
  episodes_total: 21731
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1799596428812795e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.4135658740997314
      kl: 0.00945344753563404
      policy_loss: -0.0008647161885164678
      total_loss: 111.52226257324219
      vf_explained_var: 0.961284339427948
      vf_loss: 111.52312469482422
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5343 s, 294 iter, 2940000 ts, 154 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-16-30
  done: false
  episode_len_mean: 125.87
  episode_reward_max: 196.06570762025976
  episode_reward_mean: 153.06557546392247
  episode_reward_min: -53.55900677610744
  episodes_this_iter: 77
  episodes_total: 22212
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.3849700772876057e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.5597434043884277
      kl: 0.017530057579278946
      policy_loss: -0.0006845727912150323
      total_loss: 77.66793823242188
      vf_explained_var: 0.9706405997276306
      vf_loss: 77.66861724853516
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5448 s, 300 iter, 3000000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-18-14
  done: false
  episode_len_mean: 124.87
  episode_reward_max: 215.9383143434429
  episode_reward_mean: 155.95754243021648
  episode_reward_min: -48.409897507417334
  episodes_this_iter: 80
  episodes_total: 22688
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.3849700772876057e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.3945858478546143
      kl: 0.018654849380254745
      policy_loss: 0.0011485927971079946
      total_loss: 69.76813507080078
      vf_explained_var: 0.9767594933509827
      vf_loss: 69.76700592041016
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5551 s, 306 iter, 3060000 ts, 154 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-19-58
  done: false
  episode_len_mean: 124.3
  episode_reward_max: 211.73599029015207
  episode_reward_mean: 150.28276389626254
  episode_reward_min: -46.473499194525246
  episodes_this_iter: 79
  episodes_total: 23162
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.3849700772876057e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.3341023921966553
      kl: 0.03259166330099106
      policy_loss: -0.0004322189779486507
      total_loss: 100.62018585205078
      vf_explained_var: 0.964397132396698
      vf_loss: 100.62061309814453
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5654 s, 312 iter, 3120000 ts, 154 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-21-41
  done: false
  episode_len_mean: 126.73
  episode_reward_max: 197.58660328974665
  episode_reward_mean: 159.83714256150162
  episode_reward_min: -52.263666818932705
  episodes_this_iter: 79
  episodes_total: 23635
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.577456200615153e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.427800178527832
      kl: 0.027021002024412155
      policy_loss: -0.0010498369811102748
      total_loss: 80.568115234375
      vf_explained_var: 0.9713547825813293
      vf_loss: 80.56917572021484
    grad_time_ms: 1575

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5758 s, 318 iter, 3180000 ts, 160 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-23-25
  done: false
  episode_len_mean: 127.92
  episode_reward_max: 204.0922541914895
  episode_reward_mean: 159.39421138211168
  episode_reward_min: -48.08647883454763
  episodes_this_iter: 79
  episodes_total: 24111
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.577456200615153e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.3588528633117676
      kl: 0.013386976905167103
      policy_loss: -0.0011822909582406282
      total_loss: 60.009063720703125
      vf_explained_var: 0.9795008897781372
      vf_loss: 60.01025390625
    grad_time_ms: 1556.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5862 s, 324 iter, 3240000 ts, 157 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-25-10
  done: false
  episode_len_mean: 124.05
  episode_reward_max: 201.01064734736303
  episode_reward_mean: 155.0126467540646
  episode_reward_min: -51.73230830366068
  episodes_this_iter: 80
  episodes_total: 24585
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.366181737124788e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.1150758266448975
      kl: 0.016434069722890854
      policy_loss: -0.0005175143596716225
      total_loss: 96.74616241455078
      vf_explained_var: 0.9645960927009583
      vf_loss: 96.74667358398438
    grad_time_ms: 153

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 5967 s, 330 iter, 3300000 ts, 158 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-26-54
  done: false
  episode_len_mean: 125.6
  episode_reward_max: 204.47327828289264
  episode_reward_mean: 151.78342783668478
  episode_reward_min: -45.294436186256995
  episodes_this_iter: 79
  episodes_total: 25056
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.366181737124788e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.172961711883545
      kl: 0.037105876952409744
      policy_loss: -0.0002013303601415828
      total_loss: 151.48684692382812
      vf_explained_var: 0.9487046599388123
      vf_loss: 151.48704528808594
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6070 s, 336 iter, 3360000 ts, 148 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-28-38
  done: false
  episode_len_mean: 125.2
  episode_reward_max: 205.03223423940128
  episode_reward_mean: 156.03160818262154
  episode_reward_min: -51.91640550193221
  episodes_this_iter: 79
  episodes_total: 25533
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.366181737124788e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.0594112873077393
      kl: 0.020531710237264633
      policy_loss: 0.002500060247257352
      total_loss: 65.45762634277344
      vf_explained_var: 0.9761923551559448
      vf_loss: 65.45512390136719
    grad_time_ms: 1552.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6175 s, 342 iter, 3420000 ts, 152 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-30-23
  done: false
  episode_len_mean: 125.49
  episode_reward_max: 197.45926493900546
  episode_reward_mean: 156.49022766738915
  episode_reward_min: -45.55431059224179
  episodes_this_iter: 78
  episodes_total: 26008
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.366181737124788e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.1393723487854004
      kl: 0.014605477452278137
      policy_loss: -1.3009382200834807e-05
      total_loss: 41.090576171875
      vf_explained_var: 0.9850683808326721
      vf_loss: 41.09059143066406
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6279 s, 348 iter, 3480000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-32-08
  done: false
  episode_len_mean: 128.23
  episode_reward_max: 195.46549456618197
  episode_reward_mean: 164.2122295506138
  episode_reward_min: -41.06570058315994
  episodes_this_iter: 79
  episodes_total: 26480
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.683090868562394e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.987064242362976
      kl: 0.014863510616123676
      policy_loss: -5.762914224760607e-05
      total_loss: 27.948881149291992
      vf_explained_var: 0.9901432394981384
      vf_loss: 27.94894027709961
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6383 s, 354 iter, 3540000 ts, 160 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-33-51
  done: false
  episode_len_mean: 124.07
  episode_reward_max: 196.3140526189663
  episode_reward_mean: 152.01176651473057
  episode_reward_min: -49.560603733995165
  episodes_this_iter: 80
  episodes_total: 26957
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.024636204235978e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.0373775959014893
      kl: 0.013566093519330025
      policy_loss: -0.0035530775785446167
      total_loss: 69.65105438232422
      vf_explained_var: 0.9744316339492798
      vf_loss: 69.65460205078125
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6485 s, 360 iter, 3600000 ts, 159 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-35-34
  done: false
  episode_len_mean: 128.18
  episode_reward_max: 208.22288929383353
  episode_reward_mean: 165.61042307459823
  episode_reward_min: -46.03238308916588
  episodes_this_iter: 78
  episodes_total: 27426
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.024636204235978e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.039289712905884
      kl: 0.015488323755562305
      policy_loss: -0.00026025265106000006
      total_loss: 33.282649993896484
      vf_explained_var: 0.9877120852470398
      vf_loss: 33.28291320800781
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6590 s, 366 iter, 3660000 ts, 164 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-37-19
  done: false
  episode_len_mean: 131.73
  episode_reward_max: 204.60129360028432
  episode_reward_mean: 158.90517020052238
  episode_reward_min: -41.4351730364384
  episodes_this_iter: 75
  episodes_total: 27889
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.018477646215049e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.229452133178711
      kl: 0.026022495701909065
      policy_loss: -0.002234324114397168
      total_loss: 102.51725769042969
      vf_explained_var: 0.967778742313385
      vf_loss: 102.51949310302734
    grad_time_ms: 1542

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6694 s, 372 iter, 3720000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-39-04
  done: false
  episode_len_mean: 125.62
  episode_reward_max: 200.6658295010133
  episode_reward_mean: 157.208142495677
  episode_reward_min: -45.53622217284571
  episodes_this_iter: 80
  episodes_total: 28362
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.018477646215049e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.9342461824417114
      kl: 0.06235041469335556
      policy_loss: 0.0061243269592523575
      total_loss: 120.15250396728516
      vf_explained_var: 0.9606282114982605
      vf_loss: 120.1463851928711
    grad_time_ms: 1562.8

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6797 s, 378 iter, 3780000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-40-47
  done: false
  episode_len_mean: 132.51
  episode_reward_max: 202.02877484129195
  episode_reward_mean: 161.82684563653157
  episode_reward_min: -45.63702728370629
  episodes_this_iter: 74
  episodes_total: 28834
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.791574901199087e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.23813533782959
      kl: 0.013194079510867596
      policy_loss: -0.0007495160098187625
      total_loss: 24.78982162475586
      vf_explained_var: 0.9917538166046143
      vf_loss: 24.790573120117188
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 6901 s, 384 iter, 3840000 ts, 166 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-42-31
  done: false
  episode_len_mean: 123.9
  episode_reward_max: 203.1816453817162
  episode_reward_mean: 156.61995642167068
  episode_reward_min: -48.40425499683559
  episodes_this_iter: 81
  episodes_total: 29307
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.791574901199087e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.862052083015442
      kl: 0.013838442042469978
      policy_loss: -0.0005919700488448143
      total_loss: 114.00824737548828
      vf_explained_var: 0.9622330069541931
      vf_loss: 114.00882720947266
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7006 s, 390 iter, 3900000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-44-16
  done: false
  episode_len_mean: 122.6
  episode_reward_max: 205.8108257790752
  episode_reward_mean: 145.66957535385274
  episode_reward_min: -48.69803917348973
  episodes_this_iter: 81
  episodes_total: 29782
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.791574901199087e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.9552620649337769
      kl: 0.015859097242355347
      policy_loss: -0.0037052761763334274
      total_loss: 117.1297836303711
      vf_explained_var: 0.9630101323127747
      vf_loss: 117.13349914550781
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7111 s, 396 iter, 3960000 ts, 158 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-46-01
  done: false
  episode_len_mean: 127.0
  episode_reward_max: 202.05304917693138
  episode_reward_mean: 160.8554859601278
  episode_reward_min: -49.36154406554628
  episodes_this_iter: 79
  episodes_total: 30251
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.791574901199087e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.025904893875122
      kl: 0.022262679412961006
      policy_loss: -0.001726984977722168
      total_loss: 50.79647445678711
      vf_explained_var: 0.9818546772003174
      vf_loss: 50.79820251464844
    grad_time_ms: 1563.7

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7216 s, 402 iter, 4020000 ts, 161 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-47-47
  done: false
  episode_len_mean: 127.54
  episode_reward_max: 209.4434750926968
  episode_reward_mean: 159.86614216019942
  episode_reward_min: -45.32230117799032
  episodes_this_iter: 78
  episodes_total: 30716
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.791574901199087e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.1099183559417725
      kl: 0.01527462713420391
      policy_loss: -0.001451812102459371
      total_loss: 48.149444580078125
      vf_explained_var: 0.9828180074691772
      vf_loss: 48.150901794433594
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7321 s, 408 iter, 4080000 ts, 156 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-49-32
  done: false
  episode_len_mean: 126.84
  episode_reward_max: 205.4569468675273
  episode_reward_mean: 154.9293793727273
  episode_reward_min: -47.44924731419815
  episodes_this_iter: 78
  episodes_total: 31181
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.791574901199087e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.125791549682617
      kl: 0.01358964666724205
      policy_loss: -0.0007620147080160677
      total_loss: 45.45014572143555
      vf_explained_var: 0.9846511483192444
      vf_loss: 45.45090103149414
    grad_time_ms: 1566.0

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7425 s, 414 iter, 4140000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-51-16
  done: false
  episode_len_mean: 126.9
  episode_reward_max: 207.77308241919707
  episode_reward_mean: 159.54735054274093
  episode_reward_min: -43.45083974419843
  episodes_this_iter: 79
  episodes_total: 31653
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.018736392952044e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9984052181243896
      kl: 0.016247374936938286
      policy_loss: -0.0001316949346801266
      total_loss: 47.47523498535156
      vf_explained_var: 0.9830937385559082
      vf_loss: 47.47536849975586
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7530 s, 420 iter, 4200000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-53-00
  done: false
  episode_len_mean: 126.19
  episode_reward_max: 208.24055114595018
  episode_reward_mean: 160.3326004524101
  episode_reward_min: -48.09544757377391
  episodes_this_iter: 79
  episodes_total: 32122
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.5281041949976135e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9354333877563477
      kl: 0.021245483309030533
      policy_loss: -0.0038790064863860607
      total_loss: 65.60319519042969
      vf_explained_var: 0.9772969484329224
      vf_loss: 65.6070785522461
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7634 s, 426 iter, 4260000 ts, 160 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-54-45
  done: false
  episode_len_mean: 125.42
  episode_reward_max: 209.92050215092928
  episode_reward_mean: 152.54139491389623
  episode_reward_min: -49.32430899674348
  episodes_this_iter: 80
  episodes_total: 32592
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.640520974988067e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.93547785282135
      kl: 0.03550414741039276
      policy_loss: 0.0013383495388552547
      total_loss: 93.68511199951172
      vf_explained_var: 0.969539225101471
      vf_loss: 93.68377685546875
    grad_time_ms: 1566.94

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7738 s, 432 iter, 4320000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-56-29
  done: false
  episode_len_mean: 126.71
  episode_reward_max: 201.19509556402676
  episode_reward_mean: 159.66662720637117
  episode_reward_min: -49.732695043972214
  episodes_this_iter: 78
  episodes_total: 33061
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.820260487494034e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.9128555059432983
      kl: 0.01538717932999134
      policy_loss: 0.0020875963382422924
      total_loss: 42.89795684814453
      vf_explained_var: 0.9862135648727417
      vf_loss: 42.89586639404297
    grad_time_ms: 157

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7843 s, 438 iter, 4380000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-58-14
  done: false
  episode_len_mean: 129.69
  episode_reward_max: 203.26485942584458
  episode_reward_mean: 156.36174991340403
  episode_reward_min: -45.855767174817544
  episodes_this_iter: 77
  episodes_total: 33529
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.820260487494034e-24
      cur_lr: 4.999999873689376e-05
      entropy: 2.0024023056030273
      kl: 0.49500253796577454
      policy_loss: 0.0074489363469183445
      total_loss: 129.0860595703125
      vf_explained_var: 0.9583596587181091
      vf_loss: 129.07861328125
    grad_time_ms: 1560.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 7946 s, 444 iter, 4440000 ts, 159 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_01-59-58
  done: false
  episode_len_mean: 128.01
  episode_reward_max: 197.16350318956205
  episode_reward_mean: 161.24670615961227
  episode_reward_min: -41.04809558288295
  episodes_this_iter: 78
  episodes_total: 34000
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.730391717317182e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.9086039066314697
      kl: 0.025052256882190704
      policy_loss: 0.005829993169754744
      total_loss: 29.080678939819336
      vf_explained_var: 0.9894163608551025
      vf_loss: 29.07485008239746
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8050 s, 450 iter, 4500000 ts, 159 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-01-42
  done: false
  episode_len_mean: 126.07
  episode_reward_max: 198.61888486755518
  episode_reward_mean: 161.22741173354544
  episode_reward_min: -50.47644394791071
  episodes_this_iter: 78
  episodes_total: 34477
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.730391717317182e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.8135675191879272
      kl: 0.023120684549212456
      policy_loss: -0.00411108136177063
      total_loss: 50.45313262939453
      vf_explained_var: 0.9827983379364014
      vf_loss: 50.457237243652344
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8154 s, 456 iter, 4560000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-03-26
  done: false
  episode_len_mean: 130.65
  episode_reward_max: 212.69129344319364
  episode_reward_mean: 165.16765505403808
  episode_reward_min: -40.74607092466684
  episodes_this_iter: 77
  episodes_total: 34945
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.595586195469189e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.8746750354766846
      kl: 0.018623940646648407
      policy_loss: -0.0013861162588000298
      total_loss: 45.50390625
      vf_explained_var: 0.9850791692733765
      vf_loss: 45.50529098510742
    grad_time_ms: 1553.575

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8257 s, 462 iter, 4620000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-05-10
  done: false
  episode_len_mean: 131.4
  episode_reward_max: 201.93900719653863
  episode_reward_mean: 162.84935785361222
  episode_reward_min: -48.88083435580282
  episodes_this_iter: 77
  episodes_total: 35410
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.595586195469189e-24
      cur_lr: 4.999999873689376e-05
      entropy: 1.9439902305603027
      kl: 0.040684349834918976
      policy_loss: 0.006122022867202759
      total_loss: 63.3002815246582
      vf_explained_var: 0.9781089425086975
      vf_loss: 63.294158935546875
    grad_time_ms: 1548.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8362 s, 468 iter, 4680000 ts, 160 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-06-55
  done: false
  episode_len_mean: 128.61
  episode_reward_max: 217.3160318168673
  episode_reward_mean: 158.2545877197154
  episode_reward_min: -47.84086227409615
  episodes_this_iter: 77
  episodes_total: 35877
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.9340070517527485e-23
      cur_lr: 4.999999873689376e-05
      entropy: 2.039616107940674
      kl: 0.029149578884243965
      policy_loss: 0.0025051271077245474
      total_loss: 70.1055908203125
      vf_explained_var: 0.9766192436218262
      vf_loss: 70.10308837890625
    grad_time_ms: 1545.4

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8465 s, 474 iter, 4740000 ts, 161 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-08-38
  done: false
  episode_len_mean: 127.04
  episode_reward_max: 208.2990543227089
  episode_reward_mean: 164.16777743177653
  episode_reward_min: -40.94506536438914
  episodes_this_iter: 79
  episodes_total: 36348
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.9340070517527485e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.7796120643615723
      kl: 0.040799330919981
      policy_loss: 0.006467806175351143
      total_loss: 56.57529067993164
      vf_explained_var: 0.9809901118278503
      vf_loss: 56.56882095336914
    grad_time_ms: 1541.10

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8568 s, 480 iter, 4800000 ts, 165 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-10-21
  done: false
  episode_len_mean: 132.24
  episode_reward_max: 203.2984478174166
  episode_reward_mean: 157.52195275302049
  episode_reward_min: -45.540673637301616
  episodes_this_iter: 76
  episodes_total: 36810
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.901011050945666e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9529050588607788
      kl: 0.026987776160240173
      policy_loss: 0.003219036618247628
      total_loss: 61.21527099609375
      vf_explained_var: 0.9806980490684509
      vf_loss: 61.212059020996094
    grad_time_ms: 153

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8672 s, 486 iter, 4860000 ts, 168 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-12-05
  done: false
  episode_len_mean: 127.68
  episode_reward_max: 205.32678414541905
  episode_reward_mean: 160.4186762153232
  episode_reward_min: -45.88530874031015
  episodes_this_iter: 78
  episodes_total: 37278
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.6929330825805664
      kl: 0.022685958072543144
      policy_loss: -0.0013657258823513985
      total_loss: 122.27915954589844
      vf_explained_var: 0.9627377390861511
      vf_loss: 122.28052520751953
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8778 s, 492 iter, 4920000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-13-51
  done: false
  episode_len_mean: 129.04
  episode_reward_max: 204.4178595560612
  episode_reward_mean: 165.05168101576987
  episode_reward_min: -45.791618486884786
  episodes_this_iter: 77
  episodes_total: 37746
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9181127548217773
      kl: 0.019728312268853188
      policy_loss: -0.00047027377877384424
      total_loss: 43.71390914916992
      vf_explained_var: 0.9868832230567932
      vf_loss: 43.71437454223633
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8883 s, 498 iter, 4980000 ts, 165 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-15-37
  done: false
  episode_len_mean: 126.61
  episode_reward_max: 215.61569918614657
  episode_reward_mean: 158.69863489066518
  episode_reward_min: -48.578132437376105
  episodes_this_iter: 81
  episodes_total: 38213
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.8643442392349243
      kl: 0.02198689803481102
      policy_loss: 0.0021134100388735533
      total_loss: 66.55303192138672
      vf_explained_var: 0.9795697331428528
      vf_loss: 66.55093383789062
    grad_time_ms: 1560

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 8989 s, 504 iter, 5040000 ts, 166 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-17-22
  done: false
  episode_len_mean: 127.9
  episode_reward_max: 200.7625771054244
  episode_reward_mean: 155.61518578736445
  episode_reward_min: -50.20481378002331
  episodes_this_iter: 79
  episodes_total: 38679
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9530881643295288
      kl: 0.018278280273079872
      policy_loss: -0.003055951092392206
      total_loss: 85.41859436035156
      vf_explained_var: 0.9725953340530396
      vf_loss: 85.42164611816406
    grad_time_ms: 1555.4

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9093 s, 510 iter, 5100000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-19-07
  done: false
  episode_len_mean: 126.56
  episode_reward_max: 209.67330265723834
  episode_reward_mean: 160.5187531765669
  episode_reward_min: -39.6976975372227
  episodes_this_iter: 79
  episodes_total: 39145
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.7676012516021729
      kl: 0.03892306610941887
      policy_loss: 0.005758336279541254
      total_loss: 39.124393463134766
      vf_explained_var: 0.9869086146354675
      vf_loss: 39.1186408996582
    grad_time_ms: 1542.672

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9196 s, 516 iter, 5160000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-20-49
  done: false
  episode_len_mean: 128.72
  episode_reward_max: 207.68448759438363
  episode_reward_mean: 161.495531650832
  episode_reward_min: -48.51710667247227
  episodes_this_iter: 77
  episodes_total: 39608
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9163336753845215
      kl: 0.01571754179894924
      policy_loss: 0.0003594402805902064
      total_loss: 51.386131286621094
      vf_explained_var: 0.9825781583786011
      vf_loss: 51.385772705078125
    grad_time_ms: 1545.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9300 s, 522 iter, 5220000 ts, 161 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-22-35
  done: false
  episode_len_mean: 128.98
  episode_reward_max: 204.887763680171
  episode_reward_mean: 165.6148262628511
  episode_reward_min: -44.64171419403343
  episodes_this_iter: 79
  episodes_total: 40071
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.850962519645691
      kl: 0.03938129171729088
      policy_loss: -0.001429439871571958
      total_loss: 136.3807373046875
      vf_explained_var: 0.9598515629768372
      vf_loss: 136.38217163085938
    grad_time_ms: 1557.599

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9404 s, 528 iter, 5280000 ts, 169 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-24-18
  done: false
  episode_len_mean: 130.54
  episode_reward_max: 209.8114535640371
  episode_reward_mean: 169.33886884336903
  episode_reward_min: -45.808827380290126
  episodes_this_iter: 77
  episodes_total: 40530
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9253076314926147
      kl: 0.013115043751895428
      policy_loss: -0.00040414585964754224
      total_loss: 25.230562210083008
      vf_explained_var: 0.9917250275611877
      vf_loss: 25.23096466064453
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9508 s, 534 iter, 5340000 ts, 166 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-26-03
  done: false
  episode_len_mean: 132.72
  episode_reward_max: 212.32833040394996
  episode_reward_mean: 170.6967706833324
  episode_reward_min: -43.83870189083682
  episodes_this_iter: 75
  episodes_total: 40986
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.8817144632339478
      kl: 0.029551075771450996
      policy_loss: -0.000929150846786797
      total_loss: 107.38639068603516
      vf_explained_var: 0.9655972719192505
      vf_loss: 107.3873062133789
    grad_time_ms: 1545

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9612 s, 540 iter, 5400000 ts, 170 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-27-46
  done: false
  episode_len_mean: 129.64
  episode_reward_max: 212.90234836294215
  episode_reward_mean: 165.60388674859857
  episode_reward_min: -48.69166979296964
  episodes_this_iter: 77
  episodes_total: 41452
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.8008050918579102
      kl: 0.018814953044056892
      policy_loss: -8.194224938051775e-05
      total_loss: 52.250648498535156
      vf_explained_var: 0.9842798113822937
      vf_loss: 52.25072479248047
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9714 s, 546 iter, 5460000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-29-29
  done: false
  episode_len_mean: 131.15
  episode_reward_max: 209.77615651097346
  episode_reward_mean: 161.34683905471354
  episode_reward_min: -46.316135132961925
  episodes_this_iter: 77
  episodes_total: 41908
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.805979609489441
      kl: 0.03235555812716484
      policy_loss: -0.0035090430174022913
      total_loss: 119.20368957519531
      vf_explained_var: 0.9658807516098022
      vf_loss: 119.20720672607422
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9817 s, 552 iter, 5520000 ts, 160 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-31-13
  done: false
  episode_len_mean: 128.91
  episode_reward_max: 208.21458023565225
  episode_reward_mean: 159.23684666607073
  episode_reward_min: -46.67099007536275
  episodes_this_iter: 76
  episodes_total: 42370
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.35151673419068e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.9370343685150146
      kl: 0.025366634130477905
      policy_loss: -0.0014239501906558871
      total_loss: 84.9944839477539
      vf_explained_var: 0.976850152015686
      vf_loss: 84.99592590332031
    grad_time_ms: 1542.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 9923 s, 558 iter, 5580000 ts, 164 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-32-58
  done: false
  episode_len_mean: 131.22
  episode_reward_max: 215.47028651649111
  episode_reward_mean: 167.35576135328915
  episode_reward_min: -45.72358766744043
  episodes_this_iter: 76
  episodes_total: 42829
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.7814700603485107
      kl: 0.031095782294869423
      policy_loss: 0.0002710181288421154
      total_loss: 53.11026382446289
      vf_explained_var: 0.9827497601509094
      vf_loss: 53.109989166259766
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10027 s, 564 iter, 5640000 ts, 169 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-34-42
  done: false
  episode_len_mean: 132.06
  episode_reward_max: 218.59362257246153
  episode_reward_mean: 167.34332881050193
  episode_reward_min: -48.04579656644918
  episodes_this_iter: 76
  episodes_total: 43287
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.786407709121704
      kl: 0.01986265741288662
      policy_loss: -0.003706064075231552
      total_loss: 82.69567108154297
      vf_explained_var: 0.9750520586967468
      vf_loss: 82.69938659667969
    grad_time_ms: 1564.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10131 s, 570 iter, 5700000 ts, 175 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-36-27
  done: false
  episode_len_mean: 131.76
  episode_reward_max: 209.55082172347173
  episode_reward_mean: 171.6973749027081
  episode_reward_min: -47.113081759745505
  episodes_this_iter: 76
  episodes_total: 43744
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.6865378618240356
      kl: 0.01819867640733719
      policy_loss: -0.002267020521685481
      total_loss: 57.927608489990234
      vf_explained_var: 0.9825212359428406
      vf_loss: 57.92988204956055
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10236 s, 576 iter, 5760000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-38-12
  done: false
  episode_len_mean: 134.85
  episode_reward_max: 220.45537295926482
  episode_reward_mean: 171.84137629200384
  episode_reward_min: -46.40951158039935
  episodes_this_iter: 72
  episodes_total: 44197
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.8147058486938477
      kl: 0.026323208585381508
      policy_loss: -0.0026828625705093145
      total_loss: 65.2239990234375
      vf_explained_var: 0.9816131591796875
      vf_loss: 65.2266845703125
    grad_time_ms: 1537

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10341 s, 582 iter, 5820000 ts, 169 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-39-57
  done: false
  episode_len_mean: 133.24
  episode_reward_max: 208.15285069429052
  episode_reward_mean: 175.0719425798399
  episode_reward_min: -39.68415561259382
  episodes_this_iter: 76
  episodes_total: 44650
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.4198194742202759
      kl: 0.010831386782228947
      policy_loss: -0.0006111191469244659
      total_loss: 20.331920623779297
      vf_explained_var: 0.9935658574104309
      vf_loss: 20.33253288269043
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10446 s, 588 iter, 5880000 ts, 172 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-41-42
  done: false
  episode_len_mean: 133.72
  episode_reward_max: 219.86505036561366
  episode_reward_mean: 173.83563214772687
  episode_reward_min: -45.75658288830429
  episodes_this_iter: 75
  episodes_total: 45105
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.7374097108840942
      kl: 0.014535258524119854
      policy_loss: -0.00035120421671308577
      total_loss: 34.19807052612305
      vf_explained_var: 0.9897703528404236
      vf_loss: 34.19842529296875
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10550 s, 594 iter, 5940000 ts, 175 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-43-26
  done: false
  episode_len_mean: 136.09
  episode_reward_max: 216.06092964263976
  episode_reward_mean: 176.39224359386034
  episode_reward_min: 64.96678142190575
  episodes_this_iter: 74
  episodes_total: 45554
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.7951879501342773
      kl: 0.010541117750108242
      policy_loss: -0.0011583808809518814
      total_loss: 45.42346954345703
      vf_explained_var: 0.9856880307197571
      vf_loss: 45.424625396728516
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10654 s, 600 iter, 6000000 ts, 173 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-45-11
  done: false
  episode_len_mean: 131.26
  episode_reward_max: 212.25170867602435
  episode_reward_mean: 163.06090961354406
  episode_reward_min: -46.67335470876481
  episodes_this_iter: 76
  episodes_total: 46005
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.5039217472076416
      kl: 0.022926975041627884
      policy_loss: -0.006099720951169729
      total_loss: 111.46714782714844
      vf_explained_var: 0.9711634516716003
      vf_loss: 111.4732437133789
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10758 s, 606 iter, 6060000 ts, 163 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-46-55
  done: false
  episode_len_mean: 130.59
  episode_reward_max: 217.66505265704333
  episode_reward_mean: 164.75029305058274
  episode_reward_min: -40.62823718668211
  episodes_this_iter: 78
  episodes_total: 46453
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.17575836709534e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.3748046159744263
      kl: 0.0338018499314785
      policy_loss: -0.0007252400973811746
      total_loss: 123.7647476196289
      vf_explained_var: 0.9648440480232239
      vf_loss: 123.7655029296875
    grad_time_ms: 1565

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10864 s, 612 iter, 6120000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-48-41
  done: false
  episode_len_mean: 132.48
  episode_reward_max: 214.2354069574928
  episode_reward_mean: 164.90071373876086
  episode_reward_min: -43.61555216712231
  episodes_this_iter: 76
  episodes_total: 46905
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.263636525123833e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.5661144256591797
      kl: 0.032107532024383545
      policy_loss: -0.003956742584705353
      total_loss: 121.24144744873047
      vf_explained_var: 0.96494460105896
      vf_loss: 121.24539184570312
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 10969 s, 618 iter, 6180000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-50-26
  done: false
  episode_len_mean: 133.04
  episode_reward_max: 220.29485606025963
  episode_reward_mean: 175.20106101866673
  episode_reward_min: -45.80807914164396
  episodes_this_iter: 75
  episodes_total: 47358
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.8954568387241033e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.4687825441360474
      kl: 0.029141368344426155
      policy_loss: -0.006850224453955889
      total_loss: 61.078975677490234
      vf_explained_var: 0.9826657176017761
      vf_loss: 61.085819244384766
    grad_time_ms:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11074 s, 624 iter, 6240000 ts, 170 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-52-12
  done: false
  episode_len_mean: 132.3
  episode_reward_max: 211.8882272540786
  episode_reward_mean: 169.75258257608482
  episode_reward_min: -39.43629651435529
  episodes_this_iter: 76
  episodes_total: 47809
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.8954568387241033e-23
      cur_lr: 4.999999873689376e-05
      entropy: 1.5519028902053833
      kl: 0.026385197415947914
      policy_loss: -0.001144061447121203
      total_loss: 75.5492172241211
      vf_explained_var: 0.9757281541824341
      vf_loss: 75.55036163330078
    grad_time_ms: 1558

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11179 s, 630 iter, 6300000 ts, 175 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-53-57
  done: false
  episode_len_mean: 133.53
  episode_reward_max: 223.29394543303715
  episode_reward_mean: 171.86869119656768
  episode_reward_min: -41.223232808089485
  episodes_this_iter: 76
  episodes_total: 48255
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.1014772680647258e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.4952765703201294
      kl: 0.04236675053834915
      policy_loss: 0.002468407154083252
      total_loss: 122.49529266357422
      vf_explained_var: 0.9649967551231384
      vf_loss: 122.49282836914062
    grad_time_ms: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11282 s, 636 iter, 6360000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-55-41
  done: false
  episode_len_mean: 134.44
  episode_reward_max: 223.84379055181356
  episode_reward_mean: 176.28282838505956
  episode_reward_min: -41.42276752201637
  episodes_this_iter: 73
  episodes_total: 48705
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.6522161545325784e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.5301820039749146
      kl: 0.018696093931794167
      policy_loss: -0.00020214379765093327
      total_loss: 23.475200653076172
      vf_explained_var: 0.9927339553833008
      vf_loss: 23.475406646728516
    grad_time_m

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11386 s, 642 iter, 6420000 ts, 176 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-57-25
  done: false
  episode_len_mean: 133.18
  episode_reward_max: 221.06108000354473
  episode_reward_mean: 173.1865249854686
  episode_reward_min: -44.22860529089328
  episodes_this_iter: 74
  episodes_total: 49152
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.6522161545325784e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.5959447622299194
      kl: 0.018808702006936073
      policy_loss: -0.0014128208858892322
      total_loss: 66.64783477783203
      vf_explained_var: 0.979426920413971
      vf_loss: 66.64923858642578
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.9/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11490 s, 648 iter, 6480000 ts, 169 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_02-59-09
  done: false
  episode_len_mean: 135.27
  episode_reward_max: 216.5166465659987
  episode_reward_mean: 173.41286271976927
  episode_reward_min: -39.4526105366721
  episodes_this_iter: 74
  episodes_total: 49602
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.6522161545325784e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.5421894788742065
      kl: 0.02467610500752926
      policy_loss: -0.004659375175833702
      total_loss: 72.62720489501953
      vf_explained_var: 0.9798745512962341
      vf_loss: 72.6318588256836
    grad_time_ms: 1546.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11594 s, 654 iter, 6540000 ts, 174 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-00-53
  done: false
  episode_len_mean: 133.86
  episode_reward_max: 220.1311422374727
  episode_reward_mean: 170.7469910883823
  episode_reward_min: -34.74618527621473
  episodes_this_iter: 75
  episodes_total: 50048
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.4783247997787193e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.604383111000061
      kl: 0.025910576805472374
      policy_loss: 0.00022352863743435591
      total_loss: 30.290489196777344
      vf_explained_var: 0.991346001625061
      vf_loss: 30.29026985168457
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11699 s, 660 iter, 6600000 ts, 167 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-02-39
  done: false
  episode_len_mean: 132.3
  episode_reward_max: 214.94024310397478
  episode_reward_mean: 168.43171555496627
  episode_reward_min: -42.73096817490268
  episodes_this_iter: 75
  episodes_total: 50497
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.4783247997787193e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.3566035032272339
      kl: 0.018531573936343193
      policy_loss: -0.0001704013702692464
      total_loss: 48.8043212890625
      vf_explained_var: 0.9858404994010925
      vf_loss: 48.804500579833984
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11804 s, 666 iter, 6660000 ts, 165 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-04-23
  done: false
  episode_len_mean: 133.76
  episode_reward_max: 222.45785905642077
  episode_reward_mean: 174.1342763959708
  episode_reward_min: -49.89047754873144
  episodes_this_iter: 73
  episodes_total: 50944
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.717487199668079e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.5628479719161987
      kl: 0.029004601761698723
      policy_loss: 0.00012848549522459507
      total_loss: 45.1756591796875
      vf_explained_var: 0.9859342575073242
      vf_loss: 45.17552947998047
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 11910 s, 672 iter, 6720000 ts, 166 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-06-09
  done: false
  episode_len_mean: 132.86
  episode_reward_max: 218.37230507535213
  episode_reward_mean: 172.73921637971017
  episode_reward_min: -40.70011347650303
  episodes_this_iter: 75
  episodes_total: 51399
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 5.576230294631139e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.6426078081130981
      kl: 0.030141647905111313
      policy_loss: -0.002100657671689987
      total_loss: 34.85280990600586
      vf_explained_var: 0.9889171123504639
      vf_loss: 34.85490417480469
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12011 s, 678 iter, 6780000 ts, 165 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-07-51
  done: false
  episode_len_mean: 134.8
  episode_reward_max: 218.69755815913797
  episode_reward_mean: 177.30455736156196
  episode_reward_min: -47.72029798057412
  episodes_this_iter: 74
  episodes_total: 51845
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.364345946817688e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.4590489864349365
      kl: 0.021465599536895752
      policy_loss: 0.0012414465891197324
      total_loss: 24.941631317138672
      vf_explained_var: 0.9924588203430176
      vf_loss: 24.940393447875977
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12115 s, 684 iter, 6840000 ts, 168 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-09-35
  done: false
  episode_len_mean: 131.69
  episode_reward_max: 230.87928735582832
  episode_reward_mean: 165.34778413206794
  episode_reward_min: -43.05079215433044
  episodes_this_iter: 75
  episodes_total: 52298
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.364345946817688e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.3633366823196411
      kl: 0.030213817954063416
      policy_loss: 0.0006126348744146526
      total_loss: 96.46875762939453
      vf_explained_var: 0.9737015962600708
      vf_loss: 96.46813201904297
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12220 s, 690 iter, 6900000 ts, 173 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-11-21
  done: false
  episode_len_mean: 132.17
  episode_reward_max: 228.97642822914037
  episode_reward_mean: 170.18697085253487
  episode_reward_min: -41.14643112280505
  episodes_this_iter: 76
  episodes_total: 52749
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.364345946817688e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.2529839277267456
      kl: 0.040266383439302444
      policy_loss: 2.901829248003196e-05
      total_loss: 121.57249450683594
      vf_explained_var: 0.970971941947937
      vf_loss: 121.57246398925781
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12325 s, 696 iter, 6960000 ts, 168 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-13-06
  done: false
  episode_len_mean: 135.65
  episode_reward_max: 227.7052131389625
  episode_reward_mean: 172.4036382980048
  episode_reward_min: -42.80297046722753
  episodes_this_iter: 74
  episodes_total: 53192
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.497890591621399
      kl: 0.017238004133105278
      policy_loss: -0.0012343674898147583
      total_loss: 82.77982330322266
      vf_explained_var: 0.9767318367958069
      vf_loss: 82.78105926513672
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12429 s, 702 iter, 7020000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-14-50
  done: false
  episode_len_mean: 137.96
  episode_reward_max: 231.4120635712648
  episode_reward_mean: 179.4181945555162
  episode_reward_min: 71.62747854237801
  episodes_this_iter: 72
  episodes_total: 53634
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.5501701831817627
      kl: 0.011610440909862518
      policy_loss: -0.0018133687553927302
      total_loss: 76.7119369506836
      vf_explained_var: 0.978151261806488
      vf_loss: 76.71375274658203
    grad_time_ms: 1570.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12535 s, 708 iter, 7080000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-16-36
  done: false
  episode_len_mean: 133.8
  episode_reward_max: 223.16136056110977
  episode_reward_mean: 171.2581185934413
  episode_reward_min: -45.5910636321964
  episodes_this_iter: 75
  episodes_total: 54080
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.4703387022018433
      kl: 0.0176750048995018
      policy_loss: -0.0008129760390147567
      total_loss: 69.35859680175781
      vf_explained_var: 0.980288565158844
      vf_loss: 69.35940551757812
    grad_time_ms: 1549.8

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12640 s, 714 iter, 7140000 ts, 166 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-18-21
  done: false
  episode_len_mean: 134.82
  episode_reward_max: 227.69457190679296
  episode_reward_mean: 175.20345538430425
  episode_reward_min: -43.67882382346889
  episodes_this_iter: 72
  episodes_total: 54523
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.4962657690048218
      kl: 0.016226215288043022
      policy_loss: -0.001756885088980198
      total_loss: 48.07154083251953
      vf_explained_var: 0.9854702353477478
      vf_loss: 48.07329177856445
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12744 s, 720 iter, 7200000 ts, 168 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-20-05
  done: false
  episode_len_mean: 134.12
  episode_reward_max: 224.00117601793593
  episode_reward_mean: 168.64465128530196
  episode_reward_min: -40.04659744189814
  episodes_this_iter: 74
  episodes_total: 54965
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.4858458042144775
      kl: 0.01651189848780632
      policy_loss: -0.002170429565012455
      total_loss: 83.18230438232422
      vf_explained_var: 0.9759793281555176
      vf_loss: 83.1844711303711
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12848 s, 726 iter, 7260000 ts, 182 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-21-49
  done: false
  episode_len_mean: 133.87
  episode_reward_max: 225.71113594577147
  episode_reward_mean: 168.53069433782636
  episode_reward_min: -42.046461144517565
  episodes_this_iter: 75
  episodes_total: 55407
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.352436900138855
      kl: 0.01953180506825447
      policy_loss: -0.0009590742411091924
      total_loss: 122.84542846679688
      vf_explained_var: 0.9676231145858765
      vf_loss: 122.84640502929688
    grad_time_ms:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 12952 s, 732 iter, 7320000 ts, 174 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-23-33
  done: false
  episode_len_mean: 135.82
  episode_reward_max: 229.72646089370306
  episode_reward_mean: 177.5830487737636
  episode_reward_min: 62.40405399226927
  episodes_this_iter: 73
  episodes_total: 55851
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.4629024267196655
      kl: 0.021751267835497856
      policy_loss: 0.0002949402551166713
      total_loss: 46.43889236450195
      vf_explained_var: 0.985480010509491
      vf_loss: 46.4385986328125
    grad_time_ms: 1568.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13055 s, 738 iter, 7380000 ts, 178 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-25-17
  done: false
  episode_len_mean: 134.37
  episode_reward_max: 222.16576491735208
  episode_reward_mean: 169.81819383276888
  episode_reward_min: -43.76161122200366
  episodes_this_iter: 75
  episodes_total: 56295
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.3831899166107178
      kl: 0.017153123393654823
      policy_loss: -0.004247171338647604
      total_loss: 95.25105285644531
      vf_explained_var: 0.9729850888252258
      vf_loss: 95.25529479980469
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13159 s, 744 iter, 7440000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-27-01
  done: false
  episode_len_mean: 136.79
  episode_reward_max: 218.49208761390182
  episode_reward_mean: 175.4688698095208
  episode_reward_min: -40.45325046727931
  episodes_this_iter: 74
  episodes_total: 56738
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.503449559211731
      kl: 0.015486114658415318
      policy_loss: -0.0011493401834741235
      total_loss: 78.48553466796875
      vf_explained_var: 0.9766311645507812
      vf_loss: 78.48667907714844
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13262 s, 750 iter, 7500000 ts, 171 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-28-44
  done: false
  episode_len_mean: 136.58
  episode_reward_max: 228.41116506381354
  episode_reward_mean: 174.49966419204588
  episode_reward_min: -47.42742297186121
  episodes_this_iter: 73
  episodes_total: 57182
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2546518415355553e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.5304306745529175
      kl: 0.028566619381308556
      policy_loss: -0.000578983745072037
      total_loss: 93.76888275146484
      vf_explained_var: 0.9742578268051147
      vf_loss: 93.76944732666016
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13367 s, 756 iter, 7560000 ts, 166 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-30-30
  done: false
  episode_len_mean: 135.79
  episode_reward_max: 222.67414580635946
  episode_reward_mean: 174.35118550859087
  episode_reward_min: -41.3853900344065
  episodes_this_iter: 73
  episodes_total: 57625
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 9.409888306645685e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.3552663326263428
      kl: 0.016440819948911667
      policy_loss: -0.003522801911458373
      total_loss: 59.51251220703125
      vf_explained_var: 0.981997549533844
      vf_loss: 59.51605224609375
    grad_time_ms: 1564

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13472 s, 762 iter, 7620000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-32-15
  done: false
  episode_len_mean: 132.18
  episode_reward_max: 213.96401421858533
  episode_reward_mean: 177.51368180593107
  episode_reward_min: -35.92106113634486
  episodes_this_iter: 77
  episodes_total: 58071
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 9.409888306645685e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.1921032667160034
      kl: 0.02276156097650528
      policy_loss: 0.0001990728051168844
      total_loss: 45.33014678955078
      vf_explained_var: 0.9853787422180176
      vf_loss: 45.32994842529297
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13577 s, 768 iter, 7680000 ts, 176 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-34-00
  done: false
  episode_len_mean: 134.53
  episode_reward_max: 226.0397834557265
  episode_reward_mean: 175.14448724759626
  episode_reward_min: -40.420905008174415
  episodes_this_iter: 73
  episodes_total: 58513
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 9.409888306645685e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.3315421342849731
      kl: 0.03642703592777252
      policy_loss: 0.0019041901687160134
      total_loss: 59.142127990722656
      vf_explained_var: 0.9827257990837097
      vf_loss: 59.14021682739258
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13682 s, 774 iter, 7740000 ts, 168 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-35-46
  done: false
  episode_len_mean: 133.07
  episode_reward_max: 217.77410467492763
  episode_reward_mean: 166.33017907113066
  episode_reward_min: -43.15644695191173
  episodes_this_iter: 75
  episodes_total: 58960
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 9.409888306645685e-22
      cur_lr: 4.999999873689376e-05
      entropy: 1.2546465396881104
      kl: 0.02105768956243992
      policy_loss: 0.00047238924889825284
      total_loss: 50.77982711791992
      vf_explained_var: 0.9853195548057556
      vf_loss: 50.77935791015625
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13785 s, 780 iter, 7800000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-37-28
  done: false
  episode_len_mean: 135.74
  episode_reward_max: 227.20357848246493
  episode_reward_mean: 183.4132354830952
  episode_reward_min: 67.6097016327895
  episodes_this_iter: 74
  episodes_total: 59404
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.411483145022657e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.174699306488037
      kl: 0.0173142421990633
      policy_loss: -0.001214082119986415
      total_loss: 54.52970886230469
      vf_explained_var: 0.983913004398346
      vf_loss: 54.53091812133789
    grad_time_ms: 1572.443


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13888 s, 786 iter, 7860000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-39-12
  done: false
  episode_len_mean: 135.61
  episode_reward_max: 222.8134456498014
  episode_reward_mean: 178.14781794753296
  episode_reward_min: -37.3699179844986
  episodes_this_iter: 75
  episodes_total: 59849
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.411483145022657e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.1916563510894775
      kl: 0.01599026657640934
      policy_loss: -0.0017011991003528237
      total_loss: 55.51104736328125
      vf_explained_var: 0.9862565398216248
      vf_loss: 55.51274490356445
    grad_time_ms: 1580

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 13993 s, 792 iter, 7920000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-40-57
  done: false
  episode_len_mean: 137.27
  episode_reward_max: 218.48882522981728
  episode_reward_mean: 179.20326804909539
  episode_reward_min: 84.37608632976469
  episodes_this_iter: 72
  episodes_total: 60288
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1758367733783904e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.2099108695983887
      kl: 0.02130206488072872
      policy_loss: -0.003885305020958185
      total_loss: 60.20250701904297
      vf_explained_var: 0.9821363687515259
      vf_loss: 60.20637893676758
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14097 s, 798 iter, 7980000 ts, 183 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-42-41
  done: false
  episode_len_mean: 135.24
  episode_reward_max: 221.7181164225803
  episode_reward_mean: 177.1400359293072
  episode_reward_min: -41.97688643354003
  episodes_this_iter: 74
  episodes_total: 60731
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1758367733783904e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.1214983463287354
      kl: 0.02901522070169449
      policy_loss: 0.00019964107195846736
      total_loss: 85.34322357177734
      vf_explained_var: 0.9745891094207764
      vf_loss: 85.34302520751953
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14203 s, 804 iter, 8040000 ts, 178 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-44-28
  done: false
  episode_len_mean: 136.17
  episode_reward_max: 220.3221597498892
  episode_reward_mean: 176.547840002965
  episode_reward_min: 80.80935817115963
  episodes_this_iter: 74
  episodes_total: 61170
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1758367733783904e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.1523553133010864
      kl: 0.017722198739647865
      policy_loss: -0.001510858302935958
      total_loss: 26.760604858398438
      vf_explained_var: 0.9913487434387207
      vf_loss: 26.762113571166992
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14306 s, 810 iter, 8100000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-46-10
  done: false
  episode_len_mean: 135.56
  episode_reward_max: 224.19107298641518
  episode_reward_mean: 182.95038677955145
  episode_reward_min: 80.31008959908314
  episodes_this_iter: 75
  episodes_total: 61613
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1758367733783904e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.9972792267799377
      kl: 0.017142118886113167
      policy_loss: -0.0005930598126724362
      total_loss: 27.069786071777344
      vf_explained_var: 0.9913181662559509
      vf_loss: 27.07038116455078
    grad_time_ms: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14410 s, 816 iter, 8160000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-47-55
  done: false
  episode_len_mean: 137.17
  episode_reward_max: 223.34702350811497
  episode_reward_mean: 178.5644041143495
  episode_reward_min: 114.8662116948036
  episodes_this_iter: 73
  episodes_total: 62051
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1758367733783904e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.1193585395812988
      kl: 0.023142490535974503
      policy_loss: -0.00127975398208946
      total_loss: 14.128145217895508
      vf_explained_var: 0.9951776266098022
      vf_loss: 14.129426002502441
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14513 s, 822 iter, 8220000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-49-39
  done: false
  episode_len_mean: 136.31
  episode_reward_max: 229.45085766134167
  episode_reward_mean: 180.91125053349919
  episode_reward_min: 105.36520040241794
  episodes_this_iter: 74
  episodes_total: 62492
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.1758367733783904e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.9727717638015747
      kl: 0.02600697986781597
      policy_loss: 0.002470768289640546
      total_loss: 24.27611541748047
      vf_explained_var: 0.992224395275116
      vf_loss: 24.27364158630371
    grad_time_ms: 1553

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14618 s, 828 iter, 8280000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-51-23
  done: false
  episode_len_mean: 137.61
  episode_reward_max: 227.51640783110193
  episode_reward_mean: 175.5702099411222
  episode_reward_min: -37.729550904271605
  episodes_this_iter: 72
  episodes_total: 62931
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.763756169809544e-21
      cur_lr: 4.999999873689376e-05
      entropy: 1.208178997039795
      kl: 0.018758030608296394
      policy_loss: -0.001830049091950059
      total_loss: 21.241201400756836
      vf_explained_var: 0.9930989146232605
      vf_loss: 21.243032455444336
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14721 s, 834 iter, 8340000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-53-06
  done: false
  episode_len_mean: 134.74
  episode_reward_max: 217.82912859755862
  episode_reward_mean: 175.88085407581156
  episode_reward_min: -40.18990227668594
  episodes_this_iter: 75
  episodes_total: 63374
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.145634456662708e-21
      cur_lr: 4.999999873689376e-05
      entropy: 0.9577834010124207
      kl: 0.04275978356599808
      policy_loss: -0.007660584524273872
      total_loss: 126.98947143554688
      vf_explained_var: 0.9623768925666809
      vf_loss: 126.99714660644531
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14825 s, 840 iter, 8400000 ts, 181 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-54-51
  done: false
  episode_len_mean: 138.96
  episode_reward_max: 230.17670971752696
  episode_reward_mean: 177.68288639464242
  episode_reward_min: 81.0462438441298
  episodes_this_iter: 73
  episodes_total: 63813
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.0718450473303712e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.2531054019927979
      kl: 0.016221610829234123
      policy_loss: -0.0007242989377118647
      total_loss: 35.06412887573242
      vf_explained_var: 0.9887191653251648
      vf_loss: 35.06485366821289
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 14929 s, 846 iter, 8460000 ts, 164 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-56-35
  done: false
  episode_len_mean: 135.78
  episode_reward_max: 227.79713206461605
  episode_reward_mean: 174.0240723677073
  episode_reward_min: -45.36487045873067
  episodes_this_iter: 74
  episodes_total: 64258
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.4116508920120342e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.0726059675216675
      kl: 0.047539275139570236
      policy_loss: 0.003180114086717367
      total_loss: 131.94357299804688
      vf_explained_var: 0.9626429677009583
      vf_loss: 131.94036865234375
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15033 s, 852 iter, 8520000 ts, 176 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_03-58-19
  done: false
  episode_len_mean: 134.45
  episode_reward_max: 223.57320901205645
  episode_reward_mean: 167.5341109008512
  episode_reward_min: -41.823216182023
  episodes_this_iter: 73
  episodes_total: 64700
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 8.13932410314196e-20
      cur_lr: 4.999999873689376e-05
      entropy: 1.2956452369689941
      kl: 0.03506099432706833
      policy_loss: -0.001687822863459587
      total_loss: 116.84357452392578
      vf_explained_var: 0.9669603109359741
      vf_loss: 116.84526824951172
    grad_time_ms: 1539.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15137 s, 858 iter, 8580000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-00-04
  done: false
  episode_len_mean: 136.3
  episode_reward_max: 228.08458698526175
  episode_reward_mean: 175.96843751428796
  episode_reward_min: -39.36557574275105
  episodes_this_iter: 75
  episodes_total: 65140
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2208986800947793e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.0663822889328003
      kl: 0.03269342705607414
      policy_loss: 0.0007705578464083374
      total_loss: 131.5550537109375
      vf_explained_var: 0.962295413017273
      vf_loss: 131.55426025390625
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.1/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15241 s, 864 iter, 8640000 ts, 175 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-01-48
  done: false
  episode_len_mean: 136.8
  episode_reward_max: 226.48849973348104
  episode_reward_mean: 181.13306977701458
  episode_reward_min: -41.885345657346846
  episodes_this_iter: 72
  episodes_total: 65578
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.2208986800947793e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1468290090560913
      kl: 0.02355124056339264
      policy_loss: 0.0003403817827347666
      total_loss: 70.2598876953125
      vf_explained_var: 0.9795385599136353
      vf_loss: 70.2595443725586
    grad_time_ms: 1537

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15345 s, 870 iter, 8700000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-03-32
  done: false
  episode_len_mean: 139.02
  episode_reward_max: 224.0563519270262
  episode_reward_mean: 181.09153127430997
  episode_reward_min: 85.23114450939804
  episodes_this_iter: 73
  episodes_total: 66015
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.831347503154286e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.127624750137329
      kl: 0.013546385802328587
      policy_loss: 6.387105531757697e-05
      total_loss: 30.76508331298828
      vf_explained_var: 0.9906805753707886
      vf_loss: 30.7650146484375
    grad_time_ms: 1550.9

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15450 s, 876 iter, 8760000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-05-16
  done: false
  episode_len_mean: 137.65
  episode_reward_max: 221.6999153004829
  episode_reward_mean: 176.82077306176768
  episode_reward_min: 64.74683960600947
  episodes_this_iter: 73
  episodes_total: 66451
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.7470213193549145e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1374733448028564
      kl: 0.016307605430483818
      policy_loss: 0.001160254469141364
      total_loss: 36.959922790527344
      vf_explained_var: 0.9885879755020142
      vf_loss: 36.958770751953125
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15553 s, 882 iter, 8820000 ts, 174 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-07-01
  done: false
  episode_len_mean: 138.0
  episode_reward_max: 224.21683647616376
  episode_reward_mean: 181.98916475481525
  episode_reward_min: 75.37541892364763
  episodes_this_iter: 72
  episodes_total: 66888
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.7470213193549145e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.1417324542999268
      kl: 0.023861965164542198
      policy_loss: -0.00044173808419145644
      total_loss: 34.51702117919922
      vf_explained_var: 0.9893770217895508
      vf_loss: 34.51746368408203
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15659 s, 888 iter, 8880000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-08-47
  done: false
  episode_len_mean: 137.75
  episode_reward_max: 224.4633824756314
  episode_reward_mean: 179.8671729455466
  episode_reward_min: 76.99582650991943
  episodes_this_iter: 73
  episodes_total: 67322
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.7470213193549145e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.0180606842041016
      kl: 0.020218154415488243
      policy_loss: -0.0007106910343281925
      total_loss: 9.456083297729492
      vf_explained_var: 0.996933102607727
      vf_loss: 9.456794738769531
    grad_time_ms: 1564

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15764 s, 894 iter, 8940000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-10-32
  done: false
  episode_len_mean: 137.94
  episode_reward_max: 219.03141650288268
  episode_reward_mean: 181.33378288203377
  episode_reward_min: 102.07051708984494
  episodes_this_iter: 72
  episodes_total: 67755
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 2.7470213193549145e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.0821083784103394
      kl: 0.0286728348582983
      policy_loss: 0.00029547445592470467
      total_loss: 8.894444465637207
      vf_explained_var: 0.9970599412918091
      vf_loss: 8.894149780273438
    grad_time_ms: 15

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15868 s, 900 iter, 9000000 ts, 181 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-12-16
  done: false
  episode_len_mean: 139.67
  episode_reward_max: 223.61348656107583
  episode_reward_mean: 185.05706830708723
  episode_reward_min: 96.23428090160132
  episodes_this_iter: 73
  episodes_total: 68188
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9042773246765137
      kl: 0.033278509974479675
      policy_loss: 0.001152863958850503
      total_loss: 19.88512420654297
      vf_explained_var: 0.993772566318512
      vf_loss: 19.883968353271484
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 15972 s, 906 iter, 9060000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-14-00
  done: false
  episode_len_mean: 138.41
  episode_reward_max: 224.40292142125062
  episode_reward_mean: 178.77251847255616
  episode_reward_min: -32.521568894976426
  episodes_this_iter: 72
  episodes_total: 68620
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.752766489982605
      kl: 0.023372933268547058
      policy_loss: 0.0005975595558993518
      total_loss: 28.114219665527344
      vf_explained_var: 0.9914196729660034
      vf_loss: 28.11362648010254
    grad_time_ms: 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16076 s, 912 iter, 9120000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-15-46
  done: false
  episode_len_mean: 137.75
  episode_reward_max: 225.99322526923868
  episode_reward_mean: 181.94087244845804
  episode_reward_min: 97.88753356732929
  episodes_this_iter: 72
  episodes_total: 69053
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7430503368377686
      kl: 0.028649428859353065
      policy_loss: 0.00021841864509042352
      total_loss: 18.42901039123535
      vf_explained_var: 0.9942348003387451
      vf_loss: 18.42879295349121
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16180 s, 918 iter, 9180000 ts, 183 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-17-29
  done: false
  episode_len_mean: 137.45
  episode_reward_max: 224.92712043727383
  episode_reward_mean: 174.69159568027914
  episode_reward_min: -39.50874748353909
  episodes_this_iter: 73
  episodes_total: 69486
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9963855147361755
      kl: 0.02526458166539669
      policy_loss: -0.0040777274407446384
      total_loss: 86.69205474853516
      vf_explained_var: 0.9758729934692383
      vf_loss: 86.69612884521484
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16284 s, 924 iter, 9240000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-19-12
  done: false
  episode_len_mean: 138.96
  episode_reward_max: 214.73101958949678
  episode_reward_mean: 178.13995131337583
  episode_reward_min: 83.99345456926838
  episodes_this_iter: 73
  episodes_total: 69921
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 1.0442086458206177
      kl: 0.017288943752646446
      policy_loss: -0.0004928983398713171
      total_loss: 22.8843936920166
      vf_explained_var: 0.99261873960495
      vf_loss: 22.88488006591797
    grad_time_ms: 1545

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16387 s, 930 iter, 9300000 ts, 182 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-20-56
  done: false
  episode_len_mean: 137.4
  episode_reward_max: 218.5133683608826
  episode_reward_mean: 181.8579624373491
  episode_reward_min: 116.37419129855452
  episodes_this_iter: 72
  episodes_total: 70356
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9094381928443909
      kl: 0.01578555628657341
      policy_loss: -0.002036670921370387
      total_loss: 8.64760684967041
      vf_explained_var: 0.9971064925193787
      vf_loss: 8.649643898010254
    grad_time_ms: 1545.1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16490 s, 936 iter, 9360000 ts, 181 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-22-40
  done: false
  episode_len_mean: 137.36
  episode_reward_max: 219.80745324743256
  episode_reward_mean: 180.03725724747005
  episode_reward_min: 72.07913461464025
  episodes_this_iter: 73
  episodes_total: 70791
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7912558317184448
      kl: 0.036465227603912354
      policy_loss: 0.006564991548657417
      total_loss: 5.951451778411865
      vf_explained_var: 0.9979913234710693
      vf_loss: 5.944886207580566
    grad_time_ms: 154

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16594 s, 942 iter, 9420000 ts, 179 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-24-24
  done: false
  episode_len_mean: 136.4
  episode_reward_max: 221.3418033249526
  episode_reward_mean: 177.1658532609789
  episode_reward_min: -35.02495899280106
  episodes_this_iter: 73
  episodes_total: 71227
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 4.1205312035505475e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.6502846479415894
      kl: 0.02320467121899128
      policy_loss: 0.00048232977860607207
      total_loss: 18.06758689880371
      vf_explained_var: 0.9941813349723816
      vf_loss: 18.067102432250977
    grad_time_ms: 156

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16697 s, 948 iter, 9480000 ts, 182 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-26-07
  done: false
  episode_len_mean: 137.15
  episode_reward_max: 223.72965989516376
  episode_reward_mean: 183.62544825227013
  episode_reward_min: 84.0471640671833
  episodes_this_iter: 73
  episodes_total: 71664
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.5451993628901687e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9526393413543701
      kl: 0.019089702516794205
      policy_loss: -0.0029169719200581312
      total_loss: 28.13706398010254
      vf_explained_var: 0.9910012483596802
      vf_loss: 28.139984130859375
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16801 s, 954 iter, 9540000 ts, 181 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-27-51
  done: false
  episode_len_mean: 136.99
  episode_reward_max: 225.7622333865354
  episode_reward_mean: 182.91951256716766
  episode_reward_min: -39.0300820151178
  episodes_this_iter: 72
  episodes_total: 72101
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 3.4767000851547854e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9899880886077881
      kl: 0.037877604365348816
      policy_loss: 0.0008336239843629301
      total_loss: 31.493194580078125
      vf_explained_var: 0.9907568693161011
      vf_loss: 31.492368698120117
    grad_time_ms: 1

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 16904 s, 960 iter, 9600000 ts, 185 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-29-34
  done: false
  episode_len_mean: 136.24
  episode_reward_max: 213.93983896851242
  episode_reward_mean: 182.3821960771832
  episode_reward_min: 123.76581658028277
  episodes_this_iter: 73
  episodes_total: 72541
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.822571572683087e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9190191626548767
      kl: 0.02189849503338337
      policy_loss: 0.002175647532567382
      total_loss: 16.43922233581543
      vf_explained_var: 0.9945786595344543
      vf_loss: 16.437042236328125
    grad_time_ms: 1537

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 17008 s, 966 iter, 9660000 ts, 180 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-31-18
  done: false
  episode_len_mean: 135.06
  episode_reward_max: 224.66281063773545
  episode_reward_mean: 183.93836014958327
  episode_reward_min: 143.10620534313057
  episodes_this_iter: 74
  episodes_total: 72983
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.822571572683087e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7468838691711426
      kl: 0.03572629764676094
      policy_loss: 0.0038128250744193792
      total_loss: 5.58898401260376
      vf_explained_var: 0.9981474280357361
      vf_loss: 5.585170745849609
    grad_time_ms: 1537

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 17112 s, 972 iter, 9720000 ts, 184 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-33-03
  done: false
  episode_len_mean: 137.06
  episode_reward_max: 224.43945981462542
  episode_reward_mean: 184.64403816480507
  episode_reward_min: 95.6359946443007
  episodes_this_iter: 74
  episodes_total: 73423
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.822571572683087e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7021815180778503
      kl: 0.031148601323366165
      policy_loss: 0.003287645522505045
      total_loss: 5.456977844238281
      vf_explained_var: 0.998306155204773
      vf_loss: 5.453690052032471
    grad_time_ms: 1560.6

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 17215 s, 978 iter, 9780000 ts, 189 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-34-46
  done: false
  episode_len_mean: 138.57
  episode_reward_max: 224.44246293353558
  episode_reward_mean: 182.34419397365855
  episode_reward_min: 67.99244964963736
  episodes_this_iter: 73
  episodes_total: 73860
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.822571572683087e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.8439425826072693
      kl: 0.012609871104359627
      policy_loss: -0.0018842276185750961
      total_loss: 37.14928436279297
      vf_explained_var: 0.989680826663971
      vf_loss: 37.15116882324219
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 17320 s, 984 iter, 9840000 ts, 188 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-36-32
  done: false
  episode_len_mean: 139.14
  episode_reward_max: 224.88901873799685
  episode_reward_mean: 188.03874313036198
  episode_reward_min: 100.33296690379643
  episodes_this_iter: 72
  episodes_total: 74290
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.822571572683087e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.7464337348937988
      kl: 0.023264002054929733
      policy_loss: -0.0005469695897772908
      total_loss: 17.144025802612305
      vf_explained_var: 0.9949612617492676
      vf_loss: 17.144569396972656
    grad_time_ms:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 17424 s, 990 iter, 9900000 ts, 185 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-38-16
  done: false
  episode_len_mean: 140.05
  episode_reward_max: 223.88216065726056
  episode_reward_mean: 185.59784112372373
  episode_reward_min: 67.10253205408563
  episodes_this_iter: 70
  episodes_total: 74721
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 7.822571572683087e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9590182304382324
      kl: 0.01851075328886509
      policy_loss: -0.0018294703913852572
      total_loss: 43.55040740966797
      vf_explained_var: 0.9872342348098755
      vf_loss: 43.55223846435547
    grad_time_ms: 155

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=11085], 17529 s, 996 iter, 9960000 ts, 186 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-02-20_04-40-01
  done: false
  episode_len_mean: 136.5
  episode_reward_max: 217.1559557459586
  episode_reward_mean: 180.06198064715102
  episode_reward_min: 104.74594083140695
  episodes_this_iter: 74
  episodes_total: 75155
  experiment_id: 9d2cfd4ba2f7483a838320c419e6817d
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.173386097793981e-18
      cur_lr: 4.999999873689376e-05
      entropy: 0.5226399898529053
      kl: 0.033098723739385605
      policy_loss: 0.0003526665095705539
      total_loss: 12.71113395690918
      vf_explained_var: 0.9959396123886108
      vf_loss: 12.710780143737793
    grad_time_ms: 156