# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=120                                #103 max Horizon, wenn es vor verlassen abbrechen soll!, default war 500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario_2"

# environment class
import flow.multiagent_envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "MultiAgentIntersectionEnv_sharedPolicy_4veh"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW', 'TenaciousDScenario', 'IntersectionTWScenario_2']

Available environments:
['MultiEnv', 'MultiAgentAccelEnv', 'MultiWaveAttenuationPOEnv', 'MultiAgentIntersectionEnv', 'MultiAgentTeamSpiritIntersectionEnv', 'MultiAgentIntersectionEnv_baseline_1', 'MultiAgentIntersectionEnv_baseline_2', 'MultiAgentIntersectionEnv_baseline_3', 'MultiAgentIntersectionEnv_sharedPolicy_TeamSpirit', 'MultiTenaciousDEnv', 'MultiAgentIntersectionEnv_sharedPolicy_2veh', 'MultiAgentIntersectionEnv_sharedPolicy_4veh']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 80,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=0,                               #default: 0
                                perturbation=29.99,                      #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=4,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

from ray import tune
from ray.rllib.agents.ppo.ppo_policy_graph import PPOPolicyGraph

In [10]:
# number of parallel workers
N_CPUS = 1
# number of rollouts per training iteration
N_ROLLOUTS = 40

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-05-01_23-30-28_18468/logs.
Waiting for redis server at 127.0.0.1:42238 to respond...
Waiting for redis server at 127.0.0.1:29592 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8890/notebooks/ray_ui.ipynb?token=5120a71e8e2258f6b02cd3375819c8caddf04367c9b3883c



{'node_ip_address': '192.168.2.102',
 'object_store_addresses': ['/tmp/ray/session_2019-05-01_23-30-28_18468/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-05-01_23-30-28_18468/sockets/raylet'],
 'redis_address': '192.168.2.102:42238',
 'webui_url': 'http://localhost:8890/notebooks/ray_ui.ipynb?token=5120a71e8e2258f6b02cd3375819c8caddf04367c9b3883c'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate default 0.999
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network defaule 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_paramshttps://www.tourdatenarchiv.de/setlist/71/07/Es-wird-eng/Frankfurt-Main-Festhalle/
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [12]:
# multi agent policy mapping
test_env = create_env()
obs_space = test_env.observation_space
act_space = test_env.action_space

def gen_policy():
    return (PPOPolicyGraph, obs_space, act_space, {})

# Setup PG with an ensemble of `num_policies` different policy graphs
policy_graphs = {'rl_0': gen_policy()}
    
def policy_mapping_fn(agent_id):
    return 'rl_0'

config.update({
        'multiagent': {
            'policy_graphs': policy_graphs,
            'policy_mapping_fn': tune.function(policy_mapping_fn),
            'policies_to_train': ['rl_0']
        }
    })

 Starting SUMO on port 54019


New Teamspirit:
-0.31793004235288946
0.15303236157604805
[('bottom_intersection', 34.11223706755223), ('bottom_intersection', 67.00817311605738), ('top_intersection', 8.275041899139783), ('top_intersection', 50.88762744358271)]


In [13]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/2 CPUs, 0/1 GPUs
Memory usage on this node: 13.2/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0_2019-05-01_23-30-329daunr1k -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 13.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-32-49
  done: false
  episode_len_mean: 119.05
  episode_reward_max: 43.1687284426817
  episode_reward_mean: 13.646119597348985
  episode_reward_min: -378.2460432724797
  episodes_this_iter: 40
  episodes_total: 40
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 10453.109
    load_time_ms: 81

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.0/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 233 s, 5 iter, 24000 ts, 8.13 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-36-22
  done: false
  episode_len_mean: 118.25
  episode_reward_max: 103.82215575277931
  episode_reward_mean: 17.197381650528175
  episode_reward_min: -379.21993892573585
  episodes_this_iter: 40
  episodes_total: 242
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7574.872
    load_time_ms: 15.073
    num_steps_sampled: 28800
    num_steps_trained: 28800
    rl_0:
      cur_kl_coeff: 0.006250000558793545
      cur_lr: 4.999999873689376e-05
      entropy: 1.4119046926498413
      kl: 0.0074527994729578495
      policy_lo

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-39-39
  done: false
  episode_len_mean: 111.45
  episode_reward_max: 197.76874508690798
  episode_reward_mean: -31.24078015199023
  episode_reward_min: -359.04687327074595
  episodes_this_iter: 44
  episodes_total: 455
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7079.109
    load_time_ms: 2.032
    num_steps_sampled: 52800
    num_steps_trained: 52800
    rl_0:
      cur_kl_coeff: 0.00019531251746229827
      cur_lr: 4.999999873689376e-05
      entropy: 1.3478200435638428
      kl: 0.004401156213134527
      policy_loss: -0.0022245885338634253
      total_loss: 477.9120788574219
      vf_explained_var: 0.14057382941246033
      vf_loss: 477.91436767578125
    sample_time_ms: 33807.554
    update_time_ms: 9.559
  iterations_since_restore: 11
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 18511
  policy_reward_mea

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.2/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 668 s, 16 iter, 76800 ts, -60.5 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-43-34
  done: false
  episode_len_mean: 98.93
  episode_reward_max: 250.4739690340127
  episode_reward_mean: -65.24751405077275
  episode_reward_min: -357.0307937735096
  episodes_this_iter: 48
  episodes_total: 738
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7102.551
    load_time_ms: 2.016
    num_steps_sampled: 81600
    num_steps_trained: 81600
    rl_0:
      cur_kl_coeff: 3.0517580853484105e-06
      cur_lr: 4.999999873689376e-05
      entropy: 1.3167181015014648
      kl: 0.00708889402449131
      policy_loss

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-46-55
  done: false
  episode_len_mean: 93.07
  episode_reward_max: 322.42185980968225
  episode_reward_mean: -13.710186334353244
  episode_reward_min: -350.26809520539376
  episodes_this_iter: 52
  episodes_total: 988
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7329.263
    load_time_ms: 1.855
    num_steps_sampled: 105600
    num_steps_trained: 105600
    rl_0:
      cur_kl_coeff: 9.536744016713783e-08
      cur_lr: 4.999999873689376e-05
      entropy: 1.2748862504959106
      kl: 0.00546769006177783
      policy_loss: -0.0014272554544731975
      total_loss: 680.9215698242188
      vf_explained_var: 0.31161433458328247
      vf_loss: 680.9229736328125
    sample_time_ms: 32287.526
    update_time_ms: 9.356
  iterations_since_restore: 22
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 18511
  policy_reward_mean

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 1110 s, 27 iter, 129600 ts, 10.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-51-00
  done: false
  episode_len_mean: 83.68
  episode_reward_max: 367.9434675563407
  episode_reward_mean: -51.72741135555274
  episode_reward_min: -355.36631459345165
  episodes_this_iter: 59
  episodes_total: 1297
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7438.288
    load_time_ms: 2.047
    num_steps_sampled: 134400
    num_steps_trained: 134400
    rl_0:
      cur_kl_coeff: 1.4901162526115286e-09
      cur_lr: 4.999999873689376e-05
      entropy: 1.2299062013626099
      kl: 0.0061493366956710815
      poli

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-54-34
  done: false
  episode_len_mean: 92.68
  episode_reward_max: 387.3371954163415
  episode_reward_mean: 40.39765701374343
  episode_reward_min: -349.5373619310327
  episodes_this_iter: 51
  episodes_total: 1560
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7802.551
    load_time_ms: 2.052
    num_steps_sampled: 158400
    num_steps_trained: 158400
    rl_0:
      cur_kl_coeff: 4.656613289411027e-11
      cur_lr: 4.999999873689376e-05
      entropy: 1.1901484727859497
      kl: 0.007620789110660553
      policy_loss: -0.0023944643326103687
      total_loss: 689.37841796875
      vf_explained_var: 0.5983080863952637
      vf_loss: 689.3809204101562
    sample_time_ms: 34090.904
    update_time_ms: 8.251
  iterations_since_restore: 33
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 18511
  policy_reward_mean:
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.7/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 1574 s, 38 iter, 182400 ts, 54.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-01_23-58-50
  done: false
  episode_len_mean: 92.17
  episode_reward_max: 406.55544685237345
  episode_reward_mean: 30.455765699225594
  episode_reward_min: -354.24675094644726
  episodes_this_iter: 55
  episodes_total: 1878
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7633.996
    load_time_ms: 2.392
    num_steps_sampled: 187200
    num_steps_trained: 187200
    rl_0:
      cur_kl_coeff: 7.275958264704729e-13
      cur_lr: 4.999999873689376e-05
      entropy: 1.1775388717651367
      kl: 0.005508604925125837
      polic

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.8/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 1791 s, 43 iter, 206400 ts, 12.4 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-02-20
  done: false
  episode_len_mean: 87.96
  episode_reward_max: 419.44671978116645
  episode_reward_mean: 29.75456607551702
  episode_reward_min: -355.97739495643486
  episodes_this_iter: 52
  episodes_total: 2150
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.9/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 1993 s, 48 iter, 230400 ts, 16.9 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-05-40
  done: false
  episode_len_mean: 84.63
  episode_reward_max: 431.6217326659735
  episode_reward_mean: 2.821103103511829
  episode_reward_min: -355.24949113108863
  episodes_this_iter: 56
  episodes_total: 2431
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.9/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 2190 s, 53 iter, 254400 ts, 18.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-09-00
  done: false
  episode_len_mean: 80.97
  episode_reward_max: 449.6036344987853
  episode_reward_mean: 8.141556067117966
  episode_reward_min: -348.38143365569704
  episodes_this_iter: 59
  episodes_total: 2719
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 13.8/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 2388 s, 58 iter, 278400 ts, 46.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-12-18
  done: false
  episode_len_mean: 85.52
  episode_reward_max: 455.89227884675023
  episode_reward_mean: 37.84575228262764
  episode_reward_min: -356.30838347036257
  episodes_this_iter: 55
  episodes_total: 2994
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 6697.135
    load_time_ms: 1.597
    num_steps_sampled: 283200
    num_steps_trained: 283200
    rl_0:
      cur_kl_coeff: 6.938894524292688e-19
      cur_lr: 4.999999873689376e-05
      entropy: 0.9995487332344055
      kl: 0.006408382207155228
      policy

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-15-40
  done: false
  episode_len_mean: 84.29
  episode_reward_max: 462.74399203197373
  episode_reward_mean: 47.87227864721627
  episode_reward_min: -347.1888770879498
  episodes_this_iter: 55
  episodes_total: 3280
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 6776.081
    load_time_ms: 1.652
    num_steps_sampled: 307200
    num_steps_trained: 307200
    rl_0:
      cur_kl_coeff: 2.168404538841465e-20
      cur_lr: 4.999999873689376e-05
      entropy: 0.9822554588317871
      kl: 0.007362775970250368
      policy_loss: -0.0025825584307312965
      total_loss: 627.1466674804688
      vf_explained_var: 0.7528408765792847
      vf_loss: 627.1492919921875
    sample_time_ms: 33120.562
    update_time_ms: 10.377
  iterations_since_restore: 64
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 18511
  policy_reward_mean:

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.6/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 2822 s, 69 iter, 331200 ts, 33.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-19-32
  done: false
  episode_len_mean: 82.7
  episode_reward_max: 501.60393566005297
  episode_reward_mean: 33.17429176066457
  episode_reward_min: -356.6226391828769
  episodes_this_iter: 57
  episodes_total: 3619
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7192.292
    load_time_ms: 1.84
    num_steps_sampled: 336000
    num_steps_trained: 336000
    rl_0:
      cur_kl_coeff: 3.388132091939789e-22
      cur_lr: 4.999999873689376e-05
      entropy: 0.9307132959365845
      kl: 0.007860410958528519
      policy_lo

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-22-46
  done: false
  episode_len_mean: 85.32
  episode_reward_max: 494.1042980707279
  episode_reward_mean: 92.03802082291475
  episode_reward_min: -350.65776410599716
  episodes_this_iter: 54
  episodes_total: 3908
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 7010.325
    load_time_ms: 1.816
    num_steps_sampled: 360000
    num_steps_trained: 360000
    rl_0:
      cur_kl_coeff: 1.058791278731184e-23
      cur_lr: 4.999999873689376e-05
      entropy: 0.9197521805763245
      kl: 0.008914700709283352
      policy_loss: -0.0035149487666785717
      total_loss: 509.7260437011719
      vf_explained_var: 0.8442767858505249
      vf_loss: 509.7295837402344
    sample_time_ms: 31506.63
    update_time_ms: 7.765
  iterations_since_restore: 75
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 18511
  policy_reward_mean:
 

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-26-00
  done: false
  episode_len_mean: 78.58
  episode_reward_max: 490.4652108259909
  episode_reward_mean: 19.62884813449938
  episode_reward_min: -360.62506886243057
  episodes_this_iter: 60
  episodes_total: 4203
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time_ms: 6670.491
    load_time_ms: 1.671
    num_steps_sampled: 384000
    num_steps_trained: 384000
    rl_0:
      cur_kl_coeff: 3.30872274603495e-25
      cur_lr: 4.999999873689376e-05
      entropy: 0.8632514476776123
      kl: 0.005460409913212061
      policy_loss: -0.0012314255582168698
      total_loss: 561.2254028320312
      vf_explained_var: 0.8430723547935486
      vf_loss: 561.2266845703125
    sample_time_ms: 32009.512
    update_time_ms: 7.342
  iterations_since_restore: 80
  node_ip: 192.168.2.102
  num_metric_batches_dropped: 0
  pid: 18511
  policy_reward_mean:
 

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.8/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 3409 s, 84 iter, 403200 ts, 97.3 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-29-23
  done: false
  episode_len_mean: 85.04
  episode_reward_max: 509.3703997134733
  episode_reward_mean: 89.31467581772564
  episode_reward_min: -355.93897283766967
  episodes_this_iter: 56
  episodes_total: 4487
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.0/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 3610 s, 89 iter, 427200 ts, 45.7 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-32-41
  done: false
  episode_len_mean: 79.05
  episode_reward_max: 515.9856479916798
  episode_reward_mean: 46.078713840223124
  episode_reward_min: -352.06580048410245
  episodes_this_iter: 59
  episodes_total: 4775
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 14.9/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 3805 s, 94 iter, 451200 ts, 135 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-35-59
  done: false
  episode_len_mean: 84.95
  episode_reward_max: 530.3597274261438
  episode_reward_mean: 101.92912214727899
  episode_reward_min: -345.2071620259742
  episodes_this_iter: 58
  episodes_total: 5055
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_tim

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.0/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 4004 s, 99 iter, 475200 ts, 97.6 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-39-13
  done: false
  episode_len_mean: 87.49
  episode_reward_max: 528.5952211796686
  episode_reward_mean: 118.00786352044068
  episode_reward_min: -340.7499848325765
  episodes_this_iter: 59
  episodes_total: 5340
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.3/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 4200 s, 104 iter, 499200 ts, 114 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-42-29
  done: false
  episode_len_mean: 81.97
  episode_reward_max: 536.5680995032034
  episode_reward_mean: 77.16013279253342
  episode_reward_min: -353.72623305433206
  episodes_this_iter: 56
  episodes_total: 5626
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_ti

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.2/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 4398 s, 109 iter, 523200 ts, 16.2 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-45-51
  done: false
  episode_len_mean: 81.72
  episode_reward_max: 542.7361257120178
  episode_reward_mean: 70.67931128663459
  episode_reward_min: -356.91520563911183
  episodes_this_iter: 55
  episodes_total: 5921
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_t

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.2/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 4593 s, 114 iter, 547200 ts, 160 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-49-06
  done: false
  episode_len_mean: 86.43
  episode_reward_max: 530.978762763806
  episode_reward_mean: 117.61060391407125
  episode_reward_min: -349.4956153696587
  episodes_this_iter: 56
  episodes_total: 6202
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_tim

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.4/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 4793 s, 119 iter, 571200 ts, 88 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-52-26
  done: false
  episode_len_mean: 85.13
  episode_reward_max: 533.6954220908323
  episode_reward_mean: 114.02291920502492
  episode_reward_min: -355.9008911424778
  episodes_this_iter: 55
  episodes_total: 6503
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_tim

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 2/2 CPUs, 0/1 GPUs
Memory usage on this node: 15.4/16.4 GB: ***LOW MEMORY*** less than 10% of the memory on this node is available for use. This can cause unexpected crashes. Consider reducing the memory used by your application or reducing the Ray object store size by setting `object_store_memory` when calling `ray.init`.
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:	RUNNING [pid=18511], 4987 s, 124 iter, 595200 ts, 58 rew

Result for PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0:
  custom_metrics: {}
  date: 2019-05-02_00-55-39
  done: false
  episode_len_mean: 80.64
  episode_reward_max: 533.3774778360215
  episode_reward_mean: 76.50673933530419
  episode_reward_min: -359.3138697954109
  episodes_this_iter: 58
  episodes_total: 6790
  experiment_id: 66f68a9af4c24638b2d0f530e68e804a
  hostname: Gandalf
  info:
    grad_time

Attempting to recover trial state from last checkpoint.
Error restoring runner.
Traceback (most recent call last):
  File "/home/thorsten/anaconda3/envs/flow_2/lib/python3.5/site-packages/ray/tune/trial_runner.py", line 261, in _process_events
    result = self.trial_executor.fetch_result(trial)
  File "/home/thorsten/anaconda3/envs/flow_2/lib/python3.5/site-packages/ray/tune/ray_trial_executor.py", line 211, in fetch_result
    result = ray.get(trial_future[0])
  File "/home/thorsten/anaconda3/envs/flow_2/lib/python3.5/site-packages/ray/worker.py", line 2386, in get
    raise value
ray.worker.RayTaskError: [36mray_PPOAgent:train()[39m (pid=18511, host=Gandalf)
  File "/home/thorsten/anaconda3/envs/flow_2/lib/python3.5/site-packages/ray/rllib/agents/agent.py", line 279, in train
    result = Trainable.train(self)
  File "/home/thorsten/anaconda3/envs/flow_2/lib/python3.5/site-packages/ray/tune/trainable.py", line 146, in train
    result = self._train()
  File "/home/thorsten/anacond

TuneError: ('Trials did not complete', [PPO_MultiAgentIntersectionEnv_sharedPolicy_4veh-v0_0])