# TRAINING I3W


# A) Create Envorinment, Vehicles etc

### General Parameter

In [1]:
# Define horizon as a variable to ensure consistent use across notebook (length of one rollout)
HORIZON=500

# name of the experiment
experiment_name = "IntersectionExample"

# scenario class
import flow.scenarios as scenarios
print("Available scenarios:")
print(scenarios.__all__)
scenario_name = "IntersectionTWScenario"

# environment class
import flow.envs as flowenvs
print("\nAvailable environments:")
print(flowenvs.__all__)
env_name = "IntersectionEnv"

Available scenarios:
['Scenario', 'BayBridgeScenario', 'BayBridgeTollScenario', 'BottleneckScenario', 'Figure8Scenario', 'SimpleGridScenario', 'HighwayScenario', 'LoopScenario', 'MergeScenario', 'TwoLoopsOneMergingScenario', 'MultiLoopScenario', 'IntersectionScenarioTW']

Available environments:
['Env', 'AccelEnv', 'LaneChangeAccelEnv', 'LaneChangeAccelPOEnv', 'LaneChangeAccelEnv_speed', 'GreenWaveTestEnv', 'GreenWaveTestEnv', 'WaveAttenuationMergePOEnv', 'TwoLoopsMergePOEnv', 'BottleneckEnv', 'BottleNeckAccelEnv', 'WaveAttenuationEnv', 'WaveAttenuationPOEnv', 'TrafficLightGridEnv', 'PO_TrafficLightGridEnv', 'DesiredVelocityEnv', 'TestEnv', 'BayBridgeEnv', 'IntersectionEnv']


### Net Parameter

In [2]:
from flow.core.params import NetParams
from flow.scenarios.intersection import ADDITIONAL_NET_PARAMS

additionalNetParams = {
            "edge_length": 40,
            "lanes": 1,
            "speed_limit": 30
        }

net_params = NetParams( no_internal_links=False,                  #default: True   !! damit Kreuzungen nicht überspr. werden
                        inflows=None,                             #default: None
                        osm_path=None,                            #default: None
                        netfile=None,                             #default: None
                        additional_params=additionalNetParams     #default: None   !!
                      )

### InitialConfig Parameter

In [3]:
from flow.core.params import InitialConfig

initial_config = InitialConfig( shuffle=True,                            #default: False         !!
                                spacing="custom",                        #default: "uniform"     !!
                                min_gap=10,                              #default: 0
                                perturbation=30.0,                       #default: 0.0            !!        
                                x0=0,                                    #default: 0
                                bunching=0,                              #default: 0
                                lanes_distribution=float("inf"),         #default: float("inf")
                                edges_distribution="all",                #default: "all"
                                additional_params=None )                 #default: None

### SUMO Parameter

In [4]:
from flow.core.params import SumoParams

sumo_params = SumoParams( port = None,                  #default: None
                          sim_step=0.1,                 #default: 0.1
                          emission_path=None,           #default: None
                          lateral_resolution=None,      #default: None
                          no_step_log=True,             #default: True
                          render=False,                 #default: False
                          save_render=False,            #default: False
                          sight_radius=25,              #default: 25
                          show_radius=False,            #default: False
                          pxpm=2,                       #default: 2
                          overtake_right=False,         #default: False    
                          seed=None,                    #default: None
                          restart_instance=False,       #default: False
                          print_warnings=True,          #default: True
                          teleport_time=-1,             #default: -1
                          num_clients=1,                #default: 1
                          sumo_binary=None )            #default: None

### Environment Parameter

In [5]:
from flow.core.params import EnvParams

additionalEnvParams = {
        # maximum acceleration of autonomous vehicles
        "max_accel": 3,
        # maximum deceleration of autonomous vehicles
        "max_decel": 3,
        "target_velocity": 30
    }

env_params = EnvParams( additional_params=additionalEnvParams, #default: None    !!
                        horizon=HORIZON,                       #default: 500     !!
                        warmup_steps=0,                        #default: 0       
                        sims_per_step=1,                       #default: 1
                        evaluate=False )                       #default: False

### Vehicles Parameter

In [6]:
from flow.core.params import VehicleParams

# import vehicles dynamics models
#from flow.controllers import SumoCarFollowingController
from flow.controllers import ContinuousRouter
#from flow.controllers.lane_change_controllers import SumoLaneChangeController
from flow.controllers.lane_change_controllers import StaticLaneChanger
from flow.controllers import RLController
from flow.core.params import SumoLaneChangeParams
from flow.core.params import SumoCarFollowingParams
from random import *

vehicles = VehicleParams()

#### Add RL-Agent controlled vehicles 

In [7]:
# car following parameters, default: None
cf_parameter = SumoCarFollowingParams(
                speed_mode="aggressive")
# lane change parameters, default: None
lc_parameter =  None

vehicles.add( # name of the vehicle
                veh_id = "rl",
              # acceleration controller, default: (SumoCarFollowingController, {})
                acceleration_controller=(RLController, {}),
              # lane_change_controller, default: (SumoLaneChangeController, {})
                lane_change_controller=(StaticLaneChanger,{}),
              # routing controller, default: None
                routing_controller=(ContinuousRouter, {}),
              # initial speed, default: 0
                initial_speed=0,
              # number of vehicles, default: 1 
                num_vehicles=2,
                
                car_following_params=cf_parameter
              # speed mode, default: "right_of_way"
                #speed_mode="aggressive",
              # lane change mode, default: "no_lat_collide"
                #lane_change_mode="aggressive", 
              # car following parameter, default: None
                #sumo_car_following_params=cf_parameter,
              # lane change parameter, default: None
                #sumo_lc_params=lc_parameter
)

### Flow Parameter

In [8]:
# Creating flow_params. Make sure the dictionary keys are as specified. 
flow_params = dict( # name of the experiment
                      exp_tag=experiment_name,
                    # name of the flow environment the experiment is running on
                      env_name=env_name,
                    # name of the scenario class the experiment uses
                      scenario=scenario_name,
                    # simulator that is used by the experiment
                      simulator='traci',
                    # sumo-related parameters (see flow.core.params.SumoParams)
                      sim=sumo_params,
                    # environment related parameters (see flow.core.params.EnvParams)
                      env=env_params,
                    # network-related parameters (see flow.core.params.NetParams and
                    # the scenario's documentation or ADDITIONAL_NET_PARAMS component)
                      net=net_params,
                    # vehicles to be placed in the network at the start of a rollout 
                    # (see flow.core.vehicles.Vehicles)
                      veh=vehicles,
                   # (optional) parameters affecting the positioning of vehicles upon 
                   # initialization/reset (see flow.core.params.InitialConfig)
                      initial=initial_config
                )

# B) Training

In [9]:
import json

import ray
try:
    from ray.rllib.agents.agent import get_agent_class
except ImportError:
    from ray.rllib.agents.registry import get_agent_class
from ray.tune import run_experiments
from ray.tune.registry import register_env

from flow.utils.registry import make_create_env
from flow.utils.rllib import FlowParamsEncoder

In [10]:
# number of parallel workers
N_CPUS = 2
# number of rollouts per training iteration
N_ROLLOUTS = 20

ray.init(redirect_output=True, num_cpus=N_CPUS+1)

Process STDOUT and STDERR is being redirected to /tmp/ray/session_2019-03-20_16-21-49_14070/logs.
Waiting for redis server at 127.0.0.1:32971 to respond...
Waiting for redis server at 127.0.0.1:58310 to respond...
Starting the Plasma object store with 6.554658406 GB memory using /dev/shm.

View the web UI at http://localhost:8889/notebooks/ray_ui.ipynb?token=ecae69d6f14073706d8dbd67070d19ae6afb98f59b9eda28



{'node_ip_address': '172.16.123.117',
 'object_store_addresses': ['/tmp/ray/session_2019-03-20_16-21-49_14070/sockets/plasma_store'],
 'raylet_socket_names': ['/tmp/ray/session_2019-03-20_16-21-49_14070/sockets/raylet'],
 'redis_address': '172.16.123.117:32971',
 'webui_url': 'http://localhost:8889/notebooks/ray_ui.ipynb?token=ecae69d6f14073706d8dbd67070d19ae6afb98f59b9eda28'}

In [11]:
# The algorithm or model to train. This may refer to "
#      "the name of a built-on algorithm (e.g. RLLib's DQN "
#      "or PPO), or a user-defined trainable function or "
#      "class registered in the tune registry.")
alg_run = "PPO"

agent_cls = get_agent_class(alg_run)
config = agent_cls._default_config.copy()
config["num_workers"] = N_CPUS  # number of parallel workers
config["train_batch_size"] = HORIZON * N_ROLLOUTS  # batch size
config["gamma"] = 0.999  # discount rate
config["model"].update({"fcnet_hiddens": [100, 50, 25]})  # size of hidden layers in network default 64 32
config["use_gae"] = True  # using generalized advantage estimation
config["lambda"] = 0.97  
#config["sgd_minibatch_size"] = min(16 * 1024, config["train_batch_size"])  # stochastic gradient descent
#config["sample_batch_size"] = config["train_batch_size"]/config["num_workers"] # 200 default, trotzdem zu hoch?
config["kl_target"] = 0.02  # target KL divergence
config["num_sgd_iter"] = 10  # number of SGD iterations
config["horizon"] = HORIZON  # rollout horizon

# save the flow params for replay
flow_json = json.dumps(flow_params, cls=FlowParamsEncoder, sort_keys=True,
                       indent=4)  # generating a string version of flow_params
config['env_config']['flow_params'] = flow_json  # adding the flow_params to config dict
config['env_config']['run'] = alg_run

# Call the utility function make_create_env to be able to 
# register the Flow env for this experiment
create_env, gym_name = make_create_env(params=flow_params, version=0)

# Register as rllib env with Gym
register_env(gym_name, create_env)

In [None]:
trials = run_experiments({
    flow_params["exp_tag"]: {
        "run": alg_run,  # RL algorithm to run
        "env": gym_name,  # environment name generated earlier
        "config": {  # configuration params (must match "run" value)
            **config
        },
        "checkpoint_freq": 1,  # number of iterations between checkpoints
        "max_failures": 999,
        "stop": {  # stopping conditions
            "training_iteration": 1000,  # number of iterations to stop after
        },
    },
})

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 0/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.3/16.4 GB

Created LogSyncer for /home/thorsten/ray_results/IntersectionExample/PPO_IntersectionEnv-v0_0_2019-03-20_16-21-50csiczf5f -> 
== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 5.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_16-22-40
  done: false
  episode_len_mean: 486.5
  episode_reward_max: 198.5259663830813
  episode_reward_mean: 88.7530959373219
  episode_reward_min: -50.308129412295926
  episodes_this_iter: 20
  episodes_total: 20
  experiment_id: 7017d3ece0a74e7c8a3a6917ef92853c
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 0.20000000298023224
      cur_lr: 4.999999873689376e-05
      entropy: 2.8368449211120605
      kl: 0.

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=14120], 123 s, 6 iter, 60000 ts, 117 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_16-24-41
  done: false
  episode_len_mean: 440.8
  episode_reward_max: 357.58573751857136
  episode_reward_mean: 120.25630109250862
  episode_reward_min: -60.638715080140535
  episodes_this_iter: 24
  episodes_total: 154
  experiment_id: 7017d3ece0a74e7c8a3a6917ef92853c
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 0.0031250000465661287
      cur_lr: 4.999999873689376e-05
      entropy: 2.8416595458984375
      kl: 0.0023808185942471027
      policy_loss: -0.0016981770750135183
      total_loss: 237.8536834716797
      vf_explained_var: 0.28178611397743225
      vf_loss: 237.8554229736328
    grad_time_ms: 2121.58

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.4/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=14120], 242 s, 12 iter, 120000 ts, 137 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_16-26-43
  done: false
  episode_len_mean: 342.55
  episode_reward_max: 381.75599933693377
  episode_reward_mean: 158.71631256974644
  episode_reward_min: -63.559883028216596
  episodes_this_iter: 32
  episodes_total: 324
  experiment_id: 7017d3ece0a74e7c8a3a6917ef92853c
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 9.765625145519152e-05
      cur_lr: 4.999999873689376e-05
      entropy: 2.804187059402466
      kl: 0.01465668249875307
      policy_loss: -0.006689372938126326
      total_loss: 549.803955078125
      vf_explained_var: 0.34292691946029663
      vf_loss: 549.8106079101562
    grad_time_ms: 2275.527


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=14120], 374 s, 18 iter, 180000 ts, 177 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_16-28-53
  done: false
  episode_len_mean: 359.41
  episode_reward_max: 383.423357222175
  episode_reward_mean: 154.78030104282405
  episode_reward_min: -57.24286616178049
  episodes_this_iter: 29
  episodes_total: 497
  experiment_id: 7017d3ece0a74e7c8a3a6917ef92853c
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.220703143189894e-05
      cur_lr: 4.999999873689376e-05
      entropy: 2.89862322807312
      kl: 0.02140015922486782
      policy_loss: -0.0068793329410254955
      total_loss: 377.1121826171875
      vf_explained_var: 0.4501374661922455
      vf_loss: 377.1190490722656
    grad_time_ms: 2348.436
   

== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.5/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=14120], 485 s, 24 iter, 240000 ts, 162 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_16-30-48
  done: false
  episode_len_mean: 394.19
  episode_reward_max: 366.2024421797516
  episode_reward_mean: 177.9922496910989
  episode_reward_min: -44.80449284150819
  episodes_this_iter: 24
  episodes_total: 645
  experiment_id: 7017d3ece0a74e7c8a3a6917ef92853c
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 6.10351571594947e-06
      cur_lr: 4.999999873689376e-05
      entropy: 2.9152536392211914
      kl: 0.006397399585694075
      policy_loss: -0.0031154791358858347
      total_loss: 363.1375732421875
      vf_explained_var: 0.5871943831443787
      vf_loss: 363.14068603515625
    grad_time_ms: 2041.394


== Status ==
Using FIFO scheduling algorithm.
Resources requested: 3/3 CPUs, 0/1 GPUs
Memory usage on this node: 6.3/16.4 GB
Result logdir: /home/thorsten/ray_results/IntersectionExample
RUNNING trials:
 - PPO_IntersectionEnv-v0_0:	RUNNING [pid=14120], 603 s, 30 iter, 300000 ts, 233 rew

Result for PPO_IntersectionEnv-v0_0:
  custom_metrics: {}
  date: 2019-03-20_16-32-42
  done: false
  episode_len_mean: 399.56
  episode_reward_max: 375.6774620972217
  episode_reward_mean: 242.14745683633998
  episode_reward_min: -55.130417153016175
  episodes_this_iter: 25
  episodes_total: 798
  experiment_id: 7017d3ece0a74e7c8a3a6917ef92853c
  hostname: Gandalf
  info:
    default:
      cur_kl_coeff: 1.5258789289873675e-06
      cur_lr: 4.999999873689376e-05
      entropy: 2.9072303771972656
      kl: 0.016274306923151016
      policy_loss: -0.004865695256739855
      total_loss: 406.87548828125
      vf_explained_var: 0.5063589215278625
      vf_loss: 406.88037109375
    grad_time_ms: 1979.382
  