Environment

In [10]:
from __future__ import absolute_import
from __future__ import print_function
import os
import sys
import gym
from gym import spaces, logger
import numpy as np

# we need to import python modules from the $SUMO_HOME/tools directory
if 'SUMO_HOME' in os.environ:
    tools = os.path.join(os.environ['SUMO_HOME'], 'tools')
    sys.path.append(tools)
else:
    sys.exit("please declare environment variable 'SUMO_HOME'")
from sumolib import checkBinary 
import traci 

# HARDCODE
controlled_lights = [{'name':'nodeX', 'curr_phase':0, 'num_phases': 2}]
# uncontrolled_lights = [{'name':'nw', 'curr_phase':0, 'num_phases': 4}, {'name':'se', 'curr_phase':0, 'num_phases': 4}, {'name':'sw', 'curr_phase':0, 'num_phases': 4}]
important_roads = ['edge1', 'edge2', 'edge3', 'edge4', 'edge5', 'edge6', 'edge7', 'edge8']
load_options = ["-c", "PositiveSign/PositiveSign.sumocfg", "--tripinfo-output", "tripinfo.xml", '--log', 'log.txt' , "-t"]
# load_options = ["-c", "--tripinfo-output", "tripinfo.xml", '--log', 'log.txt' , "-t"]

class SumoEnv(gym.Env):
    def __init__(self, steps_per_episode, render):
        super(SumoEnv, self).__init__()
        # self.scenario_name = scenario_name
        self.steps_per_episode = steps_per_episode
        self.is_done = False
        self.current_step = 0

        self.reward_range = (-float('inf'), float('inf')) # HARDCODE
        self.action_space = spaces.Discrete(2) # HARDCODE
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=np.array([16]), dtype=np.float32) # HARDCODE

        # Start connection with sumo
        self.noguiBinary = checkBinary('sumo')
        self.guiBinary = checkBinary('sumo-gui')
        # self.current_binary = self.noguiBinary
        self.current_binary = self.guiBinary if render else self.noguiBinary
        traci.start([self.current_binary] + load_options)
        
    def reset(self):
        traci.load(load_options+["--start"])
        self.current_step = 0
        self.is_done = False

        return self._next_observation()

    def _next_observation(self):
        obs = []
        wait_counts, road_counts = self._get_road_waiting_vehicle_count()
        # HARDCODE
        for lane in important_roads:
            obs.append(road_counts[lane])
            obs.append(wait_counts[lane])

        return np.array(obs)
    
    def step(self, action):

        self._take_action(action)

        traci.simulationStep()
        self.current_step += 1
    
        obs = self._next_observation()
        reward = self._get_reward()

        if self.is_done:
            logger.warn("You are calling 'step()' even though this environment has already returned done = True. "
                        "You should always call 'reset()' once you receive 'done = True' "
                        "-- any further steps are undefined behavior.")
            reward = 0.0

        if self.current_step + 1 == self.steps_per_episode:
            self.is_done = True

        return obs, reward, self.is_done, {}

    def _get_reward(self):
        road_waiting_vehicles_dict , _ = self._get_road_waiting_vehicle_count()
        reward = 0.0

        for (road_id, num_vehicles) in road_waiting_vehicles_dict.items():
            if road_id in important_roads:
                reward -= num_vehicles

        return reward

    def _take_action(self, action):
        if action != controlled_lights[0]['curr_phase']:
            controlled_lights[0]['curr_phase'] = action
            self._set_tl_phase(controlled_lights[0]['name'], action)

    def _get_road_waiting_vehicle_count(self):
        wait_counts = {'edge1':0, 'edge2':0, 'edge3':0, 'edge4':0, 'edge5':0, 'edge6':0, 'edge7':0, 'edge8':0}
        road_counts = {'edge1':0, 'edge2':0, 'edge3':0, 'edge4':0, 'edge5':0, 'edge6':0, 'edge7':0, 'edge8':0}
        vehicles = traci.vehicle.getIDList()
        for v in vehicles:
            road = traci.vehicle.getRoadID(v)
            if road in wait_counts.keys():
                if traci.vehicle.getWaitingTime(v) > 0:
                    wait_counts[road] += 1
                road_counts[road] += 1
        return wait_counts , road_counts

    def _on_training_end(self):
        super(self)
        traci.close()

    def _set_tl_phase(self, intersection_id, phase_id):
        traci.trafficlight.setPhase(intersection_id, phase_id)

    def render(self, mode='human', close=False):
        # self.save_replay = not self.save_replay
        self.current_binary = self.guiBinary
        
    def close(self):
        self._on_training_end()


Test the environment with an agent

In [13]:
# Remove TF warnings in Stable baselines (may not be safe)
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import traci
import sys

import gym
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv , SubprocVecEnv
from stable_baselines import PPO2, DQN

#from env.SumoEnv import SumoEnv
#from env.SumoEnv_Parallel import SumoEnv_Parallel

import time

try:
    num_proc = 1


    steps_per_episode = 250
    num_episodes = 10000
    if(num_proc == 1):
        env = DummyVecEnv([lambda: SumoEnv(steps_per_episode, False)])
    else:
        env = SubprocVecEnv([lambda: SumoEnv_Parallel(steps_per_episode, False, i) for i in range(num_proc)], start_method='forkserver')

    model = DQN('MlpPolicy', env, verbose=1)
    start = time.time()
    model.learn(total_timesteps=steps_per_episode*num_episodes)
    print(f'LEARNING TIME: {time.time() - start}')
    model.save('dqn_positive_10000')
    print('done learning')
    traci.close()

except:
    traci.close()




---------------------------------------
| % time spent exploring  | 90        |
| episodes                | 100       |
| mean 100 episode reward | -1.15e+03 |
| steps                   | 24651     |
---------------------------------------
---------------------------------------
| % time spent exploring  | 80        |
| episodes                | 200       |
| mean 100 episode reward | -1.05e+03 |
| steps                   | 49551     |
---------------------------------------
--------------------------------------
| % time spent exploring  | 70       |
| episodes                | 300      |
| mean 100 episode reward | -921     |
| steps                   | 74451    |
--------------------------------------
--------------------------------------
| % time spent exploring  | 61       |
| episodes                | 400      |
| mean 100 episode reward | -697     |
| steps                   | 99351    |
--------------------------------------
--------------------------------------
| % time spen

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 3600     |
| mean 100 episode reward | -58.4    |
| steps                   | 896151   |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 3700     |
| mean 100 episode reward | -61.1    |
| steps                   | 921051   |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 3800     |
| mean 100 episode reward | -103     |
| steps                   | 945951   |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 3900     |
| mean 100 episode reward | -60.1    |
| steps                   | 970851   |
--------------------------------------
--------------------------------------
| % time spent exploring 

--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 7200     |
| mean 100 episode reward | -257     |
| steps                   | 1792551  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 7300     |
| mean 100 episode reward | -171     |
| steps                   | 1817451  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 7400     |
| mean 100 episode reward | -278     |
| steps                   | 1842351  |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 7500     |
| mean 100 episode reward | -154     |
| steps                   | 1867251  |
--------------------------------------
--------------------------------------
| % time spent exploring 

In [19]:
traci.close()

FatalTraCIError: connection closed by SUMO

In [9]:
# Test the trained agent
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv , SubprocVecEnv
from stable_baselines import PPO2, DQN


try:
    steps_per_episode = 250

    saved_model = "dqn_positive_1000"
    print('pre-env')
    env = DummyVecEnv([lambda: SumoEnv(steps_per_episode, True)])
    print('post-env')
    # wrap it
    model = DQN.load(saved_model, env)
    print('pre-obs')
    obs = env.reset()
    print('post-obs')
    while True:
        action, _states = model.predict(obs,deterministic=True)
        obs, rewards, dones, info = env.step(action)
        env.render()

except TraCIException:
    traci.close()
    


pre-env
init
post-env
pre-obs
reset
post-obs
reset
reset


NameError: name 'TraCIException' is not defined

In [8]:
traci.close()

In [25]:
# Launch simulation server (SUMO-gui)

current_binary = checkBinary('sumo-gui')
load_options = ["-c", "PositiveSign/PositiveSign.sumocfg", "--tripinfo-output", "tripinfo.xml", '--log', 'log.txt' , "-t"]

traci.start([current_binary] + load_options)




(20, 'SUMO 1.7.0')

In [23]:
# Perform this code while simulator server (SUMO) is open

#Perform calculations here
traci.simulationStep() # step simulation in time

[]

In [26]:
traci.close() # Close TraCI connection to prevent error 

In [50]:
def generate_routefile(N):

    with open("PositiveSign/PositiveSign.rou.xml", "w") as routes:
        print("""<routes>
        <vType id="default" accel="0.8" decel="4.5" sigma="0.5" length="5" minGap="2.5" maxSpeed="16.67" guiShape="passenger"/>
        <route id="sw-ne" edges="edge1 edge4" />
        <route id="ne-sw" edges="edge3 edge6" />""", file=routes)
        vehicle_id = 0
        for i in range(N):
            if i % 8 == 0:
                print(f'    <vehicle id="test_{vehicle_id}" type="default" route="sw-ne" depart="{i}" />', file=routes)
                vehicle_id += 1
                print(f'    <vehicle id="test_{vehicle_id}" type="default" route="ne-sw" depart="{i}" />', file=routes)
                vehicle_id += 1
        print("</routes>", file=routes)
    
generate_routefile(1000)

In [5]:
def generate_routefile(route_list, vehicle_list):

    with open("PositiveSign/PositiveSign.rou.xml", "w") as routes:
        route_field = """<routes>
        <vType id="default" accel="0.8" decel="4.5" sigma="0.5" length="5" minGap="2.5" maxSpeed="16.67" guiShape="passenger"/>"""
        for route in route_list:
            route_field += f"""<route id="{route['route_id']}" edges="{route['edge_list']}" />"""
        print(route_field, file=routes)
        
        for vehicle in vehicle_list:
            vehicle_id = 0
            for i in range(vehicle['start'], vehicle['end']):
                if i % vehicle['interval'] == 0:
                    vehicle_field = f"""<vehicle id="{vehicle['batch_name']+"_"+str(vehicle_id)}" type="default" route="{vehicle['route']}" depart="{i}" />"""
                    print(vehicle_field, file=routes)
                    vehicle_id += 1
        print("</routes>", file=routes)

route_list = [{'route_id':'A-C', 'edge_list': "edge1 edge6"},
              {'route_id':'D-B', 'edge_list': "edge7 edge4"}]
vehicle_list = [{'batch_name':'batch1', 'route':'A-C', 'start':0, 'end':10, 'interval':1},
                {'batch_name':'batch2', 'route':'D-B', 'start':60, 'end':70, 'interval':1},
                {'batch_name':'batch3', 'route':'A-C', 'start':120, 'end':130, 'interval':1},
                {'batch_name':'batch4', 'route':'D-B', 'start':180, 'end':190, 'interval':1}
]
generate_routefile(route_list, vehicle_list)

In [70]:
generate_routefile(1000)

In [101]:
routes = [{'route_id':'A-C', 'edge_list': "edge1 edge6"},
          {'route_id':'D-B', 'edge_list': "edge7 edge4"}]
vehicles = [{'batch_name':'batch1', 'route':'A-C', 'start':0, 'end':15, 'interval':1},
                {'batch_name':'batch2', 'route':'D-B', 'start':30, 'end':45, 'interval':1},
                {'batch_name':'batch3', 'route':'A-C', 'start':60, 'end':75, 'interval':1},
                {'batch_name':'batch4', 'route':'D-B', 'start':90, 'end':105, 'interval':1}]

for route in routes:
    print(f"""<route id="{route['route_id']}" edges="{route['edge_list']}" />""")
for vehicle in vehicles:
            vehicle_id = 0
            for i in range(vehicle['start'], vehicle['end']):
                if i % vehicle['interval'] == 0:
                    print(f"""<vehicle id="{vehicle['batch_name']+"_"+str(vehicle_id)}" type="default" route="{vehicle['route']}" depart="{i}" />""")
                    vehicle_id += 1
            

<route id="A-C" edges="edge1 edge6" />
<route id="D-B" edges="edge7 edge4" />
<vehicle id="batch1_0" type="default" route="A-C" depart="0" />
<vehicle id="batch1_1" type="default" route="A-C" depart="1" />
<vehicle id="batch1_2" type="default" route="A-C" depart="2" />
<vehicle id="batch1_3" type="default" route="A-C" depart="3" />
<vehicle id="batch1_4" type="default" route="A-C" depart="4" />
<vehicle id="batch1_5" type="default" route="A-C" depart="5" />
<vehicle id="batch1_6" type="default" route="A-C" depart="6" />
<vehicle id="batch1_7" type="default" route="A-C" depart="7" />
<vehicle id="batch1_8" type="default" route="A-C" depart="8" />
<vehicle id="batch1_9" type="default" route="A-C" depart="9" />
<vehicle id="batch1_10" type="default" route="A-C" depart="10" />
<vehicle id="batch1_11" type="default" route="A-C" depart="11" />
<vehicle id="batch1_12" type="default" route="A-C" depart="12" />
<vehicle id="batch1_13" type="default" route="A-C" depart="13" />
<vehicle id="batch

In [77]:
print("""<routes>
        <vType id="default" accel="0.8" decel="4.5" sigma="0.5" length="5" minGap="2.5" maxSpeed="16.67" guiShape="passenger"/>
        <route id="sw-ne" edges="edge1 edge4" />
        <route id="ne-sw" edges="edge3 edge6" />""")

<routes>
        <vType id="default" accel="0.8" decel="4.5" sigma="0.5" length="5" minGap="2.5" maxSpeed="16.67" guiShape="passenger"/>
        <route id="sw-ne" edges="edge1 edge4" />
        <route id="ne-sw" edges="edge3 edge6" />


SyntaxError: invalid syntax (<ipython-input-78-d2eb08182742>, line 1)