# Training the models using a SUMO environment.
In this notebook, we are going to train the same model, with the same parameters but using SUMO and TraCI as the environment/simulator now instead of cityflow, owing to its ability to allow probabilistic poisson process based vehicle flows. All the parameters and everything of the model will remain the same, the only thing that will be changing is would be the environment class.
___
# Importing libraries

In [1]:
import pandas as pd
import numpy as np
import os
import traci
import torch
# from tqdm import tqdm

# Creating the SUMO environment class

In [None]:
# cityflow based class for reference
# DO NOT RUN!!!
class PressureEnv:
    '''
        This class is the environment implemented in cityflow for a single intersection.
    '''
    def __init__(self, maxSteps, configPath=os.path.join('generated', 'config.json'), numThreads=1):
        # initializing the cityflow engine
        self.engine = cityflow.Engine(configPath, thread_num=numThreads)
        self.numSteps = 0 # to track how many steps have been taken
        self.maxSteps = maxSteps # the maximum number of steps allowed
        self.directions = [('road_0_1_0_0', 'road_1_1_1_0'), # left
              ('road_1_0_1_0', 'road_1_1_2_0'), # left
              ('road_2_1_2_0', 'road_1_1_3_0'), # left
              ('road_1_2_3_0', 'road_1_1_0_0'), # left
              ('road_0_1_0_1', 'road_1_1_0_1'), # straight
              ('road_1_0_1_1', 'road_1_1_1_1'), # straight
              ('road_2_1_2_1', 'road_1_1_2_1'), # straight
              ('road_1_2_3_1', 'road_1_1_3_1'), # straight
              ('road_0_1_0_2', 'road_1_1_3_2'), # right
              ('road_1_0_1_2', 'road_1_1_0_2'), # right
              ('road_2_1_2_2', 'road_1_1_1_2'), # right
              ('road_1_2_3_2', 'road_1_1_2_2') # right
              ]
        self.incoming = [t[0] for t in self.directions]
        self.capacity = 40 # capacity of the lanes
    
    def _getState(self, currTLPhase):
        '''
            This function returns the state the environment is in right now
        '''
        # get lanecounts
        laneCounts = self.engine.get_lane_vehicle_count()
        # add to a dictionary and return
        stArray = []
        cumLaneLenghts = {'road_0_1_0':0, 'road_2_1_2':0, 'road_1_2_3':0, 'road_1_0_1':0}
        for k,v in laneCounts.items():
            if k in self.incoming:
                stArray.append(v)
        # appending the current phase
        stArray.append(currTLPhase)
        
        return stArray
    
    def _getReward(self):
        '''
            This function returns the reward after taking the current state
        '''
        # NOTE: reward will be generated after the action is done, so we need to implement the do_action and simulate traffic for the next 10 seconds
        # after that, calculate the reward
        # get the lanelengths
        r = 0
        vicCounts = self.engine.get_lane_waiting_vehicle_count()
        for d in self.directions:
            # calculate the number of incoming and outgoing vehicles
            nIn = vicCounts[d[0]]
            nOut = vicCounts[d[1]]
            r_i = -1 * nIn * (1 - (nOut/self.capacity))
            r += r_i
        return r
    
    def _peformAction(self):
        '''
            This function will take action, which is setting the traffic light to a specific phase.
        '''
        pass
        # set trafficlight phase
        # simulate for the next 10 seconds
        self._step(10)

    def _step(self, t=10):
        '''
            This function steps the environment for the next t seconds.
        '''
        # NOTE TO SELF: rn, the interval is hardcoded to 1 second, same as the config definition, REMEMBER to make this dynamic
        finished = False
        for i in range(t):
            self.numSteps+=1
            if self.numSteps==self.maxSteps:
                finished = True
                break
            self.engine.next_step()
        return finished

    def take_action(self, action, t=10, intersection_id='intersection_1_1'):
        '''
            This is the main callable function for taking a step in the environment. It does the following:
                1. takes the action.
                2. simulates for the next t seconds.
                3. gets the reward
                4. get next state
            Action will be the index of the tl phase for the intersection defined as defined in the roadnet file for that intersection
        '''
        # take action, set the tl phase to the provided index
        self.engine.set_tl_phase(intersection_id, action)
        # run the engine
        finished = self._step(t)
        # get the state
        next_state = self._getState(action)
        # get the reward
        r = self._getReward()

        return next_state, r, finished
    
    def reset(self,currTLPhase):
        '''
            This function resets the environment to the original state.
        '''
        self.engine.reset()
        self.numSteps = 0
        # clearing the replay and the roadnetlog files
        open(os.path.join('generated', 'GeneratedRoadNetLogExpt.json'), 'w').close()
        open(os.path.join('generated', 'GeneratedReplayLogExpt.txt'), 'w').close()
        return self._getState(currTLPhase)

In [3]:
class SUMOEnvironment:
    '''
        This class is the environment implemented using SUMO and TRACI for a single intersection.
    '''
    def __init__(self, sumoCfgPath, sumoMode='sumo', maxTime=3600.0):
        self.sumoMode = sumoMode
        self.sumoCfgPath = sumoCfgPath # these two are made into class variables because they will also be used in the reset function
        self.maxTime = 3600.0
        self.currTime = 0.0
        self.directions = ( # movement directions for calculating the reward function
            ('E1_2', '-E3_2'), # left
            ('E1_1', 'E2_1'), # straight
            ('E1_0', 'E4_0'), # right

            ('-E4_2', '-E1_2'), # left
            ('-E4_1', '-E3_1'), # straight
            ('-E4_0', 'E2_0'), # right
            
            ('-E2_2', 'E4_2'), # left
            ('-E2_1', '-E1_1'), # straight
            ('-E2_0', '-E3_0'), # right

            ('E3_2', 'E2_2'), # left            
            ('E3_1', 'E4_1'), # straight
            ('E3_0', '-E1_0') # right
        )
        self.incoming = [t[0] for t in self.directions]
        self.capacity = 40

        # starting the simulation
        traci.start([sumoMode, '-c', sumoCfgPath])
        
    def _getState(self, intersectionId='Inter'):
        '''
            This function returns the state at the current time step.
        '''
        stArray = []
        for l in self.incoming:
            # getting the number of waiting vehicles in the lane
            vc = traci.lane.getLastStepHaltingNumber(l)
            stArray.append(vc)
        
        # at the end, appending the current state of the intersection
        cs = traci.trafficlight.getPhase(intersectionId)
        stArray.append(cs)
        return stArray
    
    def _getReward(self):
        '''
            This function returns the reward as of the current state of the intersection.
        '''
        r = 0
        
        # looping through the directions and calculating individual rewards
        for d in self.directions:
            # waiting in the incoming lane
            vIn = traci.lane.getLastStepHaltingNumber(d[0])
            vOut = traci.lane.getLastStepHaltingNumber(d[1])
            r_i = -1 * vIn * (1 - (vOut/self.capacity))
            r += r_i
        return r
    
    def _step(self, t=10):
        '''
            This function moves the simulation t timesteps ahead. And if the total number of steps reaches the max allowed steps, it stop and returns if the iteration is done.
        '''
        finished = False
        for i in range(t):
            if self.currTime==self.maxTime:
                finished = True
                break
            # if not, the continue
            self.currTime  = self.currTime + 1.0
            traci.simulationStep()
        return finished
    
    def takeAction(self, action, intersectionId='Inter', t=10):
        '''
            This function performs the given action, steps the environment ahead for next t seconds/steps, and then returns the next state, reward and whether the simulation has finished or not.
        '''
        # take action: set the tl phase to the action value
        traci.trafficlight.setPhase(intersectionId, action)
        # simulate next t time steps and get the next state
        finished = self._step(t)
        # get the next state
        next_state = self._getState()
        # get the reward
        reward = self._getReward()

        return next_state, reward, finished
    
    def reset(self):
        '''
            This function resets the environment to the start and returns the starting state.
        '''
        # reseting the sumo engine
        traci.load(["-c", self.sumoCfgPath])
        self.currTime = 0.0
        return self._getState()
    
    def close(self):
        '''
            This function closes the connection of traci with the sumo environment.
            NOTE: After calling this function, you will need to reinitialize the object, as now the connection to SUMO has been closed for this. NEED TO FIND A BETTER WAY TO DO THIS.
        '''
        traci.close()

In [7]:
env = SUMOEnvironment('SingleIntersection.sumocfg')

In [15]:
env._getState()
# env.close()

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [20]:
for i in range(9):
    env._step()
env._getState(), env._getReward()

([8, 13, 0, 3, 4, 0, 7, 6, 0, 0, 1, 0, 6], -42.0)

In [21]:
traci.simulation.getTime()

270.0

In [6]:
env.close()
# env.reset()

In [8]:
finished = False
while not finished:
    finished = env._step()
    st = env._getState()
    print('Current Time: ', traci.simulation.getTime())
    print("State: ", st)
    print('----------------------------------------------------------------------------------------------------------------')
env.currTime

Current Time:  10.0
State:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----------------------------------------------------------------------------------------------------------------
Current Time:  20.0
State:  [0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]
----------------------------------------------------------------------------------------------------------------
Current Time:  30.0
State:  [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
----------------------------------------------------------------------------------------------------------------
Current Time:  40.0
State:  [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
----------------------------------------------------------------------------------------------------------------
Current Time:  50.0
State:  [0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0]
----------------------------------------------------------------------------------------------------------------
Current Time:  60.0
State:  [0, 0, 0, 1, 1, 0, 1, 0, 0, 2, 1, 0, 0]
---------------------------

3600.0

In [35]:
env._getState()

[18, 19, 0, 16, 6, 0, 15, 15, 0, 7, 17, 4, 2]

In [6]:
traci.start(['sumo', '-c', 'tst/SingleIntersection.sumocfg'])

(22, 'SUMO 1.23.1')

In [7]:
traci.close()