In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import traci
import sumolib
import math

environment = "intersection/sumo_config.sumocfg"
phase_lane_control = np.array([
        ["N2TL_0", "N2TL_1", "N2TL_2", "S2TL_0", "S2TL_1", "S2TL_2"],
        ["N2TL_3", "S2TL_3"],
        ["W2TL_0", "W2TL_1", "W2TL_2", "E2TL_0", "E2TL_1", "E2TL_2"],
        ["W2TL_3", "E2TL_3"]
    ], dtype=object)

sumobin = sumolib.checkBinary('sumo-gui')

traci.start([sumobin, '-c', environment, '--start'])  

traci.simulation.subscribe([traci.constants.VAR_COLLIDING_VEHICLES_IDS])

# Subscribe to vehicle accelerations for all vehicles
for veh_id in traci.vehicle.getIDList():
    traci.vehicle.subscribe(veh_id, traci.constants.VAR_ACCELERATION)

# for single agent
trafficlight_id = traci.trafficlight.getIDList()[0]
controlled_lanes = traci.trafficlight.getControlledLanes(trafficlight_id)
TIME_STEP = 0.8 # amount of time (in seconds) per step of the simulation, i.e. 0.01 => 10ms per step

print("Connected to TraCI")

Connected to TraCI


In [2]:
# Function to get the number of vehicles currently waiting
def get_avg_waiting():
    # grouped lanes by shared green light phases, record the number of cars waiting divided by the number of lanes
    grouped_avg_waiting = [get_lane_num_waiting(lanes) / len(lanes) for lanes in phase_lane_control]
    return grouped_avg_waiting

# returns the total number of cars waiting in the set of lanes
def get_lane_num_waiting(lanes):
    sum = 0
    for lane_id in lanes:
        sum += traci.lane.getLastStepHaltingNumber(lane_id)
    return sum

# returns a list of vehicle ids that are currently stopped in one of the lanes
def get_waiting_ids(lanes):
    ids = []
    for lane_id in lanes:
        ids.extend([veh_id for veh_id in traci.lane.getLastStepVehicleIDs(lane_id) if traci.vehicle.getSpeed(veh_id) < 0.1])
    return np.array(ids)

def pct_served(waiting_ids):
    if len(waiting_ids) == 0:
        return 0
    
    # vehicles that have been served but exited simulation need to be counted a different way
    still_loaded = [veh_id for veh_id in waiting_ids if veh_id in traci.vehicle.getLoadedIDList()]
    num_waiting_served = len([veh_id for veh_id in still_loaded if traci.vehicle.getSpeed(veh_id) > 0.5])
    num_waiting_served += len(waiting_ids) - len(still_loaded)

    return num_waiting_served / len(waiting_ids)
    


In [None]:
class Environment:
    def __init__(self):
        self.prev_action = traci.trafficlight.getPhase(trafficlight_id)
        self.yellow_duration = 3 # duration of yellow phases in seconds between actions
        self.green_duration = 5 # minimum amount of time the green phases are on for

        self.static_action = 0 # adds reward for not changing the phase, prevents flickering
        self.waiting_ids = [] # list of vehicle ids that were waiting in one of the lanes now greenlit in the current phase
        self.pct_served = 0 # percentage of cars waiting at the relevant lanes that made it through on the last light cycle


    # Function to reset the SUMO environment
    def reset_sumo_environment(self, environment):
        # reload the simulation
        traci.load(['-c', environment, '--start', '--step-length', TIME_STEP])
        traci.trafficlight.setProgram(trafficlight_id, '0')
        
        # reset some variables
        self.waiting_ids = []
        self.pct_served = 0
        state = self.get_state()

        return state


    # Function to step through the SUMO simulation
    def step_in_sumo(self, action):
        # Apply the action
        self.apply_action(action)
        
        # Step the SUMO simulation forward
        traci.simulationStep()
        
        # Get the new state after taking the action
        next_state = self.get_state()
        
        # Calculate the reward with the specified tls_id
        reward = self.calculate_reward()
        
        # Check if the episode is done
        done = self.check_done_condition()
        
        return next_state, reward, done


    # Function to get the current state (modify this based on what information you need)
    def get_state(self):
        # number of cars in the lanes each phase of the traffic light controls
        state = get_avg_waiting()
        state.append(self.pct_served) # include the served percent of the current phase
        state.append(self.prev_action) # include the current action value
        
        return np.array(state)


    # Function to apply the action (modify based on your action space)
    def apply_action(self, action):
        if action == self.prev_action:
            self.static_action = 1
            return
        
        # simulate the yellow light phase corresponding to the last green phase
        self.simulate_phase(2 * self.prev_action + 1, self.yellow_duration)

        # get the success parameters of the last light phase
        self.pct_served = pct_served(self.waiting_ids)
        self.waiting_ids = get_waiting_ids(phase_lane_control[action])
        
        # change to the new green phase, simulate for the minimum amount of time
        self.simulate_phase(2 * action, self.green_duration)
        self.prev_action = action


    # changes the phase and simulates it for the required amount of time
    def simulate_phase(self, action, duration):
        traci.trafficlight.setPhase(trafficlight_id, action)
        steps = 0
        while steps < duration / TIME_STEP:
            traci.simulationStep()
            steps += 1


    # Function to calculate the reward (implement your logic)
    def calculate_reward(self):
        reward = self.static_action + math.exp(4 * self.pct_served) - math.exp(0.2 * sum(get_avg_waiting()))
        
        self.static_action = 0
        self.pct_served = 0
        return reward


    # Function to check if the simulation should terminate
    def check_done_condition(self):
        # Example condition: terminate if simulation time exceeds a limit
        collision_data = traci.simulation.getSubscriptionResults()
        
        # Check for any collisions
        if collision_data and traci.constants.VAR_COLLIDING_VEHICLES_IDS in collision_data:
            return True
        
        current_time = traci.simulation.getTime()
        return current_time > 2000  # Change this threshold as necessary

In [None]:
# Define the neural network for the Q-function
class DQN(nn.Module):
    def __init__(self, n_state_params, n_actions):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(n_state_params, 12)
        self.fc2 = nn.Linear(12, 12)
        self.fc3 = nn.Linear(12, n_actions)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [5]:
# Define the RL agent
class RLAgent:
    def __init__(self, n_state_params, n_actions):
        self.n_state_params = n_state_params
        self.n_actions = n_actions
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 0.05  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = DQN(n_state_params, n_actions)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.n_actions)
        state = torch.FloatTensor(state)
        q_values = self.model(state)
        return np.argmax(q_values.detach().numpy())

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model(torch.FloatTensor(next_state)).detach().numpy())
            target_f = self.model(torch.FloatTensor(state)).detach().numpy()
            # Check if action index is valid
            if 0 <= action < self.n_actions:
                target_f[action] = target
            else:
                print(f"Invalid action: {action}")

            # Convert back to tensor for loss calculation
            target_f_tensor = torch.FloatTensor(target_f)
            self.model.zero_grad()
            loss = self.criterion(target_f_tensor, self.model(torch.FloatTensor(state)))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [6]:
# Simulation interaction loop
def run_simulation(agent, env, num_episodes, batch_size):
    for e in range(num_episodes):
        state = env.reset_sumo_environment(environment)  # Reset the SUMO environment and get the initial state
        done = False
        total_reward = 0

        while not done:
            action = agent.act(state)
            next_state, reward, done = env.step_in_sumo(action)  # Step through the SUMO simulation
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

        print(f"Episode: {e+1}/{num_episodes}, Total Reward: {total_reward}")
        agent.replay(batch_size)

In [None]:
# number of state parameters: parameter for each lane controlled by the traffic light, giving the total delay
env = Environment()
n_state_params = len(env.get_state())
print("Number of inputs:", n_state_params)
# Get the full phase program for the traffic light
program = traci.trafficlight.getAllProgramLogics(trafficlight_id)[0]

# Get the number of phases
n_actions = int(len(program.phases) / 2)
print("actions:", n_actions)

agent = RLAgent(n_state_params, n_actions)
run_simulation(agent, env, num_episodes=200, batch_size=32)

Number of inputs: 6
actions: 4
Episode: 1/200, Total Reward: 632431.6158961043
Episode: 2/200, Total Reward: 283709.1796984127
Episode: 3/200, Total Reward: 1303823.1438771791
Episode: 4/200, Total Reward: 5887463.887859379
Episode: 5/200, Total Reward: 1473332.609119552
Episode: 6/200, Total Reward: 501558.74091417366
Episode: 7/200, Total Reward: 1595344.3545406556
Episode: 8/200, Total Reward: 3607252.7686544554
Episode: 9/200, Total Reward: 2275544.8676035777
Episode: 10/200, Total Reward: 4547275.740211551
Episode: 11/200, Total Reward: 356632.7821713395
Episode: 12/200, Total Reward: 199521.71456426947
Episode: 13/200, Total Reward: 2101820.7660489287
Episode: 14/200, Total Reward: 372284.5660147457
Episode: 15/200, Total Reward: 618447.7229080015
Episode: 16/200, Total Reward: 184044.0538173444
Episode: 17/200, Total Reward: 1478913.8149463637
Episode: 18/200, Total Reward: 11260541.840186104
Episode: 19/200, Total Reward: 18345495.49835083
Episode: 20/200, Total Reward: 2305926