In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import traci
import sumolib
import time

environment = "environments/cross.sumocfg"
sumobin = sumolib.checkBinary('sumo-gui')

traci.start([sumobin, '-c', environment, '--start'])  

print("Connected to TraCI")

Connected to TraCI


In [2]:
# Function to sum the delay of all vehicles affected by the traffic light
def get_delay(tls_id = '0'):
    delays = [get_lane_delay(lane_id) for lane_id in traci.trafficlight.getControlledLanes(tls_id)]
    return sum(delays)

# returns the sum of every vehicle's delay (1 - speed / max_speed) in a given lane
def get_lane_delay(lane_id):
    max_s = traci.lane.getMaxSpeed(lane_id)
    avg_s = traci.lane.getLastStepMeanSpeed(lane_id)
    num_veh = traci.lane.getLastStepVehicleNumber(lane_id)
    return max(num_veh * (1 - avg_s / max_s), 0) # for some reason this can return small negative values :\

# Function to get the number of vehicles currently waiting
def get_waiting_time(tls_id = '0'):
    waiting_times = [traci.lane.getLastStepHaltingNumber(lane_id) for lane_id in traci.trafficlight.getControlledLanes(tls_id)]
    return sum(waiting_times)

# Function that returns the number of emergency stops (acceleration < -4.5m/s^2) caused by the traffic light
def num_emergency_stops(tls_id = '0'):
    emergency_stops = [get_lane_emergency_stops(lane_id) for lane_id in traci.trafficlight.getControlledLanes(tls_id)]
    return sum(emergency_stops)

# returns the number of vehicles that had to emergency stop in the last time step (decelerated > 4.5 m/s/s)
def get_lane_emergency_stops(lane_id):
    emergency_stops = [veh_id for veh_id in traci.lane.getLastStepVehicleIDs(lane_id) if traci.vehicle.getAcceleration(veh_id) < -4.5]
    return len(emergency_stops)

In [None]:
# Function to reset the SUMO environment
def reset_sumo_environment(environment):
    traci.load(['-c', environment, '--start', '--step-length', .5])
    traci.trafficlight.setProgram('0', '0')
    
    # Get initial state information (modify this based on your state representation)
    state = get_state()
    return state

# Function to step through the SUMO simulation
def step_in_sumo(action):
    # Apply the action
    apply_action(action)
    
    # Step the SUMO simulation forward
    traci.simulationStep()
    
    # Get the new state after taking the action
    next_state = get_state()
    
    # Calculate the reward with the specified tls_id
    reward = calculate_reward(action)
    
    # Check if the episode is done
    done = check_done_condition()
    
    return next_state, reward, done

# Function to get the current state (modify this based on what information you need)
def get_state():
    # example state, the total delay of each lane, takes into account cars being slowed, or too many cars stopped at a red
    lanes = traci.trafficlight.getControlledLanes('0')
    return np.array(list(map(get_lane_delay, lanes)))

# Function to apply the action (modify based on your action space)
def apply_action(action):
    # actions changes the phase of the traffic light program (0: do nothing, 1: next phase)
    try:
        if action == 1:
            traci.trafficlight.setPhaseDuration('0', 0)
    except traci.exceptions.FatalTraCIError as e:
        print("TraCI error:", e)
        traci.close()
        return

# Function to calculate the reward (implement your logic)
def calculate_reward(action):
    # Retrieve delays, waiting times, and emergency stops for the given traffic light
    delay = get_delay()
    waiting_time = get_waiting_time()
    emergency_stops = num_emergency_stops()
    
    # Weights for each component in the reward function (adjust as needed)
    action_weight = -0.1    # Negative reward to prevent flickering
    delay_weight = -1.0      # Negative reward for higher delays
    waiting_time_weight = -0.5  # Negative reward for higher waiting times
    emergency_stop_weight = -2.0  # Strong negative reward for emergency stops

    # Compute the reward as a weighted sum of the three components
    reward = (action_weight * action) +(delay_weight * delay) + (waiting_time_weight * waiting_time) + (emergency_stop_weight * emergency_stops)
    
    # Optionally, normalize reward if values are large or small
    # reward = reward / (1 + abs(reward))
    
    print(f"Reward calculated: Delay={delay}, Waiting Time={waiting_time}, Emergency Stops={emergency_stops}, Total Reward={reward}")
    
    return reward

# Function to check if the simulation should terminate
def check_done_condition():
    # Example condition: terminate if simulation time exceeds a limit
    current_time = traci.simulation.getTime()
    return current_time > 250  # Change this threshold as necessary

In [4]:
# Define the neural network for the Q-function
class DQN(nn.Module):
    def __init__(self, n_state_params, n_actions):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(n_state_params, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, n_actions)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [5]:
# Define the RL agent
class RLAgent:
    def __init__(self, n_state_params, n_actions):
        self.n_state_params = n_state_params
        self.n_actions = n_actions
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 0.05  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = DQN(n_state_params, n_actions)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.n_actions)
        state = torch.FloatTensor(state)
        q_values = self.model(state)
        return np.argmax(q_values.detach().numpy())

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model(torch.FloatTensor(next_state)).detach().numpy())
            target_f = self.model(torch.FloatTensor(state)).detach().numpy()
            # Check if action index is valid
            if 0 <= action < self.n_actions:
                target_f[action] = target
            else:
                print(f"Invalid action: {action}")

            # Convert back to tensor for loss calculation
            target_f_tensor = torch.FloatTensor(target_f)
            self.model.zero_grad()
            loss = self.criterion(target_f_tensor, self.model(torch.FloatTensor(state)))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [6]:
# Simulation interaction loop
def run_simulation(agent, num_episodes, batch_size):
    for e in range(num_episodes):
        state = reset_sumo_environment(environment)  # Reset the SUMO environment and get the initial state
        done = False
        total_reward = 0

        while not done:
            action = agent.act(state)
            next_state, reward, done = step_in_sumo(action)  # Step through the SUMO simulation
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

        print(f"Episode: {e+1}/{num_episodes}, Total Reward: {total_reward}")
        agent.replay(batch_size)

In [None]:
# number of state parameters: parameter for each lane controlled by the traffic light, giving the total delay
n_state_params = len(traci.trafficlight.getControlledLanes('0'))

# number of possible actions: 0 -> do nothing, 1 -> next traffic light phase
n_actions = 2 

agent = RLAgent(n_state_params, n_actions)
run_simulation(agent, num_episodes=1000, batch_size=32)

Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.013704811967010344, Waiting Time=0, Emergency Stops=0, Total Reward=-0.013704811967010344
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.0, Waiting Time=0, Emergency Stops=0, Total Reward=-0.0
Reward calculated: Delay=0.013074233571725435, Waitin