In [1]:
import os
import sys
import random
from collections import deque
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
if "SUMO_HOME" in os.environ:
    tools = os.path.join(os.environ["SUMO_HOME"], "tools")
    sys.path.append(tools)
else:
    sys.exit("Please declare environment variable 'SUMO_HOME'")

In [3]:
import traci

def sumo_config(traffic_pattern="P1"):
    sumo_config = [
        "sumo",
        "-c", "SUMO_networks/" + traffic_pattern + "/junction.sumocfg",
        "--step-length", "0.05",
        "--delay", "0",
        "--lateral-resolution", "0.1",
        "--start",
        "--no-warnings",
        "--no-step-log",
    ]
    return sumo_config



traci.start(sumo_config())

(22, 'SUMO 1.23.1')

In [4]:
state = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.float)
action_space_size = 8
TRAFFIC_LIGHT_ID = "traffic_light"
DELTA_PHASE_DURATION = 6
YELLOW_PHASE_DURATION = 4
lane_detectors = ['q1', 'q2', 'q3', 'q4', 'q5', 'q6', 'q7', 'q8']
action = []
current_phase = 2 

In [5]:
import itertools

traffic_patterns = ["P1", "P2", "P3", "P4"]
traffic_pattern = itertools.cycle(traffic_patterns)

def change_env():
    pattern = next(traffic_pattern)  
    traci.close()
    traci.start(sumo_config(pattern))


def get_queue_length():
    state = []
    for detector in lane_detectors:
        state.append(traci.lanearea.getLastStepHaltingNumber(detector))
    return torch.tensor(state, dtype=torch.float)

def get_current_state():
    return get_queue_length()

def simulate_time(seconds = 1):
    for i in range(20 * seconds):
        traci.simulationStep()

def step(action):
    global current_phase

    if 2 * action == current_phase:
        traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, 2 * action)
        simulate_time(DELTA_PHASE_DURATION)
        next_state = get_current_state()
        next_queue_size = torch.sum(next_state)
        reward =  -next_queue_size
        done = traci.simulation.getMinExpectedNumber() == 0
        return next_state, reward, done
    else:
        traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, current_phase + 1)
        simulate_time(YELLOW_PHASE_DURATION)
        current_phase = 2 * action
        traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, 2 * action)
        simulate_time(DELTA_PHASE_DURATION)
        next_state = get_current_state()
        next_queue_size = torch.sum(next_state)
        reward =  -next_queue_size
        done = traci.simulation.getMinExpectedNumber() == 0
        return next_state, reward, done


In [6]:
class DQN(nn.Module):
    def __init__(self, state_space_size, action_space_size):
        super(DQN, self).__init__()
        self.main = nn.Sequential(
            nn.Linear(state_space_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, action_space_size),
        )

    def forward(self, x):
        output = self.main(x)
        return output


In [7]:
learning_rate = 0.01

# discount factor
gamma = 0.999

# starting exploration rate
epsilon = 0.9

# rate of decay as model becomes more stable (5%)
epsilon_decay = 0.95

# final exploration rate for stable model
min_epsilon = 0.05
batch_size = 128
target_update_freq = 400
memory_size = 10000
episodes = 75
# episodes = 200

In [8]:
# # define hyperparameters to tune:
# TEMP_GARBO_VAR = 100000000000


# # discount factor
# gamma = [0.999, TEMP_GARBO_VAR]
# # starting exploration rate
# epsilon = [0.9, TEMP_GARBO_VAR]
# # rate of decay as model becomes more stable (5%)
# epsilon_decay = [0.95, TEMP_GARBO_VAR]
# # final exploration rate for stable model
# min_epsilon = [0.05, TEMP_GARBO_VAR]

# # NN related parameters
# learning_rate = [0.01, TEMP_GARBO_VAR]
# batch_size = [128, TEMP_GARBO_VAR]
# target_update_freq = [400, TEMP_GARBO_VAR]
# memory_size = [10000, TEMP_GARBO_VAR]

# # doen think we need to both tuning episodes
# episodes = 150

# from itertools import product

# param_combinations = list(product(
#     gamma,
#     epsilon,
#     epsilon_decay,
#     min_epsilon,
#     learning_rate,
#     batch_size,
#     target_update_freq,
#     memory_size
# ))

# Example usage below

# for combination in param_combinations:
#     g, e, e_decay, min_e, lr, vs, target_freq, mem_size = combination
#     ...

In [9]:

policy_net = DQN(state.shape[0], action_space_size)

target_net = DQN(state.shape[0], action_space_size)
target_net.load_state_dict(policy_net.state_dict())
target_net.eval()

optimizer = optim.Adam(policy_net.parameters(), lr=learning_rate)
memory = deque(maxlen=memory_size)

In [10]:

def choose_action(state, epsilon):
    if random.random() < epsilon:
        return random.randint(0, action_space_size - 1)
    else:
        q_values = policy_net(state.unsqueeze(0))
        return torch.argmax(q_values).item()


def optimise_model():
    if len(memory) < batch_size:
        return

    batch = random.sample(memory, batch_size)

    state_batch = torch.stack([b[0] for b in batch]).float()
    next_state_batch = torch.stack([b[3] for b in batch]).float()
    reward_batch = torch.tensor([b[2] for b in batch], dtype=torch.float)
    action_batch = torch.tensor([b[1] for b in batch], dtype=torch.long).unsqueeze(1)
    done_batch = torch.tensor([b[4] for b in batch], dtype=torch.float)

    q_values = policy_net(state_batch).gather(1, action_batch).squeeze()

    with torch.no_grad():
        max_next_q_values = target_net(next_state_batch).max(1)[0]
        target_q_values = reward_batch + gamma * max_next_q_values * (1 - done_batch)

    loss = nn.MSELoss()(q_values, target_q_values)

    # print(f"Training loss: {loss.item()}")

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


In [11]:
def train_algorithm(episodes, epsilon, epsilon_decay, min_epsilon): #add parameters here later
    rewards_per_episode = []
    avg_wait_per_ep = []
    max_wait_per_ep = []
    
    num_lanes = len(lane_detectors)
    all_avg_queue_lengths = torch.zeros(num_lanes, episodes)
    
    steps_done = 0
    
    for episode in range(episodes):
        print(f"Episode {episode}")
        change_env()
    
        state = get_current_state()
        episode_reward = 0
        done = False

        vehicle_wait_tracker = {} # NEW
        queue_length_tracker = {}
        num_steps = 0
        while not done:
            num_steps += 1 
            # Select action
            action = choose_action(state, epsilon)
            next_state, reward, done = step(action)


            # ADDITIONS
            for v_id in traci.vehicle.getIDList():
                wait_time = traci.vehicle.getWaitingTime(v_id)
    
                if v_id not in vehicle_wait_tracker:
                    vehicle_wait_tracker[v_id] = wait_time
                elif wait_time > vehicle_wait_tracker[v_id]:
                    vehicle_wait_tracker[v_id] = wait_time
            #
    
            # retrive queue length: list of size 8 (one number for each lane detector)
            curr_queue = get_queue_length()
            for i in range(len(curr_queue)):
                if i not in queue_length_tracker:
                    queue_length_tracker[i] = curr_queue[i]
                else:
                    queue_length_tracker[i] += curr_queue[i]
    
                
            # print(f"Action={action}, Reward={reward:.2f}, Done={done}")
            # print(f"Next State: {next_state.tolist()}")
            # Store transition in memory
            memory.append((state, action, reward, next_state, done))
    
            # Update state
            state = next_state
            episode_reward += reward
            # print(episode_reward)
            # Optimize model
            optimise_model()
    
            # Update target network periodically
            if steps_done % target_update_freq == 0:
                target_net.load_state_dict(policy_net.state_dict())
    
            steps_done += 1

        # ADDITIONS
    
        for i, len_i in queue_length_tracker.items():
            all_avg_queue_lengths[i, episode] = len_i / num_steps
            
        avg_wait = 0.0
        max_wait = 0.0
        vehicle_waits = []
    
        for key in vehicle_wait_tracker:
            vehicle_waits.append(vehicle_wait_tracker[key])
    
        if vehicle_waits:
            avg_wait = sum(vehicle_waits) / len(vehicle_waits)
            max_wait = max(vehicle_waits)
    
        avg_wait_per_ep.append(avg_wait)
        max_wait_per_ep.append(max_wait)
        # 
    
        # Decay epsilon
        epsilon = max(min_epsilon, epsilon_decay * epsilon)
        print(episode_reward)
        rewards_per_episode.append(episode_reward)
    
    return rewards_per_episode, avg_wait_per_ep, max_wait_per_ep, all_avg_queue_lengths

In [12]:
rewards_per_episode, avg_wait_per_ep, max_wait_per_ep, all_avg_queue_lengths = train_algorithm(4, epsilon, epsilon_decay, min_epsilon)

Episode 0
tensor(-12703.)
Episode 1
tensor(-14264.)
Episode 2
tensor(-13250.)
Episode 3
tensor(-530.)


In [None]:
torch.save(policy_net.state_dict(), "dqn_model.pth")

In [13]:
import matplotlib.pyplot as plt

# Total reward per episode
plt.figure()
plt.plot(rewards_per_episode)
plt.xlabel("Episode")
plt.ylabel("Reward")
plt.title("DQN on traffic lights")
plt.grid(True)
plt.tight_layout()
plt.savefig("Plots/rewards_per_episode.png")
plt.close()

# Average wait time per episode
plt.figure()
plt.plot(avg_wait_per_ep)
plt.xlabel("Episode")
plt.ylabel("Average Wait")
plt.title("DQN on traffic lights - Avg Wait")
plt.grid(True)
plt.tight_layout()
plt.savefig("Plots/avg_wait_per_episode.png")
plt.close()

# Maximum wait time per episode
plt.figure()
plt.plot(max_wait_per_ep)
plt.xlabel("Episode")
plt.ylabel("Maximum Wait")
plt.title("DQN on traffic lights - Max Wait")
plt.grid(True)
plt.tight_layout()
plt.savefig("Plots/max_wait_per_episode.png")
plt.close()

# Per-lane average queue length
num_lanes = all_avg_queue_lengths.shape[0]
for lane_index in range(num_lanes):
    plt.figure()
    plt.plot(all_avg_queue_lengths[lane_index].numpy())
    plt.xlabel("Episode")
    plt.ylabel("Avg Queue Length")
    plt.title(f"Avg queue length per episode for lane {lane_index}")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"Plots/avg_queue_lane_{lane_index}.png")
    plt.close()



In [6]:
def get_baseline_results(episodes=4):
    baseline_rewards_per_episode = []
    baseline_avg_wait_per_ep = []
    baseline_max_wait_per_ep = []

    num_lanes = len(lane_detectors)
    baseline_avg_queue_lengths = torch.zeros(num_lanes, episodes)

    phase_sequence = [2, 6, 4, 0]  # green phases
    green_duration = DELTA_PHASE_DURATION
    yellow_duration = YELLOW_PHASE_DURATION

    for episode in range(episodes):
        print(f"[Baseline] Episode {episode}")
        change_env()

        episode_reward = 0
        done = False

        vehicle_wait_tracker = {}
        queue_length_tracker = {}
        num_steps = 0

        state = get_current_state()
        phase_index = 0

        while not done:
            green_phase = phase_sequence[phase_index]
            yellow_phase = green_phase + 1

            # Set green phase
            traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, green_phase)
            simulate_time(green_duration)
            num_steps += green_duration * 20

            # Update state and reward
            state = get_current_state()
            queue_size = torch.sum(state)
            reward = -queue_size
            episode_reward += reward

            for v_id in traci.vehicle.getIDList():
                wait_time = traci.vehicle.getWaitingTime(v_id)
                if v_id not in vehicle_wait_tracker:
                    vehicle_wait_tracker[v_id] = wait_time
                elif wait_time > vehicle_wait_tracker[v_id]:
                    vehicle_wait_tracker[v_id] = wait_time

            curr_queue = get_queue_length()
            for i in range(len(curr_queue)):
                if i not in queue_length_tracker:
                    queue_length_tracker[i] = curr_queue[i]
                else:
                    queue_length_tracker[i] += curr_queue[i]

            done = traci.simulation.getMinExpectedNumber() == 0
            if done:
                break

            # Set yellow phase
            traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, yellow_phase)
            simulate_time(yellow_duration)
            num_steps += yellow_duration * 20

            done = traci.simulation.getMinExpectedNumber() == 0
            phase_index = (phase_index + 1) % len(phase_sequence)

        # Lane-wise average queue lengths
        for i, len_i in queue_length_tracker.items():
            baseline_avg_queue_lengths[i, episode] = len_i / num_steps

        # Wait time stats
        vehicle_waits = list(vehicle_wait_tracker.values())
        avg_wait = sum(vehicle_waits) / len(vehicle_waits) if vehicle_waits else 0.0
        max_wait = max(vehicle_waits) if vehicle_waits else 0.0

        baseline_avg_wait_per_ep.append(avg_wait)
        baseline_max_wait_per_ep.append(max_wait)
        baseline_rewards_per_episode.append(episode_reward)

    return (
        baseline_rewards_per_episode,
        baseline_avg_wait_per_ep,
        baseline_max_wait_per_ep,
        baseline_avg_queue_lengths
    )


In [7]:
baseline_results = get_baseline_results(episodes=4)
baseline_rewards, baseline_avg_waits, baseline_max_waits, baseline_queues = baseline_results


[Baseline] Episode 0
[Baseline] Episode 1
[Baseline] Episode 2
[Baseline] Episode 3


In [8]:
baseline_rewards

[tensor(-6354.), tensor(-14060.), tensor(-12336.), tensor(-521.)]

In [9]:
baseline_avg_waits

[14.694024249422625, 19.395777906304208, 17.880480868665963, 12.91672661870504]

In [10]:
baseline_max_waits

[34.05, 90.65, 64.05, 28.4]

In [11]:
baseline_queues

tensor([[0.0046, 0.0026, 0.0033, 0.0000],
        [0.0021, 0.0020, 0.0023, 0.0000],
        [0.0202, 0.0230, 0.0030, 0.0000],
        [0.0237, 0.0424, 0.0024, 0.0000],
        [0.0037, 0.0028, 0.0347, 0.0000],
        [0.0017, 0.0014, 0.0187, 0.0000],
        [0.0136, 0.0271, 0.0319, 0.0044],
        [0.0162, 0.0482, 0.0443, 0.0027]])