In [None]:
# !pip uninstall torch
# !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [None]:
# !pip uninstall torch

In [None]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
# !pip install matplotlib

In [None]:
import os
import sys
import random
from collections import deque
import torch
import torch.nn as nn
import torch.optim as optim
import traci
import matplotlib.pyplot as plt
from itertools import product
import pandas as pd
import itertools

In [None]:
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
print("Using device:", device)
print("CUDA available:", torch.cuda.is_available())
if device.type == "cuda":
    print("Device name:", torch.cuda.get_device_name(0))


In [None]:
# %% --- SUMO Configuration ---
def sumo_config(traffic_pattern="P1"):
    return [
        "sumo",
        "-c", f"SUMO_networks/{traffic_pattern}/junction.sumocfg",
        "--step-length", "0.05",
        "--delay", "0",
        "--lateral-resolution", "0.1",
        "--start",
        "--no-warnings",
        "--no-step-log",
    ]

if "SUMO_HOME" in os.environ:
    tools = os.path.join(os.environ["SUMO_HOME"], "tools")
    sys.path.append(tools)
else:
    sys.exit("Please declare environment variable 'SUMO_HOME'")

if not traci.isLoaded():
    traci.start(sumo_config())

In [None]:
# %% --- Global Variables ---
action_space_size = 8
TRAFFIC_LIGHT_ID = "traffic_light"
DELTA_PHASE_DURATION = 6
YELLOW_PHASE_DURATION = 4
lane_detectors = [f'q{i+1}' for i in range(8)]
current_phase = 2

def get_queue_length():
    return torch.tensor([
        traci.lanearea.getLastStepHaltingNumber(d) for d in lane_detectors
    ], dtype=torch.float)

def get_current_state():
    return get_queue_length()

def simulate_time(seconds=1):
    for _ in range(20 * seconds):
        traci.simulationStep()

def step(action):
    global current_phase
    if 2 * action == current_phase:
        traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, 2 * action)
        simulate_time(DELTA_PHASE_DURATION)
    else:
        traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, current_phase + 1)
        simulate_time(YELLOW_PHASE_DURATION)
        current_phase = 2 * action
        traci.trafficlight.setPhase(TRAFFIC_LIGHT_ID, 2 * action)
        simulate_time(DELTA_PHASE_DURATION)

    next_state = get_current_state()
    reward = -torch.sum(next_state)
    done = traci.simulation.getMinExpectedNumber() == 0
    return next_state, reward, done, next_state.clone()


In [None]:
# %% --- Neural Network ---
class DQN(nn.Module):
    def __init__(self, state_dim, action_dim):
        super().__init__()
        self.main = nn.Sequential(
            nn.Linear(state_dim, 128), nn.ReLU(),
            nn.Linear(128, 64), nn.ReLU(),
            nn.Linear(64, action_dim)
        )

    def forward(self, x):
        return self.main(x)

In [None]:
# %% --- Environment Switching ---
traffic_patterns = itertools.cycle(["P1", "P2", "P3", "P4"])

def change_env():
    pattern = next(traffic_patterns)
    if traci.isLoaded():
        traci.close()
    traci.start(sumo_config(pattern))

In [None]:
# %% --- Choose Action ---
def choose_action(state, epsilon, policy_net):
    if random.random() < epsilon:
        return random.randint(0, action_space_size - 1)
    else:
        with torch.no_grad():
            return torch.argmax(policy_net(state.unsqueeze(0).to(device))).item()


In [None]:
# %% --- Optimizer Step ---
def optimise_model(policy_net, target_net, memory, optimizer, batch_size, gamma):
    if len(memory) < batch_size:
        return
    batch = random.sample(memory, batch_size)
    states = torch.stack([x[0] for x in batch]).to(device)
    actions = torch.tensor([x[1] for x in batch]).unsqueeze(1).to(device)
    rewards = torch.tensor([x[2] for x in batch], dtype=torch.float).to(device)
    next_states = torch.stack([x[3] for x in batch]).to(device)
    dones = torch.tensor([x[4] for x in batch], dtype=torch.float).to(device)

    q_vals = policy_net(states).gather(1, actions).squeeze()
    with torch.no_grad():
        max_next_q_vals = target_net(next_states).max(1)[0]
        target_vals = rewards + gamma * max_next_q_vals * (1 - dones)
    loss = nn.MSELoss()(q_vals, target_vals)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
# %% --- Training Loop ---
def train_algorithm(params, episodes=10):
    gamma = params["gamma"]
    epsilon = params["epsilon"]
    epsilon_decay = params["epsilon_decay"]
    min_epsilon = params["min_epsilon"]
    lr = params["learning_rate"]
    batch_size = params["batch_size"]
    target_update_freq = params["target_update_freq"]
    memory_size = params["memory_size"]

    state_dim = len(lane_detectors)
    policy_net = DQN(state_dim, action_space_size).to(device)
    target_net = DQN(state_dim, action_space_size).to(device)
    target_net.load_state_dict(policy_net.state_dict())
    optimizer = optim.Adam(policy_net.parameters(), lr=lr)
    memory = deque(maxlen=memory_size)
    
    rewards_per_episode = []
    steps_done = 0

    avg_wait_per_ep = []
    max_wait_per_ep = []
    all_avg_queue_lengths = torch.zeros(len(lane_detectors), episodes)

    for episode in range(episodes):
        change_env()
        state = get_current_state().to(device)
        episode_reward = 0
        done = False

        vehicle_wait_tracker = {}
        queue_length_tracker = {}
        num_steps = 0

        while not done:
            action = choose_action(state, epsilon, policy_net)
            next_state, reward, done, curr_queue = step(action)
            next_state = next_state.to(device)
            memory.append((state, action, reward, next_state, done))
            state = next_state
            episode_reward += reward

            # print("Policy Net device:", next(policy_net.parameters()).device)
            # print("State device:", state.device)
    
            for v_id in traci.vehicle.getIDList():
                wait_time = traci.vehicle.getWaitingTime(v_id)
                if v_id not in vehicle_wait_tracker:
                    vehicle_wait_tracker[v_id] = wait_time
                elif wait_time > vehicle_wait_tracker[v_id]:
                    vehicle_wait_tracker[v_id] = wait_time

            for i in range(len(curr_queue)):
                queue_length_tracker[i] = queue_length_tracker.get(i, 0) + curr_queue[i]

            optimise_model(policy_net, target_net, memory, optimizer, batch_size, gamma)
            if steps_done % target_update_freq == 0:
                target_net.load_state_dict(policy_net.state_dict())
            steps_done += 1
            num_steps += 1

        for i, total_len in queue_length_tracker.items():
            all_avg_queue_lengths[i, episode] = total_len / num_steps

        vehicle_waits = list(vehicle_wait_tracker.values())
        avg_wait = sum(vehicle_waits) / len(vehicle_waits) if vehicle_waits else 0.0
        max_wait = max(vehicle_waits) if vehicle_waits else 0.0
        avg_wait_per_ep.append(avg_wait)
        max_wait_per_ep.append(max_wait)

        epsilon = max(min_epsilon, epsilon * epsilon_decay)
        rewards_per_episode.append(episode_reward.item())

    N = max(1, int(0.2 * episodes))
    avg_reward_last_N = sum(rewards_per_episode[-N:]) / N
    avg_wait_last_N = sum(avg_wait_per_ep[-N:]) / N
    max_wait_last_N = max(max_wait_per_ep[-N:]) if max_wait_per_ep[-N:] else 0.0

    return {
        "avg_reward": sum(rewards_per_episode) / episodes,
        "avg_wait_per_ep": avg_wait_per_ep,
        "max_wait_per_ep": max_wait_per_ep,
        "avg_queue_lengths": all_avg_queue_lengths,
        "rewards_per_episode": rewards_per_episode,
        "avg_reward_last_N": avg_reward_last_N,
        "avg_wait_last_N": avg_wait_last_N,
        "max_wait_last_N": max_wait_last_N,
        "trained_model": policy_net
    }

In [None]:
# %% --- Parameter Grid ---
param_grid = {
    "gamma": [0.99],
    "epsilon": [0.9],
    "epsilon_decay": [0.95],
    "min_epsilon": [0.05],
    "learning_rate": [0.001],
    "batch_size": [128],
    "target_update_freq": [400],
    "memory_size": [10000],
}

keys, values = zip(*param_grid.items())
param_combinations = [dict(zip(keys, v)) for v in product(*values)]

In [None]:
# %% --- Plot Saving Function ---
def save_episode_plots(metrics, combo_name="default"):
    os.makedirs(f"Plots/{combo_name}", exist_ok=True)

    plt.figure()
    plt.plot(metrics["rewards_per_episode"])
    plt.xlabel("Episode")
    plt.ylabel("Total Reward")
    plt.title(f"{combo_name} - Reward per Episode")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"Plots/{combo_name}/rewards_per_episode.png")
    plt.close()

    plt.figure()
    plt.plot(metrics["avg_wait_per_ep"])
    plt.xlabel("Episode")
    plt.ylabel("Average Wait Time")
    plt.title(f"{combo_name} - Avg Wait per Episode")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"Plots/{combo_name}/avg_wait_per_episode.png")
    plt.close()

    plt.figure()
    plt.plot(metrics["max_wait_per_ep"])
    plt.xlabel("Episode")
    plt.ylabel("Maximum Wait Time")
    plt.title(f"{combo_name} - Max Wait per Episode")
    plt.grid(True)
    plt.tight_layout()
    plt.savefig(f"Plots/{combo_name}/max_wait_per_episode.png")
    plt.close()

    avg_queues = metrics["avg_queue_lengths"]
    for lane_index in range(avg_queues.shape[0]):
        plt.figure()
        plt.plot(avg_queues[lane_index].cpu().numpy())
        plt.xlabel("Episode")
        plt.ylabel("Average Queue Length")
        plt.title(f"{combo_name} - Lane {lane_index} Avg Queue Length")
        plt.grid(True)
        plt.tight_layout()
        plt.savefig(f"Plots/{combo_name}/avg_queue_lane_{lane_index}.png")
        plt.close()

In [None]:
from joblib import Parallel, delayed
import multiprocessing

results = Parallel(n_jobs=4)(
    delayed(train_algorithm)(combo, episodes=4)
    for combo in param_combinations
)

# Collect results with parameter info
final_results = []
for combo, metrics in zip(param_combinations, results):
    combo_result = combo.copy()
    combo_result["avg_reward"] = metrics["avg_reward"]
    combo_result["avg_reward_last_N"] = metrics["avg_reward_last_N"]
    combo_result["avg_wait_last_N"] = metrics["avg_wait_last_N"]
    combo_result["max_wait_last_N"] = metrics["max_wait_last_N"]
    final_results.append(combo_result)

    combo_name = f"combo_" + "_".join(f"{k}-{v}" for k, v in combo.items())
    save_episode_plots(metrics, combo_name=combo_name)

    model_path = f"Trained_Models/{combo_name}.pt"
    torch.save(metrics["trained_model"].state_dict(), model_path)

results_df = pd.DataFrame(final_results)
results_df.to_csv("grid_search_results.csv", index=False)

print(results_df.sort_values("avg_reward_last_N", ascending=False).head())

In [None]:
# # %% --- Save Results ---
# results_df = pd.DataFrame(results)
# results_df.to_csv("grid_search_results.csv", index=False)

# # %% --- Print Best Results ---
# print(results_df.sort_values("avg_reward_last_N", ascending=False).head())
