In [470]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import traci
import sumolib
import time

sumobin=sumolib.checkBinary('sumo')
traci.start([sumobin,'-c','c:\\users\\kaneh\\Documents\\COMP9444\\SUMO_DQN\\environments\\cross.sumocfg'])   
print("Connected to TraCI")

Connected to TraCI


In [None]:
environment = "environments/cross.sumocfg"

# Function to reset the SUMO environment
def reset_sumo_environment(config_file):
    # conn = traci.getConnection('default')
    
    # Get initial state information (modify this based on your state representation)
    state = get_state()

    # print(state)
    
    return state

# Function to step through the SUMO simulation
def step_in_sumo(action):
    # Apply the action (this example assumes action affects traffic lights or vehicle behavior)
    apply_action(action)
    
    # Step the SUMO simulation forward
    conn = traci.getConnection('default')
    conn.simulationStep()
    
    # Get the new state after taking the action
    next_state = get_state()
    
    # Calculate the reward (implement your reward logic here)
    reward = calculate_reward()
    
    # Check if the episode is done (implement your termination logic here)
    done = check_done_condition()
    
    return next_state, reward, done

# Function to get the current state (modify this based on what information you need)
def get_state():
    # Example: returning the number of vehicles on the road
    num_vehicles = traci.vehicle.getIDCount()
    # Add more features as needed (e.g., speeds, positions, etc.)
    return np.array([num_vehicles])  # Adjust shape as necessary

# Function to apply the action (modify based on your action space)
def apply_action(action):
    # Example action handling
    # Assuming actions control traffic lights (0: Red, 1: Green, etc.)
    traffic_light_ids = traci.trafficlight.getIDList()
    try:
        for tl_id in traffic_light_ids:
            if action == 0:
                traci.trafficlight.setRedYellowGreenState(tl_id, "r")
            elif action == 1:
                traci.trafficlight.setRedYellowGreenState(tl_id, "G")
    except traci.exceptions.FatalTraCIError as e:
        print("TraCI error:", e)
        traci.close()
        return

# Function to calculate the reward (implement your logic)
def calculate_reward():
    # Example reward based on average speed of vehicles
    speeds = [traci.vehicle.getSpeed(veh_id) for veh_id in traci.vehicle.getIDList()]
    return np.mean(speeds)  # Modify reward logic as needed

# Function to sum the delay of all vehicles affected by the traffic light
def get_delay(tls_id):
    delays = [get_lane_delay(lane_id) for lane_id in traci.trafficlight.getControlledLanes(tls_id)]
    return sum(delays)

def get_lane_delay(lane_id):
    max_s = traci.lane.getMaxSpeed(lane_id)
    avg_s = traci.lane.getLastStepMeanSpeed(lane_id)
    num_veh = traci.lane.getLastStepVehicleNumber(lane_id)
    return num_veh * (1 - avg_s / max_s)

# Function to get the sum of waiting times of all vehicles currently stopped at the traffic light
def get_waiting_time(tls_id):
    waiting_times = [get_lane_waiting_time(lane_id) for lane_id in traci.trafficlight.getControlledLanes(tls_id)]
    return sum(waiting_times)

def get_lane_waiting_time(lane_id):
    waiting_times = [traci.vehicle.getWaitingTime(veh_id) for veh_id in traci.lane.getLastStepVehicleIDs(lane_id)]
    return sum(waiting_times)

# Function that returns the number of emergency stops (acceleration < -4.5m/s^2) caused by the traffic light
def num_emergency_stops(tls_id):
    emergency_stops = [get_lane_emergency_stops(lane_id) for lane_id in traci.trafficlight.getControlledLanes(tls_id)]
    return sum(emergency_stops)

def get_lane_emergency_stops(lane_id):
    emergency_stops = [veh_id for veh_id in traci.lane.getLastStepVehicleIDs(lane_id) if traci.vehicle.getAcceleration(veh_id) < -4.5]
    return len(emergency_stops)

# Function to check if the simulation should terminate
def check_done_condition():
    # Example condition: terminate if simulation time exceeds a limit
    current_time = traci.simulation.getTime()
    return current_time > 1000  # Change this threshold as necessary

In [472]:
# Define the neural network for the Q-function
class DQN(nn.Module):
    def __init__(self, state_size, action_size):
        super(DQN, self).__init__()
        self.fc1 = nn.Linear(state_size, 24)
        self.fc2 = nn.Linear(24, 24)
        self.fc3 = nn.Linear(24, action_size)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        return self.fc3(x)

In [473]:
# Define the RL agent
class RLAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = DQN(state_size, action_size)
        self.optimizer = optim.Adam(self.model.parameters(), lr=0.001)
        self.criterion = nn.MSELoss()

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        state = torch.FloatTensor(state)
        q_values = self.model(state)
        return np.argmax(q_values.detach().numpy())

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target += self.gamma * np.amax(self.model(torch.FloatTensor(next_state)).detach().numpy())
            target_f = self.model(torch.FloatTensor(state))
            # Check if action index is valid
            if 0 <= action < self.action_size:
                target_f[action] = target
            else:
                print(f"Invalid action: {action}")

            # Convert back to tensor for loss calculation
            target_f_tensor = torch.FloatTensor(target_f)
            self.model.zero_grad()
            loss = self.criterion(target_f_tensor, self.model(torch.FloatTensor(state)))
            loss.backward()
            self.optimizer.step()
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [474]:
# Simulation interaction loop
def run_simulation(agent, num_episodes, batch_size):
    for e in range(num_episodes):
        state = reset_sumo_environment(environment)  # Reset the SUMO environment and get the initial state
        done = False
        total_reward = 0

        while not done:
            action = agent.act(state)
            next_state, reward, done = step_in_sumo(action)  # Step through the SUMO simulation
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

        print(f"Episode: {e+1}/{num_episodes}, Total Reward: {total_reward}")
        agent.replay(batch_size)

In [475]:
# Sample parameters
state_size = 4  # Example state size, adjust based on your simulation
action_size = 10  # Example action size, adjust based on your simulation
agent = RLAgent(state_size, action_size)
run_simulation(agent, num_episodes=1000, batch_size=32)

TraCI error: Connection closed by SUMO.


TraCIException: Connection 'default' is not known.