In [1]:
!pip install pyvirtualdisplay Pillow






# SUMO Setups

In [1]:
import os
import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import gymnasium as gym
from tqdm import tqdm
from dotenv import load_dotenv
from gymnasium import spaces
import matplotlib.patches as patches

SUMO_HOME = 'C:\\Program Files (x86)\\Eclipse\\Sumo'

os.environ['SUMO_HOME'] = SUMO_HOME

# Print the SUMO_HOME environment variable to verify
print("SUMO_HOME is set to:", os.environ.get('SUMO_HOME'))

# Check if the SUMO_HOME path exists
print("Does SUMO_HOME exist?", os.path.exists(os.environ.get('SUMO_HOME')))


# Define file paths
net_file = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.net.xml/osm.net.xml'
route_file = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.passenger.rou.xml'
out_csv_name = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.passenger.csv'
config_file = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.sumocfg'


SUMO_HOME is set to: C:\Program Files (x86)\Eclipse\Sumo
Does SUMO_HOME exist? True


## Importing Required libraries

In [9]:
import torch
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import gymnasium as gym
from tqdm import tqdm
import os
from dotenv import load_dotenv
import sumo_rl
import traci

# Environment Setup

In [47]:
class Environment:
    def __init__(self, env_name, route_file, net_file, out_csv_name, render_mode='human', num_seconds=100000, max_steps=1000, port=8873):
        self.env_name = env_name
        self.use_gui = True if render_mode == 'human' else False
        self.route_file = route_file
        self.net_file = net_file
        self.out_csv_name = out_csv_name
        self.env = gym.make(
                        env_name,
                        net_file=net_file,
                        route_file=route_file,
                        out_csv_name=out_csv_name,
                        use_gui=self.use_gui,
                        num_seconds=num_seconds
                    )
     
        self.state, _ = self.env.reset()
        self.done = False
        self.observation_space = self.env.observation_space.shape[0]
        self.action_space = self.env.action_space
        self.traffic_signals = self.env.traffic_signals
    
    def reset(self):
        self.state, _ = self.env.reset()
        self.done = False
        return self.state
    
    def custom_reward(self, traffic_signal, reward_type='average_speed', reward_method='simple'):
        if reward_method == 'simple':
            match reward_type:
                case 'average_speed':
                    return traffic_signal.get_avgerage_speed()
                case 'congesion':
                    return -1 * traffic_signal.get_pressure()
                case 'emissions':
                    return -1* traffic_signal.get_emission_co2()
                case 'throughput':
                    return traffic_signal.get_throughput()

        else:
            # Weighted sum of the metrics
            reward = 0
            if weights is None:
                weights = {
                    'average_speed': 0.4,
                    'waiting_time': 0.3,
                    'emissions': 0.2,
                    'throughput': 0.1
                }

            # Calculate individual rewards
            average_speed = traffic_signal.get_average_speed()
            waiting_time = -1* traffic_signal._diff_waiting_time_reward()
            total_queue = -1 * traffic_signal.get_total_queued()
            congesion = traffic_signal.get_pressure()

            print(average_speed, waiting_time, total_queue, congesion)
            weighted_reward = (
                weights['average_speed'] * average_speed +
                weights['waiting_time'] * waiting_time +
                weights['emissions'] * total_queue +
                weights['throughput'] * congesion
            )

            return weighted_reward
            
            
    def step(self, action):
        next_state, _, terminated, truncated, info = self.env.step(action)

        traffic_signal = list(self.traffic_signals.values())[0]
        print(traffic_signal.get_average_speed(), traffic_signal.get_total_queued(), traffic_signal._diff_waiting_time_reward(), traffic_signal.get_pressure())
        reward = self.custom_reward(traffic_signal, reward_type='congesion', reward_method='simple')

        self.state = next_state
        self.done = terminated
        return next_state, reward, self.done or truncated
    
    def render(self):
        self.env.render()
    
    def close(self):
        try:
            self.env.close()
            if traci.isLoaded():
                traci.close()
            print("Env and Traci closed successfully.")
        except Exception as e:
            print("Error while closing the environment:", e)
    
    def get_state(self):
        return self.state


## Random Agent Run Basic

## Running Environment (Simple Intersection)

In [57]:
# Define paths to the network and route files (Buffalo Road Intersection)

# net_file = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.net.xml/osm.net.xml'
# route_file = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.passenger.rou.xml'
# out_csv_name = 'C:/Users/ASUS/Sumo/2024-11-03-18-23-47/osm.passenger.csv'


# Using Custom simple Traffic Intersection
nets_dir = 'nets'

file_name = 'single_intersection_simple'

nets_file = os.path.join(nets_dir, f'{file_name}.net.xml')
routes_file = os.path.join(nets_dir, f'{file_name}.rou.xml')

file_exists = lambda file_path: os.path.exists(file_path)

if not file_exists(nets_file):
    raise FileNotFoundError(f"Net file not found: {net_file}")
if not file_exists(routes_file):
    raise FileNotFoundError(f"Route file not found: {route_file}")


# Instantiate the environment
sumo_env = Environment('sumo-rl-v0', net_file=nets_file, route_file=routes_file, out_csv_name=out_csv_name, render_mode=None)

print("Observation Space:", sumo_env.observation_space)
print("Action Space:", sumo_env.action_space.n)
print("Initial State:", sumo_env.state)

max_steps = 1000

try:
    print("action space", sumo_env.action_space)
    for step in range(max_steps):
        # sumo_env.render()
        action = sumo_env.env.action_space.sample()
        next_state, reward, done = sumo_env.step(action)
        print("Action ", action, " Reward ", reward, " Done ", done, "observation", next_state)
        
        if done:
            sumo_env.reset()
            break

except KeyboardInterrupt:
    print("Keyboard interrupt detected. Closing the environment.")

except Exception as e:
    print("Error occurred:", e)

finally:
    sumo_env.close()

Observation Space: 11
Action Space: 2
Initial State: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
action space Discrete(2)
0.9705471392100999 0 0.0 -5
Action  1  Reward  5  Done  False observation [1.         0.         0.         0.05048805 0.         0.10567101
 0.10567101 0.         0.         0.         0.        ]
0.8907290714368037 0 0.0 -9
Action  1  Reward  9  Done  False observation [1.         0.         1.         0.1009761  0.05048805 0.15850651
 0.15850651 0.         0.         0.         0.        ]
0.7349946466997885 2 0.0 -14
Action  0  Reward  14  Done  False observation [1.         0.         1.         0.15146415 0.15146415 0.21134202
 0.21134202 0.         0.         0.05283551 0.05283551]
0.64870672231928 1 0.0 -16
Action  1  Reward  16  Done  False observation [0.         1.         0.         0.25244024 0.2019522  0.26417753
 0.21134202 0.05048805 0.         0.         0.        ]
0.5300957170042275 5 0.0 -9
Action  0  Reward  9  Done  False observation [0.         1.     

# Training DQN

## DQN Network

In [12]:
# Setting up basic Neural Network for DQN
from torch import nn

input_nodes = 128

class DQN(nn.Module):
    def __init__(self, observation_size, action_size, input_nodes=128):
        super(DQN, self).__init__()
        print("Observation size Network", observation_size)
        print("Action size Network", action_size)

        self.fc1 = nn.Linear(observation_size, input_nodes)
        self.fc2 = nn.Linear(input_nodes, input_nodes)
        self.fc3 = nn.Linear(input_nodes, action_size)
        
    
    def forward(self, x):
        x = nn.functional.relu(self.fc1(x))
        x = nn.functional.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## Replay Memory

In [5]:
# Class for Replay Memory
from collections import namedtuple, deque
import random

Transition = namedtuple('Transition', ('state', 'action', 'next_state', 'reward', 'done'))

class ReplayMemory:
    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

## DQN Agent

In [16]:
# Setting up the Agent Class for doing the DQN 
class Agent:
    def __init__(self, env, epsilon, gamma, learning_rate, epsilon_decay, mem_size=5000):
        self.env = env
        self.state_size = env.observation_space
        self.action_size = env.action_space.n
        self.epsilon = epsilon
        self.gamma = gamma
        self.learning_rate = learning_rate
        self.min_epsilon = 0.01
        self.max_epsilon = 1
        self.epsilon_decay = epsilon_decay

        self.memory_size_max = mem_size
        self.memory_buffer = ReplayMemory(self.memory_size_max)
        self.q_network = DQN(self.state_size, self.action_size)
        self.q_target_network = DQN(self.state_size, self.action_size)
        self.q_target_network.load_state_dict(self.q_network.state_dict())
        self.loss_function = nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.q_network.parameters(), lr=self.learning_rate)
    
    def choose_action(self, state):
        if np.random.random() < self.epsilon:
            return np.random.choice(self.action_size)
        else:
            state = torch.FloatTensor(state).unsqueeze(0)    
            with torch.no_grad():
                q_values = self.q_network(state)
                return torch.argmax(q_values).item()
    
    def update_experience(self, state, action, reward, next_state, done):
        state = torch.FloatTensor(state).unsqueeze(0)
        next_state = torch.FloatTensor(next_state).unsqueeze(0)
        action = torch.LongTensor([action]).view(-1, 1)
        reward = torch.FloatTensor([reward]).to(torch.float32)
        done = torch.FloatTensor([done]).to(torch.float32)
        self.memory_buffer.push(state, action, next_state, reward, done)

        if self.memory_buffer.__len__() > self.memory_size_max:
            self.memory_buffer.pop(0)

    def update_exploration_probability(self, episode):
        self.epsilon = self.min_epsilon + (self.max_epsilon - self.min_epsilon) * np.exp(-self.epsilon_decay * episode)
    
    def optimize(self, batch_size):
        if len(self.memory_buffer) < batch_size:
            return

        transitions = self.memory_buffer.sample(batch_size)
        batch = Transition(*zip(*transitions))
        
        # Convert the parameters to torch tensors
        # state_batch = torch.cat(batch.state)
        # action_batch = torch.tensor(batch.action).view(-1, 1)
        # reward_batch = torch.tensor(batch.reward).float()
        # next_state_batch = torch.cat(batch.next_state)

        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)
        next_state_batch = torch.cat(batch.next_state)
        done_batch = torch.cat(batch.done)

        curr_q_vals = self.q_network(state_batch)
        curr_q_vals = curr_q_vals.gather(1, action_batch)

        # update target q value and current q values, backpropagate the loss
        with torch.no_grad():
            next_q_vals = self.q_target_network(next_state_batch)
            max_next_q_vals = torch.max(next_q_vals, 1)[0]
            target_q_vals = reward_batch + self.gamma * max_next_q_vals * (1 - done_batch)

        loss = self.loss_function(curr_q_vals.squeeze(), target_q_vals)
        self.optimizer.zero_grad()
        loss.backward()
        # Handling the exploding gradient problem
        nn.utils.clip_grad_norm_(self.q_network.parameters(), 1)
        self.optimizer.step()

        return loss.item()
    
    def update_target_network(self):
        self.q_target_network.load_state_dict(self.q_network.state_dict())
    

## Trainer DQN

In [None]:


def trainer_dqn(env, agent, n_episodes, batch_size, epsilon, gamma, learning_rate, epsilon_decay, C):
    max_steps = 500
    use_wandb = os.environ.get('USE_WANDB', 'false').lower() == 'true'
    epsilon_start = 1
    epsilon_end = 0.01

    if use_wandb:
        wandb_config = {
            'env': env.env_name,
            'algorithm': 'DQN',
            'batch_size': batch_size,
            'gamma': gamma,
            'eps_start': epsilon_start,
            'eps_end': epsilon_end,
            'eps_decay': epsilon_decay,
            'target_update': C,
            'lr': learning_rate,
            'num_episodes': n_episodes,
            'max_timesteps': max_steps,
            'seed': 0
        }
        
        cust_wandb = WanDB(wandb_config, f'assignment-2-{env.env_name}', env.env_name)
       
    
    loss_list = []
    reward_list = []
    rewards_per_episode = {}
    epsilon_values = []

    p_bar = tqdm(range(n_episodes), colour='green', desc='Training progress', unit='Episode')
    

    tot_steps = 0

    try:
        for episode in p_bar:
            state = env.reset()
            done = False
            total_reward = 0
            total_loss = 0            
    
            for step_ in range(max_steps):
                    tot_steps += 1
                    action = agent.choose_action(state)
                    print("Action ", action)
                    next_state, reward, done = env.step(action)

                    print("Action ", action, " Reward ", reward, " Done ", done, "observation", next_state)
                    # Save the transition in the replay memory
                    agent.update_experience(state, action, reward, next_state, done)

                    # Run the optimization step
                    loss = agent.optimize(batch_size)
                    if loss:
                        if use_wandb:
                            cust_wandb.wandb_log({
                                    'loss': loss, 
                                    'step_reward': reward, 
                                    'step': tot_steps
                                })
                        
                        total_loss += loss
                    total_reward += reward
                    state = next_state

                    if done:
                        break

                    if episode % C == 0:
                        agent.update_target_network()

            agent.update_exploration_probability(episode)
            epsilon_values.append(agent.epsilon)

            loss_list.append(total_loss)
            reward_list.append(total_reward)
            rewards_per_episode[episode] = total_reward

            if use_wandb:
                cust_wandb.wandb_log({
                            'epsilon': agent.epsilon,
                            'total_reward': total_reward,
                            'epsilon': agent.epsilon,
                            'total_loss': total_loss,
                            'episode': episode
                })
    except traci.exceptions.FatalTraCIError as e:
        print("TraCI error:", e)
        
    except KeyboardInterrupt:
        print("Keyboard interrupt detected. Closing the environment.")


    except Exception as e:
        print(f"Error occurred in episode {episode}, step {step_}: {e}")
        raise

        
    finally:
        if use_wandb:
            cust_wandb.wandb_finish()
            print(f'Episode: {episode}, Total Reward: {total_reward}, Loss: {total_loss}')
    env.close()        
    return reward_list, loss_list, rewards_per_episode, epsilon_values

## Training Agent

In [59]:
# Trainer method

n_episodes = 1000
batch_size = 64
epsilon = 1
gamma = 0.99
learning_rate = 1e-3
epsilon_decay = 0.995
C = 5

nets_dir = 'nets'

file_name = 'single_intersection_simple'

nets_file = os.path.join(nets_dir, f'{file_name}.net.xml')
routes_file = os.path.join(nets_dir, f'{file_name}.rou.xml')

file_exists = lambda file_path: os.path.exists(file_path)

if not file_exists(nets_file):
    raise FileNotFoundError(f"Net file not found: {net_file}")
if not file_exists(routes_file):
    raise FileNotFoundError(f"Route file not found: {route_file}")

sumo_env = Environment('sumo-rl-v0', net_file=nets_file, route_file=routes_file, out_csv_name=out_csv_name, render_mode=None, num_seconds=1000)


print("Observation Space:", sumo_env.observation_space)
print("Action Space:", sumo_env.action_space)
# print("Initial State:", sumo_env.state)

agent = Agent(sumo_env, epsilon, gamma, learning_rate, epsilon_decay)


rewards, losses, rewards_per_episode, epsilon_values = trainer_dqn(sumo_env, agent, n_episodes, batch_size, epsilon, gamma, learning_rate, epsilon_decay, C)

  logger.warn(


Observation Space: 11
Action Space: Discrete(2)
Observation size Network 11
Action size Network 2
Observation size Network 11
Action size Network 2


Training progress:   0%|[32m          [0m| 0/1000 [00:00<?, ?Episode/s]

action space Discrete(2) 11


Training progress:   0%|[32m          [0m| 0/1000 [00:00<?, ?Episode/s]

TraCI error: Connection already closed.
Env and Traci closed successfully.





# Training A3C

In [5]:
from a3c_cartpole import train_a3c

nets_dir = 'nets'

file_name = 'single_intersection_simple'

nets_file = os.path.join(nets_dir, f'{file_name}.net.xml')
routes_file = os.path.join(nets_dir, f'{file_name}.rou.xml')
sumo_env = Environment('sumo-rl-v0', net_file=nets_file, route_file=routes_file, out_csv_name=out_csv_name)

print("Observation Space:", sumo_env.observation_space)
print("Action Space:", sumo_env.action_space)
print("Initial State:", sumo_env.state)

max_steps = 1000

input_dims = sumo_env.observation_space
actions = sumo_env.action_space.n
# Train the model
# train_a3c(sumo_env, max_steps=1000, num_episodes=10, gamma=0.99, lr=0.001, beta=0.01, num_processes=4)
train_a3c(sumo_env, input_dims=[input_dims], n_actions=actions, n_episodes=1000, use_wandb=False, grad_clip=1, C=10, env_id='SUMO-RL', lr=1e-4)

 Retrying in 1 seconds
Observation Space: 11
Action Space: Discrete(2)
Initial State: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Training started...


  logger.warn(


TypeError: cannot pickle '_thread.lock' object

In [1]:
import gymnasium as gym
from sumo_rl import SumoEnvironment
import os

# Print SUMO_HOME for confirmation
print("SUMO_HOME is set to:", os.environ['SUMO_HOME'])

# Initialize SUMO Gym environment
env = gym.make(
    'sumo-rl-v0',
    net_file=net_file,
    route_file=route_file,
    out_csv_name=out_csv_name,
    use_gui=True,
    num_seconds=100000
)

# Reset the environment and start simulation
obs, info = env.reset()
done = False

print(env.action_space)

while not done:
    next_obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
    print("Reward:", reward)
    print("Observation:", next_obs)
    done = terminated or truncated


KeyboardInterrupt: 

In [9]:
import sumolib

# Load the network file
net = sumolib.net.readNet(nets_file)  # Replace with your network file

# Get all edge IDs
edge_ids = [edge.getID() for edge in net.getEdges()]
print("Edge IDs:", edge_ids)

# List all traffic light IDs
tls_ids = [tls.getID() for tls in net.getTrafficLights()]
print("Traffic Light IDs:", tls_ids)

# # Get all vehicle IDs in the current step
# vehicle_ids = traci.vehicle.getIDList()
# print("Vehicle IDs:", vehicle_ids)




Edge IDs: ['n_t', 't_e', 't_s', 'w_t']
Traffic Light IDs: ['t']


In [6]:
import traci
import sumolib
import sys
import time

try:
    # Define paths to SUMO binary, network file, and route file
    sumo_binary = "sumo-gui"  # Use "sumo" for non-GUI mode
    # Command to start SUMO with TraCI connection
    sumo_cmd = [sumo_binary, "-n", nets_file, "-r", routes_file, "--start"]
    
    # Attempt to start SUMO with the specified command
    try:
        traci.start(sumo_cmd, label="sim1")
    except traci.exceptions.TraCIException as e:
        print(f"Failed to start TraCI: {e}")
        sys.exit(1)

    print("TraCI connected successfully.")
    
    # Main simulation loop with safe TraCI operations
    for step in range(1000):
        try:
            traci.simulationStep()  # Advance simulation by one step
            
            # Check vehicle count on a specific edge
            edge_id = "t_e"  # Replace with actual edge ID
            try:
                vehicle_count = traci.edge.getLastStepVehicleNumber(edge_id)
                print(f"Step {step}, Vehicle count on {edge_id}: {vehicle_count}")
            except traci.exceptions.TraCIException as e:
                print(f"Error retrieving vehicle count for edge {edge_id}: {e}")
            
            # Get traffic light phase and queue length
            tls_id = "t"  # Replace with actual traffic light ID
            try:
                phase = traci.trafficlight.getPhase(tls_id)
                print(f"Traffic light {tls_id} phase: {phase}")
            except traci.exceptions.TraCIException as e:
                print(f"Error retrieving traffic light phase for {tls_id}: {e}")
            
            # Pause to inspect values for each step
            # input("Press Enter to continue to the next step...")
            time.sleep(0.5)
            
            if step >= max_steps -1:
                break            

        except KeyboardInterrupt:
            print("\nSimulation interrupted by user.")
            break  # Safely exit the loop if user interrupts

except (FileNotFoundError, traci.exceptions.TraCIException) as e:
    print(f"Error in setup or simulation: {e}")

except Exception as e:
    print(f"Unexpected error occurred: {e}")

finally:
    # Ensure that TraCI closes safely even if an error occurs
    try:
        if traci.isLoaded():
            traci.close()
            print("TraCI connection closed safely.")
    except traci.exceptions.TraCIException as e:
        print(f"Error while closing TraCI: {e}")


 Retrying in 1 seconds
TraCI connected successfully.
Step 0, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 1, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 2, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 3, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 4, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 5, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 6, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 7, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 8, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 9, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 10, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 11, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 12, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 13, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 14, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 15, Vehicle count on t_e: 0
Traffic light t phase: 0
Step 16, Vehicle count on t_e

FatalTraCIError: Connection closed by SUMO.