In [1]:
import sumo_rl

# Initialize the SUMO environment
env = sumo_rl.environment.env.SumoEnvironment(
    net_file='third.net.xml',   # Path to your network file
    route_file='third.rou.xml',   # Path to your route file
    use_gui=True,                         # If you want to visualize the simulation
    num_seconds=20000,                    # Total simulation time
    single_agent=False,                    # Single agent or multi-agent setting
    reward_fn='diff-waiting-time',        # Reward function (waiting time, throughput, etc.)
    min_green=5,                          # Minimum green time
    max_green=50,                         # Maximum green time
)


obs = env.reset()

for step in range(1000):
    actions = {
    'B1': env.action_spaces('B1').sample(),  # Action for traffic light B1
    'C1': env.action_spaces('C1').sample()   # Action for traffic light C1
    }

    obs, rewards, dones, infos = env.step(actions)
    
    print(f"Step: {step}")
    print(f"Actions: {actions}")
    #print(f"Observations: {obs}")
    print(f"Rewards: {rewards}")
    print(f"Done: {dones}")

    # Check if all traffic signals are done
    if all(dones.values()):
        break

env.close()


Actions: {'B1': 0, 'C1': 1}
Rewards: {'B1': 0.0, 'C1': 0.0}
Done: {'B1': False, 'C1': False, '__all__': False}


# network with Single traffic light

In [None]:
import gymnasium as gym
import sumo_rl
env = gym.make('sumo-rl-v0',
                net_file='third.net.xml',  
                route_file='third.rou.xml',
                out_csv_name='path_to_output.csv',
                use_gui=True,
                num_seconds=100000)
obs, info = env.reset()
done = False
while not done:
    next_obs, reward, terminated, truncated, info = env.step(env.action_space.sample())
    done = terminated or truncated

In [None]:
import sumo_rl
env = sumo_rl.parallel_env(net_file='third.net.xml',
                  route_file='third.rou.xml',
                  use_gui=True,
                  num_seconds=550)
observations = env.reset()
epo = 1 
while env.agents:
    actions = {agent: env.action_space(agent).sample() for agent in env.agents}  # this is where you would insert your policy
    observations, rewards, terminations, truncations, infos = env.step(actions)
    # if( epo % 5 == 0):
    #     print(f" {rewards["B1"]} , {rewards["C1"]} ")
    epo += 1

In [4]:
import sumo_rl

env = sumo_rl.environment.env.SumoEnvironment(
    net_file='third.net.xml',
    route_file='third.rou.xml',
    use_gui=False,
    num_seconds=20000,
    single_agent=False,
    reward_fn='diff-waiting-time',  # or any other reward function
    min_green=5,
    max_green=50,
)


In [26]:
import numpy as np
import random
from collections import defaultdict
import sumo_rl
import pickle

class QLearningAgent:
    def __init__(self, action_space, alpha=0.1, gamma=0.99, epsilon=0.1):
        self.action_space = action_space
        self.alpha = alpha  # Learning rate
        self.gamma = gamma  # Discount factor
        self.epsilon = epsilon  # Exploration rate
        self.q_table = defaultdict(lambda: np.zeros(action_space.n))

    def choose_action(self, state):
        if random.uniform(0, 1) < self.epsilon:
            return self.action_space.sample()  # Explore
        else:
            return np.argmax(self.q_table[state])  # Exploit

    def update_q_table(self, state, action, reward, next_state):
        best_next_action = np.argmax(self.q_table[next_state])
        td_target = reward + self.gamma * self.q_table[next_state][best_next_action]
        td_error = td_target - self.q_table[state][action]
        self.q_table[state][action] += self.alpha * td_error

def train_q_learning(env, agents, num_episodes=1000):
    for episode in range(num_episodes):
        obs = env.reset()
        total_rewards = {agent_id: 0 for agent_id in agents}
        done = False

        for step in range(500):
            actions = {}
            for agent_id, agent in agents.items():
                state = tuple(obs[agent_id])  # Convert state to a tuple
                action = agent.choose_action(state)
                actions[agent_id] = action

            next_obs, rewards, done, _ = env.step(actions)
            
            for agent_id, agent in agents.items():
                state = tuple(obs[agent_id])
                next_state = tuple(next_obs[agent_id])
                agent.update_q_table(state, actions[agent_id], rewards[agent_id], next_state)
                total_rewards[agent_id] += rewards[agent_id]

            obs = next_obs

        print(f"Episode {episode + 1}: Total Rewards: {total_rewards}")

    print("Training complete.")

def test_agent(env, agents):
    obs = env.reset()
    total_rewards = {agent_id: 0 for agent_id in agents}
    done = False

    while not done:
        actions = {}
        for agent_id, agent in agents.items():
            state = tuple(obs[agent_id])
            action = np.argmax(agent.q_table[state])
            actions[agent_id] = action

        obs, rewards, done, _ = env.step(actions)
        
        for agent_id in agents:
            total_rewards[agent_id] += rewards[agent_id]

    print(f"Total Rewards during testing: {total_rewards}")


    # Initialize the environment
env = sumo_rl.environment.env.SumoEnvironment(
        net_file='third.net.xml',
        route_file='third.rou.xml',
        use_gui=False,
        num_seconds=500,
        single_agent=False,  # Multi-agent setting
        reward_fn='diff-waiting-time',
        min_green=5,
        max_green=50,
    )

    # Define the agents
agents = {
        'B1': QLearningAgent(action_space=env.action_spaces('B1')),
        'C1': QLearningAgent(action_space=env.action_spaces('C1')),
        # Add more agents if needed
    }

    # Train the agents
train_q_learning(env, agents, num_episodes=100)

QT = { 'B1' : agents['B1'].q_table ,
        'C1' : agents['C1'].q_table }


    # Save Q-tables
    # with open("q_tables.pkl", "wb") as f:
    #     pickle.dump({QT}, f)

    # # Load Q-tables
    # with open("q_tables.pkl", "rb") as f:
    #     loaded_q_tables = pickle.load(f)
    #     for agent_id in agents:
    #         agents[agent_id].q_table = loaded_q_tables[agent_id]

    # Test the agents
    # test_agent(env, agents)


Episode 1: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 2: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 3: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 4: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 5: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 6: Total Rewards: {'B1': 0.0, 'C1': -6.938893903907228e-18}
Episode 7: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 8: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 9: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 10: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 11: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 12: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 13: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 14: Total Rewards: {'B1': 1.3877787807814457e-17, 'C1': 0.0}
Episode 15: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 16: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 17: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 18: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 19: Total Rewards: {'B1': 0.0, 'C1': 0.0}
Episode 20: Total Rew