In [12]:
import traci
import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt

In [25]:
import traci
import sumolib


In [32]:
import gym
from gym import spaces
import numpy as np
import traci

class TrafficSimEnv(gym.Env):
    def __init__(self, sumo_cfg_file, max_steps):
        super(TrafficSimEnv, self).__init__()
        self.sumo_cfg_file = sumo_cfg_file
        self.max_steps = max_steps
        self.current_step = 0

        # State: [highway density, ramp queue length]
        self.observation_space = spaces.Box(low=0, high=1, shape=(2,), dtype=np.float32)

        # Actions: Adjust green light time [-10%, 0%, +10%]
        self.action_space = spaces.Discrete(3)

    def reset(self):
        traci.start(["sumo", "-c", self.sumo_cfg_file])
        self.current_step = 0
        # Reset state variables
        state = self.get_state()
        return state

    def step(self, action):
        # Perform action: Adjust traffic light timings
        self.apply_action(action)

        # Run simulation for one step
        traci.simulationStep()
        self.current_step += 1

        # Get new state
        state = self.get_state()

        # Calculate reward
        reward = self.calculate_reward()

        # Check if done
        done = self.current_step >= self.max_steps

        return state, reward, done, {}

    def get_state(self):
        highway_density = traci.edge.getLastStepOccupancy("in 2to3 out")
        ramp_queue = traci.lanearea.getJamLengthMeters("intramp outramp 2to3 out")
        return np.array([highway_density, ramp_queue], dtype=np.float32)

    def calculate_reward(self):
        highway_flow = traci.edge.getLastStepVehicleNumber("in 2to3 out")
        ramp_queue = traci.lanearea.getJamLengthMeters("intramp outramp 2to3 out")
        return -ramp_queue + highway_flow  # Example: maximize flow, minimize queue

    def apply_action(self, action):
        if action == 0:
            self.adjust_green_time(-10)
        elif action == 2:
            self.adjust_green_time(+10)

    def adjust_green_time(self, change):
        # Modify traffic light phases dynamically
        program_id = traci.trafficlight.getProgram("node6")
        phase = traci.trafficlight.getPhase("node6")
        current_duration = traci.trafficlight.getPhaseDuration("node6")
        traci.trafficlight.setPhaseDuration("node6", current_duration + change)

    def close(self):
        traci.close()


In [34]:
import numpy as np
import random

class QLearningAgent:
    def __init__(self, state_size, action_size, learning_rate=0.1, discount_factor=0.99, exploration_rate=1.0, exploration_decay=0.995):
        self.state_size = state_size
        self.action_size = action_size
        self.q_table = np.zeros((state_size, action_size))
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.exploration_decay = exploration_decay
        self.exploration_min = 0.01

    def choose_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return random.choice(range(self.action_size))
        return np.argmax(self.q_table[state])

    def learn(self, state, action, reward, next_state):
        q_update = reward + self.discount_factor * np.max(self.q_table[next_state])
        self.q_table[state, action] += self.learning_rate * (q_update - self.q_table[state, action])
        self.exploration_rate = max(self.exploration_min, self.exploration_rate * self.exploration_decay)


In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from collections import deque

class DQNAgent:
    def __init__(self, state_size, action_size, learning_rate=0.001):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = learning_rate
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(24, activation='relu', input_dim=self.state_size),
            Dense(24, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=Adam(learning_rate=self.learning_rate), loss='mse')
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        q_values = self.model.predict(state)
        return np.argmax(q_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward if done else reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay


In [36]:
env = TrafficSimEnv(sumo_cfg_file="/3CS/RL/Rl project/sumo/mynet.sumocfg", max_steps=500)
agent = DQNAgent(state_size=env.observation_space.shape[0], action_size=env.action_space.n)

episodes = 1000
batch_size = 32

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, env.observation_space.shape[0]])
    total_reward = 0

    for time in range(env.max_steps):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, env.observation_space.shape[0]])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        if done:
            print(f"Episode {e+1}/{episodes}, Total Reward: {total_reward}")
            break

        if len(agent.memory) > batch_size:
            agent.replay(batch_size)

env.close()


TraCIException: Connection 'default' is already active.