<a href="https://colab.research.google.com/github/2303A41447/ADM-2303A51447/blob/main/reinforcement%20project%202.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import random
import numpy as np

# -----------------------------------------
# SMART TRAFFIC LIGHT RL – NO SUMO VERSION
# -----------------------------------------

class SmartTrafficEnv:
    """
    Simple simulated environment for a traffic light with 2 directions:
    - North-South (NS)
    - East-West (EW)

    Each direction has queue increasing randomly.
    The RL agent selects which direction gets GREEN.
    """

    def __init__(self):
        self.state = [0, 0]   # [NS_queue, EW_queue]
        self.max_cars = 20
        self.done = False

    def reset(self):
        self.state = [random.randint(0, 5), random.randint(0, 5)]
        self.done = False
        return self.state

    def step(self, action):
        """
        action = 0 → NS green
        action = 1 → EW green
        """

        # Cars arrive randomly (0–3 cars)
        arrivals_NS = random.randint(0, 3)
        arrivals_EW = random.randint(0, 3)

        self.state[0] += arrivals_NS
        self.state[1] += arrivals_EW

        # Cars pass on the green light
        if action == 0:   # NS green
            self.state[0] = max(0, self.state[0] - random.randint(2, 5))
        else:             # EW green
            self.state[1] = max(0, self.state[1] - random.randint(2, 5))

        # Reward = negative total queue length
        reward = -(self.state[0] + self.state[1])

        # Episode ends if queues get too large
        if self.state[0] > self.max_cars or self.state[1] > self.max_cars:
            self.done = True

        return self.state, reward, self.done


# -----------------------------------------
# Q-LEARNING AGENT
# -----------------------------------------

class QLearningAgent:
    def __init__(self, alpha=0.1, gamma=0.9, epsilon=0.2):
        self.q_table = {}      # key: state, value: [Q_NS, Q_EW]
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def get_q(self, state):
        state_key = tuple(state)
        if state_key not in self.q_table:
            self.q_table[state_key] = [0, 0]
        return self.q_table[state_key]

    def choose_action(self, state):
        if random.random() < self.epsilon:
            return random.choice([0, 1])  # explore
        return np.argmax(self.get_q(state))  # exploit

    def update(self, state, action, reward, next_state):
        q_values = self.get_q(state)
        max_future = max(self.get_q(next_state))
        q_values[action] += self.alpha * (reward + self.gamma * max_future - q_values[action])


# -----------------------------------------
# TRAINING LOOP
# -----------------------------------------

env = SmartTrafficEnv()
agent = QLearningAgent()

EPISODES = 300

for ep in range(EPISODES):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        agent.update(state, action, reward, next_state)

        state = next_state
        total_reward += reward

        if done:
            break

    print(f"Episode {ep+1}: total reward = {total_reward}")

print("\nTraining complete!")
print("Q-Table size:", len(agent.q_table))


Episode 1: total reward = -235
Episode 2: total reward = -1011
Episode 3: total reward = -67376
Episode 4: total reward = -64860
Episode 5: total reward = -31750
Episode 6: total reward = -9649
Episode 7: total reward = -34982
Episode 8: total reward = -55608
Episode 9: total reward = -30163
Episode 10: total reward = -28185
Episode 11: total reward = -30146
Episode 12: total reward = -149577
Episode 13: total reward = -55644
Episode 14: total reward = -8090
Episode 15: total reward = -35533
Episode 16: total reward = -138406
Episode 17: total reward = -13743
Episode 18: total reward = -59225
Episode 19: total reward = -167155
Episode 20: total reward = -44477
Episode 21: total reward = -221151
Episode 22: total reward = -12198
Episode 23: total reward = -30895
Episode 24: total reward = -14208
Episode 25: total reward = -54226
Episode 26: total reward = -193332
Episode 27: total reward = -18640
Episode 28: total reward = -66123
Episode 29: total reward = -29304
Episode 30: total rewar

In [3]:
import random
import numpy as np

# -------------------------------------------------------
# SMART TRAFFIC LIGHT ENVIRONMENT (NO SUMO)
# -------------------------------------------------------
class SmartTrafficEnv:
    """
    Simulated traffic light with:
    - 2 directions: North-South (NS) and East-West (EW)
    - Cars arrive randomly
    - RL agent chooses which direction gets GREEN
    """

    def __init__(self):
        self.max_queue = 30
        self.reset()

    def reset(self):
        # Initial queues of cars
        self.state = [random.randint(0, 5), random.randint(0, 5)]  # [NS_queue, EW_queue]
        self.done = False
        return self.state

    def step(self, action):
        """
        action = 0 → NS green
        action = 1 → EW green
        """

        # Random car arrivals (0–3 cars per direction)
        self.state[0] += random.randint(0, 3)
        self.state[1] += random.randint(0, 3)

        # Green light reduces queue
        if action == 0:  # NS
            self.state[0] = max(0, self.state[0] - random.randint(2, 6))
        else:            # EW
            self.state[1] = max(0, self.state[1] - random.randint(2, 6))

        # Reward = negative total queue -> minimize traffic
        reward = -(self.state[0] + self.state[1])

        # Episode ends if queue grows too large
        if self.state[0] > self.max_queue or self.state[1] > self.max_queue:
            self.done = True

        return self.state, reward, self.done


# -------------------------------------------------------
# Q-LEARNING AGENT
# -------------------------------------------------------
class QLearningAgent:
    def __init__(self, alpha=0.2, gamma=0.9, epsilon=0.1):
        self.q_table = {}  # state → [q_NS, q_EW]
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def get_qs(self, state):
        key = tuple(state)
        if key not in self.q_table:
            self.q_table[key] = [0, 0]  # [Q for NS, Q for EW]
        return self.q_table[key]

    def choose_action(self, state):
        # Exploration
        if random.random() < self.epsilon:
            return random.randint(0, 1)

        # Exploitation (choose best action)
        qs = self.get_qs(state)
        return np.argmax(qs)

    def update(self, state, action, reward, next_state):
        qs = self.get_qs(state)
        max_future_q = max(self.get_qs(next_state))
        qs[action] += self.alpha * (reward + self.gamma * max_future_q - qs[action])


# -------------------------------------------------------
# TRAINING LOOP
# -------------------------------------------------------
env = SmartTrafficEnv()
agent = QLearningAgent()

EPISODES = 300

for episode in range(EPISODES):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        agent.update(state, action, reward, next_state)

        state = next_state
        total_reward += reward

        if done:
            break

    print(f"Episode {episode+1}: Total Reward = {total_reward}")

print("\nTraining Completed!")
print("Q-table entries:", len(agent.q_table))


Episode 1: Total Reward = -6447239
Episode 2: Total Reward = -622922
Episode 3: Total Reward = -1456251
Episode 4: Total Reward = -1024462
Episode 5: Total Reward = -68705
Episode 6: Total Reward = -22485
Episode 7: Total Reward = -133813
Episode 8: Total Reward = -17684
Episode 9: Total Reward = -30506
Episode 10: Total Reward = -163314
Episode 11: Total Reward = -99481
Episode 12: Total Reward = -306291
Episode 13: Total Reward = -345255
Episode 14: Total Reward = -21973
Episode 15: Total Reward = -115143
Episode 16: Total Reward = -243560
Episode 17: Total Reward = -154798
Episode 18: Total Reward = -62809
Episode 19: Total Reward = -83405
Episode 20: Total Reward = -28835
Episode 21: Total Reward = -62426
Episode 22: Total Reward = -125732
Episode 23: Total Reward = -53124
Episode 24: Total Reward = -45882
Episode 25: Total Reward = -252953
Episode 26: Total Reward = -40566
Episode 27: Total Reward = -212741
Episode 28: Total Reward = -98975
Episode 29: Total Reward = -107022
Episo

In [5]:
import random
import numpy as np

# -------------------------------------------------------
# SMART TRAFFIC LIGHT ENVIRONMENT (NO SUMO)
# -------------------------------------------------------
class SmartTrafficEnv:
    """
    Simulated traffic light with:
    - 2 directions: North-South (NS) and East-West (EW)
    - Cars arrive randomly
    - RL agent chooses which direction gets GREEN
    """

    def __init__(self):
        self.max_queue = 30
        self.reset()

    def reset(self):
        # Initial queues of cars
        self.state = [random.randint(0, 5), random.randint(0, 5)]  # [NS_queue, EW_queue]
        self.done = False
        return self.state

    def step(self, action):
        """
        action = 0 → NS green
        action = 1 → EW green
        """

        # Random car arrivals (0–3 cars per direction)
        self.state[0] += random.randint(0, 3)
        self.state[1] += random.randint(0, 3)

        # Green light reduces queue
        if action == 0:  # NS
            self.state[0] = max(0, self.state[0] - random.randint(2, 6))
        else:            # EW
            self.state[1] = max(0, self.state[1] - random.randint(2, 6))

        # Reward = negative total queue -> minimize traffic
        reward = -(self.state[0] + self.state[1])

        # Episode ends if queue grows too large
        if self.state[0] > self.max_queue or self.state[1] > self.max_queue:
            self.done = True

        return self.state, reward, self.done


# -------------------------------------------------------
# Q-LEARNING AGENT
# -------------------------------------------------------
class QLearningAgent:
    def __init__(self, alpha=0.2, gamma=0.9, epsilon=0.1):
        self.q_table = {}  # state → [q_NS, q_EW]
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon

    def get_qs(self, state):
        key = tuple(state)
        if key not in self.q_table:
            self.q_table[key] = [0, 0]  # [Q for NS, Q for EW]
        return self.q_table[key]

    def choose_action(self, state):
        # Exploration
        if random.random() < self.epsilon:
            return random.randint(0, 1)

        # Exploitation (choose best action)
        qs = self.get_qs(state)
        return np.argmax(qs)

    def update(self, state, action, reward, next_state):
        qs = self.get_qs(state)
        max_future_q = max(self.get_qs(next_state))
        qs[action] += self.alpha * (reward + self.gamma * max_future_q - qs[action])


# -------------------------------------------------------
# TRAINING LOOP
# -------------------------------------------------------
env = SmartTrafficEnv()
agent = QLearningAgent()

EPISODES = 300

for episode in range(EPISODES):
    state = env.reset()
    total_reward = 0

    while True:
        action = agent.choose_action(state)
        next_state, reward, done = env.step(action)
        agent.update(state, action, reward, next_state)

        state = next_state
        total_reward += reward

        if done:
            break

    print(f"Episode {episode+1}: Total Reward = {total_reward}")

print("\nTraining Completed!")
print("Q-table entries:", len(agent.q_table))


Episode 1: Total Reward = -6482114
Episode 2: Total Reward = -1004926
Episode 3: Total Reward = -1322334
Episode 4: Total Reward = -1336647
Episode 5: Total Reward = -286458
Episode 6: Total Reward = -91176
Episode 7: Total Reward = -593453
Episode 8: Total Reward = -44662
Episode 9: Total Reward = -392336
Episode 10: Total Reward = -612005
Episode 11: Total Reward = -354503
Episode 12: Total Reward = -143529
Episode 13: Total Reward = -183706
Episode 14: Total Reward = -116057
Episode 15: Total Reward = -99235
Episode 16: Total Reward = -7181
Episode 17: Total Reward = -16383
Episode 18: Total Reward = -144275
Episode 19: Total Reward = -8862
Episode 20: Total Reward = -18585
Episode 21: Total Reward = -3135
Episode 22: Total Reward = -259646
Episode 23: Total Reward = -22210
Episode 24: Total Reward = -226089
Episode 25: Total Reward = -66947
Episode 26: Total Reward = -23376
Episode 27: Total Reward = -96866
Episode 28: Total Reward = -154534
Episode 29: Total Reward = -360320
Episo