In [1]:
import numpy as np
import random

# Define environment parameters
NUM_STATES = 10  # Simplified state space (e.g., vehicle queue level: 0-9)
NUM_ACTIONS = 2  # 0 = NS Green, 1 = EW Green
EPISODES = 5000

ALPHA = 0.1      # Learning rate
GAMMA = 0.9      # Discount factor
EPSILON = 0.1    # Exploration rate

# Initialize Q-table
Q = np.zeros((NUM_STATES, NUM_ACTIONS))

# Simulated traffic environment
def get_next_state(state, action):
    if action == 0:
        # NS green - reduce NS queue, increase EW
        ns = max(0, state - random.randint(1, 3))
        ew = min(NUM_STATES - 1, random.randint(0, 3))
    else:
        # EW green - reduce EW queue, increase NS
        ew = max(0, state - random.randint(1, 3))
        ns = min(NUM_STATES - 1, random.randint(0, 3))
    return (ns + ew) // 2  # simplified next state (average queue level)

def get_reward(state, action):
    # Lower state value means shorter queue, which is better
    return -state  # reward is negative queue length

# Q-learning algorithm
for episode in range(EPISODES):
    state = random.randint(0, NUM_STATES - 1)
    for t in range(100):  # time steps per episode
        # Choose action (epsilon-greedy)
        if random.random() < EPSILON:
            action = random.randint(0, NUM_ACTIONS - 1)
        else:
            action = np.argmax(Q[state])
        
        next_state = get_next_state(state, action)
        reward = get_reward(state, action)

        # Update Q-value
        Q[state][action] += ALPHA * (reward + GAMMA * np.max(Q[next_state]) - Q[state][action])
        state = next_state

# Test policy
print("Trained Q-table:")
print(Q)

def get_optimal_action(queue_level):
    return "NS Green" if np.argmax(Q[queue_level]) == 0 else "EW Green"

# Example decision
current_queue = 6
print(f"Current queue level: {current_queue}, Suggested signal: {get_optimal_action(current_queue)}")


Trained Q-table:
[[ -4.70698225  -4.38575038]
 [ -5.18443789  -5.73467473]
 [ -6.75251012  -6.89085226]
 [ -7.9291886   -8.08184988]
 [ -9.39063566  -9.67148279]
 [-10.76453974 -11.20754648]
 [-12.97467275 -12.43991618]
 [-14.06730281 -14.52705293]
 [-15.68682311 -16.14510701]
 [-17.82727652 -17.39628473]]
Current queue level: 6, Suggested signal: EW Green
