<a href="https://colab.research.google.com/github/AmmarJamshed/Economy21/blob/main/Untitled66.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import random

# Define the environment
states = ["Safe", "Pedestrian Detected", "Near Collision", "Collision"]
actions = ["Slow Down", "Stop", "Change Direction", "Move Forward"]

# Initialize Q-table
q_table = np.zeros((len(states), len(actions)))

# Parameters for Q-Learning
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.2  # Exploration rate
episodes = 1000

# State transition simulation
def get_next_state(state, action):
    if state == "Safe" and action == "Move Forward":
        return "Safe", 1
    elif state == "Safe" and action in ["Slow Down", "Stop"]:
        return "Pedestrian Detected", -1
    elif state == "Pedestrian Detected" and action == "Stop":
        return "Safe", 5
    elif state == "Pedestrian Detected" and action == "Slow Down":
        return "Near Collision", -10
    elif state == "Near Collision" and action == "Stop":
        return "Safe", 10
    elif state == "Near Collision" and action == "Change Direction":
        return "Safe", 20
    elif state in ["Pedestrian Detected", "Near Collision"] and action == "Move Forward":
        return "Collision", -50
    return state, 0

# Map states to indices for the Q-table
state_to_index = {state: i for i, state in enumerate(states)}
action_to_index = {action: i for i, action in enumerate(actions)}

# Q-Learning algorithm
for episode in range(episodes):
    state = "Safe"  # Start state
    for _ in range(50):  # Max steps per episode
        state_idx = state_to_index[state]

        # Choose action using epsilon-greedy policy
        if random.random() < epsilon:
            action = random.choice(actions)  # Explore
        else:
            action = actions[np.argmax(q_table[state_idx])]  # Exploit

        # Take action and observe reward and next state
        next_state, reward = get_next_state(state, action)
        next_state_idx = state_to_index[next_state]

        # Update Q-value
        q_table[state_idx, action_to_index[action]] += alpha * (
            reward + gamma * np.max(q_table[next_state_idx]) - q_table[state_idx, action_to_index[action]]
        )

        # Transition to next state
        state = next_state

        # End episode if collision
        if state == "Collision":
            break

# Display learned Q-table
print("Q-Table after training:")
print(q_table)

# Simulate the policy
def simulate_policy():
    state = "Safe"
    print(f"Initial State: {state}")
    while state != "Collision":
        state_idx = state_to_index[state]
        action = actions[np.argmax(q_table[state_idx])]
        print(f"Action Taken: {action}")
        next_state, _ = get_next_state(state, action)
        print(f"Next State: {next_state}")
        if next_state == "Safe" or next_state == "Collision":
            break
        state = next_state

simulate_policy()

Q-Table after training:
[[ 22.87822878  22.87822878  20.5904059   21.5904059 ]
 [ 26.53136531  25.5904059   23.87822878 -50.        ]
 [ 36.53136531  30.5904059   40.5904059  -50.        ]
 [  0.           0.           0.           0.        ]]
Initial State: Safe
Action Taken: Slow Down
Next State: Pedestrian Detected
Action Taken: Slow Down
Next State: Near Collision
Action Taken: Change Direction
Next State: Safe
