<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Example_Implementing_Q_Learning_for_a_Simple_Grid_Environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# Define the grid environment
grid_size = (5, 5)
rewards = np.zeros(grid_size)
rewards[4, 4] = 1  # Reward for reaching the goal

# Define parameters for Q-learning
q_table = np.zeros((*grid_size, 4))  # 4 actions: up, down, left, right
learning_rate = 0.1
discount_factor = 0.95
epsilon = 0.9  # Exploration rate

# Q-learning algorithm
def choose_action(state):
    if np.random.rand() < epsilon:
        return np.random.randint(4)  # Explore
    return np.argmax(q_table[state])  # Exploit

def take_action(state, action):
    row, col = state
    if action == 0:  # Up
        row = max(0, row - 1)
    elif action == 1:  # Down
        row = min(grid_size[0] - 1, row + 1)
    elif action == 2:  # Left
        col = max(0, col - 1)
    elif action == 3:  # Right
        col = min(grid_size[1] - 1, col + 1)
    return (row, col)

# Train the agent
episodes = 1000
for episode in range(episodes):
    state = (0, 0)  # Start position
    for step in range(100):  # Limit the steps in each episode
        action = choose_action(state)
        next_state = take_action(state, action)
        reward = rewards[next_state]

        # Update Q-table using the Q-learning update rule
        best_next_action = np.argmax(q_table[next_state])
        q_table[state][action] = q_table[state][action] + learning_rate * (
            reward + discount_factor * q_table[next_state][best_next_action] - q_table[state][action])

        state = next_state

        # End episode if goal is reached
        if state == (4, 4):
            break

# Print the learned Q-table
print("Learned Q-Table:")
print(q_table)

# Test the learned policy
state = (0, 0)
for step in range(100):
    action = np.argmax(q_table[state])
    print(f"Step {step}: {state} -> Action: {action}")
    state = take_action(state, action)
    if state == (4, 4):
        print("Reached the goal!")
        break