<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStarstuff/blob/main/Example_Implementing_Q_Learning_for_Gridworld.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random

# Environment grid dimensions (4x4 grid)
grid_size = 4
# Initialize Q-table (all zeros)
Q_table = np.zeros((grid_size, grid_size, 4))  # 4 possible actions (up, down, left, right)

# Parameters
alpha = 0.1  # Learning rate
gamma = 0.9  # Discount factor
epsilon = 0.2  # Exploration factor
actions = ['up', 'down', 'left', 'right']

# Reward matrix (goal state at [3, 3])
rewards = np.zeros((grid_size, grid_size))
rewards[3, 3] = 100  # Goal

# Define the movement based on action
def move(state, action):
    i, j = state
    if action == 'up':
        return max(0, i - 1), j
    elif action == 'down':
        return min(grid_size - 1, i + 1), j
    elif action == 'left':
        return i, max(0, j - 1)
    elif action == 'right':
        return i, min(grid_size - 1, j + 1)

# Q-learning algorithm
for episode in range(1000):
    state = (0, 0)  # Start at top-left corner
    while state != (3, 3):  # Until goal is reached
        # Choose action (epsilon-greedy)
        if random.uniform(0, 1) < epsilon:
            action = random.choice(actions)
        else:
            action = actions[np.argmax(Q_table[state[0], state[1]])]

        # Take action and observe new state and reward
        new_state = move(state, action)
        reward = rewards[new_state]

        # Update Q-value using Bellman equation
        Q_table[state[0], state[1], actions.index(action)] = (
            Q_table[state[0], state[1], actions.index(action)] + alpha *
            (reward + gamma * np.max(Q_table[new_state[0], new_state[1]]) -
             Q_table[state[0], state[1], actions.index(action)])
        )

        # Move to new state
        state = new_state

# Display final Q-table
print("Learned Q-table:")
print(Q_table)