In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from PIL import Image

# Define the size of the environment
rows, cols = 10, 10

# Create a random environment matrix (0s for empty cells, 1s for blocks, 2s for rewards)
environment = np.zeros((rows, cols), dtype=int)

# Add some random blocks
for _ in range(15):
    row, col = np.random.randint(0, rows), np.random.randint(0, cols)
    environment[row, col] = 1

# Add some random rewards
for _ in range(5):
    row, col = np.random.randint(0, rows), np.random.randint(0, cols)
    environment[row, col] = 2

# Define agent class
class QLearningAgent:
    def __init__(self, num_states, num_actions, learning_rate=0.1, discount_factor=0.9, exploration_rate=0.1):
        self.num_states = num_states
        self.num_actions = num_actions
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate
        self.q_values = np.zeros((num_states, num_actions))

    def select_action(self, state):
        if np.random.rand() < self.exploration_rate:
            return np.random.randint(self.num_actions)
        else:
            return np.argmax(self.q_values[state])

    def update_q_values(self, state, action, reward, next_state):
        max_q_next = np.max(self.q_values[next_state])
        td_target = reward + self.discount_factor * max_q_next
        td_error = td_target - self.q_values[state, action]
        self.q_values[state, action] += self.learning_rate * td_error

# Define the update function
def update(frame):
    global agent_pos, agent
    # Move the agent based on Q-learning
    state = agent_pos[0] * cols + agent_pos[1]
    action = agent.select_action(state)
    
    # Move the agent
    if action == 0 and agent_pos[0] > 0:
        agent_pos[0] -= 1  # Move up
    elif action == 1 and agent_pos[0] < rows - 1:
        agent_pos[0] += 1  # Move down
    elif action == 2 and agent_pos[1] > 0:
        agent_pos[1] -= 1  # Move left
    elif action == 3 and agent_pos[1] < cols - 1:
        agent_pos[1] += 1  # Move right
    
    next_state = agent_pos[0] * cols + agent_pos[1]
    reward = environment[agent_pos[0], agent_pos[1]]
    agent.update_q_values(state, action, reward, next_state)
    
    # Update the environment with agent's position
    updated_environment = np.copy(environment)
    updated_environment[agent_pos[0], agent_pos[1]] = 3  # Agent represented by value 3
    
    # Plot the environment
    plt.imshow(updated_environment, cmap='tab10')
    plt.axis('off')  # Turn off axis
    plt.title(f'Frame: {frame}')

# Initialize the agent's position
agent_pos = [0, 0]

# Initialize the agent
agent = QLearningAgent(num_states=rows*cols, num_actions=4)

# Initialize the figure and axis
fig, ax = plt.subplots()

# Create the animation
ani = FuncAnimation(fig, update, frames=range(100), blit=False)

# Save the animation as a GIF
ani.save('qlearning_animation.gif', writer='pillow', fps=10)

plt.show()
