In [None]:
import pygame
import numpy as np
import random
import time
import os

# Initialize Pygame
pygame.init()

# Define window dimensions and other parameters
WINDOW_WIDTH = 600
WINDOW_HEIGHT = 400
GRID_SIZE = 20
FPS = 10

# Colors (black for background)
BLACK = (0, 0, 0)

# Initialize the game window
screen = pygame.display.set_mode((WINDOW_WIDTH, WINDOW_HEIGHT))
pygame.display.set_caption("RL Agent Playing Game")
clock = pygame.time.Clock()

# Load your assets (using your old variables and file paths)
player_image_path = "dinorun0000.png"  # Example, replace with your actual file path
target_image_path = "ground.png"  # Example, replace with your actual file path

# Load images for player and target
player_image = pygame.image.load(player_image_path)
target_image = pygame.image.load(target_image_path)

# Resize images to fit within the grid
player_image = pygame.transform.scale(player_image, (GRID_SIZE, GRID_SIZE))
target_image = pygame.transform.scale(target_image, (GRID_SIZE, GRID_SIZE))

# Define the environment (simple grid-based game)
class GameEnv:
    def __init__(self):
        self.reset()

    def reset(self):
        # Starting position of the player (centered)
        self.player_pos = [WINDOW_WIDTH // 2, WINDOW_HEIGHT // 2]
        # Random position for the target
        self.target_pos = [
            random.randint(0, (WINDOW_WIDTH - GRID_SIZE) // GRID_SIZE) * GRID_SIZE,
            random.randint(0, (WINDOW_HEIGHT - GRID_SIZE) // GRID_SIZE) * GRID_SIZE,
        ]
        # State: (player x, player y, target x, target y)
        self.state = (self.player_pos[0], self.player_pos[1], self.target_pos[0], self.target_pos[1])
        return self.state

    def step(self, action):
        # Actions: 0 = up, 1 = down, 2 = left, 3 = right
        if action == 0 and self.player_pos[1] > 0:
            self.player_pos[1] -= GRID_SIZE
        elif action == 1 and self.player_pos[1] < WINDOW_HEIGHT - GRID_SIZE:
            self.player_pos[1] += GRID_SIZE
        elif action == 2 and self.player_pos[0] > 0:
            self.player_pos[0] -= GRID_SIZE
        elif action == 3 and self.player_pos[0] < WINDOW_WIDTH - GRID_SIZE:
            self.player_pos[0] += GRID_SIZE

        # Check if the player reaches the target
        done = self.player_pos == self.target_pos
        reward = 1 if done else -0.1  # Reward for reaching the target

        # Update the state
        self.state = (self.player_pos[0], self.player_pos[1], self.target_pos[0], self.target_pos[1])
        return self.state, reward, done

    def render(self):
        screen.fill(BLACK)
        # Draw the player and target at their respective positions
        screen.blit(player_image, (self.player_pos[0], self.player_pos[1]))
        screen.blit(target_image, (self.target_pos[0], self.target_pos[1]))
        pygame.display.flip()

# Define the Q-Learning agent
class QLearningAgent:
    def __init__(self, state_space, action_space, alpha=0.1, gamma=0.9, epsilon=1.0, epsilon_decay=0.995):
        self.q_table = np.zeros(state_space + (action_space,))
        self.alpha = alpha
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.action_space = action_space

    def choose_action(self, state):
        if random.random() < self.epsilon:
            return random.randint(0, self.action_space - 1)  # Explore
        return np.argmax(self.q_table[state])  # Exploit

    def learn(self, state, action, reward, next_state):
        max_next_q = np.max(self.q_table[next_state])
        self.q_table[state][action] += self.alpha * (reward + self.gamma * max_next_q - self.q_table[state][action])
        self.epsilon *= self.epsilon_decay

# Main training and playing loop
def train_and_play():
    env = GameEnv()
    # Discretizing state space: dividing by grid size to get grid-based positions
    state_space = (WINDOW_WIDTH // GRID_SIZE, WINDOW_HEIGHT // GRID_SIZE, WINDOW_WIDTH // GRID_SIZE, WINDOW_HEIGHT // GRID_SIZE)
    action_space = 4  # up, down, left, right
    agent = QLearningAgent(state_space, action_space)

    episodes = 500  # Number of training episodes
    for episode in range(episodes):
        state = env.reset()
        done = False
        while not done:
            # Scale state to discrete grid positions
            state_scaled = tuple(s // GRID_SIZE for s in state)
            action = agent.choose_action(state_scaled)
            next_state, reward, done = env.step(action)
            next_state_scaled = tuple(s // GRID_SIZE for s in next_state)
            agent.learn(state_scaled, action, reward, next_state_scaled)
            state = next_state

        if episode % 50 == 0:
            print(f"Episode {episode}, Epsilon: {agent.epsilon:.2f}")

    # Play the game after training
    print("Training complete! Watch the agent play...")
    time.sleep(2)
    for _ in range(10):  # Let the agent play 10 episodes
        state = env.reset()
        done = False
        while not done:
            env.render()
            state_scaled = tuple(s // GRID_SIZE for s in state)
            action = np.argmax(agent.q_table[state_scaled])  # Use the trained policy
            state, _, done = env.step(action)
            clock.tick(FPS)

    pygame.quit()

if __name__ == "__main__":
    train_and_play()
