#### Importaciones

In [None]:
import gym
from gym import spaces
import numpy as np
import pygame
import random
import sys

# Entorno jugable

In [None]:
pygame.init()

# Configuración de pantalla de juego
width, height = 400, 600
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Entorno de Aprendizaje")

# Colores
white = (255, 255, 255)
black = (0, 0, 0)

# Jugador
player_size = 50
player_x = width // 2 - player_size // 2
player_y = height - 2 * player_size

# Obstáculos
obstacle_size = 50
obstacle_speed = 5
obstacle_frequency = 25  # A mayor valor, menos obstáculos
obstacles = []

# Reloj para controlar la velocidad del juego
clock = pygame.time.Clock()

# Función para mostrar un mensaje en la pantalla
def show_message(message, size, color, y_offset):
    font = pygame.font.Font(None, size)
    text = font.render(message, True, color)
    text_rect = text.get_rect(center=(width // 2, height // 2 + y_offset))
    screen.blit(text, text_rect)

# Bucle principal del juego
running = True
waiting_for_restart = False
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    keys = pygame.key.get_pressed()
    player_speed = 5
    player_x -= keys[pygame.K_LEFT] * player_speed
    player_x += keys[pygame.K_RIGHT] * player_speed

    # Límites del jugador
    player_x = max(0, min(player_x, width - player_size))

    # Generar obstáculos aleatorios
    if random.randint(0, obstacle_frequency) == 0:
        obstacle_x = random.randint(0, width - obstacle_size)
        obstacle_y = 0
        obstacles.append((obstacle_x, obstacle_y))

    # Mover y dibujar obstáculos
    new_obstacles = []
    for obstacle in obstacles:
        obstacle_x, obstacle_y = obstacle
        obstacle_y += obstacle_speed
        pygame.draw.rect(screen, white, (obstacle_x, obstacle_y, obstacle_size, obstacle_size))
        if obstacle_y < height:
            new_obstacles.append((obstacle_x, obstacle_y))
    obstacles = new_obstacles

    # Dibujar jugador
    pygame.draw.rect(screen, white, (player_x, player_y, player_size, player_size))

    # Verificar colisiones
    player_rect = pygame.Rect(player_x, player_y, player_size, player_size)
    for obstacle in obstacles:
        obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], obstacle_size, obstacle_size)
        if player_rect.colliderect(obstacle_rect):
            show_message("Game Over", 36, white, -20)
            show_message("Press 'R' to Restart", 24, white, 20)
            pygame.display.flip()
            waiting_for_restart = True

    pygame.display.flip()
    screen.fill(black)
    clock.tick(30)

    # Bucle para esperar la pulsación de la tecla 'R' para reiniciar el juego
    while waiting_for_restart:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                waiting_for_restart = False
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_r:
                    waiting_for_restart = False

        # Muestra el mensaje de reinicio en la pantalla
        show_message("Press 'R' to Restart", 24, white, 20)
        pygame.display.flip()
        clock.tick(30)

        # Limpiar y reiniciar el juego
        player_x = width // 2 - player_size // 2
        player_y = height - 2 * player_size
        obstacles = []

pygame.quit()
sys.exit()

# Declaración de Entorno de Entrenamiento

In [None]:
pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        # Configuración de pantalla de juego
        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")

        # Colores
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        # Jugador
        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        # Obstáculos
        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25  # A mayor valor, menos obstáculos
        self.obstacles = []

        # Reloj para controlar la velocidad del juego
        self.clock = pygame.time.Clock()

        # Definir el espacio de observación y de acción
        self.observation_space = spaces.Discrete(2)  # Ajusta el espacio de observación según tu juego
        self.action_space = spaces.Discrete(2)  # Ajusta el espacio de acción según tu juego

        # Define recompensas y penalizaciones
        self.reward_for_movement = 0.1
        self.reward_for_avoiding_obstacle = 1.0
        self.penalty_for_collision = -10

        # Define parámetros de exploración vs. explotación
        self.epsilon = 0.1

    def reset(self):
        # Reiniciar el juego y devolver el estado inicial
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        # Realizar la acción en el juego y devolver la observación, la recompensa y si el episodio ha terminado
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        # Verificar colisiones
        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        # Mostrar el estado actual del juego
        pass

    def close(self):
        # Cerrar el entorno
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        # Devuelve la observación actual (podría ser más compleja según tu juego)
        return 0

    def _handle_player_movement(self, action):
        # Manejar el movimiento del jugador según la acción
        player_speed = 5
        self.player_x += (2 * action - 1) * player_speed  # Mover a la izquierda si action es 0, mover a la derecha si action es 1
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))


    def _generate_obstacles(self):
        # Generar obstáculos aleatorios
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        # Mover y dibujar obstáculos
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        # Dibujar jugador
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        # Verificar colisiones entre el jugador y los obstáculos
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True  # Colisión detectada
        return False

# Entrenamiento

## Beta

## Izquierda Compulsiva

In [None]:
env = SimpleGameEnv()
num_episodes = 1000

for episode in range(num_episodes):
    observation = env.reset()
    total_reward = 0

    while True:
        # Exploración vs. Explotación
        if random.uniform(0, 1) < env.epsilon:
            action = env.action_space.sample()  # Explorar: seleccionar una acción al azar
        else:
            # Explotar: seleccionar la mejor acción según la política actual
            # (aquí usarías tu modelo de aprendizaje por refuerzo)
            action = 0  # Ajusta el valor según tu lógica

        next_observation, reward, done, _ = env.step(action)
        total_reward += reward
        observation = next_observation

        if done:
            print(f"Episodio {episode + 1}, Recompensa total: {total_reward}")
            break

env.close()

In [None]:
env = SimpleGameEnv()
num_episodes = 1000

# Inicializar la tabla Q con valores arbitrarios
Q_table = np.zeros((env.observation_space.n, env.action_space.n))

# Hiperparámetros de Q-learning
learning_rate = 0.1
discount_factor = 0.9
exploration_prob = 0.1

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0

    while True:
        # Elegir la acción según la política epsilon-greedy
        if np.random.rand() < exploration_prob:
            action = env.action_space.sample()  # Explorar: seleccionar una acción al azar
        else:
            action = np.argmax(Q_table[state, :])  # Explotar: seleccionar la mejor acción según la política actual

        next_state, reward, done, _ = env.step(action)

        # Actualizar la tabla Q
        best_next_action = np.argmax(Q_table[next_state, :])
        Q_table[state, action] += learning_rate * (reward + discount_factor * Q_table[next_state, best_next_action] - Q_table[state, action])

        total_reward += reward
        state = next_state

        if done:
            print(f"Episodio {episode + 1}, Recompensa total: {total_reward}")
            break

env.close()

# Pruebas

#### No sé qué le pasa a este, no funciona del todo

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        # Configuración de pantalla de juego
        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")

        # Colores
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        # Jugador
        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        # Obstáculos
        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25  # A mayor valor, menos obstáculos
        self.obstacles = []

        # Reloj para controlar la velocidad del juego
        self.clock = pygame.time.Clock()

        # Definir el espacio de observación y de acción
        self.observation_space = spaces.Discrete(2)  # Ajusta el espacio de observación según tu juego
        self.action_space = spaces.Discrete(2)  # Ajusta el espacio de acción según tu juego

        # Define recompensas y penalizaciones
        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1  # Nueva recompensa por obstáculo evitado

        # Define parámetros de exploración vs. explotación
        self.epsilon = 0.1

    def reset(self):
        # Reiniciar el juego y devolver el estado inicial
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        # Realizar la acción en el juego y devolver la observación, la recompensa y si el episodio ha terminado
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        # Verificar colisiones
        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            # Recompensa adicional por obstáculo evitado
            reward += self._check_obstacle_avoidance_reward()
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        # Mostrar el estado actual del juego
        pass

    def close(self):
        # Cerrar el entorno
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        # Devuelve la observación actual (podría ser más compleja según tu juego)
        return 0

    def _handle_player_movement(self, action):
        # Manejar el movimiento del jugador según la acción
        player_speed = 5
        self.player_x += (2 * action - 1) * player_speed  # Mover a la izquierda si action es 0, mover a la derecha si action es 1
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        # Generar obstáculos aleatorios
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        # Mover y dibujar obstáculos
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        # Dibujar jugador
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        # Verificar colisiones entre el jugador y los obstáculos
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True  # Colisión detectada
        return False

    def _check_obstacle_avoidance_reward(self):
    # Verificar si hay obstáculos presentes
        if not self.obstacles:
            return 0

        # Verificar si el jugador está cerca de un obstáculo
        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        if min_distance_to_obstacle < self.player_size + self.obstacle_size:
            return self.reward_for_obstacle_avoidance
        else:
            # Incentivar el movimiento hacia la derecha si no hay obstáculos cercanos
            return self.reward_for_movement / 2


# Ejemplo de uso del entorno
env = SimpleGameEnv()
num_episodes = 1000

for episode in range(num_episodes):
    observation = env.reset()
    total_reward = 0

    while True:
        # Exploración vs. Explotación
        if random.uniform(0, 1) < env.epsilon:
            action = env.action_space.sample()  # Explorar: seleccionar una acción al azar
        else:
            # Explotar: seleccionar la mejor acción según la política actual
            action = 0  # Ajusta el valor según tu lógica

        next_observation, reward, done, _ = env.step(action)
        total_reward += reward
        observation = next_observation

        if done:
            print(f"Episodio {episode + 1}, Recompensa total: {total_reward}")
            break

env.close()

#### Con múltiples entrenamientos al mismo tiempo

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        # Configuración de pantalla de juego
        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")

        # Colores
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        # Jugador
        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        # Obstáculos
        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        # Reloj para controlar la velocidad del juego
        self.clock = pygame.time.Clock()

        # Definir el espacio de observación y de acción
        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(2)

        # Define recompensas y penalizaciones
        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1  # Nueva recompensa

        # Define parámetros de exploración vs. explotación
        self.epsilon = 0.1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        obstacle_avoided = self._check_obstacle_avoidance()  # Nueva verificación
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            if obstacle_avoided:
                reward += self.reward_for_obstacle_avoidance  # Nueva recompensa
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        pass

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        self.player_x -= action * player_speed
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

    def _check_obstacle_avoidance(self):
        if not self.obstacles:
            return False

        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        return min_distance_to_obstacle > self.player_size + self.obstacle_size

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        return random.choice([0, 1])

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            if random.uniform(0, 1) < env.epsilon:
                action = env.action_space.sample()
            else:
                action = agent.select_action(observation)

            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

#### Explotación > Exploración

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(2)

        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1

        self.epsilon = 0.1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        obstacle_avoided = self._check_obstacle_avoidance()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            if obstacle_avoided:
                reward += self.reward_for_obstacle_avoidance
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        pass

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        self.player_x -= action * player_speed
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

    def _check_obstacle_avoidance(self):
        if not self.obstacles:
            return False

        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        return min_distance_to_obstacle > self.player_size + self.obstacle_size

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        # Utiliza la política actual para elegir la acción
        # (aquí es donde usarías tu modelo de aprendizaje por refuerzo)
        action = 1  # Ajusta según la lógica de tu modelo
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

#### Otro! Intento más

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(2)

        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        obstacle_avoided = self._check_obstacle_avoidance()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            if obstacle_avoided:
                reward += self.reward_for_obstacle_avoidance
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        pass

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        self.player_x -= action * player_speed
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

    def _check_obstacle_avoidance(self):
        if not self.obstacles:
            return False

        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        return min_distance_to_obstacle > self.player_size + self.obstacle_size

class YourAgent:
    def __init__(self):
        self.epsilon = 0.1

    def select_action(self, observation):
        # Modificamos la lógica de selección de acciones para dar más énfasis a la explotación
        # (aquí es donde usarías tu modelo de aprendizaje por refuerzo)
        action = 1 if random.uniform(0, 1) < self.epsilon else 0
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

Ojo con este modelito de arriba, no estaba del todo mal, pero terminó el entrenamiento por su cuenta. 
Como errores: solo se mueve hacia la izquierda, aunque de forma más humana, no de forma compulsiva.

#### Este parece funcionar bien, pero no aprende

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(3)

        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        # Modificamos la lógica de selección de acciones para permitir movimientos hacia la derecha
        action = random.choice([0, 1, 2])
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

# Vamos a cambiar las recompensas (mejoría)

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(3)

        self.reward_for_movement = 0.01
        self.penalty_for_collision = -50  # Aumentamos la penalización por colisión
        self.reward_for_avoiding_obstacle = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        # Recompensa adicional por evitar obstáculos
        if not collision:
            reward += self.reward_for_avoiding_obstacle

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        action = random.choice([0, 1, 2])
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()


# El modelo ahora va muy bien, pero me da la sensación de que va como ciego por la vida. Vamos a darle contexto de cómo se mueven los obstáculos, para que pueda evitarlos.

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Box(low=0, high=1, shape=(3,), dtype=float)  # Añadimos información sobre los obstáculos
        self.action_space = spaces.Discrete(3)

        self.reward_for_movement = 0.01
        self.penalty_for_collision = -50
        self.reward_for_avoiding_obstacle = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        # Recompensa adicional por evitar obstáculos
        if not collision and self.player_y < self.height - 2 * self.player_size:
            reward += self.reward_for_avoiding_obstacle

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        # Añadimos información sobre la posición del jugador y la presencia de obstáculos
        player_position = self.player_x / self.width
        obstacle_info = [0] * self.width
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_info[int(obstacle_x)] = 1
        return [player_position] + obstacle_info

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        # Modificamos la lógica de selección de acciones para permitir movimientos hacia la derecha
        action = random.choice([0, 1, 2])
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 1000

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading
import numpy as np

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Box(low=0, high=1, shape=(self.width // 10, self.height // 10, 1), dtype=int)
        self.action_space = spaces.Discrete(3)

        self.penalty_for_collision = -50  # Aumentamos la penalización por colisión
        self.reward_for_avoiding_obstacle = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = 0  # No hay recompensa por movimiento
            done = False

        # Recompensa adicional por evitar obstáculos
        if not collision:
            reward += self.reward_for_avoiding_obstacle

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        obs = np.zeros((self.width // 10, self.height // 10, 1), dtype=int)

        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            x, y = obstacle_x // 10, obstacle_y // 10
            if 0 <= x < obs.shape[0] and 0 <= y < obs.shape[1]:
                obs[x, y] = 1

        return obs

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        # Lógica para seleccionar la acción basada en la observación actual
        # Puedes implementar un modelo de aprendizaje profundo aquí
        action = random.choice([0, 1, 2])
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

# Agente Filtrado (Y mejorado, supuestamente)

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading
import numpy as np

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Box(low=0, high=1, shape=(self.width // 10, self.height // 10, 1), dtype=int)
        self.action_space = spaces.Discrete(3)

        self.penalty_for_collision = -50  # Aumentamos la penalización por colisión
        self.reward_for_avoiding_obstacle = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = 0  # No hay recompensa por movimiento
            done = False

        # Recompensa adicional por evitar obstáculos
        if not collision:
            reward += self.reward_for_avoiding_obstacle

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        obs = np.zeros((self.width // 10, self.height // 10, 1), dtype=int)

        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            x, y = obstacle_x // 10, obstacle_y // 10
            if 0 <= x < obs.shape[0] and 0 <= y < obs.shape[1]:
                obs[x, y] = 1

        return obs

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class ImprovedAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        left_obstacle = observation[0, :, :].any()
        center_obstacle = observation[1, :, :].any()
        right_obstacle = observation[2, :, :].any()

        if left_obstacle and not center_obstacle and not right_obstacle:
            action = 0  # Mover hacia la izquierda
        elif not left_obstacle and not center_obstacle and right_obstacle:
            action = 2  # Mover hacia la derecha
        else:
            action = 1  # No hacer nada

        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = ImprovedAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

vale... tengo dos preguntas... 
la primera, el agente es consciente de los obstáculos que aparecen? Lo digo porque a veces tiene la izquierda libre, donde podría evitarlos, pero decide moverse a la derecha. 

y la segunda... si creamos un "random state" para que el juego siempre sea el mismo, mejoraría el aprendizaje del modelo, no? Pero sería injusto, porque el modelo aprendería directamente una única situación (movimientos concretos) y no a generalizar y evitar objetos sea cual sea la situación, no?

Segunda pregunta: Random State
Introducir un "random state" para que el juego siempre sea el mismo puede ser útil en algunos casos, pero tiene sus pros y contras. Como mencionas, esto puede ayudar al modelo a aprender comportamientos específicos en ese entorno particular. Sin embargo, como señalas, también puede llevar a un modelo que está especializado en esa situación específica y no generaliza bien a otros escenarios.

Es importante encontrar un equilibrio. Puedes usar un conjunto de datos diverso que incluya diferentes configuraciones del juego para que el modelo aprenda a generalizar. También puedes explorar técnicas como el uso de un conjunto de entrenamiento aleatorio, la introducción de variabilidad en el entorno y la política de exploración del agente para asegurarte de que el modelo no se vuelva demasiado específico.

En el aprendizaje por refuerzo, la variedad en los datos de entrenamiento es clave para que el modelo generalice bien y tome decisiones informadas en una variedad de situaciones.

# OPERACIÓN ARDILLA

# Entorno Fijo (sin aleatoriedad)

In [3]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading
import numpy as np

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Box(low=0, high=1, shape=(self.width // 10, self.height // 10, 1), dtype=int)
        self.action_space = spaces.Discrete(3)

        self.penalty_for_collision = -50  # Aumentamos la penalización por colisión
        self.reward_for_avoiding_obstacle = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = 0  # No hay recompensa por movimiento
            done = False

        # Recompensa adicional por evitar obstáculos
        if not collision:
            reward += self.reward_for_avoiding_obstacle

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        obs = np.zeros((self.width // 10, self.height // 10, 1), dtype=int)

        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            x, y = obstacle_x // 10, obstacle_y // 10
            if 0 <= x < obs.shape[0] and 0 <= y < obs.shape[1]:
                obs[x, y] = 1

        return obs

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class QLearningAgent:
    def __init__(self):
        self.action_space = 3
        self.state_space = 2  # Se asume un espacio de estado bidimensional para este ejemplo
        self.learning_rate = 0.1
        self.discount_rate = 0.99
        self.exploration_prob = 0.3
        self.q_table = np.zeros((self.state_space, self.action_space))

    def _get_best_action(self, state):
        return np.argmax(self.q_table[state, :])

    def select_action(self, state):
        if random.uniform(0, 1) < self.exploration_prob:
            return random.choice(range(self.action_space))
        else:
            return self._get_best_action(state)

    def update(self, state, action, reward, next_state, done):
        if not done:
            max_next_q = np.max(self.q_table[next_state, :])
            updated_q_value = (1 - self.learning_rate) * self.q_table[state, action] + \
                              self.learning_rate * (reward + self.discount_rate * max_next_q)
        else:
            updated_q_value = (1 - self.learning_rate) * self.q_table[state, action] + self.learning_rate * reward

        self.q_table[state, action] = updated_q_value

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = QLearningAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

Exception in thread Thread-2:
Traceback (most recent call last):
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\2740944519.py", line 160, in train_thread
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\2740944519.py", line 144, in update
IndexError: index 1404 is out of bounds for axis 1 with size 3
Exception in thread Thread-3:
Traceback (most recent call last):
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\274094451

Hilo Thread-4, Episodio 1, Recompensa total: 75
Hilo Thread-1, Episodio 1, Recompensa total: 75
Hilo Thread-1, Episodio 2, Recompensa total: 145
Hilo Thread-4, Episodio 2, Recompensa total: 218
Hilo Thread-1, Episodio 3, Recompensa total: 22
Hilo Thread-4, Episodio 3, Recompensa total: 11
Hilo Thread-1, Episodio 4, Recompensa total: 11
Hilo Thread-1, Episodio 5, Recompensa total: 47
Hilo Thread-4, Episodio 4, Recompensa total: 48
Hilo Thread-1, Episodio 6, Recompensa total: 150
Hilo Thread-4, Episodio 5, Recompensa total: 151
Hilo Thread-4, Episodio 6, Recompensa total: 0
Hilo Thread-1, Episodio 7, Recompensa total: 1


Exception in thread Thread-4:
Traceback (most recent call last):
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\2740944519.py", line 160, in train_thread
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\2740944519.py", line 144, in update
IndexError: index 3 is out of bounds for axis 1 with size 3


Hilo Thread-1, Episodio 8, Recompensa total: 80
Hilo Thread-1, Episodio 9, Recompensa total: 252


Exception in thread Thread-1:
Traceback (most recent call last):
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 1038, in _bootstrap_inner
    self.run()
  File "c:\Users\ciruz\AppData\Local\Programs\Python\Python311\Lib\threading.py", line 975, in run
    self._target(*self._args, **self._kwargs)
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\2740944519.py", line 160, in train_thread
  File "C:\Users\ciruz\AppData\Local\Temp\ipykernel_14476\2740944519.py", line 144, in update
IndexError: index 4 is out of bounds for axis 1 with size 3


# RRNN (NOPE)

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(3)

        self.reward_for_movement = 0.01
        self.penalty_for_collision = -50
        self.reward_for_avoiding_obstacle = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        if not collision:
            reward += self.reward_for_avoiding_obstacle

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:
            self.player_x -= player_speed
        elif action == 2:
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class DQNAgent:
    def __init__(self, observation_space, action_space):
        self.observation_space = observation_space
        self.action_space = action_space

        self.model = self._build_model()

    def _build_model(self):
        model = keras.Sequential([
            layers.Input(shape=self.observation_space.shape),
            layers.Flatten(),
            layers.Dense(128, activation='relu'),
            layers.Dense(64, activation='relu'),
            layers.Dense(self.action_space.n, activation='linear')
        ])

        model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
                      loss='mse')

        return model

    def select_action(self, observation):
        q_values = self.model.predict(np.expand_dims(observation, axis=0))
        action = np.argmax(q_values)
        return action

    def update(self, observation, action, reward, next_observation, done):
        target = reward if done else reward + 0.99 * np.max(self.model.predict(np.expand_dims(next_observation, axis=0)))
        target_f = self.model.predict(np.expand_dims(observation, axis=0))
        target_f[0][action] = target
        self.model.fit(np.expand_dims(observation, axis=0), target_f, epochs=1, verbose=0)

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = DQNAgent(env.observation_space, env.action_space)

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()