#### Importaciones

In [1]:
import gym
from gym import spaces
import numpy as np
import pygame
import random
import sys

# Entorno jugable

In [None]:
pygame.init()

# Configuración de pantalla de juego
width, height = 400, 600
screen = pygame.display.set_mode((width, height))
pygame.display.set_caption("Entorno de Aprendizaje")

# Colores
white = (255, 255, 255)
black = (0, 0, 0)

# Jugador
player_size = 50
player_x = width // 2 - player_size // 2
player_y = height - 2 * player_size

# Obstáculos
obstacle_size = 50
obstacle_speed = 5
obstacle_frequency = 25  # A mayor valor, menos obstáculos
obstacles = []

# Reloj para controlar la velocidad del juego
clock = pygame.time.Clock()

# Función para mostrar un mensaje en la pantalla
def show_message(message, size, color, y_offset):
    font = pygame.font.Font(None, size)
    text = font.render(message, True, color)
    text_rect = text.get_rect(center=(width // 2, height // 2 + y_offset))
    screen.blit(text, text_rect)

# Bucle principal del juego
running = True
waiting_for_restart = False
while running:
    for event in pygame.event.get():
        if event.type == pygame.QUIT:
            running = False

    keys = pygame.key.get_pressed()
    player_speed = 5
    player_x -= keys[pygame.K_LEFT] * player_speed
    player_x += keys[pygame.K_RIGHT] * player_speed

    # Límites del jugador
    player_x = max(0, min(player_x, width - player_size))

    # Generar obstáculos aleatorios
    if random.randint(0, obstacle_frequency) == 0:
        obstacle_x = random.randint(0, width - obstacle_size)
        obstacle_y = 0
        obstacles.append((obstacle_x, obstacle_y))

    # Mover y dibujar obstáculos
    new_obstacles = []
    for obstacle in obstacles:
        obstacle_x, obstacle_y = obstacle
        obstacle_y += obstacle_speed
        pygame.draw.rect(screen, white, (obstacle_x, obstacle_y, obstacle_size, obstacle_size))
        if obstacle_y < height:
            new_obstacles.append((obstacle_x, obstacle_y))
    obstacles = new_obstacles

    # Dibujar jugador
    pygame.draw.rect(screen, white, (player_x, player_y, player_size, player_size))

    # Verificar colisiones
    player_rect = pygame.Rect(player_x, player_y, player_size, player_size)
    for obstacle in obstacles:
        obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], obstacle_size, obstacle_size)
        if player_rect.colliderect(obstacle_rect):
            show_message("Game Over", 36, white, -20)
            show_message("Press 'R' to Restart", 24, white, 20)
            pygame.display.flip()
            waiting_for_restart = True

    pygame.display.flip()
    screen.fill(black)
    clock.tick(30)

    # Bucle para esperar la pulsación de la tecla 'R' para reiniciar el juego
    while waiting_for_restart:
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                running = False
                waiting_for_restart = False
            elif event.type == pygame.KEYDOWN:
                if event.key == pygame.K_r:
                    waiting_for_restart = False

        # Muestra el mensaje de reinicio en la pantalla
        show_message("Press 'R' to Restart", 24, white, 20)
        pygame.display.flip()
        clock.tick(30)

        # Limpiar y reiniciar el juego
        player_x = width // 2 - player_size // 2
        player_y = height - 2 * player_size
        obstacles = []

pygame.quit()
sys.exit()

# Declaración de Entorno de Entrenamiento

In [2]:
pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        # Configuración de pantalla de juego
        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")

        # Colores
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        # Jugador
        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        # Obstáculos
        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25  # A mayor valor, menos obstáculos
        self.obstacles = []

        # Reloj para controlar la velocidad del juego
        self.clock = pygame.time.Clock()

        # Definir el espacio de observación y de acción
        self.observation_space = spaces.Discrete(2)  # Ajusta el espacio de observación según tu juego
        self.action_space = spaces.Discrete(2)  # Ajusta el espacio de acción según tu juego

        # Define recompensas y penalizaciones
        self.reward_for_movement = 0.1
        self.reward_for_avoiding_obstacle = 1.0
        self.penalty_for_collision = -10

        # Define parámetros de exploración vs. explotación
        self.epsilon = 0.1

    def reset(self):
        # Reiniciar el juego y devolver el estado inicial
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        # Realizar la acción en el juego y devolver la observación, la recompensa y si el episodio ha terminado
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        # Verificar colisiones
        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        # Mostrar el estado actual del juego
        pass

    def close(self):
        # Cerrar el entorno
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        # Devuelve la observación actual (podría ser más compleja según tu juego)
        return 0

    def _handle_player_movement(self, action):
        # Manejar el movimiento del jugador según la acción
        player_speed = 5
        self.player_x += (2 * action - 1) * player_speed  # Mover a la izquierda si action es 0, mover a la derecha si action es 1
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))


    def _generate_obstacles(self):
        # Generar obstáculos aleatorios
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        # Mover y dibujar obstáculos
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        # Dibujar jugador
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        # Verificar colisiones entre el jugador y los obstáculos
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True  # Colisión detectada
        return False

# Entrenamiento

## Beta

## Izquierda Compulsiva

In [7]:
env = SimpleGameEnv()
num_episodes = 1000

for episode in range(num_episodes):
    observation = env.reset()
    total_reward = 0

    while True:
        # Exploración vs. Explotación
        if random.uniform(0, 1) < env.epsilon:
            action = env.action_space.sample()  # Explorar: seleccionar una acción al azar
        else:
            # Explotar: seleccionar la mejor acción según la política actual
            # (aquí usarías tu modelo de aprendizaje por refuerzo)
            action = 0  # Ajusta el valor según tu lógica

        next_observation, reward, done, _ = env.step(action)
        total_reward += reward
        observation = next_observation

        if done:
            print(f"Episodio {episode + 1}, Recompensa total: {total_reward}")
            break

env.close()

Episodio 1, Recompensa total: 26.400000000000247
Episodio 2, Recompensa total: 11.10000000000003
Episodio 3, Recompensa total: 10.40000000000002


KeyboardInterrupt: 

In [6]:
env = SimpleGameEnv()
num_episodes = 1000

# Inicializar la tabla Q con valores arbitrarios
Q_table = np.zeros((env.observation_space.n, env.action_space.n))

# Hiperparámetros de Q-learning
learning_rate = 0.1
discount_factor = 0.9
exploration_prob = 0.1

for episode in range(num_episodes):
    state = env.reset()
    total_reward = 0

    while True:
        # Elegir la acción según la política epsilon-greedy
        if np.random.rand() < exploration_prob:
            action = env.action_space.sample()  # Explorar: seleccionar una acción al azar
        else:
            action = np.argmax(Q_table[state, :])  # Explotar: seleccionar la mejor acción según la política actual

        next_state, reward, done, _ = env.step(action)

        # Actualizar la tabla Q
        best_next_action = np.argmax(Q_table[next_state, :])
        Q_table[state, action] += learning_rate * (reward + discount_factor * Q_table[next_state, best_next_action] - Q_table[state, action])

        total_reward += reward
        state = next_state

        if done:
            print(f"Episodio {episode + 1}, Recompensa total: {total_reward}")
            break

env.close()

Episodio 1, Recompensa total: -0.4000000000000181
Episodio 2, Recompensa total: 9.100000000000001
Episodio 3, Recompensa total: 23.300000000000203
Episodio 4, Recompensa total: 12.300000000000047
Episodio 5, Recompensa total: 9.0


KeyboardInterrupt: 

KeyboardInterrupt: 

# Pruebas

#### No sé qué le pasa a este, no funciona del todo

In [3]:
import pygame
import gym
from gym import spaces
import random
import sys

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        # Configuración de pantalla de juego
        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")

        # Colores
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        # Jugador
        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        # Obstáculos
        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25  # A mayor valor, menos obstáculos
        self.obstacles = []

        # Reloj para controlar la velocidad del juego
        self.clock = pygame.time.Clock()

        # Definir el espacio de observación y de acción
        self.observation_space = spaces.Discrete(2)  # Ajusta el espacio de observación según tu juego
        self.action_space = spaces.Discrete(2)  # Ajusta el espacio de acción según tu juego

        # Define recompensas y penalizaciones
        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1  # Nueva recompensa por obstáculo evitado

        # Define parámetros de exploración vs. explotación
        self.epsilon = 0.1

    def reset(self):
        # Reiniciar el juego y devolver el estado inicial
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        # Realizar la acción en el juego y devolver la observación, la recompensa y si el episodio ha terminado
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        # Verificar colisiones
        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            # Recompensa adicional por obstáculo evitado
            reward += self._check_obstacle_avoidance_reward()
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        # Mostrar el estado actual del juego
        pass

    def close(self):
        # Cerrar el entorno
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        # Devuelve la observación actual (podría ser más compleja según tu juego)
        return 0

    def _handle_player_movement(self, action):
        # Manejar el movimiento del jugador según la acción
        player_speed = 5
        self.player_x += (2 * action - 1) * player_speed  # Mover a la izquierda si action es 0, mover a la derecha si action es 1
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        # Generar obstáculos aleatorios
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        # Mover y dibujar obstáculos
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        # Dibujar jugador
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        # Verificar colisiones entre el jugador y los obstáculos
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True  # Colisión detectada
        return False

    def _check_obstacle_avoidance_reward(self):
    # Verificar si hay obstáculos presentes
        if not self.obstacles:
            return 0

        # Verificar si el jugador está cerca de un obstáculo
        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        if min_distance_to_obstacle < self.player_size + self.obstacle_size:
            return self.reward_for_obstacle_avoidance
        else:
            # Incentivar el movimiento hacia la derecha si no hay obstáculos cercanos
            return self.reward_for_movement / 2


# Ejemplo de uso del entorno
env = SimpleGameEnv()
num_episodes = 1000

for episode in range(num_episodes):
    observation = env.reset()
    total_reward = 0

    while True:
        # Exploración vs. Explotación
        if random.uniform(0, 1) < env.epsilon:
            action = env.action_space.sample()  # Explorar: seleccionar una acción al azar
        else:
            # Explotar: seleccionar la mejor acción según la política actual
            action = 0  # Ajusta el valor según tu lógica

        next_observation, reward, done, _ = env.step(action)
        total_reward += reward
        observation = next_observation

        if done:
            print(f"Episodio {episode + 1}, Recompensa total: {total_reward}")
            break

env.close()

Episodio 1, Recompensa total: 602.4000000000002
Episodio 2, Recompensa total: 110.94999999999972
Episodio 3, Recompensa total: 161.6999999999995
Episodio 4, Recompensa total: 287.50000000000006
Episodio 5, Recompensa total: 441.9999999999989
Episodio 6, Recompensa total: 98.24999999999983
Episodio 7, Recompensa total: 141.94999999999925
Episodio 8, Recompensa total: 168.79999999999947
Episodio 9, Recompensa total: 118.34999999999962
Episodio 10, Recompensa total: 120.99999999999963
Episodio 11, Recompensa total: 188.39999999999938
Episodio 12, Recompensa total: 152.54999999999933
Episodio 13, Recompensa total: 93.64999999999985
Episodio 14, Recompensa total: 84.2499999999999
Episodio 15, Recompensa total: 97.59999999999982
Episodio 16, Recompensa total: 92.39999999999986
Episodio 17, Recompensa total: 174.64999999999947
Episodio 18, Recompensa total: 236.04999999999964
Episodio 19, Recompensa total: 108.09999999999978
Episodio 20, Recompensa total: 207.29999999999927
Episodio 21, Recom

KeyboardInterrupt: 

#### Con múltiples entrenamientos al mismo tiempo

In [1]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        # Configuración de pantalla de juego
        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")

        # Colores
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        # Jugador
        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        # Obstáculos
        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        # Reloj para controlar la velocidad del juego
        self.clock = pygame.time.Clock()

        # Definir el espacio de observación y de acción
        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(2)

        # Define recompensas y penalizaciones
        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1  # Nueva recompensa

        # Define parámetros de exploración vs. explotación
        self.epsilon = 0.1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        obstacle_avoided = self._check_obstacle_avoidance()  # Nueva verificación
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            if obstacle_avoided:
                reward += self.reward_for_obstacle_avoidance  # Nueva recompensa
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        pass

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        self.player_x -= action * player_speed
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

    def _check_obstacle_avoidance(self):
        if not self.obstacles:
            return False

        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        return min_distance_to_obstacle > self.player_size + self.obstacle_size

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        return random.choice([0, 1])

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            if random.uniform(0, 1) < env.epsilon:
                action = env.action_space.sample()
            else:
                action = agent.select_action(observation)

            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

pygame 2.5.2 (SDL 2.28.3, Python 3.11.4)
Hello from the pygame community. https://www.pygame.org/contribute.html
Hilo Thread-1, Episodio 1, Recompensa total: -6.999999999999998
Hilo Thread-4, Episodio 1, Recompensa total: -6.999999999999998
Hilo Thread-2, Episodio 1, Recompensa total: -6.999999999999998
Hilo Thread-3, Episodio 1, Recompensa total: -6.899999999999999
Hilo Thread-3, Episodio 2, Recompensa total: 34.30000000000006
Hilo Thread-2, Episodio 2, Recompensa total: 35.40000000000006
Hilo Thread-1, Episodio 2, Recompensa total: 33.40000000000006
Hilo Thread-4, Episodio 2, Recompensa total: 34.40000000000006
Hilo Thread-2, Episodio 3, Recompensa total: -6.1999999999999975
Hilo Thread-1, Episodio 3, Recompensa total: -6.1999999999999975
Hilo Thread-4, Episodio 3, Recompensa total: -5.200000000000002
Hilo Thread-3, Episodio 3, Recompensa total: -6.099999999999998
Hilo Thread-2, Episodio 4, Recompensa total: 17.600000000000104
Hilo Thread-4, Episodio 4, Recompensa total: 18.600000000

#### Explotación > Exploración

In [1]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(2)

        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1

        self.epsilon = 0.1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        obstacle_avoided = self._check_obstacle_avoidance()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            if obstacle_avoided:
                reward += self.reward_for_obstacle_avoidance
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        pass

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        self.player_x -= action * player_speed
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

    def _check_obstacle_avoidance(self):
        if not self.obstacles:
            return False

        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        return min_distance_to_obstacle > self.player_size + self.obstacle_size

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        # Utiliza la política actual para elegir la acción
        # (aquí es donde usarías tu modelo de aprendizaje por refuerzo)
        action = 1  # Ajusta según la lógica de tu modelo
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

pygame 2.5.2 (SDL 2.28.3, Python 3.11.4)
Hello from the pygame community. https://www.pygame.org/contribute.html
Hilo Thread-3, Episodio 1, Recompensa total: -7.299999999999999
Hilo Thread-1, Episodio 1, Recompensa total: -7.199999999999999
Hilo Thread-4, Episodio 1, Recompensa total: -7.199999999999999
Hilo Thread-2, Episodio 1, Recompensa total: -7.199999999999999
Hilo Thread-3, Episodio 2, Recompensa total: 78.79999999999981
Hilo Thread-1, Episodio 2, Recompensa total: 78.79999999999981
Hilo Thread-2, Episodio 2, Recompensa total: 77.69999999999982
Hilo Thread-4, Episodio 2, Recompensa total: 77.79999999999981
Hilo Thread-3, Episodio 3, Recompensa total: -5.999999999999998
Hilo Thread-4, Episodio 3, Recompensa total: -6.099999999999998
Hilo Thread-1, Episodio 3, Recompensa total: -5.999999999999998
Hilo Thread-2, Episodio 3, Recompensa total: -5.999999999999998
Hilo Thread-3, Episodio 4, Recompensa total: -6.499999999999998
Hilo Thread-1, Episodio 4, Recompensa total: -6.59999999999

: 

#### Otro! Intento más

In [1]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(2)

        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10
        self.reward_for_obstacle_avoidance = 1

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        obstacle_avoided = self._check_obstacle_avoidance()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            if obstacle_avoided:
                reward += self.reward_for_obstacle_avoidance
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        pass

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        self.player_x -= action * player_speed
        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

    def _check_obstacle_avoidance(self):
        if not self.obstacles:
            return False

        min_distance_to_obstacle = min(abs(obstacle[0] - self.player_x) for obstacle in self.obstacles)
        return min_distance_to_obstacle > self.player_size + self.obstacle_size

class YourAgent:
    def __init__(self):
        self.epsilon = 0.1

    def select_action(self, observation):
        # Modificamos la lógica de selección de acciones para dar más énfasis a la explotación
        # (aquí es donde usarías tu modelo de aprendizaje por refuerzo)
        action = 1 if random.uniform(0, 1) < self.epsilon else 0
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

pygame 2.5.2 (SDL 2.28.3, Python 3.11.4)
Hello from the pygame community. https://www.pygame.org/contribute.html
Hilo Thread-3, Episodio 1, Recompensa total: -7.399999999999999Hilo Thread-2, Episodio 1, Recompensa total: -7.399999999999999
Hilo Thread-1, Episodio 1, Recompensa total: -7.399999999999999

Hilo Thread-4, Episodio 1, Recompensa total: -7.299999999999999
Hilo Thread-1, Episodio 2, Recompensa total: -7.699999999999999Hilo Thread-3, Episodio 2, Recompensa total: -7.6
Hilo Thread-4, Episodio 2, Recompensa total: -7.6

Hilo Thread-2, Episodio 2, Recompensa total: -7.6
Hilo Thread-1, Episodio 3, Recompensa total: -1.500000000000007
Hilo Thread-3, Episodio 3, Recompensa total: -0.40000000000000746
Hilo Thread-4, Episodio 3, Recompensa total: -1.4000000000000075
Hilo Thread-2, Episodio 3, Recompensa total: -0.40000000000000924
Hilo Thread-3, Episodio 4, Recompensa total: -2.100000000000013
Hilo Thread-1, Episodio 4, Recompensa total: -3.000000000000009
Hilo Thread-2, Episodio 4, R

SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


Ojo con este modelito de arriba, no estaba del todo mal, pero terminó el entrenamiento por su cuenta. 
Como errores: solo se mueve hacia la izquierda, aunque de forma más humana, no de forma compulsiva.

#### Este parece funcionar bien

In [None]:
import pygame
import gym
from gym import spaces
import random
import sys
import threading

pygame.init()

class SimpleGameEnv(gym.Env):
    def __init__(self):
        super(SimpleGameEnv, self).__init__()

        self.width, self.height = 400, 600
        self.screen = pygame.display.set_mode((self.width, self.height))
        pygame.display.set_caption("Entorno de Aprendizaje")
        self.white = (255, 255, 255)
        self.black = (0, 0, 0)

        self.player_size = 50
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size

        self.obstacle_size = 50
        self.obstacle_speed = 5
        self.obstacle_frequency = 25
        self.obstacles = []

        self.clock = pygame.time.Clock()

        self.observation_space = spaces.Discrete(2)
        self.action_space = spaces.Discrete(3)

        self.reward_for_movement = 0.1
        self.penalty_for_collision = -10

    def reset(self):
        self.player_x = self.width // 2 - self.player_size // 2
        self.player_y = self.height - 2 * self.player_size
        self.obstacles = []
        return self._get_observation()

    def step(self, action):
        self._handle_player_movement(action)
        self._generate_obstacles()
        self._move_and_draw_obstacles()
        self._draw_player()

        collision = self._check_collisions()
        if collision:
            reward = self.penalty_for_collision
            done = True
        else:
            reward = self.reward_for_movement
            done = False

        pygame.display.flip()
        self.screen.fill(self.black)
        self.clock.tick(30)

        return self._get_observation(), reward, done, {}

    def render(self):
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                sys.exit()

    def close(self):
        pygame.quit()
        sys.exit()

    def _get_observation(self):
        return 0

    def _handle_player_movement(self, action):
        player_speed = 5
        if action == 0:  # Mover hacia la izquierda
            self.player_x -= player_speed
        elif action == 2:  # Mover hacia la derecha
            self.player_x += player_speed

        self.player_x = max(0, min(self.player_x, self.width - self.player_size))

    def _generate_obstacles(self):
        if random.randint(0, self.obstacle_frequency) == 0:
            obstacle_x = random.randint(0, self.width - self.obstacle_size)
            obstacle_y = 0
            self.obstacles.append((obstacle_x, obstacle_y))

    def _move_and_draw_obstacles(self):
        new_obstacles = []
        for obstacle in self.obstacles:
            obstacle_x, obstacle_y = obstacle
            obstacle_y += self.obstacle_speed
            pygame.draw.rect(self.screen, self.white, (obstacle_x, obstacle_y, self.obstacle_size, self.obstacle_size))
            if obstacle_y < self.height:
                new_obstacles.append((obstacle_x, obstacle_y))
        self.obstacles = new_obstacles

    def _draw_player(self):
        pygame.draw.rect(self.screen, self.white, (self.player_x, self.player_y, self.player_size, self.player_size))

    def _check_collisions(self):
        player_rect = pygame.Rect(self.player_x, self.player_y, self.player_size, self.player_size)
        for obstacle in self.obstacles:
            obstacle_rect = pygame.Rect(obstacle[0], obstacle[1], self.obstacle_size, self.obstacle_size)
            if player_rect.colliderect(obstacle_rect):
                return True
        return False

class YourAgent:
    def __init__(self):
        pass

    def select_action(self, observation):
        # Modificamos la lógica de selección de acciones para permitir movimientos hacia la derecha
        action = random.choice([0, 1, 2])
        return action

    def update(self, observation, action, reward, next_observation, done):
        pass

def train_thread(env, agent, num_episodes):
    for episode in range(num_episodes):
        observation = env.reset()
        total_reward = 0

        while True:
            action = agent.select_action(observation)
            next_observation, reward, done, _ = env.step(action)
            total_reward += reward
            agent.update(observation, action, reward, next_observation, done)

            observation = next_observation

            if done:
                print(f"Hilo {threading.current_thread().name}, Episodio {episode + 1}, Recompensa total: {total_reward}")
                break

env = SimpleGameEnv()
agent = YourAgent()

num_threads = 4
num_episodes_per_thread = 250

threads = []
for i in range(num_threads):
    thread = threading.Thread(target=train_thread, args=(env, agent, num_episodes_per_thread), name=f"Thread-{i+1}")
    thread.start()
    threads.append(thread)

for thread in threads:
    thread.join()

env.close()

pygame 2.5.2 (SDL 2.28.3, Python 3.11.4)
Hello from the pygame community. https://www.pygame.org/contribute.html
Hilo Thread-4, Episodio 1, Recompensa total: -5.999999999999998Hilo Thread-2, Episodio 1, Recompensa total: -5.899999999999999
Hilo Thread-3, Episodio 1, Recompensa total: -5.999999999999998
Hilo Thread-1, Episodio 1, Recompensa total: -5.999999999999998

Hilo Thread-3, Episodio 2, Recompensa total: -6.1999999999999975
Hilo Thread-4, Episodio 2, Recompensa total: -6.1999999999999975
Hilo Thread-2, Episodio 2, Recompensa total: -6.099999999999998
Hilo Thread-1, Episodio 2, Recompensa total: -6.099999999999998
Hilo Thread-3, Episodio 3, Recompensa total: -6.599999999999998
Hilo Thread-1, Episodio 3, Recompensa total: -6.599999999999998
Hilo Thread-2, Episodio 3, Recompensa total: -6.599999999999998
Hilo Thread-4, Episodio 3, Recompensa total: -6.499999999999998
Hilo Thread-2, Episodio 4, Recompensa total: -5.200000000000001
Hilo Thread-3, Episodio 4, Recompensa total: -5.10000

: 