In [1]:
import sys

sys.path.append("../..")

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from tqdm import trange

In [4]:
from environment.qlearning.obstacle_avoidance_env import ObstacleAvoidanceEnv
from utils.reader import get_yaml_path, read_file

In [5]:
from agent.qagent import QAgent
from training.qlearning import QLearning

In [6]:
import pygame
import numpy as np

  from pkg_resources import resource_stream, resource_exists


In [7]:
server_address = "localhost:50060"
client_name = "RLClient"
env = ObstacleAvoidanceEnv(server_address, client_name)
env.connect_to_client()

2025-11-01 16:19:49,488 — INFO — ✓ Connected to localhost:50060



In [8]:
config_path = get_yaml_path("resources", "configurations", "obstacle-avoidance.yml")
config = read_file(config_path)
# print(config)

In [9]:
env.init(config)

2025-11-01 16:19:49,725 — INFO — ✓ Initialization successful


In [10]:
episodes = 100
steps = 5000

In [11]:
agent = QAgent(env, episodes = episodes)
agentId = "00000000-0000-0000-0000-000000000001"
agents = { agentId: agent }

In [12]:
def run_episodes(episode_count, episode_max_steps, render = False):
    running = True
    
    if render:
        pygame.init()
        screen = pygame.display.set_mode((800, 600))
        pygame.display.set_caption(f"Q-Learning obstacle avoidance")
        clock = pygame.time.Clock()
    for _ep in trange(episode_count, desc="Training", unit="ep"):
        obs, _ = env.reset()
        done = False
        total_reward = { agentId: 0 }
        episode_history = []
        step_count = 0
        while not done and step_count < episode_max_steps:
            actions = { k: agents[k].choose_action(v, epsilon_greedy = not render) for k, v in obs.items() }
            next_obs, rewards, terminateds, truncateds, _ = env.step(actions)
            # TODO: take other agents into consideration
            done = terminateds[agentId] or truncateds[agentId]
            if not render:
                for k, _ in next_obs.items():
                    agents[k].update_q(obs[k], actions[k], rewards[k], next_obs[k], done)
                    total_reward[k] += rewards[k]
            obs = next_obs
            if render:
                rgb_array = env.render()
                surface = pygame.surfarray.make_surface(np.transpose(rgb_array, (1, 0, 2)))
                screen.blit(surface, (0, 0))
                pygame.display.flip()
                clock.tick(60)
            step_count += 1
            if render:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        running = False

            if not running:
                break
        for _, agent in agents.items():
            agent.decay_epsilon(_ep)
    if render:
        pygame.quit()


In [13]:
run_episodes(episodes, steps)

Training:  12%|███████████████████████                                                                                                                                                                         | 12/100 [00:12<01:29,  1.02s/ep]


KeyboardInterrupt: 

In [None]:
run_episodes(1, 10000, True)