In [1]:
import sys

sys.path.append("..")

In [2]:
from tqdm import trange

In [3]:
from environment.obstacle_avoidance_env import ObstacleAvoidanceEnv
from utils.reader import get_yaml_path, read_file

In [4]:
from agent.qagent import QAgent
from training.qlearning import QLearning

In [5]:
import pygame
import numpy as np

  from pkg_resources import resource_stream, resource_exists


In [7]:
server_address = "localhost:50051"
client_name = "RLClient"
env = ObstacleAvoidanceEnv(server_address, client_name)
await env.connect_to_client()

2025-10-27 10:48:31,526 — INFO — ✓ Connected to localhost:50051



In [8]:
config_path = get_yaml_path("resources", "configurations", "obstacle-avoidance.yml")
config = read_file(config_path)
# print(config)

In [9]:
await env.init(config)

2025-10-27 10:48:41,169 — INFO — ✓ Initialization successful


In [10]:
episodes = 3000
steps = 5000

In [11]:
agent = QAgent(env, episodes = episodes)
agentId = "00000000-0000-0000-0000-000000000001"
agents = { agentId: agent }

In [12]:
async def run_episodes(episode_count, episode_max_steps, render = False):
    running = True
    
    if render:
        pygame.init()
        screen = pygame.display.set_mode((800, 600))
        pygame.display.set_caption(f"Q-Learning obstacle avoidance")
        clock = pygame.time.Clock()
    for _ep in trange(episode_count, desc="Training", unit="ep"):
        obs, _ = await env.reset()
        done = False
        total_reward = { agentId: 0 }
        episode_history = []
        step_count = 0
        while not done and step_count < episode_max_steps:
            actions = { k: agents[k].choose_action(v, epsilon_greedy = not render) for k, v in obs.items() }
            next_obs, rewards, terminateds, truncateds, _ = await env.step(actions)
            # TODO: take other agents into consideration
            done = terminateds[agentId] or truncateds[agentId]
            if not render:
                for k, _ in next_obs.items():
                    agents[k].update_q(obs[k], actions[k], rewards[k], next_obs[k], done)
                    total_reward[k] += rewards[k]
            obs = next_obs
            if render:
                rgb_array = await env.render()
                surface = pygame.surfarray.make_surface(np.transpose(rgb_array, (1, 0, 2)))
                screen.blit(surface, (0, 0))
                pygame.display.flip()
                clock.tick(60)
            step_count += 1
            if render:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        running = False

            if not running:
                break
        for _, agent in agents.items():
            agent.decay_epsilon(_ep)
    if render:
        pygame.quit()


In [13]:
await run_episodes(episodes, steps)

Training:   1%|█                                                                                                                                                                                             | 16/3000 [00:05<18:11,  2.73ep/s]


CancelledError: 

In [None]:
await run_episodes(1, 10000, True)