In [1]:
from tqdm import trange

In [2]:
from environment.obstacle_avoidance_env import ObstacleAvoidanceEnv
from utils.reader import get_yaml_path, read_file

In [3]:
from agent.qagent import QAgent
from training.qlearning import QLearning

In [4]:
import pygame
import numpy as np

  from pkg_resources import resource_stream, resource_exists


In [5]:
server_address = "localhost:50051"
client_name = "RLClient"
env = ObstacleAvoidanceEnv(server_address, client_name)
await env.init_client()

✓ Connected to localhost:50051



In [6]:
config_path = get_yaml_path("resources", "configurations", "obstacle-avoidance.yml")
config = read_file(config_path)
print(config)

simulation:
    seed: 42
    duration: 1000000
environment:
    width: 10
    height: 10
    entities:
        - agent:
              id: 00000000-0000-0000-0000-000000000001
              radius: 0.25
              withProximitySensors: true
              withLightSensors: true
              position: [1.0, 4.0]
              orientation: 0.0
              speed: 1.0
              reward: ObstacleAvoidance
              termination: EndSimulationTermination
              truncation: ObstacleAvoidance
        - obstacle:
              height: 0.2
              width: 4.0
              orientation: 0.0
              position: [3.0, 5.0]
        - obstacle:
              width: 0.2
              height: 5.0
              position: [7.0, 6.0]
              orientation: 15.0
        - obstacle:
              width: 6.8
              position: [4.0, 3.0]
              height: 0.2
              orientation: 0.0
        - obstacle:
              orientation: 10.0
              position: [5.0,

In [7]:
await env.init(config)

✓ Initialization successful


In [8]:
agent = QAgent(env)
agentId = "00000000-0000-0000-0000-000000000001"
agents = { agentId: agent }

In [9]:
async def run_episodes(episode_count, episode_max_steps, render = False):
    running = True
    
    if render:
        pygame.init()
        screen = pygame.display.set_mode((800, 600))
        pygame.display.set_caption(f"Q-Learning obstacle avoidance")
        clock = pygame.time.Clock()
    for _ep in trange(episode_count, desc="Training", unit="ep"):
        obs, _ = await env.reset()
        done = False
        total_reward = { agentId: 0 }
        episode_history = []
        step_count = 0
        while not done and step_count < episode_max_steps:
            actions = { k: agents[k].choose_action(v) for k, v in obs.items() }
            next_obs, rewards, terminateds, truncateds, _ = await env.step(actions)
            # TODO: take other agents into consideration
            done = terminateds[agentId] or truncateds[agentId]
            for k, _ in next_obs.items():
                agents[k].update_q(obs[k], actions[k], rewards[k], next_obs[k], done)
                total_reward[k] += rewards[k]
            obs = next_obs
            if render:
                rgb_array = await env.render()
                surface = pygame.surfarray.make_surface(np.transpose(rgb_array, (1, 0, 2)))
                screen.blit(surface, (0, 0))
                pygame.display.flip()
                clock.tick(60)
            step_count += 1
            if render:
                for event in pygame.event.get():
                    if event.type == pygame.QUIT:
                        running = False

            if not running:
                break
    if render:
        pygame.quit()


In [10]:
episodes = 3000
steps = 1000

In [None]:
await run_episodes(episodes, steps)

Training:   9%|████████████████▋                                                                                                                                                                            | 264/3000 [01:45<15:58,  2.85ep/s]

In [None]:
await run_episodes(1, steps, True)