In [1]:
import sys

sys.path.append("../..")

In [2]:
import nest_asyncio
nest_asyncio.apply()
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_memory_growth(gpus[0], True)

2025-11-01 16:21:07.068869: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from training.dqnetwork import DQNetwork

In [4]:
from environment.deepqlearning.obstacle_avoidance_env import ObstacleAvoidanceEnv
from utils.reader import get_yaml_path, read_file

In [5]:
server_address = "localhost:50060"
client_name = "RLClient"
env = ObstacleAvoidanceEnv(server_address, client_name)
env.connect_to_client()

2025-11-01 16:21:13,487 — INFO — ✓ Connected to localhost:50060



In [6]:
config_path = get_yaml_path("resources", "configurations", "obstacle-avoidance.yml")
config = read_file(config_path)
# print(config)

In [7]:
env.init(config)

2025-11-01 16:21:13,513 — INFO — ✓ Initialization successful


In [8]:
neuron_count_per_hidden_layer = [64, 32]

In [9]:
episode_count = 1000  # Total number of training episodes
episode_max_steps = 2000  # Maximum number of steps per episode

replay_memory_max_size = (
    100000  # Maximum number of transitions stored into the replay memory
)
replay_memory_init_size = (
    10000  # Maximum number of transitions stored into the replay memory
)
batch_size = 512  # Mini-batch size

step_per_update = 4  # Number of total steps executed between successive updates of the action model weights
step_per_update_target_model = 1000  # Number of total steps executed between successive replaces of the target model weights

max_epsilon = 1.0  # Exploration probability at start
min_epsilon = 0.01  # Minimum exploration probability
epsilon_decay = 0.0002  # Decay for exploration probability

gamma = 0.99  # Discount factor

moving_avg_window_size = 20  # Number of consecutive episodes to be considered in the calculation of the total reward moving average
moving_avg_stop_thr = 100

In [10]:
from agent.scala_dqagent import DQAgent

agent1 = DQAgent(
    env,
    agent_id="00000000-0000-0000-0000-000000000001",
    action_model=DQNetwork(
        env.observation_space.shape,
        neuron_count_per_hidden_layer,
        env.action_space.n,
        summary=True,
    ),
    target_model=DQNetwork(
        env.observation_space.shape,
        neuron_count_per_hidden_layer,
        env.action_space.n,
        summary=False,
    ),
    epsilon_max=max_epsilon,
    epsilon_min=min_epsilon,
    gamma=gamma,
    replay_memory_max_size=replay_memory_max_size,
    replay_memory_init_size=replay_memory_init_size,
    batch_size=batch_size,
    step_per_update=step_per_update,
    step_per_update_target_model=step_per_update_target_model,
    moving_avg_window_size=moving_avg_window_size,
    moving_avg_stop_thr=moving_avg_stop_thr,
    episode_max_steps=episode_max_steps,
    episodes=episode_count,
)

agents = [agent1]

I0000 00:00:1762010473.607651   13969 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4081 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1060 6GB, pci bus id: 0000:1d:00.0, compute capability: 6.1


KeyboardInterrupt: 

In [None]:
import time

from training.multi_agent_dqlearning import DQLearning

train_start_time = time.time()

trainer = DQLearning(
    env,
    agents,
    episode_count=episode_count,
    episode_max_steps=episode_max_steps,
)
train_rewards = trainer.simple_dqn_training()

train_finish_time = time.time()
train_elapsed_time = train_finish_time - train_start_time
train_avg_episode_time = train_elapsed_time / episode_count

print(
    f"Train time: {train_elapsed_time / 60.0:.1f}m [{train_avg_episode_time:.1f}s]"
)

In [None]:
trainer.play_with_pygame(episodes=5, fps=60)