In [1]:
import sys

sys.path.append("../..")

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from training.dqnetwork import DQNetwork

2025-10-31 10:46:09.539188: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-31 10:46:09.564645: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-31 10:46:12.497944: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [4]:
from environment.deepqlearning.obstacle_avoidance_env import ObstacleAvoidanceEnv
from utils.reader import get_yaml_path, read_file

In [5]:
server_address = "localhost:50051"
client_name = "RLClient"
env = ObstacleAvoidanceEnv(server_address, client_name)
env.connect_to_client()

2025-10-31 10:46:13,390 — INFO — ✓ Connected to localhost:50051



In [6]:
config_path = get_yaml_path("resources", "configurations", "obstacle-avoidance.yml")
config = read_file(config_path)
# print(config)

In [7]:
env.init(config)

2025-10-31 10:46:13,402 — INFO — ✓ Initialization successful


In [8]:
neuron_count_per_hidden_layer = [64, 32]

In [9]:
episode_count = 500  # Total number of training episodes
episode_max_steps = 5000  # Maximum number of steps per episode

replay_memory_max_size = (
    100000  # Maximum number of transitions stored into the replay memory
)
replay_memory_init_size = (
    1000  # Maximum number of transitions stored into the replay memory
)
batch_size = 64  # Mini-batch size

step_per_update = 4  # Number of total steps executed between successive updates of the action model weights
step_per_update_target_model = 8  # Number of total steps executed between successive replaces of the target model weights

max_epsilon = 1.0  # Exploration probability at start
min_epsilon = 0.01  # Minimum exploration probability
epsilon_decay = 0.0002  # Decay for exploration probability

gamma = 0.99  # Discount factor

moving_avg_window_size = 20  # Number of consecutive episodes to be considered in the calculation of the total reward moving average
moving_avg_stop_thr = 100

In [10]:
from agent.scala_dqagent import DQAgent

agent1 = DQAgent(
    env,
    agent_id="00000000-0000-0000-0000-000000000001",
    action_model=DQNetwork(
        env.observation_space.shape,
        neuron_count_per_hidden_layer,
        env.action_space.n,
        summary=False,
    ),
    target_model=DQNetwork(
        env.observation_space.shape,
        neuron_count_per_hidden_layer,
        env.action_space.n,
        summary=False,
    ),
    epsilon_max=max_epsilon,
    epsilon_min=min_epsilon,
    epsilon_decay=epsilon_decay,
    gamma=gamma,
    replay_memory_max_size=replay_memory_max_size,
    replay_memory_init_size=replay_memory_init_size,
    batch_size=batch_size,
    step_per_update=step_per_update,
    step_per_update_target_model=step_per_update_target_model,
    moving_avg_window_size=moving_avg_window_size,
    moving_avg_stop_thr=moving_avg_stop_thr,
    episode_max_steps=episode_max_steps,
)

agents = [agent1]

  from pkg_resources import resource_stream, resource_exists


In [11]:
import time

from training.multi_agent_dqlearning import DQLearning

train_start_time = time.time()

trainer = DQLearning(
    env,
    agents,
    episode_count=episode_count,
    episode_max_steps=episode_max_steps,
)
train_rewards = trainer.simple_dqn_training()

train_finish_time = time.time()
train_elapsed_time = train_finish_time - train_start_time
train_avg_episode_time = train_elapsed_time / episode_count

print(
    f"Train time: {train_elapsed_time / 60.0:.1f}m [{train_avg_episode_time:.1f}s]"
)

Training DQN:   0%|                                                                                                                                                                                                     | 0/500 [00:00<?, ?ep/s]

2025-10-31 10:48:09,387 — INFO — Episode: 0 | Steps: 2774[2774] | Epsilon (of the first agent): 1.000 | Time: 108.72s | Reward (of the first agent): -96060.7 | MovingAvg (of the first agent): -96060.7


Training DQN:   0%|▎                                                                                                                                                                                        | 1/500 [01:48<15:04:11, 108.72s/ep]

2025-10-31 10:48:54,917 — INFO — Episode: 1 | Steps: 1004[3778] | Epsilon (of the first agent): 0.445 | Time: 45.53s | Reward (of the first agent): 160.0 | MovingAvg (of the first agent): 160.0


Training DQN:   0%|▋                                                                                                                                                                                          | 2/500 [02:34<9:53:51, 71.55s/ep]

2025-10-31 10:50:34,860 — INFO — Episode: 2 | Steps: 1505[5283] | Epsilon (of the first agent): 0.362 | Time: 99.94s | Reward (of the first agent): -90041.6 | MovingAvg (of the first agent): -90041.6


Training DQN:   1%|█                                                                                                                                                                                         | 3/500 [04:29<12:23:18, 89.73s/ep]


KeyboardInterrupt: 

In [12]:
trainer.play_with_pygame(episodes=5, fps=60)

2025-10-31 10:51:13,195 — INFO — Episode 1/5 - Reward: 78.58215596839916
2025-10-31 10:51:13,198 — INFO — Episode 2/5 - Reward: 0
2025-10-31 10:51:13,198 — INFO — Episode 3/5 - Reward: 0
2025-10-31 10:51:13,199 — INFO — Episode 4/5 - Reward: 0
2025-10-31 10:51:13,200 — INFO — Episode 5/5 - Reward: 0
2025-10-31 10:51:13,200 — INFO — ✓ Closed connection to localhost:50051
