In [1]:
import sys

sys.path.append("../..")

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from training.dqnetwork import DQNetwork

2025-10-31 17:12:13.088027: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-31 17:12:13.113359: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-10-31 17:12:16.005743: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [4]:
from environment.deepqlearning.obstacle_avoidance_env import ObstacleAvoidanceEnv
from utils.reader import get_yaml_path, read_file

In [5]:
server_address = "localhost:50051"
client_name = "RLClient"
env = ObstacleAvoidanceEnv(server_address, client_name)
env.connect_to_client()

2025-10-31 17:12:16,937 — INFO — ✓ Connected to localhost:50051



In [6]:
config_path = get_yaml_path("resources", "configurations", "obstacle-avoidance.yml")
config = read_file(config_path)
# print(config)

In [7]:
env.init(config)

2025-10-31 17:12:16,952 — INFO — ✓ Initialization successful


In [8]:
neuron_count_per_hidden_layer = [64, 32]

In [9]:
episode_count = 50  # Total number of training episodes
episode_max_steps = 2000  # Maximum number of steps per episode

replay_memory_max_size = (
    100000  # Maximum number of transitions stored into the replay memory
)
replay_memory_init_size = (
    1000  # Maximum number of transitions stored into the replay memory
)
batch_size = 64  # Mini-batch size

step_per_update = 4  # Number of total steps executed between successive updates of the action model weights
step_per_update_target_model = 8  # Number of total steps executed between successive replaces of the target model weights

max_epsilon = 1.0  # Exploration probability at start
min_epsilon = 0.01  # Minimum exploration probability
epsilon_decay = 0.0002  # Decay for exploration probability

gamma = 0.99  # Discount factor

moving_avg_window_size = 20  # Number of consecutive episodes to be considered in the calculation of the total reward moving average
moving_avg_stop_thr = 100

In [10]:
from agent.scala_dqagent import DQAgent

agent1 = DQAgent(
    env,
    agent_id="00000000-0000-0000-0000-000000000001",
    action_model=DQNetwork(
        env.observation_space.shape,
        neuron_count_per_hidden_layer,
        env.action_space.n,
        summary=False,
    ),
    target_model=DQNetwork(
        env.observation_space.shape,
        neuron_count_per_hidden_layer,
        env.action_space.n,
        summary=False,
    ),
    epsilon_max=max_epsilon,
    epsilon_min=min_epsilon,
    gamma=gamma,
    replay_memory_max_size=replay_memory_max_size,
    replay_memory_init_size=replay_memory_init_size,
    batch_size=batch_size,
    step_per_update=step_per_update,
    step_per_update_target_model=step_per_update_target_model,
    moving_avg_window_size=moving_avg_window_size,
    moving_avg_stop_thr=moving_avg_stop_thr,
    episode_max_steps=episode_max_steps,
    episodes=episode_count,
)

agents = [agent1]

In [11]:
import time

from training.multi_agent_dqlearning import DQLearning

train_start_time = time.time()

trainer = DQLearning(
    env,
    agents,
    episode_count=episode_count,
    episode_max_steps=episode_max_steps,
)
train_rewards = trainer.simple_dqn_training()

train_finish_time = time.time()
train_elapsed_time = train_finish_time - train_start_time
train_avg_episode_time = train_elapsed_time / episode_count

print(
    f"Train time: {train_elapsed_time / 60.0:.1f}m [{train_avg_episode_time:.1f}s]"
)

  from pkg_resources import resource_stream, resource_exists
Training DQN:   0%|                                                                                                                                                                                                      | 0/50 [00:00<?, ?ep/s]

2025-10-31 17:12:51,020 — INFO — Episode: 0 | Steps: 2000[2000] | Epsilon: 1.000 | Time: 32.67s | Reward: {'00000000-0000-0000-0000-000000000001': -964.8347416125011} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -964.8347416125011}


Training DQN:   2%|███▊                                                                                                                                                                                          | 1/50 [00:32<26:40, 32.67s/ep]

2025-10-31 17:13:32,301 — INFO — Episode: 1 | Steps: 2000[4000] | Epsilon: 1.000 | Time: 41.28s | Reward: {'00000000-0000-0000-0000-000000000001': -769.2362681213457} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -769.2362681213457}


Training DQN:   4%|███████▌                                                                                                                                                                                      | 2/50 [01:13<30:11, 37.74s/ep]

2025-10-31 17:14:16,160 — INFO — Episode: 2 | Steps: 2000[6000] | Epsilon: 0.913 | Time: 43.86s | Reward: {'00000000-0000-0000-0000-000000000001': -638.0801588091082} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -638.0801588091082}


Training DQN:   6%|███████████▍                                                                                                                                                                                  | 3/50 [01:57<31:44, 40.53s/ep]

2025-10-31 17:15:00,773 — INFO — Episode: 3 | Steps: 1827[7827] | Epsilon: 0.833 | Time: 44.61s | Reward: {'00000000-0000-0000-0000-000000000001': -320.12383932373086} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -320.12383932373086}


Training DQN:   8%|███████████████▏                                                                                                                                                                              | 4/50 [02:42<32:18, 42.14s/ep]

2025-10-31 17:15:32,993 — INFO — Episode: 4 | Steps: 1212[9039] | Epsilon: 0.761 | Time: 32.22s | Reward: {'00000000-0000-0000-0000-000000000001': -232.60478622803296} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -232.60478622803296}


Training DQN:  10%|███████████████████                                                                                                                                                                           | 5/50 [03:14<28:55, 38.56s/ep]

2025-10-31 17:15:57,436 — INFO — Episode: 5 | Steps: 822[9861] | Epsilon: 0.695 | Time: 24.44s | Reward: {'00000000-0000-0000-0000-000000000001': -36.081041530836885} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -36.081041530836885}


Training DQN:  12%|██████████████████████▊                                                                                                                                                                       | 6/50 [03:39<24:45, 33.76s/ep]

2025-10-31 17:16:13,732 — INFO — Episode: 6 | Steps: 540[10401] | Epsilon: 0.635 | Time: 16.29s | Reward: {'00000000-0000-0000-0000-000000000001': -76.85583462658943} | MovingAvg: {'00000000-0000-0000-0000-000000000001': -76.85583462658943}


Training DQN:  14%|██████████████████████████▌                                                                                                                                                                   | 7/50 [03:55<20:06, 28.05s/ep]

2025-10-31 17:17:19,576 — INFO — Episode: 7 | Steps: 2000[12401] | Epsilon: 0.580 | Time: 65.84s | Reward: {'00000000-0000-0000-0000-000000000001': 319.0348336095685} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 319.0348336095685}


Training DQN:  16%|██████████████████████████████▍                                                                                                                                                               | 8/50 [05:01<28:03, 40.08s/ep]

2025-10-31 17:18:25,954 — INFO — Episode: 8 | Steps: 1865[14266] | Epsilon: 0.530 | Time: 66.38s | Reward: {'00000000-0000-0000-0000-000000000001': 391.34590743659516} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 391.34590743659516}


Training DQN:  18%|██████████████████████████████████▏                                                                                                                                                           | 9/50 [06:07<33:00, 48.30s/ep]

2025-10-31 17:19:35,043 — INFO — Episode: 9 | Steps: 2000[16266] | Epsilon: 0.484 | Time: 69.09s | Reward: {'00000000-0000-0000-0000-000000000001': 679.901978331165} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 679.901978331165}


Training DQN:  20%|█████████████████████████████████████▊                                                                                                                                                       | 10/50 [07:16<36:28, 54.72s/ep]

2025-10-31 17:20:28,621 — INFO — Episode: 10 | Steps: 1517[17783] | Epsilon: 0.442 | Time: 53.58s | Reward: {'00000000-0000-0000-0000-000000000001': 835.3723951937604} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 835.3723951937604}


Training DQN:  22%|█████████████████████████████████████████▌                                                                                                                                                   | 11/50 [08:10<35:20, 54.37s/ep]

2025-10-31 17:21:46,031 — INFO — Episode: 11 | Steps: 1909[19692] | Epsilon: 0.404 | Time: 77.41s | Reward: {'00000000-0000-0000-0000-000000000001': 546.7289574940764} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 546.7289574940764}


Training DQN:  24%|█████████████████████████████████████████████▎                                                                                                                                               | 12/50 [09:27<38:52, 61.38s/ep]

2025-10-31 17:23:04,061 — INFO — Episode: 12 | Steps: 2000[21692] | Epsilon: 0.369 | Time: 78.03s | Reward: {'00000000-0000-0000-0000-000000000001': 723.4064475318056} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 723.4064475318056}


Training DQN:  26%|█████████████████████████████████████████████████▏                                                                                                                                           | 13/50 [10:45<40:57, 66.42s/ep]

2025-10-31 17:23:41,037 — INFO — Episode: 13 | Steps: 953[22645] | Epsilon: 0.338 | Time: 36.98s | Reward: {'00000000-0000-0000-0000-000000000001': 52.49311938805113} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 52.49311938805113}


Training DQN:  28%|████████████████████████████████████████████████████▉                                                                                                                                        | 14/50 [11:22<34:31, 57.53s/ep]

2025-10-31 17:24:07,141 — INFO — Episode: 14 | Steps: 660[23305] | Epsilon: 0.309 | Time: 26.10s | Reward: {'00000000-0000-0000-0000-000000000001': 244.98424906721164} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 244.98424906721164}


Training DQN:  30%|████████████████████████████████████████████████████████▋                                                                                                                                    | 15/50 [11:48<28:01, 48.06s/ep]

2025-10-31 17:25:33,775 — INFO — Episode: 15 | Steps: 2000[25305] | Epsilon: 0.283 | Time: 86.63s | Reward: {'00000000-0000-0000-0000-000000000001': 763.0617072161456} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 763.0617072161456}


Training DQN:  32%|████████████████████████████████████████████████████████████▍                                                                                                                                | 16/50 [13:15<33:48, 59.67s/ep]

2025-10-31 17:26:57,050 — INFO — Episode: 16 | Steps: 2000[27305] | Epsilon: 0.259 | Time: 83.27s | Reward: {'00000000-0000-0000-0000-000000000001': 722.6603743888695} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 722.6603743888695}


Training DQN:  34%|████████████████████████████████████████████████████████████████▎                                                                                                                            | 17/50 [14:38<36:43, 66.77s/ep]

2025-10-31 17:27:11,173 — INFO — Episode: 17 | Steps: 332[27637] | Epsilon: 0.237 | Time: 14.12s | Reward: {'00000000-0000-0000-0000-000000000001': 285.27299469724306} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 285.27299469724306}


Training DQN:  36%|████████████████████████████████████████████████████████████████████                                                                                                                         | 18/50 [14:52<27:10, 50.95s/ep]

2025-10-31 17:28:39,712 — INFO — Episode: 18 | Steps: 2000[29637] | Epsilon: 0.217 | Time: 88.54s | Reward: {'00000000-0000-0000-0000-000000000001': 1462.4751048515432} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 1462.4751048515432}


Training DQN:  38%|███████████████████████████████████████████████████████████████████████▊                                                                                                                     | 19/50 [16:21<32:09, 62.24s/ep]

2025-10-31 17:29:35,651 — INFO — Episode: 19 | Steps: 1275[30912] | Epsilon: 0.199 | Time: 55.94s | Reward: {'00000000-0000-0000-0000-000000000001': 364.50467350720305} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 364.50467350720305}


Training DQN:  40%|███████████████████████████████████████████████████████████████████████████▌                                                                                                                 | 20/50 [17:17<30:10, 60.35s/ep]

2025-10-31 17:31:03,663 — INFO — Episode: 20 | Steps: 2000[32912] | Epsilon: 0.182 | Time: 88.01s | Reward: {'00000000-0000-0000-0000-000000000001': 795.6106234339842} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 217.67130362305465}


Training DQN:  42%|███████████████████████████████████████████████████████████████████████████████▍                                                                                                             | 21/50 [18:45<33:10, 68.65s/ep]

2025-10-31 17:32:37,699 — INFO — Episode: 21 | Steps: 2000[34912] | Epsilon: 0.167 | Time: 94.03s | Reward: {'00000000-0000-0000-0000-000000000001': 242.15342037396195} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 305.69357187537895}


Training DQN:  44%|███████████████████████████████████████████████████████████████████████████████████▏                                                                                                         | 22/50 [20:19<35:35, 76.27s/ep]

2025-10-31 17:34:08,133 — INFO — Episode: 22 | Steps: 2000[36912] | Epsilon: 0.153 | Time: 90.43s | Reward: {'00000000-0000-0000-0000-000000000001': 510.36652317044843} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 356.2630563001443}


Training DQN:  46%|██████████████████████████████████████████████████████████████████████████████████████▉                                                                                                      | 23/50 [21:49<36:14, 80.52s/ep]

2025-10-31 17:35:40,267 — INFO — Episode: 23 | Steps: 2000[38912] | Epsilon: 0.141 | Time: 92.13s | Reward: {'00000000-0000-0000-0000-000000000001': 1233.7599814010812} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 413.6853903991221}


Training DQN:  48%|██████████████████████████████████████████████████████████████████████████████████████████▋                                                                                                  | 24/50 [23:21<36:24, 84.00s/ep]

2025-10-31 17:37:14,091 — INFO — Episode: 24 | Steps: 2000[40912] | Epsilon: 0.129 | Time: 93.82s | Reward: {'00000000-0000-0000-0000-000000000001': 691.6064410688174} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 491.37958143536275}


Training DQN:  50%|██████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                              | 25/50 [24:55<36:13, 86.95s/ep]

2025-10-31 17:38:45,152 — INFO — Episode: 25 | Steps: 2000[42912] | Epsilon: 0.119 | Time: 91.06s | Reward: {'00000000-0000-0000-0000-000000000001': 705.7993775046081} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 537.5901428002053}


Training DQN:  52%|██████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                                          | 26/50 [26:26<35:16, 88.18s/ep]

2025-10-31 17:40:30,084 — INFO — Episode: 26 | Steps: 2000[44912] | Epsilon: 0.109 | Time: 104.93s | Reward: {'00000000-0000-0000-0000-000000000001': 961.27971323317} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 574.6841637519775}


Training DQN:  54%|██████████████████████████████████████████████████████████████████████████████████████████████████████                                                                                       | 27/50 [28:11<35:43, 93.21s/ep]

2025-10-31 17:42:07,556 — INFO — Episode: 27 | Steps: 2000[46912] | Epsilon: 0.100 | Time: 97.47s | Reward: {'00000000-0000-0000-0000-000000000001': 1121.9521692277344} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 626.5909411449654}


Training DQN:  56%|█████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                   | 28/50 [29:49<34:38, 94.49s/ep]

2025-10-31 17:43:38,688 — INFO — Episode: 28 | Steps: 1819[48731] | Epsilon: 0.092 | Time: 91.13s | Reward: {'00000000-0000-0000-0000-000000000001': 1414.7987285647728} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 666.7368079258738}


Training DQN:  58%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                               | 29/50 [31:20<32:43, 93.48s/ep]

2025-10-31 17:45:20,395 — INFO — Episode: 29 | Steps: 2000[50731] | Epsilon: 0.085 | Time: 101.71s | Reward: {'00000000-0000-0000-0000-000000000001': 496.4696990857718} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 717.9094489822827}


Training DQN:  60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                           | 30/50 [33:02<31:58, 95.95s/ep]

2025-10-31 17:46:43,380 — INFO — Episode: 30 | Steps: 1698[52429] | Epsilon: 0.078 | Time: 82.98s | Reward: {'00000000-0000-0000-0000-000000000001': 1332.1870325177345} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 708.737835020013}


Training DQN:  62%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                       | 31/50 [34:25<29:09, 92.06s/ep]

2025-10-31 17:48:30,056 — INFO — Episode: 31 | Steps: 2000[54429] | Epsilon: 0.072 | Time: 106.67s | Reward: {'00000000-0000-0000-0000-000000000001': 507.2278206299414} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 733.5785668862117}


Training DQN:  64%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                                                    | 32/50 [36:11<28:56, 96.44s/ep]

2025-10-31 17:49:14,684 — INFO — Episode: 32 | Steps: 845[55274] | Epsilon: 0.067 | Time: 44.63s | Reward: {'00000000-0000-0000-0000-000000000001': -41.86337747098684} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 731.6035100430049}


Training DQN:  66%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                                                | 33/50 [36:56<22:55, 80.90s/ep]

2025-10-31 17:50:55,379 — INFO — Episode: 33 | Steps: 2000[57274] | Epsilon: 0.062 | Time: 100.69s | Reward: {'00000000-0000-0000-0000-000000000001': 1051.1951524831325} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 693.3400187928653}


Training DQN:  68%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                            | 34/50 [38:37<23:09, 86.84s/ep]

2025-10-31 17:51:44,232 — INFO — Episode: 34 | Steps: 951[58225] | Epsilon: 0.057 | Time: 48.85s | Reward: {'00000000-0000-0000-0000-000000000001': 286.9535171054632} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 743.2751204476194}


Training DQN:  70%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                        | 35/50 [39:25<18:51, 75.44s/ep]

2025-10-31 17:52:54,419 — INFO — Episode: 35 | Steps: 1341[59566] | Epsilon: 0.053 | Time: 70.19s | Reward: {'00000000-0000-0000-0000-000000000001': 628.5168861756307} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 745.373583849532}


Training DQN:  72%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                     | 36/50 [40:36<17:14, 73.87s/ep]

2025-10-31 17:54:35,034 — INFO — Episode: 36 | Steps: 2000[61566] | Epsilon: 0.049 | Time: 100.61s | Reward: {'00000000-0000-0000-0000-000000000001': 1727.2006547104388} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 738.6463427975062}


Training DQN:  74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                 | 37/50 [42:16<17:44, 81.89s/ep]

2025-10-31 17:56:20,794 — INFO — Episode: 37 | Steps: 2000[63566] | Epsilon: 0.046 | Time: 105.76s | Reward: {'00000000-0000-0000-0000-000000000001': 1272.5452891149882} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 788.8733568135847}


Training DQN:  76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋                                             | 38/50 [44:02<17:48, 89.05s/ep]

2025-10-31 17:57:16,133 — INFO — Episode: 38 | Steps: 1030[64596] | Epsilon: 0.043 | Time: 55.34s | Reward: {'00000000-0000-0000-0000-000000000001': 279.06950451109185} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 838.236971534472}


Training DQN:  78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                         | 39/50 [44:57<14:28, 78.94s/ep]

2025-10-31 17:58:56,282 — INFO — Episode: 39 | Steps: 2000[66596] | Epsilon: 0.040 | Time: 100.15s | Reward: {'00000000-0000-0000-0000-000000000001': 1330.1258917049583} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 779.0666915174494}


Training DQN:  80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                     | 40/50 [46:37<14:13, 85.30s/ep]

2025-10-31 18:00:15,262 — INFO — Episode: 40 | Steps: 1524[68120] | Epsilon: 0.037 | Time: 78.98s | Reward: {'00000000-0000-0000-0000-000000000001': 515.0847563642276} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 827.3477524273371}


Training DQN:  82%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉                                  | 41/50 [47:56<12:30, 83.40s/ep]

2025-10-31 18:01:52,348 — INFO — Episode: 41 | Steps: 1907[70027] | Epsilon: 0.035 | Time: 97.08s | Reward: {'00000000-0000-0000-0000-000000000001': 323.41674254302745} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 813.3214590738493}


Training DQN:  84%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                              | 42/50 [49:33<11:40, 87.51s/ep]

2025-10-31 18:02:33,455 — INFO — Episode: 42 | Steps: 841[70868] | Epsilon: 0.033 | Time: 41.10s | Reward: {'00000000-0000-0000-0000-000000000001': 6.90943810719935} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 817.3846251823026}


Training DQN:  86%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                          | 43/50 [50:15<08:35, 73.59s/ep]

2025-10-31 18:04:13,946 — INFO — Episode: 43 | Steps: 2000[72868] | Epsilon: 0.031 | Time: 100.49s | Reward: {'00000000-0000-0000-0000-000000000001': 1105.6775293530936} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 792.2117709291401}


Training DQN:  88%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                      | 44/50 [51:55<08:09, 81.66s/ep]

2025-10-31 18:05:48,406 — INFO — Episode: 44 | Steps: 1826[74694] | Epsilon: 0.029 | Time: 94.46s | Reward: {'00000000-0000-0000-0000-000000000001': 1948.0606727022835} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 785.8076483267407}


Training DQN:  90%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                   | 45/50 [53:30<07:07, 85.50s/ep]

2025-10-31 18:06:50,172 — INFO — Episode: 45 | Steps: 1258[75952] | Epsilon: 0.027 | Time: 61.77s | Reward: {'00000000-0000-0000-0000-000000000001': 972.0483220573018} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 848.6303599084141}


Training DQN:  92%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▉               | 46/50 [54:31<05:13, 78.38s/ep]

2025-10-31 18:07:06,745 — INFO — Episode: 46 | Steps: 338[76290] | Epsilon: 0.026 | Time: 16.57s | Reward: {'00000000-0000-0000-0000-000000000001': 118.67498737383585} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 861.9428071360487}


Training DQN:  94%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▋           | 47/50 [54:48<02:59, 59.84s/ep]

2025-10-31 18:07:36,294 — INFO — Episode: 47 | Steps: 613[76903] | Epsilon: 0.024 | Time: 29.55s | Reward: {'00000000-0000-0000-0000-000000000001': 306.451384312033} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 819.8125708430821}


Training DQN:  96%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍       | 48/50 [55:17<01:41, 50.75s/ep]

2025-10-31 18:09:22,065 — INFO — Episode: 48 | Steps: 2000[78903] | Epsilon: 0.023 | Time: 105.77s | Reward: {'00000000-0000-0000-0000-000000000001': 762.4032792623693} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 779.037531597297}


Training DQN:  98%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏   | 49/50 [57:03<01:07, 67.26s/ep]

2025-10-31 18:11:02,738 — INFO — Episode: 49 | Steps: 2000[80903] | Epsilon: 0.022 | Time: 100.67s | Reward: {'00000000-0000-0000-0000-000000000001': 1725.7035449906405} | MovingAvg: {'00000000-0000-0000-0000-000000000001': 746.4177591321768}


Training DQN: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [58:44<00:00, 70.49s/ep]

Train time: 58.7m [70.5s]





In [12]:
trainer.play_with_pygame(episodes=5, fps=60)

2025-10-31 18:12:28,972 — INFO — Episode 1/5 - Reward: 1520.3815183927197
2025-10-31 18:12:28,975 — INFO — Episode 2/5 - Reward: 0
2025-10-31 18:12:28,975 — INFO — Episode 3/5 - Reward: 0
2025-10-31 18:12:28,976 — INFO — Episode 4/5 - Reward: 0
2025-10-31 18:12:28,977 — INFO — Episode 5/5 - Reward: 0
