In [1]:
import sys

import gymnasium as gym

sys.path.append("..")

In [2]:
from training.dqnetwork import DQNetwork

In [3]:
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [4]:
from utils.reproducibility import set_global_seed

set_global_seed(seed=42, env=env, tensorflow_deterministic=True, verbose=True)

2025-10-20 16:32:22,907 — INFO — [set_global_seed] TensorFlow seed set.
2025-10-20 16:32:22,909 — INFO — [set_global_seed] Gymnasium environment seed set.
2025-10-20 16:32:22,909 — INFO — [set_global_seed] All available seeds set to 42.


In [5]:
neuron_count_per_hidden_layer = [64, 32]

action_net = DQNetwork(
    env.observation_space.shape,
    neuron_count_per_hidden_layer,
    env.action_space.n,
    summary=True,
)
target_net = DQNetwork(
    env.observation_space.shape,
    neuron_count_per_hidden_layer,
    env.action_space.n,
    summary=True,
)

target_net.model.set_weights(action_net.model.get_weights())

In [6]:
episode_count = 500  # Total number of training episodes
episode_max_steps = 400  # Maximum number of steps per episode

replay_memory_max_size = (
    100000  # Maximum number of transitions stored into the replay memory
)
replay_memory_init_size = (
    1000  # Maximum number of transitions stored into the replay memory
)
batch_size = 64  # Mini-batch size

step_per_update = 4  # Number of total steps executed between successive updates of the action model weights
step_per_update_target_model = 8  # Number of total steps executed between successive replaces of the target model weights

max_epsilon = 1.0  # Exploration probability at start
min_epsilon = 0.01  # Minimum exploration probability
epsilon_decay = 0.0002  # Decay for exploration probability

gamma = 0.99  # Discount factor

moving_avg_window_size = 20  # Number of consecutive episodes to be considered in the calculation of the total reward moving average
moving_avg_stop_thr = 100

In [7]:
from agent.dqagent import DQAgent

agent = DQAgent(
    env,
    epsilon_max=max_epsilon,
    epsilon_min=min_epsilon,
    epsilon_decay=epsilon_decay,
    gamma=gamma,
    replay_memory_max_size=replay_memory_max_size,
    replay_memory_init_size=replay_memory_init_size,
    batch_size=batch_size,
    step_per_update=step_per_update,
    step_per_update_target_model=step_per_update_target_model,
    moving_avg_window_size=moving_avg_window_size,
    moving_avg_stop_thr=moving_avg_stop_thr,
    episode_max_steps=episode_max_steps,
)

In [8]:
import time

from training.dqlearning import DQLearning

train_start_time = time.time()

trainer = DQLearning(
    env,
    agent,
    episode_count=episode_count,
    episode_max_steps=episode_max_steps,
    dqn_action_model=action_net,
    dqn_target_model=target_net,
)
train_rewards = trainer.simple_dqn_training()

train_finish_time = time.time()
train_elapsed_time = train_finish_time - train_start_time
train_avg_episode_time = train_elapsed_time / episode_count

print(
    f"Train time: {train_elapsed_time / 60.0:.1f}m [{train_avg_episode_time:.1f}s]"
)

Training DQN:   0%|          | 0/500 [00:00<?, ?ep/s]

2025-10-20 16:32:28,874 — INFO — Episode: 0 | Steps: 18[18] | Epsilon: 1.000 | Time: 1.12s | Reward: 18.0 | MovingAvg: 18.0


Training DQN:   0%|          | 1/500 [00:01<09:21,  1.13s/ep]

2025-10-20 16:32:29,342 — INFO — Episode: 1 | Steps: 16[34] | Epsilon: 0.996 | Time: 0.47s | Reward: 16.0 | MovingAvg: 16.0


Training DQN:   0%|          | 2/500 [00:01<06:07,  1.35ep/s]

2025-10-20 16:32:30,440 — INFO — Episode: 2 | Steps: 33[67] | Epsilon: 0.993 | Time: 1.10s | Reward: 33.0 | MovingAvg: 33.0


Training DQN:   1%|          | 3/500 [00:02<07:28,  1.11ep/s]

2025-10-20 16:32:30,914 — INFO — Episode: 3 | Steps: 14[81] | Epsilon: 0.987 | Time: 0.47s | Reward: 14.0 | MovingAvg: 14.0


Training DQN:   1%|          | 4/500 [00:03<06:03,  1.36ep/s]

2025-10-20 16:32:31,374 — INFO — Episode: 4 | Steps: 15[96] | Epsilon: 0.984 | Time: 0.46s | Reward: 15.0 | MovingAvg: 15.0


Training DQN:   1%|          | 5/500 [00:03<05:14,  1.58ep/s]

2025-10-20 16:32:31,839 — INFO — Episode: 5 | Steps: 14[110] | Epsilon: 0.981 | Time: 0.46s | Reward: 14.0 | MovingAvg: 14.0


Training DQN:   1%|          | 6/500 [00:04<04:45,  1.73ep/s]

2025-10-20 16:32:32,556 — INFO — Episode: 6 | Steps: 24[134] | Epsilon: 0.978 | Time: 0.71s | Reward: 24.0 | MovingAvg: 24.0


Training DQN:   1%|▏         | 7/500 [00:04<05:07,  1.61ep/s]

2025-10-20 16:32:33,109 — INFO — Episode: 7 | Steps: 16[150] | Epsilon: 0.973 | Time: 0.55s | Reward: 16.0 | MovingAvg: 16.0


Training DQN:   2%|▏         | 8/500 [00:05<04:55,  1.66ep/s]

2025-10-20 16:32:34,143 — INFO — Episode: 8 | Steps: 30[180] | Epsilon: 0.970 | Time: 1.03s | Reward: 30.0 | MovingAvg: 30.0


Training DQN:   2%|▏         | 9/500 [00:06<06:01,  1.36ep/s]

2025-10-20 16:32:35,092 — INFO — Episode: 9 | Steps: 24[204] | Epsilon: 0.964 | Time: 0.95s | Reward: 24.0 | MovingAvg: 24.0


Training DQN:   2%|▏         | 10/500 [00:07<06:32,  1.25ep/s]

2025-10-20 16:32:35,942 — INFO — Episode: 10 | Steps: 17[221] | Epsilon: 0.959 | Time: 0.85s | Reward: 17.0 | MovingAvg: 17.0


Training DQN:   2%|▏         | 11/500 [00:08<06:39,  1.22ep/s]

2025-10-20 16:32:36,419 — INFO — Episode: 11 | Steps: 13[234] | Epsilon: 0.956 | Time: 0.48s | Reward: 13.0 | MovingAvg: 13.0


Training DQN:   2%|▏         | 12/500 [00:08<05:48,  1.40ep/s]

2025-10-20 16:32:38,092 — INFO — Episode: 12 | Steps: 44[278] | Epsilon: 0.953 | Time: 1.67s | Reward: 44.0 | MovingAvg: 44.0


Training DQN:   3%|▎         | 13/500 [00:10<08:08,  1.00s/ep]

2025-10-20 16:32:38,613 — INFO — Episode: 13 | Steps: 15[293] | Epsilon: 0.944 | Time: 0.52s | Reward: 15.0 | MovingAvg: 15.0


Training DQN:   3%|▎         | 14/500 [00:10<06:57,  1.17ep/s]

2025-10-20 16:32:39,298 — INFO — Episode: 14 | Steps: 22[315] | Epsilon: 0.941 | Time: 0.68s | Reward: 22.0 | MovingAvg: 22.0


Training DQN:   3%|▎         | 15/500 [00:11<06:30,  1.24ep/s]

2025-10-20 16:32:39,747 — INFO — Episode: 15 | Steps: 10[325] | Epsilon: 0.937 | Time: 0.45s | Reward: 10.0 | MovingAvg: 10.0


Training DQN:   3%|▎         | 16/500 [00:11<05:38,  1.43ep/s]

2025-10-20 16:32:40,488 — INFO — Episode: 16 | Steps: 21[346] | Epsilon: 0.935 | Time: 0.74s | Reward: 21.0 | MovingAvg: 21.0


Training DQN:   3%|▎         | 17/500 [00:12<05:43,  1.41ep/s]

2025-10-20 16:32:41,020 — INFO — Episode: 17 | Steps: 10[356] | Epsilon: 0.931 | Time: 0.53s | Reward: 10.0 | MovingAvg: 10.0


Training DQN:   4%|▎         | 18/500 [00:13<05:16,  1.52ep/s]

2025-10-20 16:32:41,798 — INFO — Episode: 18 | Steps: 20[376] | Epsilon: 0.929 | Time: 0.78s | Reward: 20.0 | MovingAvg: 20.0


Training DQN:   4%|▍         | 19/500 [00:14<05:33,  1.44ep/s]

2025-10-20 16:32:42,403 — INFO — Episode: 19 | Steps: 15[391] | Epsilon: 0.925 | Time: 0.60s | Reward: 15.0 | MovingAvg: 15.0


Training DQN:   4%|▍         | 20/500 [00:14<05:20,  1.50ep/s]

2025-10-20 16:32:43,583 — INFO — Episode: 20 | Steps: 30[421] | Epsilon: 0.922 | Time: 1.18s | Reward: 30.0 | MovingAvg: 19.6


Training DQN:   4%|▍         | 21/500 [00:15<06:33,  1.22ep/s]

2025-10-20 16:32:44,163 — INFO — Episode: 21 | Steps: 12[433] | Epsilon: 0.916 | Time: 0.58s | Reward: 12.0 | MovingAvg: 20.1


Training DQN:   4%|▍         | 22/500 [00:16<05:57,  1.34ep/s]

2025-10-20 16:32:44,672 — INFO — Episode: 22 | Steps: 10[443] | Epsilon: 0.913 | Time: 0.51s | Reward: 10.0 | MovingAvg: 19.9


Training DQN:   5%|▍         | 23/500 [00:16<05:22,  1.48ep/s]

2025-10-20 16:32:45,237 — INFO — Episode: 23 | Steps: 11[454] | Epsilon: 0.911 | Time: 0.56s | Reward: 11.0 | MovingAvg: 18.8


Training DQN:   5%|▍         | 24/500 [00:17<05:06,  1.55ep/s]

2025-10-20 16:32:46,112 — INFO — Episode: 24 | Steps: 22[476] | Epsilon: 0.909 | Time: 0.87s | Reward: 22.0 | MovingAvg: 18.6


Training DQN:   5%|▌         | 25/500 [00:18<05:38,  1.40ep/s]

2025-10-20 16:32:47,249 — INFO — Episode: 25 | Steps: 26[502] | Epsilon: 0.905 | Time: 1.14s | Reward: 26.0 | MovingAvg: 19.0


Training DQN:   5%|▌         | 26/500 [00:19<06:38,  1.19ep/s]

2025-10-20 16:32:47,831 — INFO — Episode: 26 | Steps: 15[517] | Epsilon: 0.900 | Time: 0.58s | Reward: 15.0 | MovingAvg: 19.6


Training DQN:   5%|▌         | 27/500 [00:20<06:00,  1.31ep/s]

2025-10-20 16:32:49,394 — INFO — Episode: 27 | Steps: 32[549] | Epsilon: 0.897 | Time: 1.56s | Reward: 32.0 | MovingAvg: 19.1


Training DQN:   6%|▌         | 28/500 [00:21<07:53,  1.00s/ep]

2025-10-20 16:32:49,894 — INFO — Episode: 28 | Steps: 13[562] | Epsilon: 0.890 | Time: 0.50s | Reward: 13.0 | MovingAvg: 19.9


Training DQN:   6%|▌         | 29/500 [00:22<06:41,  1.17ep/s]

2025-10-20 16:32:51,356 — INFO — Episode: 29 | Steps: 39[601] | Epsilon: 0.888 | Time: 1.46s | Reward: 39.0 | MovingAvg: 19.1


Training DQN:   6%|▌         | 30/500 [00:23<08:06,  1.04s/ep]

2025-10-20 16:32:52,566 — INFO — Episode: 30 | Steps: 32[633] | Epsilon: 0.880 | Time: 1.21s | Reward: 32.0 | MovingAvg: 19.9


Training DQN:   6%|▌         | 31/500 [00:24<08:29,  1.09s/ep]

2025-10-20 16:32:53,226 — INFO — Episode: 31 | Steps: 16[649] | Epsilon: 0.873 | Time: 0.66s | Reward: 16.0 | MovingAvg: 20.6


Training DQN:   6%|▋         | 32/500 [00:25<07:28,  1.04ep/s]

2025-10-20 16:32:53,931 — INFO — Episode: 32 | Steps: 16[665] | Epsilon: 0.870 | Time: 0.70s | Reward: 16.0 | MovingAvg: 20.8


Training DQN:   7%|▋         | 33/500 [00:26<06:52,  1.13ep/s]

2025-10-20 16:32:54,503 — INFO — Episode: 33 | Steps: 12[677] | Epsilon: 0.867 | Time: 0.57s | Reward: 12.0 | MovingAvg: 19.4


Training DQN:   7%|▋         | 34/500 [00:26<06:07,  1.27ep/s]

2025-10-20 16:32:55,623 — INFO — Episode: 34 | Steps: 29[706] | Epsilon: 0.865 | Time: 1.12s | Reward: 29.0 | MovingAvg: 19.2


Training DQN:   7%|▋         | 35/500 [00:27<06:53,  1.13ep/s]

2025-10-20 16:32:57,333 — INFO — Episode: 35 | Steps: 35[741] | Epsilon: 0.859 | Time: 1.71s | Reward: 35.0 | MovingAvg: 19.6


Training DQN:   7%|▋         | 36/500 [00:29<08:46,  1.14s/ep]

2025-10-20 16:32:58,254 — INFO — Episode: 36 | Steps: 22[763] | Epsilon: 0.852 | Time: 0.92s | Reward: 22.0 | MovingAvg: 20.8


Training DQN:   7%|▋         | 37/500 [00:30<08:15,  1.07s/ep]

2025-10-20 16:32:58,856 — INFO — Episode: 37 | Steps: 12[775] | Epsilon: 0.847 | Time: 0.60s | Reward: 12.0 | MovingAvg: 20.9


Training DQN:   8%|▊         | 38/500 [00:31<07:09,  1.07ep/s]

2025-10-20 16:32:59,193 — INFO — Episode: 38 | Steps: 9[784] | Epsilon: 0.845 | Time: 0.34s | Reward: 9.0 | MovingAvg: 20.9


Training DQN:   8%|▊         | 39/500 [00:31<05:46,  1.33ep/s]

2025-10-20 16:32:59,897 — INFO — Episode: 39 | Steps: 11[795] | Epsilon: 0.843 | Time: 0.70s | Reward: 11.0 | MovingAvg: 20.4


Training DQN:   8%|▊         | 40/500 [00:32<05:39,  1.36ep/s]

2025-10-20 16:33:00,531 — INFO — Episode: 40 | Steps: 15[810] | Epsilon: 0.841 | Time: 0.63s | Reward: 15.0 | MovingAvg: 20.2


Training DQN:   8%|▊         | 41/500 [00:32<05:24,  1.41ep/s]

2025-10-20 16:33:01,260 — INFO — Episode: 41 | Steps: 18[828] | Epsilon: 0.838 | Time: 0.73s | Reward: 18.0 | MovingAvg: 19.4


Training DQN:   8%|▊         | 42/500 [00:33<05:26,  1.40ep/s]

2025-10-20 16:33:01,934 — INFO — Episode: 42 | Steps: 13[841] | Epsilon: 0.834 | Time: 0.67s | Reward: 13.0 | MovingAvg: 19.8


Training DQN:   9%|▊         | 43/500 [00:34<05:20,  1.43ep/s]

2025-10-20 16:33:02,418 — INFO — Episode: 43 | Steps: 10[851] | Epsilon: 0.832 | Time: 0.48s | Reward: 10.0 | MovingAvg: 19.9


Training DQN:   9%|▉         | 44/500 [00:34<04:50,  1.57ep/s]

2025-10-20 16:33:03,087 — INFO — Episode: 44 | Steps: 10[861] | Epsilon: 0.830 | Time: 0.67s | Reward: 10.0 | MovingAvg: 19.9


Training DQN:   9%|▉         | 45/500 [00:35<04:53,  1.55ep/s]

2025-10-20 16:33:03,567 — INFO — Episode: 45 | Steps: 12[873] | Epsilon: 0.828 | Time: 0.48s | Reward: 12.0 | MovingAvg: 19.2


Training DQN:   9%|▉         | 46/500 [00:35<04:30,  1.68ep/s]

2025-10-20 16:33:04,418 — INFO — Episode: 46 | Steps: 17[890] | Epsilon: 0.825 | Time: 0.85s | Reward: 17.0 | MovingAvg: 18.6


Training DQN:   9%|▉         | 47/500 [00:36<05:04,  1.49ep/s]

2025-10-20 16:33:06,198 — INFO — Episode: 47 | Steps: 37[927] | Epsilon: 0.822 | Time: 1.78s | Reward: 37.0 | MovingAvg: 18.6


Training DQN:  10%|▉         | 48/500 [00:38<07:34,  1.00s/ep]

2025-10-20 16:33:08,200 — INFO — Episode: 48 | Steps: 37[964] | Epsilon: 0.815 | Time: 2.00s | Reward: 37.0 | MovingAvg: 18.9


Training DQN:  10%|▉         | 49/500 [00:40<09:48,  1.30s/ep]

2025-10-20 16:33:08,926 — INFO — Episode: 49 | Steps: 12[976] | Epsilon: 0.807 | Time: 0.73s | Reward: 12.0 | MovingAvg: 20.1


Training DQN:  10%|█         | 50/500 [00:41<08:28,  1.13s/ep]

2025-10-20 16:33:09,469 — INFO — Episode: 50 | Steps: 10[986] | Epsilon: 0.805 | Time: 0.54s | Reward: 10.0 | MovingAvg: 18.8


Training DQN:  10%|█         | 51/500 [00:41<07:08,  1.05ep/s]

2025-10-20 16:33:10,714 — INFO — Episode: 51 | Steps: 29[1015] | Epsilon: 0.803 | Time: 1.24s | Reward: 29.0 | MovingAvg: 17.6


Training DQN:  10%|█         | 52/500 [00:42<07:46,  1.04s/ep]

2025-10-20 16:33:11,316 — INFO — Episode: 52 | Steps: 13[1028] | Epsilon: 0.797 | Time: 0.60s | Reward: 13.0 | MovingAvg: 18.3


Training DQN:  11%|█         | 53/500 [00:43<06:46,  1.10ep/s]

2025-10-20 16:33:12,057 — INFO — Episode: 53 | Steps: 19[1047] | Epsilon: 0.794 | Time: 0.74s | Reward: 19.0 | MovingAvg: 18.1


Training DQN:  11%|█         | 54/500 [00:44<06:23,  1.16ep/s]

2025-10-20 16:33:12,664 — INFO — Episode: 54 | Steps: 13[1060] | Epsilon: 0.791 | Time: 0.61s | Reward: 13.0 | MovingAvg: 18.5


Training DQN:  11%|█         | 55/500 [00:44<05:48,  1.28ep/s]

2025-10-20 16:33:13,699 — INFO — Episode: 55 | Steps: 22[1082] | Epsilon: 0.788 | Time: 1.03s | Reward: 22.0 | MovingAvg: 17.7


Training DQN:  11%|█         | 56/500 [00:45<06:21,  1.16ep/s]

2025-10-20 16:33:14,865 — INFO — Episode: 56 | Steps: 24[1106] | Epsilon: 0.784 | Time: 1.16s | Reward: 24.0 | MovingAvg: 17.1


Training DQN:  11%|█▏        | 57/500 [00:47<07:01,  1.05ep/s]

2025-10-20 16:33:15,592 — INFO — Episode: 57 | Steps: 14[1120] | Epsilon: 0.779 | Time: 0.73s | Reward: 14.0 | MovingAvg: 17.1


Training DQN:  12%|█▏        | 58/500 [00:47<06:30,  1.13ep/s]

2025-10-20 16:33:16,443 — INFO — Episode: 58 | Steps: 16[1136] | Epsilon: 0.776 | Time: 0.85s | Reward: 16.0 | MovingAvg: 17.2


Training DQN:  12%|█▏        | 59/500 [00:48<06:25,  1.14ep/s]

2025-10-20 16:33:17,674 — INFO — Episode: 59 | Steps: 31[1167] | Epsilon: 0.773 | Time: 1.23s | Reward: 31.0 | MovingAvg: 17.6


Training DQN:  12%|█▏        | 60/500 [00:49<07:11,  1.02ep/s]

2025-10-20 16:33:19,237 — INFO — Episode: 60 | Steps: 32[1199] | Epsilon: 0.767 | Time: 1.56s | Reward: 32.0 | MovingAvg: 18.6


Training DQN:  12%|█▏        | 61/500 [00:51<08:27,  1.16s/ep]

2025-10-20 16:33:19,703 — INFO — Episode: 61 | Steps: 12[1211] | Epsilon: 0.760 | Time: 0.46s | Reward: 12.0 | MovingAvg: 19.4


Training DQN:  12%|█▏        | 62/500 [00:51<06:55,  1.05ep/s]

2025-10-20 16:33:20,694 — INFO — Episode: 62 | Steps: 16[1227] | Epsilon: 0.758 | Time: 0.99s | Reward: 16.0 | MovingAvg: 19.1


Training DQN:  13%|█▎        | 63/500 [00:52<07:00,  1.04ep/s]

2025-10-20 16:33:22,154 — INFO — Episode: 63 | Steps: 31[1258] | Epsilon: 0.755 | Time: 1.46s | Reward: 31.0 | MovingAvg: 19.3


Training DQN:  13%|█▎        | 64/500 [00:54<08:04,  1.11s/ep]

2025-10-20 16:33:22,631 — INFO — Episode: 64 | Steps: 12[1270] | Epsilon: 0.748 | Time: 0.48s | Reward: 12.0 | MovingAvg: 20.4


Training DQN:  13%|█▎        | 65/500 [00:54<06:40,  1.09ep/s]

2025-10-20 16:33:23,645 — INFO — Episode: 65 | Steps: 23[1293] | Epsilon: 0.746 | Time: 1.01s | Reward: 23.0 | MovingAvg: 20.4


Training DQN:  13%|█▎        | 66/500 [00:55<06:51,  1.05ep/s]

2025-10-20 16:33:24,520 — INFO — Episode: 66 | Steps: 19[1312] | Epsilon: 0.741 | Time: 0.87s | Reward: 19.0 | MovingAvg: 21.0


Training DQN:  13%|█▎        | 67/500 [00:56<06:41,  1.08ep/s]

2025-10-20 16:33:25,465 — INFO — Episode: 67 | Steps: 14[1326] | Epsilon: 0.738 | Time: 0.94s | Reward: 14.0 | MovingAvg: 21.1


Training DQN:  14%|█▎        | 68/500 [00:57<06:42,  1.07ep/s]

2025-10-20 16:33:27,162 — INFO — Episode: 68 | Steps: 25[1351] | Epsilon: 0.735 | Time: 1.70s | Reward: 25.0 | MovingAvg: 19.9


Training DQN:  14%|█▍        | 69/500 [00:59<08:20,  1.16s/ep]

2025-10-20 16:33:27,724 — INFO — Episode: 69 | Steps: 9[1360] | Epsilon: 0.730 | Time: 0.56s | Reward: 9.0 | MovingAvg: 19.4


Training DQN:  14%|█▍        | 70/500 [00:59<07:02,  1.02ep/s]

2025-10-20 16:33:28,250 — INFO — Episode: 70 | Steps: 8[1368] | Epsilon: 0.728 | Time: 0.52s | Reward: 8.0 | MovingAvg: 19.2


Training DQN:  14%|█▍        | 71/500 [01:00<06:02,  1.18ep/s]

2025-10-20 16:33:29,047 — INFO — Episode: 71 | Steps: 15[1383] | Epsilon: 0.726 | Time: 0.80s | Reward: 15.0 | MovingAvg: 19.1


Training DQN:  14%|█▍        | 72/500 [01:01<05:55,  1.20ep/s]

2025-10-20 16:33:29,767 — INFO — Episode: 72 | Steps: 16[1399] | Epsilon: 0.723 | Time: 0.72s | Reward: 16.0 | MovingAvg: 18.4


Training DQN:  15%|█▍        | 73/500 [01:02<05:40,  1.25ep/s]

2025-10-20 16:33:30,323 — INFO — Episode: 73 | Steps: 13[1412] | Epsilon: 0.720 | Time: 0.56s | Reward: 13.0 | MovingAvg: 18.6


Training DQN:  15%|█▍        | 74/500 [01:02<05:08,  1.38ep/s]

2025-10-20 16:33:31,368 — INFO — Episode: 74 | Steps: 20[1432] | Epsilon: 0.718 | Time: 1.04s | Reward: 20.0 | MovingAvg: 18.2


Training DQN:  15%|█▌        | 75/500 [01:03<05:48,  1.22ep/s]

2025-10-20 16:33:32,462 — INFO — Episode: 75 | Steps: 20[1452] | Epsilon: 0.714 | Time: 1.09s | Reward: 20.0 | MovingAvg: 18.6


Training DQN:  15%|█▌        | 76/500 [01:04<06:22,  1.11ep/s]

2025-10-20 16:33:33,437 — INFO — Episode: 76 | Steps: 13[1465] | Epsilon: 0.710 | Time: 0.97s | Reward: 13.0 | MovingAvg: 18.5


Training DQN:  15%|█▌        | 77/500 [01:05<06:31,  1.08ep/s]

2025-10-20 16:33:34,277 — INFO — Episode: 77 | Steps: 12[1477] | Epsilon: 0.707 | Time: 0.84s | Reward: 12.0 | MovingAvg: 17.9


Training DQN:  16%|█▌        | 78/500 [01:06<06:19,  1.11ep/s]

2025-10-20 16:33:34,569 — INFO — Episode: 78 | Steps: 9[1486] | Epsilon: 0.705 | Time: 0.29s | Reward: 9.0 | MovingAvg: 17.9


Training DQN:  16%|█▌        | 79/500 [01:06<05:01,  1.39ep/s]

2025-10-20 16:33:35,646 — INFO — Episode: 79 | Steps: 17[1503] | Epsilon: 0.703 | Time: 1.08s | Reward: 17.0 | MovingAvg: 17.5


Training DQN:  16%|█▌        | 80/500 [01:07<05:46,  1.21ep/s]

2025-10-20 16:33:36,265 — INFO — Episode: 80 | Steps: 11[1514] | Epsilon: 0.699 | Time: 0.62s | Reward: 11.0 | MovingAvg: 16.8


Training DQN:  16%|█▌        | 81/500 [01:08<05:19,  1.31ep/s]

2025-10-20 16:33:37,007 — INFO — Episode: 81 | Steps: 17[1531] | Epsilon: 0.697 | Time: 0.74s | Reward: 17.0 | MovingAvg: 15.8


Training DQN:  16%|█▋        | 82/500 [01:09<05:16,  1.32ep/s]

2025-10-20 16:33:37,886 — INFO — Episode: 82 | Steps: 13[1544] | Epsilon: 0.694 | Time: 0.88s | Reward: 13.0 | MovingAvg: 16.0


Training DQN:  17%|█▋        | 83/500 [01:10<05:30,  1.26ep/s]

2025-10-20 16:33:38,627 — INFO — Episode: 83 | Steps: 11[1555] | Epsilon: 0.691 | Time: 0.74s | Reward: 11.0 | MovingAvg: 15.8


Training DQN:  17%|█▋        | 84/500 [01:10<05:23,  1.29ep/s]

2025-10-20 16:33:39,372 — INFO — Episode: 84 | Steps: 11[1566] | Epsilon: 0.689 | Time: 0.74s | Reward: 11.0 | MovingAvg: 14.8


Training DQN:  17%|█▋        | 85/500 [01:11<05:18,  1.30ep/s]

2025-10-20 16:33:40,701 — INFO — Episode: 85 | Steps: 23[1589] | Epsilon: 0.687 | Time: 1.33s | Reward: 23.0 | MovingAvg: 14.8


Training DQN:  17%|█▋        | 86/500 [01:12<06:27,  1.07ep/s]

2025-10-20 16:33:41,679 — INFO — Episode: 86 | Steps: 23[1612] | Epsilon: 0.682 | Time: 0.98s | Reward: 23.0 | MovingAvg: 14.8


Training DQN:  17%|█▋        | 87/500 [01:13<06:31,  1.05ep/s]

2025-10-20 16:33:42,317 — INFO — Episode: 87 | Steps: 9[1621] | Epsilon: 0.678 | Time: 0.64s | Reward: 9.0 | MovingAvg: 15.0


Training DQN:  18%|█▊        | 88/500 [01:14<05:52,  1.17ep/s]

2025-10-20 16:33:44,546 — INFO — Episode: 88 | Steps: 46[1667] | Epsilon: 0.676 | Time: 2.23s | Reward: 46.0 | MovingAvg: 14.8


Training DQN:  18%|█▊        | 89/500 [01:16<08:41,  1.27s/ep]

2025-10-20 16:33:45,321 — INFO — Episode: 89 | Steps: 12[1679] | Epsilon: 0.667 | Time: 0.77s | Reward: 12.0 | MovingAvg: 15.8


Training DQN:  18%|█▊        | 90/500 [01:17<07:39,  1.12s/ep]

2025-10-20 16:33:46,578 — INFO — Episode: 90 | Steps: 23[1702] | Epsilon: 0.664 | Time: 1.26s | Reward: 23.0 | MovingAvg: 15.9


Training DQN:  18%|█▊        | 91/500 [01:18<07:54,  1.16s/ep]

2025-10-20 16:33:47,651 — INFO — Episode: 91 | Steps: 14[1716] | Epsilon: 0.660 | Time: 1.07s | Reward: 14.0 | MovingAvg: 16.7


Training DQN:  18%|█▊        | 92/500 [01:19<07:42,  1.13s/ep]

2025-10-20 16:33:48,577 — INFO — Episode: 92 | Steps: 11[1727] | Epsilon: 0.657 | Time: 0.92s | Reward: 11.0 | MovingAvg: 16.6


Training DQN:  19%|█▊        | 93/500 [01:20<07:16,  1.07s/ep]

2025-10-20 16:33:49,281 — INFO — Episode: 93 | Steps: 10[1737] | Epsilon: 0.655 | Time: 0.70s | Reward: 10.0 | MovingAvg: 16.4


Training DQN:  19%|█▉        | 94/500 [01:21<06:30,  1.04ep/s]

2025-10-20 16:33:49,922 — INFO — Episode: 94 | Steps: 10[1747] | Epsilon: 0.653 | Time: 0.64s | Reward: 10.0 | MovingAvg: 16.2


Training DQN:  19%|█▉        | 95/500 [01:22<05:50,  1.16ep/s]

2025-10-20 16:33:52,108 — INFO — Episode: 95 | Steps: 38[1785] | Epsilon: 0.651 | Time: 2.18s | Reward: 38.0 | MovingAvg: 15.8


Training DQN:  19%|█▉        | 96/500 [01:24<08:29,  1.26s/ep]

2025-10-20 16:33:52,995 — INFO — Episode: 96 | Steps: 18[1803] | Epsilon: 0.643 | Time: 0.89s | Reward: 18.0 | MovingAvg: 16.6


Training DQN:  19%|█▉        | 97/500 [01:25<07:43,  1.15s/ep]

2025-10-20 16:33:53,758 — INFO — Episode: 97 | Steps: 11[1814] | Epsilon: 0.639 | Time: 0.76s | Reward: 11.0 | MovingAvg: 16.9


Training DQN:  20%|█▉        | 98/500 [01:26<06:55,  1.03s/ep]

2025-10-20 16:33:54,847 — INFO — Episode: 98 | Steps: 13[1827] | Epsilon: 0.637 | Time: 1.09s | Reward: 13.0 | MovingAvg: 16.9


Training DQN:  20%|█▉        | 99/500 [01:27<07:01,  1.05s/ep]

2025-10-20 16:33:56,456 — INFO — Episode: 99 | Steps: 25[1852] | Epsilon: 0.635 | Time: 1.61s | Reward: 25.0 | MovingAvg: 17.1


Training DQN:  20%|██        | 100/500 [01:28<08:07,  1.22s/ep]

2025-10-20 16:33:57,543 — INFO — Episode: 100 | Steps: 18[1870] | Epsilon: 0.630 | Time: 1.09s | Reward: 18.0 | MovingAvg: 17.4


Training DQN:  20%|██        | 101/500 [01:29<07:50,  1.18s/ep]

2025-10-20 16:33:58,044 — INFO — Episode: 101 | Steps: 9[1879] | Epsilon: 0.626 | Time: 0.50s | Reward: 9.0 | MovingAvg: 17.8


Training DQN:  20%|██        | 102/500 [01:30<06:28,  1.03ep/s]

2025-10-20 16:33:58,911 — INFO — Episode: 102 | Steps: 17[1896] | Epsilon: 0.624 | Time: 0.87s | Reward: 17.0 | MovingAvg: 17.4


Training DQN:  21%|██        | 103/500 [01:31<06:14,  1.06ep/s]

2025-10-20 16:33:59,690 — INFO — Episode: 103 | Steps: 16[1912] | Epsilon: 0.621 | Time: 0.78s | Reward: 16.0 | MovingAvg: 17.6


Training DQN:  21%|██        | 104/500 [01:31<05:53,  1.12ep/s]

2025-10-20 16:34:00,972 — INFO — Episode: 104 | Steps: 22[1934] | Epsilon: 0.618 | Time: 1.28s | Reward: 22.0 | MovingAvg: 17.9


Training DQN:  21%|██        | 105/500 [01:33<06:39,  1.01s/ep]

2025-10-20 16:34:02,307 — INFO — Episode: 105 | Steps: 19[1953] | Epsilon: 0.613 | Time: 1.33s | Reward: 19.0 | MovingAvg: 18.4


Training DQN:  21%|██        | 106/500 [01:34<07:16,  1.11s/ep]

2025-10-20 16:34:03,798 — INFO — Episode: 106 | Steps: 28[1981] | Epsilon: 0.609 | Time: 1.49s | Reward: 28.0 | MovingAvg: 18.2


Training DQN:  21%|██▏       | 107/500 [01:36<08:00,  1.22s/ep]

2025-10-20 16:34:04,283 — INFO — Episode: 107 | Steps: 10[1991] | Epsilon: 0.604 | Time: 0.48s | Reward: 10.0 | MovingAvg: 18.4


Training DQN:  22%|██▏       | 108/500 [01:36<06:32,  1.00s/ep]

2025-10-20 16:34:04,900 — INFO — Episode: 108 | Steps: 10[2001] | Epsilon: 0.602 | Time: 0.61s | Reward: 10.0 | MovingAvg: 18.5


Training DQN:  22%|██▏       | 109/500 [01:37<05:46,  1.13ep/s]

2025-10-20 16:34:05,585 — INFO — Episode: 109 | Steps: 9[2010] | Epsilon: 0.600 | Time: 0.68s | Reward: 9.0 | MovingAvg: 16.7


Training DQN:  22%|██▏       | 110/500 [01:37<05:22,  1.21ep/s]

2025-10-20 16:34:06,565 — INFO — Episode: 110 | Steps: 14[2024] | Epsilon: 0.598 | Time: 0.98s | Reward: 14.0 | MovingAvg: 16.6


Training DQN:  22%|██▏       | 111/500 [01:38<05:39,  1.15ep/s]

2025-10-20 16:34:07,881 — INFO — Episode: 111 | Steps: 14[2038] | Epsilon: 0.595 | Time: 1.31s | Reward: 14.0 | MovingAvg: 16.1


Training DQN:  22%|██▏       | 112/500 [01:40<06:29,  1.00s/ep]

2025-10-20 16:34:08,982 — INFO — Episode: 112 | Steps: 16[2054] | Epsilon: 0.592 | Time: 1.10s | Reward: 16.0 | MovingAvg: 16.1


Training DQN:  23%|██▎       | 113/500 [01:41<06:40,  1.03s/ep]

2025-10-20 16:34:09,985 — INFO — Episode: 113 | Steps: 19[2073] | Epsilon: 0.589 | Time: 1.00s | Reward: 19.0 | MovingAvg: 16.4


Training DQN:  23%|██▎       | 114/500 [01:42<06:35,  1.02s/ep]

2025-10-20 16:34:11,205 — INFO — Episode: 114 | Steps: 20[2093] | Epsilon: 0.585 | Time: 1.22s | Reward: 20.0 | MovingAvg: 16.8


Training DQN:  23%|██▎       | 115/500 [01:43<06:57,  1.08s/ep]

2025-10-20 16:34:12,082 — INFO — Episode: 115 | Steps: 13[2106] | Epsilon: 0.581 | Time: 0.88s | Reward: 13.0 | MovingAvg: 17.3


Training DQN:  23%|██▎       | 116/500 [01:44<06:32,  1.02s/ep]

2025-10-20 16:34:12,717 — INFO — Episode: 116 | Steps: 10[2116] | Epsilon: 0.579 | Time: 0.63s | Reward: 10.0 | MovingAvg: 16.1


Training DQN:  23%|██▎       | 117/500 [01:44<05:46,  1.10ep/s]

2025-10-20 16:34:13,731 — INFO — Episode: 117 | Steps: 19[2135] | Epsilon: 0.577 | Time: 1.01s | Reward: 19.0 | MovingAvg: 15.7


Training DQN:  24%|██▎       | 118/500 [01:45<05:58,  1.07ep/s]

2025-10-20 16:34:15,205 — INFO — Episode: 118 | Steps: 22[2157] | Epsilon: 0.573 | Time: 1.47s | Reward: 22.0 | MovingAvg: 16.1


Training DQN:  24%|██▍       | 119/500 [01:47<06:58,  1.10s/ep]

2025-10-20 16:34:17,086 — INFO — Episode: 119 | Steps: 24[2181] | Epsilon: 0.569 | Time: 1.88s | Reward: 24.0 | MovingAvg: 16.5


Training DQN:  24%|██▍       | 120/500 [01:49<08:26,  1.33s/ep]

2025-10-20 16:34:18,069 — INFO — Episode: 120 | Steps: 13[2194] | Epsilon: 0.564 | Time: 0.98s | Reward: 13.0 | MovingAvg: 16.4


Training DQN:  24%|██▍       | 121/500 [01:50<07:45,  1.23s/ep]

2025-10-20 16:34:19,527 — INFO — Episode: 121 | Steps: 21[2215] | Epsilon: 0.561 | Time: 1.46s | Reward: 21.0 | MovingAvg: 16.2


Training DQN:  24%|██▍       | 122/500 [01:51<08:10,  1.30s/ep]

2025-10-20 16:34:20,300 — INFO — Episode: 122 | Steps: 17[2232] | Epsilon: 0.557 | Time: 0.77s | Reward: 17.0 | MovingAvg: 16.8


Training DQN:  25%|██▍       | 123/500 [01:52<07:09,  1.14s/ep]

2025-10-20 16:34:21,123 — INFO — Episode: 123 | Steps: 15[2247] | Epsilon: 0.554 | Time: 0.82s | Reward: 15.0 | MovingAvg: 16.8


Training DQN:  25%|██▍       | 124/500 [01:53<06:32,  1.04s/ep]

2025-10-20 16:34:24,565 — INFO — Episode: 124 | Steps: 54[2301] | Epsilon: 0.551 | Time: 3.44s | Reward: 54.0 | MovingAvg: 16.8


Training DQN:  25%|██▌       | 125/500 [01:56<11:01,  1.76s/ep]

2025-10-20 16:34:25,780 — INFO — Episode: 125 | Steps: 16[2317] | Epsilon: 0.540 | Time: 1.21s | Reward: 16.0 | MovingAvg: 18.4


Training DQN:  25%|██▌       | 126/500 [01:58<09:58,  1.60s/ep]

2025-10-20 16:34:27,227 — INFO — Episode: 126 | Steps: 20[2337] | Epsilon: 0.537 | Time: 1.45s | Reward: 20.0 | MovingAvg: 18.2


Training DQN:  25%|██▌       | 127/500 [01:59<09:39,  1.55s/ep]

2025-10-20 16:34:30,025 — INFO — Episode: 127 | Steps: 51[2388] | Epsilon: 0.533 | Time: 2.80s | Reward: 51.0 | MovingAvg: 17.8


Training DQN:  26%|██▌       | 128/500 [02:02<11:56,  1.93s/ep]

2025-10-20 16:34:32,529 — INFO — Episode: 128 | Steps: 39[2427] | Epsilon: 0.522 | Time: 2.50s | Reward: 39.0 | MovingAvg: 19.9


Training DQN:  26%|██▌       | 129/500 [02:04<12:59,  2.10s/ep]

2025-10-20 16:34:35,350 — INFO — Episode: 129 | Steps: 43[2470] | Epsilon: 0.515 | Time: 2.82s | Reward: 43.0 | MovingAvg: 21.3


Training DQN:  26%|██▌       | 130/500 [02:07<14:17,  2.32s/ep]

2025-10-20 16:34:37,717 — INFO — Episode: 130 | Steps: 38[2508] | Epsilon: 0.506 | Time: 2.37s | Reward: 38.0 | MovingAvg: 23.0


Training DQN:  26%|██▌       | 131/500 [02:09<14:20,  2.33s/ep]

2025-10-20 16:34:39,562 — INFO — Episode: 131 | Steps: 25[2533] | Epsilon: 0.498 | Time: 1.84s | Reward: 25.0 | MovingAvg: 24.2


Training DQN:  26%|██▋       | 132/500 [02:11<13:24,  2.19s/ep]

2025-10-20 16:34:40,574 — INFO — Episode: 132 | Steps: 17[2550] | Epsilon: 0.493 | Time: 1.01s | Reward: 17.0 | MovingAvg: 24.8


Training DQN:  27%|██▋       | 133/500 [02:12<11:12,  1.83s/ep]

2025-10-20 16:34:45,199 — INFO — Episode: 133 | Steps: 71[2621] | Epsilon: 0.490 | Time: 4.62s | Reward: 71.0 | MovingAvg: 24.8


Training DQN:  27%|██▋       | 134/500 [02:17<16:17,  2.67s/ep]

2025-10-20 16:34:49,011 — INFO — Episode: 134 | Steps: 54[2675] | Epsilon: 0.476 | Time: 3.81s | Reward: 54.0 | MovingAvg: 27.4


Training DQN:  27%|██▋       | 135/500 [02:21<18:19,  3.01s/ep]

2025-10-20 16:34:50,915 — INFO — Episode: 135 | Steps: 26[2701] | Epsilon: 0.465 | Time: 1.90s | Reward: 26.0 | MovingAvg: 29.1


Training DQN:  27%|██▋       | 136/500 [02:23<16:15,  2.68s/ep]

2025-10-20 16:34:52,789 — INFO — Episode: 136 | Steps: 29[2730] | Epsilon: 0.460 | Time: 1.87s | Reward: 29.0 | MovingAvg: 29.8


Training DQN:  27%|██▋       | 137/500 [02:25<14:45,  2.44s/ep]

2025-10-20 16:34:55,305 — INFO — Episode: 137 | Steps: 36[2766] | Epsilon: 0.454 | Time: 2.51s | Reward: 36.0 | MovingAvg: 30.7


Training DQN:  28%|██▊       | 138/500 [02:27<14:51,  2.46s/ep]

2025-10-20 16:34:57,328 — INFO — Episode: 138 | Steps: 30[2796] | Epsilon: 0.447 | Time: 2.02s | Reward: 30.0 | MovingAvg: 31.6


Training DQN:  28%|██▊       | 139/500 [02:29<14:01,  2.33s/ep]

2025-10-20 16:35:02,747 — INFO — Episode: 139 | Steps: 87[2883] | Epsilon: 0.441 | Time: 5.42s | Reward: 87.0 | MovingAvg: 31.9


Training DQN:  28%|██▊       | 140/500 [02:34<19:32,  3.26s/ep]

2025-10-20 16:35:07,319 — INFO — Episode: 140 | Steps: 63[2946] | Epsilon: 0.423 | Time: 4.57s | Reward: 63.0 | MovingAvg: 35.1


Training DQN:  28%|██▊       | 141/500 [02:39<21:50,  3.65s/ep]

2025-10-20 16:35:11,243 — INFO — Episode: 141 | Steps: 57[3003] | Epsilon: 0.411 | Time: 3.92s | Reward: 57.0 | MovingAvg: 37.6


Training DQN:  28%|██▊       | 142/500 [02:43<22:16,  3.73s/ep]

2025-10-20 16:35:12,768 — INFO — Episode: 142 | Steps: 23[3026] | Epsilon: 0.399 | Time: 1.52s | Reward: 23.0 | MovingAvg: 39.4


Training DQN:  29%|██▊       | 143/500 [02:45<18:16,  3.07s/ep]

2025-10-20 16:35:19,817 — INFO — Episode: 143 | Steps: 89[3115] | Epsilon: 0.395 | Time: 7.05s | Reward: 89.0 | MovingAvg: 39.7


Training DQN:  29%|██▉       | 144/500 [02:52<25:18,  4.26s/ep]

2025-10-20 16:35:22,821 — INFO — Episode: 144 | Steps: 39[3154] | Epsilon: 0.377 | Time: 3.00s | Reward: 39.0 | MovingAvg: 43.4


Training DQN:  29%|██▉       | 145/500 [02:55<22:59,  3.89s/ep]

2025-10-20 16:35:26,048 — INFO — Episode: 145 | Steps: 46[3200] | Epsilon: 0.369 | Time: 3.23s | Reward: 46.0 | MovingAvg: 42.6


Training DQN:  29%|██▉       | 146/500 [02:58<21:45,  3.69s/ep]

2025-10-20 16:35:29,834 — INFO — Episode: 146 | Steps: 46[3246] | Epsilon: 0.360 | Time: 3.78s | Reward: 46.0 | MovingAvg: 44.1


Training DQN:  29%|██▉       | 147/500 [03:02<21:52,  3.72s/ep]

2025-10-20 16:35:34,742 — INFO — Episode: 147 | Steps: 70[3316] | Epsilon: 0.351 | Time: 4.91s | Reward: 70.0 | MovingAvg: 45.5


Training DQN:  30%|██▉       | 148/500 [03:06<23:54,  4.07s/ep]

2025-10-20 16:35:40,217 — INFO — Episode: 148 | Steps: 78[3394] | Epsilon: 0.337 | Time: 5.47s | Reward: 78.0 | MovingAvg: 46.4


Training DQN:  30%|██▉       | 149/500 [03:12<26:17,  4.49s/ep]

2025-10-20 16:35:43,862 — INFO — Episode: 149 | Steps: 54[3448] | Epsilon: 0.321 | Time: 3.64s | Reward: 54.0 | MovingAvg: 48.4


Training DQN:  30%|███       | 150/500 [03:16<24:43,  4.24s/ep]

2025-10-20 16:35:50,542 — INFO — Episode: 150 | Steps: 81[3529] | Epsilon: 0.310 | Time: 6.68s | Reward: 81.0 | MovingAvg: 48.9


Training DQN:  30%|███       | 151/500 [03:22<28:55,  4.97s/ep]

2025-10-20 16:35:55,057 — INFO — Episode: 151 | Steps: 53[3582] | Epsilon: 0.294 | Time: 4.51s | Reward: 53.0 | MovingAvg: 51.0


Training DQN:  30%|███       | 152/500 [03:27<28:02,  4.83s/ep]

2025-10-20 16:35:59,973 — INFO — Episode: 152 | Steps: 59[3641] | Epsilon: 0.284 | Time: 4.92s | Reward: 59.0 | MovingAvg: 52.5


Training DQN:  31%|███       | 153/500 [03:32<28:06,  4.86s/ep]

2025-10-20 16:36:02,996 — INFO — Episode: 153 | Steps: 36[3677] | Epsilon: 0.272 | Time: 3.02s | Reward: 36.0 | MovingAvg: 54.5


Training DQN:  31%|███       | 154/500 [03:35<24:50,  4.31s/ep]

2025-10-20 16:36:09,955 — INFO — Episode: 154 | Steps: 91[3768] | Epsilon: 0.265 | Time: 6.96s | Reward: 91.0 | MovingAvg: 52.8


Training DQN:  31%|███       | 155/500 [03:42<29:20,  5.10s/ep]

2025-10-20 16:36:22,832 — INFO — Episode: 155 | Steps: 151[3919] | Epsilon: 0.246 | Time: 12.88s | Reward: 151.0 | MovingAvg: 54.6


Training DQN:  31%|███       | 156/500 [03:55<42:37,  7.44s/ep]

2025-10-20 16:36:27,643 — INFO — Episode: 156 | Steps: 62[3981] | Epsilon: 0.216 | Time: 4.81s | Reward: 62.0 | MovingAvg: 60.9


Training DQN:  31%|███▏      | 157/500 [03:59<38:00,  6.65s/ep]

2025-10-20 16:36:30,990 — INFO — Episode: 157 | Steps: 42[4023] | Epsilon: 0.204 | Time: 3.35s | Reward: 42.0 | MovingAvg: 62.5


Training DQN:  32%|███▏      | 158/500 [04:03<32:15,  5.66s/ep]

2025-10-20 16:36:38,502 — INFO — Episode: 158 | Steps: 94[4117] | Epsilon: 0.195 | Time: 7.51s | Reward: 94.0 | MovingAvg: 62.9


Training DQN:  32%|███▏      | 159/500 [04:10<35:19,  6.21s/ep]

2025-10-20 16:36:43,017 — INFO — Episode: 159 | Steps: 52[4169] | Epsilon: 0.177 | Time: 4.51s | Reward: 52.0 | MovingAvg: 66.0


Training DQN:  32%|███▏      | 160/500 [04:15<32:19,  5.70s/ep]

2025-10-20 16:36:47,627 — INFO — Episode: 160 | Steps: 52[4221] | Epsilon: 0.166 | Time: 4.61s | Reward: 52.0 | MovingAvg: 64.3


Training DQN:  32%|███▏      | 161/500 [04:19<30:22,  5.38s/ep]

2025-10-20 16:36:53,552 — INFO — Episode: 161 | Steps: 65[4286] | Epsilon: 0.156 | Time: 5.92s | Reward: 65.0 | MovingAvg: 63.8


Training DQN:  32%|███▏      | 162/500 [04:25<31:12,  5.54s/ep]

2025-10-20 16:37:01,451 — INFO — Episode: 162 | Steps: 94[4380] | Epsilon: 0.143 | Time: 7.90s | Reward: 94.0 | MovingAvg: 64.2


Training DQN:  33%|███▎      | 163/500 [04:33<35:05,  6.25s/ep]

2025-10-20 16:37:04,925 — INFO — Episode: 163 | Steps: 37[4417] | Epsilon: 0.124 | Time: 3.47s | Reward: 37.0 | MovingAvg: 67.7


Training DQN:  33%|███▎      | 164/500 [04:37<30:19,  5.42s/ep]

2025-10-20 16:37:09,240 — INFO — Episode: 164 | Steps: 51[4468] | Epsilon: 0.117 | Time: 4.31s | Reward: 51.0 | MovingAvg: 65.1


Training DQN:  33%|███▎      | 165/500 [04:41<28:23,  5.09s/ep]

2025-10-20 16:37:19,521 — INFO — Episode: 165 | Steps: 115[4583] | Epsilon: 0.106 | Time: 10.28s | Reward: 115.0 | MovingAvg: 65.7


Training DQN:  33%|███▎      | 166/500 [04:51<36:59,  6.64s/ep]

2025-10-20 16:37:24,369 — INFO — Episode: 166 | Steps: 49[4632] | Epsilon: 0.083 | Time: 4.84s | Reward: 49.0 | MovingAvg: 69.2


Training DQN:  33%|███▎      | 167/500 [04:56<33:53,  6.11s/ep]

2025-10-20 16:37:32,834 — INFO — Episode: 167 | Steps: 92[4724] | Epsilon: 0.074 | Time: 8.46s | Reward: 92.0 | MovingAvg: 69.3


Training DQN:  34%|███▎      | 168/500 [05:05<37:42,  6.81s/ep]

2025-10-20 16:37:42,484 — INFO — Episode: 168 | Steps: 102[4826] | Epsilon: 0.055 | Time: 9.65s | Reward: 102.0 | MovingAvg: 70.4


Training DQN:  34%|███▍      | 169/500 [05:14<42:16,  7.66s/ep]

2025-10-20 16:37:52,938 — INFO — Episode: 169 | Steps: 111[4937] | Epsilon: 0.035 | Time: 10.45s | Reward: 111.0 | MovingAvg: 71.6


Training DQN:  34%|███▍      | 170/500 [05:25<46:45,  8.50s/ep]

2025-10-20 16:38:13,218 — INFO — Episode: 170 | Steps: 217[5154] | Epsilon: 0.013 | Time: 20.28s | Reward: 217.0 | MovingAvg: 74.5


Training DQN:  34%|███▍      | 171/500 [05:45<1:05:59, 12.03s/ep]

2025-10-20 16:38:19,450 — INFO — Episode: 171 | Steps: 67[5221] | Epsilon: 0.010 | Time: 6.23s | Reward: 67.0 | MovingAvg: 81.2


Training DQN:  34%|███▍      | 172/500 [05:51<56:16, 10.29s/ep]  

2025-10-20 16:38:28,388 — INFO — Episode: 172 | Steps: 96[5317] | Epsilon: 0.010 | Time: 8.94s | Reward: 96.0 | MovingAvg: 82.0


Training DQN:  35%|███▍      | 173/500 [06:00<53:53,  9.89s/ep]

2025-10-20 16:38:36,174 — INFO — Episode: 173 | Steps: 85[5402] | Epsilon: 0.010 | Time: 7.78s | Reward: 85.0 | MovingAvg: 83.8


Training DQN:  35%|███▍      | 174/500 [06:08<50:17,  9.26s/ep]

2025-10-20 16:38:42,836 — INFO — Episode: 174 | Steps: 71[5473] | Epsilon: 0.010 | Time: 6.66s | Reward: 71.0 | MovingAvg: 86.2


Training DQN:  35%|███▌      | 175/500 [06:15<45:55,  8.48s/ep]

2025-10-20 16:38:49,121 — INFO — Episode: 175 | Steps: 68[5541] | Epsilon: 0.010 | Time: 6.28s | Reward: 68.0 | MovingAvg: 85.2


Training DQN:  35%|███▌      | 176/500 [06:21<42:13,  7.82s/ep]

2025-10-20 16:38:58,189 — INFO — Episode: 176 | Steps: 94[5635] | Epsilon: 0.010 | Time: 9.07s | Reward: 94.0 | MovingAvg: 81.1


Training DQN:  35%|███▌      | 177/500 [06:30<44:06,  8.19s/ep]

2025-10-20 16:39:04,851 — INFO — Episode: 177 | Steps: 70[5705] | Epsilon: 0.010 | Time: 6.66s | Reward: 70.0 | MovingAvg: 82.7


Training DQN:  36%|███▌      | 178/500 [06:37<41:30,  7.73s/ep]

2025-10-20 16:39:21,018 — INFO — Episode: 178 | Steps: 141[5846] | Epsilon: 0.010 | Time: 16.17s | Reward: 141.0 | MovingAvg: 84.1


Training DQN:  36%|███▌      | 179/500 [06:53<54:54, 10.26s/ep]

2025-10-20 16:39:30,817 — INFO — Episode: 179 | Steps: 110[5956] | Epsilon: 0.010 | Time: 9.80s | Reward: 110.0 | MovingAvg: 86.5


Training DQN:  36%|███▌      | 180/500 [07:03<53:59, 10.12s/ep]

2025-10-20 16:39:39,671 — INFO — Episode: 180 | Steps: 103[6059] | Epsilon: 0.010 | Time: 8.85s | Reward: 103.0 | MovingAvg: 89.3


Training DQN:  36%|███▌      | 181/500 [07:11<51:48,  9.74s/ep]

2025-10-20 16:39:45,998 — INFO — Episode: 181 | Steps: 74[6133] | Epsilon: 0.010 | Time: 6.33s | Reward: 74.0 | MovingAvg: 91.9


Training DQN:  36%|███▋      | 182/500 [07:18<46:12,  8.72s/ep]

2025-10-20 16:39:55,515 — INFO — Episode: 182 | Steps: 112[6245] | Epsilon: 0.010 | Time: 9.52s | Reward: 112.0 | MovingAvg: 92.3


Training DQN:  37%|███▋      | 183/500 [07:27<47:19,  8.96s/ep]

2025-10-20 16:40:00,165 — INFO — Episode: 183 | Steps: 53[6298] | Epsilon: 0.010 | Time: 4.65s | Reward: 53.0 | MovingAvg: 93.2


Training DQN:  37%|███▋      | 184/500 [07:32<40:22,  7.67s/ep]

2025-10-20 16:40:05,705 — INFO — Episode: 184 | Steps: 66[6364] | Epsilon: 0.010 | Time: 5.54s | Reward: 66.0 | MovingAvg: 94.0


Training DQN:  37%|███▋      | 185/500 [07:37<36:53,  7.03s/ep]

2025-10-20 16:40:17,116 — INFO — Episode: 185 | Steps: 131[6495] | Epsilon: 0.010 | Time: 11.41s | Reward: 131.0 | MovingAvg: 94.8


Training DQN:  37%|███▋      | 186/500 [07:49<43:39,  8.34s/ep]

2025-10-20 16:40:29,368 — INFO — Episode: 186 | Steps: 124[6619] | Epsilon: 0.010 | Time: 12.25s | Reward: 124.0 | MovingAvg: 95.6


Training DQN:  37%|███▋      | 187/500 [08:01<49:38,  9.52s/ep]

2025-10-20 16:40:43,201 — INFO — Episode: 187 | Steps: 141[6760] | Epsilon: 0.010 | Time: 13.83s | Reward: 141.0 | MovingAvg: 99.3


Training DQN:  38%|███▊      | 188/500 [08:15<56:12, 10.81s/ep]

2025-10-20 16:40:49,982 — INFO — Episode: 188 | Steps: 63[6823] | Epsilon: 0.010 | Time: 6.78s | Reward: 63.0 | MovingAvg: 101.8


Training DQN:  38%|███▊      | 188/500 [08:22<13:53,  2.67s/ep]

Train time: 8.4m [1.0s]





In [9]:
trainer.play_with_pygame(episodes=5, fps=30)

2025-10-20 16:42:55,614 — INFO — Episode 1/5 - Reward: 52.0
2025-10-20 16:43:02,864 — INFO — Episode 2/5 - Reward: 116.0
2025-10-20 16:43:06,640 — INFO — Episode 3/5 - Reward: 61.0
2025-10-20 16:43:10,553 — INFO — Episode 4/5 - Reward: 62.0
2025-10-20 16:43:14,240 — INFO — Episode 5/5 - Reward: 60.0
