In [1]:
import sys

import gymnasium as gym

sys.path.append("..")

In [2]:
from training.dqnetwork import DQNetwork

In [3]:
env = gym.make("CartPole-v1", render_mode="rgb_array")

In [4]:
from python.src.utils.reproducibility import set_global_seed

set_global_seed(seed=42, env=env, tensorflow_deterministic=True, verbose=True)

[set_global_seed] TensorFlow seed set.
[set_global_seed] Gymnasium environment seed set.
[set_global_seed] All available seeds set to 42.


In [5]:
neuron_count_per_hidden_layer = [64, 32]

action_net = DQNetwork(
    env.observation_space.shape,
    neuron_count_per_hidden_layer,
    env.action_space.n,
    summary=True,
)
target_net = DQNetwork(
    env.observation_space.shape,
    neuron_count_per_hidden_layer,
    env.action_space.n,
    summary=True,
)

target_net.model.set_weights(action_net.model.get_weights())

In [6]:
episode_count = 500  # Total number of training episodes
episode_max_steps = 400  # Maximum number of steps per episode

replay_memory_max_size = (
    100000  # Maximum number of transitions stored into the replay memory
)
replay_memory_init_size = (
    1000  # Maximum number of transitions stored into the replay memory
)
batch_size = 64  # Mini-batch size

step_per_update = 4  # Number of total steps executed between successive updates of the action model weights
step_per_update_target_model = 8  # Number of total steps executed between successive replaces of the target model weights

max_epsilon = 1.0  # Exploration probability at start
min_epsilon = 0.01  # Minimum exploration probability
epsilon_decay = 0.0002  # Decay for exploration probability

gamma = 0.99  # Discount factor

moving_avg_window_size = 20  # Number of consecutive episodes to be considered in the calculation of the total reward moving average
moving_avg_stop_thr = 100

In [7]:
from agent.dqagent import DQAgent

agent = DQAgent(
    env,
    epsilon_max=max_epsilon,
    epsilon_min=min_epsilon,
    epsilon_decay=epsilon_decay,
    gamma=gamma,
    replay_memory_max_size=replay_memory_max_size,
    replay_memory_init_size=replay_memory_init_size,
    batch_size=batch_size,
    step_per_update=step_per_update,
    step_per_update_target_model=step_per_update_target_model,
    moving_avg_window_size=moving_avg_window_size,
    moving_avg_stop_thr=moving_avg_stop_thr,
    episode_max_steps=episode_max_steps,
)

In [8]:
import time

from training.dqlearning import DQLearning

train_start_time = time.time()

trainer = DQLearning(
    env,
    agent,
    episode_count=episode_count,
    episode_max_steps=episode_max_steps,
    dqn_action_model=action_net,
    dqn_target_model=target_net,
)
train_rewards = trainer.simple_dqn_training()

train_finish_time = time.time()
train_elapsed_time = train_finish_time - train_start_time
train_avg_episode_time = train_elapsed_time / episode_count

print(
    f"Train time: {train_elapsed_time / 60.0:.1f}m [{train_avg_episode_time:.1f}s]"
)

Training DQN:   0%|          | 1/500 [00:02<16:56,  2.04s/ep]

Episode: 0 | Steps: 18[18] | Epsilon: 1.000 | Time: 2.04s | Reward: 18.0 | MovingAvg: 18.0


Training DQN:   0%|          | 2/500 [00:02<10:52,  1.31s/ep]

Episode: 1 | Steps: 16[34] | Epsilon: 0.996 | Time: 0.80s | Reward: 16.0 | MovingAvg: 16.0


Training DQN:   1%|          | 3/500 [00:04<13:22,  1.61s/ep]

Episode: 2 | Steps: 33[67] | Epsilon: 0.993 | Time: 1.97s | Reward: 33.0 | MovingAvg: 33.0


Training DQN:   1%|          | 4/500 [00:05<11:14,  1.36s/ep]

Episode: 3 | Steps: 14[81] | Epsilon: 0.987 | Time: 0.97s | Reward: 14.0 | MovingAvg: 14.0


Training DQN:   1%|          | 5/500 [00:06<09:41,  1.18s/ep]

Episode: 4 | Steps: 15[96] | Epsilon: 0.984 | Time: 0.84s | Reward: 15.0 | MovingAvg: 15.0


Training DQN:   1%|          | 6/500 [00:07<08:45,  1.06s/ep]

Episode: 5 | Steps: 14[110] | Epsilon: 0.981 | Time: 0.84s | Reward: 14.0 | MovingAvg: 14.0


Training DQN:   1%|▏         | 7/500 [00:08<08:40,  1.06s/ep]

Episode: 6 | Steps: 24[134] | Epsilon: 0.978 | Time: 1.04s | Reward: 24.0 | MovingAvg: 24.0


Training DQN:   2%|▏         | 8/500 [00:09<08:15,  1.01s/ep]

Episode: 7 | Steps: 16[150] | Epsilon: 0.973 | Time: 0.90s | Reward: 16.0 | MovingAvg: 16.0


Training DQN:   2%|▏         | 9/500 [00:10<09:35,  1.17s/ep]

Episode: 8 | Steps: 30[180] | Epsilon: 0.970 | Time: 1.54s | Reward: 30.0 | MovingAvg: 30.0


Training DQN:   2%|▏         | 10/500 [00:12<10:28,  1.28s/ep]

Episode: 9 | Steps: 24[204] | Epsilon: 0.964 | Time: 1.53s | Reward: 24.0 | MovingAvg: 24.0


Training DQN:   2%|▏         | 11/500 [00:13<10:30,  1.29s/ep]

Episode: 10 | Steps: 17[221] | Epsilon: 0.959 | Time: 1.30s | Reward: 17.0 | MovingAvg: 17.0


Training DQN:   2%|▏         | 12/500 [00:14<09:16,  1.14s/ep]

Episode: 11 | Steps: 13[234] | Epsilon: 0.956 | Time: 0.80s | Reward: 13.0 | MovingAvg: 13.0


Training DQN:   3%|▎         | 13/500 [00:17<13:26,  1.66s/ep]

Episode: 12 | Steps: 44[278] | Epsilon: 0.953 | Time: 2.84s | Reward: 44.0 | MovingAvg: 44.0


Training DQN:   3%|▎         | 14/500 [00:18<11:40,  1.44s/ep]

Episode: 13 | Steps: 15[293] | Epsilon: 0.944 | Time: 0.95s | Reward: 15.0 | MovingAvg: 15.0


Training DQN:   3%|▎         | 15/500 [00:19<10:55,  1.35s/ep]

Episode: 14 | Steps: 22[315] | Epsilon: 0.941 | Time: 1.14s | Reward: 22.0 | MovingAvg: 22.0


Training DQN:   3%|▎         | 16/500 [00:20<09:37,  1.19s/ep]

Episode: 15 | Steps: 10[325] | Epsilon: 0.937 | Time: 0.82s | Reward: 10.0 | MovingAvg: 10.0


Training DQN:   3%|▎         | 17/500 [00:21<10:08,  1.26s/ep]

Episode: 16 | Steps: 21[346] | Epsilon: 0.935 | Time: 1.42s | Reward: 21.0 | MovingAvg: 21.0


Training DQN:   4%|▎         | 18/500 [00:22<09:30,  1.18s/ep]

Episode: 17 | Steps: 10[356] | Epsilon: 0.931 | Time: 1.01s | Reward: 10.0 | MovingAvg: 10.0


Training DQN:   4%|▍         | 19/500 [00:24<09:47,  1.22s/ep]

Episode: 18 | Steps: 20[376] | Epsilon: 0.929 | Time: 1.30s | Reward: 20.0 | MovingAvg: 20.0


Training DQN:   4%|▍         | 20/500 [00:25<09:36,  1.20s/ep]

Episode: 19 | Steps: 15[391] | Epsilon: 0.925 | Time: 1.15s | Reward: 15.0 | MovingAvg: 15.0


Training DQN:   4%|▍         | 21/500 [00:26<10:10,  1.27s/ep]

Episode: 20 | Steps: 30[421] | Epsilon: 0.922 | Time: 1.45s | Reward: 30.0 | MovingAvg: 19.6


Training DQN:   4%|▍         | 22/500 [00:27<09:08,  1.15s/ep]

Episode: 21 | Steps: 12[433] | Epsilon: 0.916 | Time: 0.85s | Reward: 12.0 | MovingAvg: 20.1


Training DQN:   5%|▍         | 23/500 [00:28<07:59,  1.01s/ep]

Episode: 22 | Steps: 10[443] | Epsilon: 0.913 | Time: 0.68s | Reward: 10.0 | MovingAvg: 19.9


Training DQN:   5%|▍         | 24/500 [00:28<07:10,  1.11ep/s]

Episode: 23 | Steps: 11[454] | Epsilon: 0.911 | Time: 0.67s | Reward: 11.0 | MovingAvg: 18.8


Training DQN:   5%|▌         | 25/500 [00:29<07:06,  1.11ep/s]

Episode: 24 | Steps: 22[476] | Epsilon: 0.909 | Time: 0.88s | Reward: 22.0 | MovingAvg: 18.6


Training DQN:   5%|▌         | 26/500 [00:31<08:46,  1.11s/ep]

Episode: 25 | Steps: 26[502] | Epsilon: 0.905 | Time: 1.61s | Reward: 26.0 | MovingAvg: 19.0


Training DQN:   5%|▌         | 27/500 [00:32<08:18,  1.05s/ep]

Episode: 26 | Steps: 15[517] | Epsilon: 0.900 | Time: 0.92s | Reward: 15.0 | MovingAvg: 19.6


Training DQN:   6%|▌         | 28/500 [00:34<10:31,  1.34s/ep]

Episode: 27 | Steps: 32[549] | Epsilon: 0.897 | Time: 2.00s | Reward: 32.0 | MovingAvg: 19.1


Training DQN:   6%|▌         | 29/500 [00:35<09:09,  1.17s/ep]

Episode: 28 | Steps: 13[562] | Epsilon: 0.890 | Time: 0.77s | Reward: 13.0 | MovingAvg: 19.9


Training DQN:   6%|▌         | 30/500 [00:37<11:53,  1.52s/ep]

Episode: 29 | Steps: 39[601] | Epsilon: 0.888 | Time: 2.33s | Reward: 39.0 | MovingAvg: 19.1


Training DQN:   6%|▌         | 31/500 [00:38<12:01,  1.54s/ep]

Episode: 30 | Steps: 32[633] | Epsilon: 0.880 | Time: 1.59s | Reward: 32.0 | MovingAvg: 19.9


Training DQN:   6%|▋         | 32/500 [00:40<11:08,  1.43s/ep]

Episode: 31 | Steps: 16[649] | Epsilon: 0.873 | Time: 1.16s | Reward: 16.0 | MovingAvg: 20.6


Training DQN:   7%|▋         | 33/500 [00:41<10:03,  1.29s/ep]

Episode: 32 | Steps: 16[665] | Epsilon: 0.870 | Time: 0.97s | Reward: 16.0 | MovingAvg: 20.8


Training DQN:   7%|▋         | 34/500 [00:42<09:07,  1.17s/ep]

Episode: 33 | Steps: 12[677] | Epsilon: 0.867 | Time: 0.90s | Reward: 12.0 | MovingAvg: 19.4


Training DQN:   7%|▋         | 35/500 [00:43<10:18,  1.33s/ep]

Episode: 34 | Steps: 29[706] | Epsilon: 0.865 | Time: 1.69s | Reward: 29.0 | MovingAvg: 19.2


Training DQN:   7%|▋         | 36/500 [00:46<12:54,  1.67s/ep]

Episode: 35 | Steps: 35[741] | Epsilon: 0.859 | Time: 2.46s | Reward: 35.0 | MovingAvg: 19.6


Training DQN:   7%|▋         | 37/500 [00:47<12:16,  1.59s/ep]

Episode: 36 | Steps: 22[763] | Epsilon: 0.852 | Time: 1.40s | Reward: 22.0 | MovingAvg: 20.8


Training DQN:   8%|▊         | 38/500 [00:48<10:25,  1.35s/ep]

Episode: 37 | Steps: 12[775] | Epsilon: 0.847 | Time: 0.80s | Reward: 12.0 | MovingAvg: 20.9


Training DQN:   8%|▊         | 39/500 [00:48<08:20,  1.09s/ep]

Episode: 38 | Steps: 9[784] | Epsilon: 0.845 | Time: 0.46s | Reward: 9.0 | MovingAvg: 20.9


Training DQN:   8%|▊         | 40/500 [00:49<08:08,  1.06s/ep]

Episode: 39 | Steps: 11[795] | Epsilon: 0.843 | Time: 1.00s | Reward: 11.0 | MovingAvg: 20.4


Training DQN:   8%|▊         | 41/500 [00:50<07:30,  1.02ep/s]

Episode: 40 | Steps: 15[810] | Epsilon: 0.841 | Time: 0.79s | Reward: 15.0 | MovingAvg: 20.2


Training DQN:   8%|▊         | 42/500 [00:51<07:25,  1.03ep/s]

Episode: 41 | Steps: 18[828] | Epsilon: 0.838 | Time: 0.95s | Reward: 18.0 | MovingAvg: 19.4


Training DQN:   9%|▊         | 43/500 [00:52<07:45,  1.02s/ep]

Episode: 42 | Steps: 13[841] | Epsilon: 0.834 | Time: 1.13s | Reward: 13.0 | MovingAvg: 19.8


Training DQN:   9%|▉         | 44/500 [00:53<07:12,  1.06ep/s]

Episode: 43 | Steps: 10[851] | Epsilon: 0.832 | Time: 0.78s | Reward: 10.0 | MovingAvg: 19.9


Training DQN:   9%|▉         | 45/500 [00:54<07:29,  1.01ep/s]

Episode: 44 | Steps: 10[861] | Epsilon: 0.830 | Time: 1.08s | Reward: 10.0 | MovingAvg: 19.9


Training DQN:   9%|▉         | 46/500 [00:55<06:57,  1.09ep/s]

Episode: 45 | Steps: 12[873] | Epsilon: 0.828 | Time: 0.76s | Reward: 12.0 | MovingAvg: 19.2


Training DQN:   9%|▉         | 47/500 [00:56<07:19,  1.03ep/s]

Episode: 46 | Steps: 17[890] | Epsilon: 0.825 | Time: 1.09s | Reward: 17.0 | MovingAvg: 18.6


Training DQN:  10%|▉         | 48/500 [00:58<10:18,  1.37s/ep]

Episode: 47 | Steps: 37[927] | Epsilon: 0.822 | Time: 2.30s | Reward: 37.0 | MovingAvg: 18.6


Training DQN:  10%|▉         | 49/500 [01:01<13:07,  1.75s/ep]

Episode: 48 | Steps: 37[964] | Epsilon: 0.815 | Time: 2.62s | Reward: 37.0 | MovingAvg: 18.9


Training DQN:  10%|█         | 50/500 [01:02<11:56,  1.59s/ep]

Episode: 49 | Steps: 12[976] | Epsilon: 0.807 | Time: 1.23s | Reward: 12.0 | MovingAvg: 20.1


Training DQN:  10%|█         | 51/500 [01:03<10:16,  1.37s/ep]

Episode: 50 | Steps: 10[986] | Epsilon: 0.805 | Time: 0.86s | Reward: 10.0 | MovingAvg: 18.8


Training DQN:  10%|█         | 52/500 [01:05<11:33,  1.55s/ep]

Episode: 51 | Steps: 29[1015] | Epsilon: 0.803 | Time: 1.95s | Reward: 29.0 | MovingAvg: 17.6


Training DQN:  11%|█         | 53/500 [01:06<10:15,  1.38s/ep]

Episode: 52 | Steps: 13[1028] | Epsilon: 0.797 | Time: 0.98s | Reward: 13.0 | MovingAvg: 18.3


Training DQN:  11%|█         | 54/500 [01:07<09:43,  1.31s/ep]

Episode: 53 | Steps: 19[1047] | Epsilon: 0.794 | Time: 1.14s | Reward: 19.0 | MovingAvg: 18.1


Training DQN:  11%|█         | 55/500 [01:08<08:54,  1.20s/ep]

Episode: 54 | Steps: 13[1060] | Epsilon: 0.791 | Time: 0.95s | Reward: 13.0 | MovingAvg: 18.5


Training DQN:  11%|█         | 56/500 [01:10<09:48,  1.33s/ep]

Episode: 55 | Steps: 22[1082] | Epsilon: 0.788 | Time: 1.61s | Reward: 22.0 | MovingAvg: 17.7


Training DQN:  11%|█▏        | 57/500 [01:12<11:08,  1.51s/ep]

Episode: 56 | Steps: 24[1106] | Epsilon: 0.784 | Time: 1.93s | Reward: 24.0 | MovingAvg: 17.1


Training DQN:  12%|█▏        | 58/500 [01:13<10:27,  1.42s/ep]

Episode: 57 | Steps: 14[1120] | Epsilon: 0.779 | Time: 1.21s | Reward: 14.0 | MovingAvg: 17.1


Training DQN:  12%|█▏        | 59/500 [01:14<09:44,  1.32s/ep]

Episode: 58 | Steps: 16[1136] | Epsilon: 0.776 | Time: 1.10s | Reward: 16.0 | MovingAvg: 17.2


Training DQN:  12%|█▏        | 60/500 [01:16<10:57,  1.49s/ep]

Episode: 59 | Steps: 31[1167] | Epsilon: 0.773 | Time: 1.89s | Reward: 31.0 | MovingAvg: 17.6


Training DQN:  12%|█▏        | 61/500 [01:19<13:53,  1.90s/ep]

Episode: 60 | Steps: 32[1199] | Epsilon: 0.767 | Time: 2.84s | Reward: 32.0 | MovingAvg: 18.6


Training DQN:  12%|█▏        | 62/500 [01:19<11:20,  1.55s/ep]

Episode: 61 | Steps: 12[1211] | Epsilon: 0.760 | Time: 0.75s | Reward: 12.0 | MovingAvg: 19.4


Training DQN:  13%|█▎        | 63/500 [01:21<11:19,  1.56s/ep]

Episode: 62 | Steps: 16[1227] | Epsilon: 0.758 | Time: 1.56s | Reward: 16.0 | MovingAvg: 19.1


Training DQN:  13%|█▎        | 64/500 [01:23<13:24,  1.85s/ep]

Episode: 63 | Steps: 31[1258] | Epsilon: 0.755 | Time: 2.52s | Reward: 31.0 | MovingAvg: 19.3


Training DQN:  13%|█▎        | 65/500 [01:24<10:50,  1.49s/ep]

Episode: 64 | Steps: 12[1270] | Epsilon: 0.748 | Time: 0.67s | Reward: 12.0 | MovingAvg: 20.4


Training DQN:  13%|█▎        | 66/500 [01:26<11:29,  1.59s/ep]

Episode: 65 | Steps: 23[1293] | Epsilon: 0.746 | Time: 1.80s | Reward: 23.0 | MovingAvg: 20.4


Training DQN:  13%|█▎        | 67/500 [01:27<10:37,  1.47s/ep]

Episode: 66 | Steps: 19[1312] | Epsilon: 0.741 | Time: 1.20s | Reward: 19.0 | MovingAvg: 21.0


Training DQN:  14%|█▎        | 68/500 [01:29<10:37,  1.48s/ep]

Episode: 67 | Steps: 14[1326] | Epsilon: 0.738 | Time: 1.49s | Reward: 14.0 | MovingAvg: 21.1


Training DQN:  14%|█▍        | 69/500 [01:31<12:38,  1.76s/ep]

Episode: 68 | Steps: 25[1351] | Epsilon: 0.735 | Time: 2.42s | Reward: 25.0 | MovingAvg: 19.9


Training DQN:  14%|█▍        | 70/500 [01:32<10:22,  1.45s/ep]

Episode: 69 | Steps: 9[1360] | Epsilon: 0.730 | Time: 0.72s | Reward: 9.0 | MovingAvg: 19.4


Training DQN:  14%|█▍        | 71/500 [01:32<08:37,  1.21s/ep]

Episode: 70 | Steps: 8[1368] | Epsilon: 0.728 | Time: 0.64s | Reward: 8.0 | MovingAvg: 19.2


Training DQN:  14%|█▍        | 72/500 [01:34<08:45,  1.23s/ep]

Episode: 71 | Steps: 15[1383] | Epsilon: 0.726 | Time: 1.27s | Reward: 15.0 | MovingAvg: 19.1


Training DQN:  15%|█▍        | 73/500 [01:35<08:27,  1.19s/ep]

Episode: 72 | Steps: 16[1399] | Epsilon: 0.723 | Time: 1.09s | Reward: 16.0 | MovingAvg: 18.4


Training DQN:  15%|█▍        | 74/500 [01:36<07:57,  1.12s/ep]

Episode: 73 | Steps: 13[1412] | Epsilon: 0.720 | Time: 0.97s | Reward: 13.0 | MovingAvg: 18.6


Training DQN:  15%|█▌        | 75/500 [01:37<09:20,  1.32s/ep]

Episode: 74 | Steps: 20[1432] | Epsilon: 0.718 | Time: 1.78s | Reward: 20.0 | MovingAvg: 18.2


Training DQN:  15%|█▌        | 76/500 [01:39<10:04,  1.43s/ep]

Episode: 75 | Steps: 20[1452] | Epsilon: 0.714 | Time: 1.67s | Reward: 20.0 | MovingAvg: 18.6


Training DQN:  15%|█▌        | 77/500 [01:40<09:51,  1.40s/ep]

Episode: 76 | Steps: 13[1465] | Epsilon: 0.710 | Time: 1.33s | Reward: 13.0 | MovingAvg: 18.5


Training DQN:  16%|█▌        | 78/500 [01:42<09:43,  1.38s/ep]

Episode: 77 | Steps: 12[1477] | Epsilon: 0.707 | Time: 1.35s | Reward: 12.0 | MovingAvg: 17.9


Training DQN:  16%|█▌        | 79/500 [01:42<07:46,  1.11s/ep]

Episode: 78 | Steps: 9[1486] | Epsilon: 0.705 | Time: 0.47s | Reward: 9.0 | MovingAvg: 17.9


Training DQN:  16%|█▌        | 80/500 [01:44<08:25,  1.20s/ep]

Episode: 79 | Steps: 17[1503] | Epsilon: 0.703 | Time: 1.42s | Reward: 17.0 | MovingAvg: 17.5


Training DQN:  16%|█▌        | 81/500 [01:45<07:45,  1.11s/ep]

Episode: 80 | Steps: 11[1514] | Epsilon: 0.699 | Time: 0.90s | Reward: 11.0 | MovingAvg: 16.8


Training DQN:  16%|█▋        | 82/500 [01:46<08:04,  1.16s/ep]

Episode: 81 | Steps: 17[1531] | Epsilon: 0.697 | Time: 1.27s | Reward: 17.0 | MovingAvg: 15.8


Training DQN:  17%|█▋        | 83/500 [01:47<07:57,  1.14s/ep]

Episode: 82 | Steps: 13[1544] | Epsilon: 0.694 | Time: 1.11s | Reward: 13.0 | MovingAvg: 16.0


Training DQN:  17%|█▋        | 84/500 [01:48<07:44,  1.12s/ep]

Episode: 83 | Steps: 11[1555] | Epsilon: 0.691 | Time: 1.05s | Reward: 11.0 | MovingAvg: 15.8


Training DQN:  17%|█▋        | 85/500 [01:49<07:44,  1.12s/ep]

Episode: 84 | Steps: 11[1566] | Epsilon: 0.689 | Time: 1.13s | Reward: 11.0 | MovingAvg: 14.8


Training DQN:  17%|█▋        | 86/500 [01:51<09:45,  1.41s/ep]

Episode: 85 | Steps: 23[1589] | Epsilon: 0.687 | Time: 2.10s | Reward: 23.0 | MovingAvg: 14.8


Training DQN:  17%|█▋        | 87/500 [01:53<10:20,  1.50s/ep]

Episode: 86 | Steps: 23[1612] | Epsilon: 0.682 | Time: 1.71s | Reward: 23.0 | MovingAvg: 14.8


Training DQN:  18%|█▊        | 88/500 [01:54<09:23,  1.37s/ep]

Episode: 87 | Steps: 9[1621] | Epsilon: 0.678 | Time: 1.05s | Reward: 9.0 | MovingAvg: 15.0


Training DQN:  18%|█▊        | 89/500 [01:57<13:03,  1.91s/ep]

Episode: 88 | Steps: 46[1667] | Epsilon: 0.676 | Time: 3.16s | Reward: 46.0 | MovingAvg: 14.8


Training DQN:  18%|█▊        | 90/500 [01:58<11:25,  1.67s/ep]

Episode: 89 | Steps: 12[1679] | Epsilon: 0.667 | Time: 1.13s | Reward: 12.0 | MovingAvg: 15.8


Training DQN:  18%|█▊        | 91/500 [02:00<11:34,  1.70s/ep]

Episode: 90 | Steps: 23[1702] | Epsilon: 0.664 | Time: 1.76s | Reward: 23.0 | MovingAvg: 15.9


Training DQN:  18%|█▊        | 92/500 [02:02<11:05,  1.63s/ep]

Episode: 91 | Steps: 14[1716] | Epsilon: 0.660 | Time: 1.47s | Reward: 14.0 | MovingAvg: 16.7


Training DQN:  19%|█▊        | 93/500 [02:03<10:03,  1.48s/ep]

Episode: 92 | Steps: 11[1727] | Epsilon: 0.657 | Time: 1.14s | Reward: 11.0 | MovingAvg: 16.6


Training DQN:  19%|█▉        | 94/500 [02:04<08:58,  1.33s/ep]

Episode: 93 | Steps: 10[1737] | Epsilon: 0.655 | Time: 0.96s | Reward: 10.0 | MovingAvg: 16.4


Training DQN:  19%|█▉        | 95/500 [02:04<07:53,  1.17s/ep]

Episode: 94 | Steps: 10[1747] | Epsilon: 0.653 | Time: 0.80s | Reward: 10.0 | MovingAvg: 16.2


Training DQN:  19%|█▉        | 96/500 [02:08<12:23,  1.84s/ep]

Episode: 95 | Steps: 38[1785] | Epsilon: 0.651 | Time: 3.40s | Reward: 38.0 | MovingAvg: 15.8


Training DQN:  19%|█▉        | 97/500 [02:09<11:18,  1.68s/ep]

Episode: 96 | Steps: 18[1803] | Epsilon: 0.643 | Time: 1.31s | Reward: 18.0 | MovingAvg: 16.6


Training DQN:  20%|█▉        | 98/500 [02:10<09:57,  1.49s/ep]

Episode: 97 | Steps: 11[1814] | Epsilon: 0.639 | Time: 1.03s | Reward: 11.0 | MovingAvg: 16.9


Training DQN:  20%|█▉        | 99/500 [02:12<09:44,  1.46s/ep]

Episode: 98 | Steps: 13[1827] | Epsilon: 0.637 | Time: 1.39s | Reward: 13.0 | MovingAvg: 16.9


Training DQN:  20%|██        | 100/500 [02:14<10:59,  1.65s/ep]

Episode: 99 | Steps: 25[1852] | Epsilon: 0.635 | Time: 2.10s | Reward: 25.0 | MovingAvg: 17.1


Training DQN:  20%|██        | 101/500 [02:15<10:52,  1.64s/ep]

Episode: 100 | Steps: 18[1870] | Epsilon: 0.630 | Time: 1.60s | Reward: 18.0 | MovingAvg: 17.4


Training DQN:  20%|██        | 102/500 [02:16<09:05,  1.37s/ep]

Episode: 101 | Steps: 9[1879] | Epsilon: 0.626 | Time: 0.75s | Reward: 9.0 | MovingAvg: 17.8


Training DQN:  21%|██        | 103/500 [02:17<08:50,  1.34s/ep]

Episode: 102 | Steps: 17[1896] | Epsilon: 0.624 | Time: 1.26s | Reward: 17.0 | MovingAvg: 17.4


Training DQN:  21%|██        | 104/500 [02:18<07:58,  1.21s/ep]

Episode: 103 | Steps: 16[1912] | Epsilon: 0.621 | Time: 0.91s | Reward: 16.0 | MovingAvg: 17.6


Training DQN:  21%|██        | 105/500 [02:20<09:17,  1.41s/ep]

Episode: 104 | Steps: 22[1934] | Epsilon: 0.618 | Time: 1.88s | Reward: 22.0 | MovingAvg: 17.9


Training DQN:  21%|██        | 106/500 [02:22<11:00,  1.68s/ep]

Episode: 105 | Steps: 19[1953] | Epsilon: 0.613 | Time: 2.29s | Reward: 19.0 | MovingAvg: 18.4


Training DQN:  21%|██▏       | 107/500 [02:25<12:27,  1.90s/ep]

Episode: 106 | Steps: 28[1981] | Epsilon: 0.609 | Time: 2.43s | Reward: 28.0 | MovingAvg: 18.2


Training DQN:  22%|██▏       | 108/500 [02:26<10:09,  1.56s/ep]

Episode: 107 | Steps: 10[1991] | Epsilon: 0.604 | Time: 0.74s | Reward: 10.0 | MovingAvg: 18.4


Training DQN:  22%|██▏       | 109/500 [02:26<08:40,  1.33s/ep]

Episode: 108 | Steps: 10[2001] | Epsilon: 0.602 | Time: 0.81s | Reward: 10.0 | MovingAvg: 18.5


Training DQN:  22%|██▏       | 110/500 [02:28<08:16,  1.27s/ep]

Episode: 109 | Steps: 9[2010] | Epsilon: 0.600 | Time: 1.13s | Reward: 9.0 | MovingAvg: 16.7


Training DQN:  22%|██▏       | 111/500 [02:29<08:36,  1.33s/ep]

Episode: 110 | Steps: 14[2024] | Epsilon: 0.598 | Time: 1.46s | Reward: 14.0 | MovingAvg: 16.6


Training DQN:  22%|██▏       | 112/500 [02:31<09:04,  1.40s/ep]

Episode: 111 | Steps: 14[2038] | Epsilon: 0.595 | Time: 1.58s | Reward: 14.0 | MovingAvg: 16.1


Training DQN:  23%|██▎       | 113/500 [02:32<09:32,  1.48s/ep]

Episode: 112 | Steps: 16[2054] | Epsilon: 0.592 | Time: 1.65s | Reward: 16.0 | MovingAvg: 16.1


Training DQN:  23%|██▎       | 114/500 [02:34<09:35,  1.49s/ep]

Episode: 113 | Steps: 19[2073] | Epsilon: 0.589 | Time: 1.52s | Reward: 19.0 | MovingAvg: 16.4


Training DQN:  23%|██▎       | 115/500 [02:35<09:41,  1.51s/ep]

Episode: 114 | Steps: 20[2093] | Epsilon: 0.585 | Time: 1.55s | Reward: 20.0 | MovingAvg: 16.8


Training DQN:  23%|██▎       | 116/500 [02:37<09:29,  1.48s/ep]

Episode: 115 | Steps: 13[2106] | Epsilon: 0.581 | Time: 1.42s | Reward: 13.0 | MovingAvg: 17.3


Training DQN:  23%|██▎       | 117/500 [02:38<08:29,  1.33s/ep]

Episode: 116 | Steps: 10[2116] | Epsilon: 0.579 | Time: 0.97s | Reward: 10.0 | MovingAvg: 16.1


Training DQN:  24%|██▎       | 118/500 [02:39<08:45,  1.38s/ep]

Episode: 117 | Steps: 19[2135] | Epsilon: 0.577 | Time: 1.48s | Reward: 19.0 | MovingAvg: 15.7


Training DQN:  24%|██▍       | 119/500 [02:41<09:35,  1.51s/ep]

Episode: 118 | Steps: 22[2157] | Epsilon: 0.573 | Time: 1.83s | Reward: 22.0 | MovingAvg: 16.1


Training DQN:  24%|██▍       | 120/500 [02:43<10:50,  1.71s/ep]

Episode: 119 | Steps: 24[2181] | Epsilon: 0.569 | Time: 2.18s | Reward: 24.0 | MovingAvg: 16.5


Training DQN:  24%|██▍       | 121/500 [02:44<09:58,  1.58s/ep]

Episode: 120 | Steps: 13[2194] | Epsilon: 0.564 | Time: 1.27s | Reward: 13.0 | MovingAvg: 16.4


Training DQN:  24%|██▍       | 122/500 [02:46<10:42,  1.70s/ep]

Episode: 121 | Steps: 21[2215] | Epsilon: 0.561 | Time: 1.98s | Reward: 21.0 | MovingAvg: 16.2


Training DQN:  25%|██▍       | 123/500 [02:47<09:13,  1.47s/ep]

Episode: 122 | Steps: 17[2232] | Epsilon: 0.557 | Time: 0.93s | Reward: 17.0 | MovingAvg: 16.8


Training DQN:  25%|██▍       | 124/500 [02:49<08:48,  1.41s/ep]

Episode: 123 | Steps: 15[2247] | Epsilon: 0.554 | Time: 1.26s | Reward: 15.0 | MovingAvg: 16.8


Training DQN:  25%|██▌       | 125/500 [02:54<17:02,  2.73s/ep]

Episode: 124 | Steps: 54[2301] | Epsilon: 0.551 | Time: 5.81s | Reward: 54.0 | MovingAvg: 16.8


Training DQN:  25%|██▌       | 126/500 [02:56<15:14,  2.45s/ep]

Episode: 125 | Steps: 16[2317] | Epsilon: 0.540 | Time: 1.79s | Reward: 16.0 | MovingAvg: 18.4


Training DQN:  25%|██▌       | 127/500 [02:58<14:40,  2.36s/ep]

Episode: 126 | Steps: 20[2337] | Epsilon: 0.537 | Time: 2.16s | Reward: 20.0 | MovingAvg: 18.2


Training DQN:  26%|██▌       | 128/500 [03:02<16:18,  2.63s/ep]

Episode: 127 | Steps: 51[2388] | Epsilon: 0.533 | Time: 3.26s | Reward: 51.0 | MovingAvg: 17.8


Training DQN:  26%|██▌       | 129/500 [03:06<19:09,  3.10s/ep]

Episode: 128 | Steps: 39[2427] | Epsilon: 0.522 | Time: 4.19s | Reward: 39.0 | MovingAvg: 19.9


Training DQN:  26%|██▌       | 130/500 [03:11<22:11,  3.60s/ep]

Episode: 129 | Steps: 43[2470] | Epsilon: 0.515 | Time: 4.76s | Reward: 43.0 | MovingAvg: 21.3


Training DQN:  26%|██▌       | 131/500 [03:14<21:48,  3.55s/ep]

Episode: 130 | Steps: 38[2508] | Epsilon: 0.506 | Time: 3.43s | Reward: 38.0 | MovingAvg: 23.0


Training DQN:  26%|██▋       | 132/500 [03:17<19:50,  3.23s/ep]

Episode: 131 | Steps: 25[2533] | Epsilon: 0.498 | Time: 2.51s | Reward: 25.0 | MovingAvg: 24.2


Training DQN:  27%|██▋       | 133/500 [03:18<16:34,  2.71s/ep]

Episode: 132 | Steps: 17[2550] | Epsilon: 0.493 | Time: 1.48s | Reward: 17.0 | MovingAvg: 24.8


Training DQN:  27%|██▋       | 134/500 [03:25<24:00,  3.94s/ep]

Episode: 133 | Steps: 71[2621] | Epsilon: 0.490 | Time: 6.80s | Reward: 71.0 | MovingAvg: 24.8


Training DQN:  27%|██▋       | 135/500 [03:31<27:15,  4.48s/ep]

Episode: 134 | Steps: 54[2675] | Epsilon: 0.476 | Time: 5.75s | Reward: 54.0 | MovingAvg: 27.4


Training DQN:  27%|██▋       | 136/500 [03:33<23:37,  3.89s/ep]

Episode: 135 | Steps: 26[2701] | Epsilon: 0.465 | Time: 2.52s | Reward: 26.0 | MovingAvg: 29.1


Training DQN:  27%|██▋       | 137/500 [03:36<21:58,  3.63s/ep]

Episode: 136 | Steps: 29[2730] | Epsilon: 0.460 | Time: 3.02s | Reward: 29.0 | MovingAvg: 29.8


Training DQN:  28%|██▊       | 138/500 [03:40<22:48,  3.78s/ep]

Episode: 137 | Steps: 36[2766] | Epsilon: 0.454 | Time: 4.13s | Reward: 36.0 | MovingAvg: 30.7


Training DQN:  28%|██▊       | 139/500 [03:43<20:54,  3.47s/ep]

Episode: 138 | Steps: 30[2796] | Epsilon: 0.447 | Time: 2.76s | Reward: 30.0 | MovingAvg: 31.6


Training DQN:  28%|██▊       | 140/500 [03:51<29:11,  4.86s/ep]

Episode: 139 | Steps: 87[2883] | Epsilon: 0.441 | Time: 8.11s | Reward: 87.0 | MovingAvg: 31.9


Training DQN:  28%|██▊       | 141/500 [03:58<33:09,  5.54s/ep]

Episode: 140 | Steps: 63[2946] | Epsilon: 0.423 | Time: 7.12s | Reward: 63.0 | MovingAvg: 35.1


Training DQN:  28%|██▊       | 142/500 [04:04<32:41,  5.48s/ep]

Episode: 141 | Steps: 57[3003] | Epsilon: 0.411 | Time: 5.33s | Reward: 57.0 | MovingAvg: 37.6


Training DQN:  29%|██▊       | 143/500 [04:06<26:52,  4.52s/ep]

Episode: 142 | Steps: 23[3026] | Epsilon: 0.399 | Time: 2.27s | Reward: 23.0 | MovingAvg: 39.4


Training DQN:  29%|██▉       | 144/500 [04:14<33:40,  5.67s/ep]

Episode: 143 | Steps: 89[3115] | Epsilon: 0.395 | Time: 8.38s | Reward: 89.0 | MovingAvg: 39.7


Training DQN:  29%|██▉       | 145/500 [04:18<29:39,  5.01s/ep]

Episode: 144 | Steps: 39[3154] | Epsilon: 0.377 | Time: 3.47s | Reward: 39.0 | MovingAvg: 43.4


Training DQN:  29%|██▉       | 146/500 [04:22<28:57,  4.91s/ep]

Episode: 145 | Steps: 46[3200] | Epsilon: 0.369 | Time: 4.66s | Reward: 46.0 | MovingAvg: 42.6


Training DQN:  29%|██▉       | 147/500 [04:27<28:56,  4.92s/ep]

Episode: 146 | Steps: 46[3246] | Epsilon: 0.360 | Time: 4.95s | Reward: 46.0 | MovingAvg: 44.1


Training DQN:  30%|██▉       | 148/500 [04:34<32:10,  5.48s/ep]

Episode: 147 | Steps: 70[3316] | Epsilon: 0.351 | Time: 6.80s | Reward: 70.0 | MovingAvg: 45.5


Training DQN:  30%|██▉       | 149/500 [04:43<37:30,  6.41s/ep]

Episode: 148 | Steps: 78[3394] | Epsilon: 0.337 | Time: 8.58s | Reward: 78.0 | MovingAvg: 46.4


Training DQN:  30%|███       | 150/500 [04:48<36:06,  6.19s/ep]

Episode: 149 | Steps: 54[3448] | Epsilon: 0.321 | Time: 5.67s | Reward: 54.0 | MovingAvg: 48.4


Training DQN:  30%|███       | 151/500 [04:58<41:53,  7.20s/ep]

Episode: 150 | Steps: 81[3529] | Epsilon: 0.310 | Time: 9.57s | Reward: 81.0 | MovingAvg: 48.9


Training DQN:  30%|███       | 152/500 [05:05<41:20,  7.13s/ep]

Episode: 151 | Steps: 53[3582] | Epsilon: 0.294 | Time: 6.95s | Reward: 53.0 | MovingAvg: 51.0


Training DQN:  31%|███       | 153/500 [05:13<43:32,  7.53s/ep]

Episode: 152 | Steps: 59[3641] | Epsilon: 0.284 | Time: 8.46s | Reward: 59.0 | MovingAvg: 52.5


Training DQN:  31%|███       | 154/500 [05:18<38:06,  6.61s/ep]

Episode: 153 | Steps: 36[3677] | Epsilon: 0.272 | Time: 4.46s | Reward: 36.0 | MovingAvg: 54.5


Training DQN:  31%|███       | 155/500 [05:28<44:14,  7.70s/ep]

Episode: 154 | Steps: 91[3768] | Epsilon: 0.265 | Time: 10.23s | Reward: 91.0 | MovingAvg: 52.8


Training DQN:  31%|███       | 156/500 [05:46<1:02:20, 10.87s/ep]

Episode: 155 | Steps: 151[3919] | Epsilon: 0.246 | Time: 18.29s | Reward: 151.0 | MovingAvg: 54.6


Training DQN:  31%|███▏      | 157/500 [05:54<56:20,  9.86s/ep]  

Episode: 156 | Steps: 62[3981] | Epsilon: 0.216 | Time: 7.48s | Reward: 62.0 | MovingAvg: 60.9


Training DQN:  32%|███▏      | 158/500 [05:59<48:31,  8.51s/ep]

Episode: 157 | Steps: 42[4023] | Epsilon: 0.204 | Time: 5.38s | Reward: 42.0 | MovingAvg: 62.5


Training DQN:  32%|███▏      | 159/500 [06:10<51:45,  9.11s/ep]

Episode: 158 | Steps: 94[4117] | Epsilon: 0.195 | Time: 10.49s | Reward: 94.0 | MovingAvg: 62.9


Training DQN:  32%|███▏      | 160/500 [06:17<48:15,  8.52s/ep]

Episode: 159 | Steps: 52[4169] | Epsilon: 0.177 | Time: 7.14s | Reward: 52.0 | MovingAvg: 66.0


Training DQN:  32%|███▏      | 161/500 [06:24<46:44,  8.27s/ep]

Episode: 160 | Steps: 52[4221] | Epsilon: 0.166 | Time: 7.70s | Reward: 52.0 | MovingAvg: 64.3


Training DQN:  32%|███▏      | 162/500 [06:32<45:18,  8.04s/ep]

Episode: 161 | Steps: 65[4286] | Epsilon: 0.156 | Time: 7.50s | Reward: 65.0 | MovingAvg: 63.8


Training DQN:  33%|███▎      | 163/500 [06:43<50:38,  9.02s/ep]

Episode: 162 | Steps: 94[4380] | Epsilon: 0.143 | Time: 11.29s | Reward: 94.0 | MovingAvg: 64.2


Training DQN:  33%|███▎      | 164/500 [06:49<44:16,  7.91s/ep]

Episode: 163 | Steps: 37[4417] | Epsilon: 0.124 | Time: 5.31s | Reward: 37.0 | MovingAvg: 67.7


Training DQN:  33%|███▎      | 165/500 [06:55<42:18,  7.58s/ep]

Episode: 164 | Steps: 51[4468] | Epsilon: 0.117 | Time: 6.81s | Reward: 51.0 | MovingAvg: 65.1


Training DQN:  33%|███▎      | 166/500 [07:10<53:12,  9.56s/ep]

Episode: 165 | Steps: 115[4583] | Epsilon: 0.106 | Time: 14.18s | Reward: 115.0 | MovingAvg: 65.7


Training DQN:  33%|███▎      | 167/500 [07:16<47:34,  8.57s/ep]

Episode: 166 | Steps: 49[4632] | Epsilon: 0.083 | Time: 6.27s | Reward: 49.0 | MovingAvg: 69.2


Training DQN:  34%|███▎      | 168/500 [07:27<52:16,  9.45s/ep]

Episode: 167 | Steps: 92[4724] | Epsilon: 0.074 | Time: 11.49s | Reward: 92.0 | MovingAvg: 69.3


Training DQN:  34%|███▍      | 169/500 [07:40<57:34, 10.44s/ep]

Episode: 168 | Steps: 102[4826] | Epsilon: 0.055 | Time: 12.74s | Reward: 102.0 | MovingAvg: 70.4


Training DQN:  34%|███▍      | 170/500 [07:55<1:04:31, 11.73s/ep]

Episode: 169 | Steps: 111[4937] | Epsilon: 0.035 | Time: 14.76s | Reward: 111.0 | MovingAvg: 71.6


Training DQN:  34%|███▍      | 171/500 [08:25<1:34:35, 17.25s/ep]

Episode: 170 | Steps: 217[5154] | Epsilon: 0.013 | Time: 30.13s | Reward: 217.0 | MovingAvg: 74.5


Training DQN:  34%|███▍      | 172/500 [08:34<1:20:04, 14.65s/ep]

Episode: 171 | Steps: 67[5221] | Epsilon: 0.010 | Time: 8.58s | Reward: 67.0 | MovingAvg: 81.2


Training DQN:  35%|███▍      | 173/500 [08:48<1:18:54, 14.48s/ep]

Episode: 172 | Steps: 96[5317] | Epsilon: 0.010 | Time: 14.08s | Reward: 96.0 | MovingAvg: 82.0


Training DQN:  35%|███▍      | 174/500 [08:59<1:12:50, 13.41s/ep]

Episode: 173 | Steps: 85[5402] | Epsilon: 0.010 | Time: 10.90s | Reward: 85.0 | MovingAvg: 83.8


Training DQN:  35%|███▌      | 175/500 [09:09<1:08:24, 12.63s/ep]

Episode: 174 | Steps: 71[5473] | Epsilon: 0.010 | Time: 10.81s | Reward: 71.0 | MovingAvg: 86.2


Training DQN:  35%|███▌      | 176/500 [09:19<1:02:37, 11.60s/ep]

Episode: 175 | Steps: 68[5541] | Epsilon: 0.010 | Time: 9.19s | Reward: 68.0 | MovingAvg: 85.2


Training DQN:  35%|███▌      | 177/500 [09:30<1:02:46, 11.66s/ep]

Episode: 176 | Steps: 94[5635] | Epsilon: 0.010 | Time: 11.81s | Reward: 94.0 | MovingAvg: 81.1


Training DQN:  36%|███▌      | 178/500 [09:41<1:00:53, 11.35s/ep]

Episode: 177 | Steps: 70[5705] | Epsilon: 0.010 | Time: 10.61s | Reward: 70.0 | MovingAvg: 82.7


Training DQN:  36%|███▌      | 179/500 [10:01<1:14:30, 13.93s/ep]

Episode: 178 | Steps: 141[5846] | Epsilon: 0.010 | Time: 19.94s | Reward: 141.0 | MovingAvg: 84.1


Training DQN:  36%|███▌      | 180/500 [10:14<1:12:37, 13.62s/ep]

Episode: 179 | Steps: 110[5956] | Epsilon: 0.010 | Time: 12.90s | Reward: 110.0 | MovingAvg: 86.5


Training DQN:  36%|███▌      | 181/500 [10:28<1:13:28, 13.82s/ep]

Episode: 180 | Steps: 103[6059] | Epsilon: 0.010 | Time: 14.29s | Reward: 103.0 | MovingAvg: 89.3


Training DQN:  36%|███▋      | 182/500 [10:40<1:10:03, 13.22s/ep]

Episode: 181 | Steps: 74[6133] | Epsilon: 0.010 | Time: 11.82s | Reward: 74.0 | MovingAvg: 91.9


Training DQN:  37%|███▋      | 183/500 [10:56<1:14:44, 14.15s/ep]

Episode: 182 | Steps: 112[6245] | Epsilon: 0.010 | Time: 16.30s | Reward: 112.0 | MovingAvg: 92.3


Training DQN:  37%|███▋      | 184/500 [11:05<1:05:15, 12.39s/ep]

Episode: 183 | Steps: 53[6298] | Epsilon: 0.010 | Time: 8.29s | Reward: 53.0 | MovingAvg: 93.2


Training DQN:  37%|███▋      | 185/500 [11:15<1:02:02, 11.82s/ep]

Episode: 184 | Steps: 66[6364] | Epsilon: 0.010 | Time: 10.48s | Reward: 66.0 | MovingAvg: 94.0


Training DQN:  37%|███▋      | 186/500 [11:34<1:12:36, 13.88s/ep]

Episode: 185 | Steps: 131[6495] | Epsilon: 0.010 | Time: 18.67s | Reward: 131.0 | MovingAvg: 94.8


Training DQN:  37%|███▋      | 187/500 [11:51<1:18:20, 15.02s/ep]

Episode: 186 | Steps: 124[6619] | Epsilon: 0.010 | Time: 17.68s | Reward: 124.0 | MovingAvg: 95.6


Training DQN:  38%|███▊      | 188/500 [12:12<1:26:56, 16.72s/ep]

Episode: 187 | Steps: 141[6760] | Epsilon: 0.010 | Time: 20.69s | Reward: 141.0 | MovingAvg: 99.3


Training DQN:  38%|███▊      | 188/500 [12:20<20:29,  3.94s/ep]  

Episode: 188 | Steps: 63[6823] | Epsilon: 0.010 | Time: 8.21s | Reward: 63.0 | MovingAvg: 101.8
Train time: 12.3m [1.5s]





In [9]:
trainer.play_with_pygame(episodes=5, fps=30)

Episode 1/5 - Reward: 52.0
Episode 2/5 - Reward: 116.0
Episode 3/5 - Reward: 61.0
Episode 4/5 - Reward: 62.0
Episode 5/5 - Reward: 60.0
