In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')
os.chdir('/content/drive/My Drive/ece176_final_project-main')

!pip3 install -r requirements.txt

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Set memory allocation configuration
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [None]:
import torch
import numpy as np
import gymnasium as gym
import ale_py
import matplotlib.pyplot as plt
import os

# Import your modules
from utils.preprocessor import DQNPreprocessor
from models.base import DQN
from models.dqn2015 import DQN2
from utils.replayBuffer import SequentialGPUReplayBuffer
from agents.base import DQNAgent
from utils.visualizer import DQNVisualizer

In [None]:
def train_breakout_dqn(num_frames=10000000,
                    memory_size=500000,          # Smaller replay buffer
                    batch_size=32,
                    gamma=0.99,
                    eps_start=1.0,
                    eps_end=0.1,
                    eps_decay=250000,             # Very fast epsilon decay
                    target_update=5000,          # Update target network frequently
                    learning_rate=0.0025,       # Slightly higher learning rate
                    update_freq=4,
                    replay_start_size=25000,     # Start training after just 1000 frames
                    no_op_max=5,
                    eval_interval=250000,
                    save_interval=500000):
    """
    Short test training of a DQN agent on Breakout.
    """
    # 1. Environment setup
    env_name = "CarnivalDeterministic-v4"
    env = gym.make(env_name)

    # Set device
    device = None
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    print(f"Using device: {device}")

    # 2. Create the agent with minimized hyperparameters
    agent = DQNAgent(
        env=env,
        replayBufferClass=SequentialGPUReplayBuffer,
        frameShape=(4, 84, 84),
        QNetwork=DQN2,
        PreprocessorClass=DQNPreprocessor,
        device=device,
        memory_size=memory_size,          # Smaller replay buffer
        batch_size=batch_size,
        gamma=gamma,
        eps_start=eps_start,
        eps_end=eps_end,
        eps_decay=eps_decay,             # Very fast epsilon decay
        target_update=target_update,          # Update target network frequently
        learning_rate=learning_rate,       # Slightly higher learning rate
        update_freq=update_freq,
        replay_start_size=replay_start_size,     # Start training after just 1000 frames
        no_op_max=no_op_max,                # Fewer no-ops at start of episode
        eval_interval=eval_interval,
        save_interval=save_interval
    )

    # 3. Train for a small number of frames
    print(f"Starting quick test training for {num_frames} frames...")

    episode_rewards, eval_rewards = agent.train(num_frames=num_frames)
    print("Training completed!")

    # 4. Plot training episode rewards
    os.makedirs("train_runs", exist_ok=True)
    plt.figure(figsize=(12,6))
    plt.plot(episode_rewards, label="Episode Reward")
    plt.title("Training Rewards over Episodes")
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.legend()
    plt.grid(True)
    plt.savefig("train_runs/breakout_dqn_test_training_rewards.png")
    plt.close()

    # 5. Quick evaluation - just 3 episodes
    eval_score = agent.evaluate(num_episodes=3)
    print(f"Evaluation over 3 episodes: {eval_score:.2f} average reward")

    # 6. Clean up
    env.close()

Test training with minimial parameters

In [None]:
train_breakout_dqn(num_frames=2000,
        memory_size=10000,          # Smaller replay buffer
        batch_size=32,
        gamma=0.99,
        eps_start=1.0,
        eps_end=0.1,
        eps_decay=2000,             # Very fast epsilon decay
        target_update=500,          # Update target network frequently
        learning_rate=0.0025,       # Slightly higher learning rate
        update_freq=4,
        replay_start_size=1000,     # Start training after just 1000 frames
        no_op_max=5)

Using device: cuda
Allocating sequential replay buffer with capacity 10000 on cuda...
Allocating sequential states tensor...
Allocating actions tensor...
Allocating rewards tensor...
Allocating dones tensor...
Sequential buffer allocation complete!
Estimated GPU memory usage: 0.26 GiB
Starting quick test training for 2000 frames...

Filling replay memory with 1000 frames of random experience...


Filling Replay Memory: 100%|██████████| 1000/1000 [00:01<00:00, 875.32it/s]


Replay memory filled!


Training:  25%|██▌       | 507/2000 [00:01<00:03, 377.55it/s]

Episode 1 completed | Reward: 660.00 | Avg Reward: 660.00 | Frames: 433 | Epsilon: 0.7835


Training:  43%|████▎     | 869/2000 [00:02<00:02, 377.09it/s]

Episode 2 completed | Reward: 480.00 | Avg Reward: 570.00 | Frames: 824 | Epsilon: 0.5880


Training:  66%|██████▋   | 1325/2000 [00:03<00:01, 359.34it/s]

Episode 3 completed | Reward: 480.00 | Avg Reward: 540.00 | Frames: 1269 | Epsilon: 0.3655


Training:  86%|████████▋ | 1729/2000 [00:05<00:00, 341.09it/s]

Episode 4 completed | Reward: 380.00 | Avg Reward: 500.00 | Frames: 1683 | Epsilon: 0.1585


Training: 100%|██████████| 2000/2000 [00:05<00:00, 338.54it/s]


Episode 5 completed | Reward: 340.00 | Avg Reward: 468.00 | Frames: 2000 | Epsilon: 0.1000

Training completed!
Model saved to weights/CarnivalDeterministic-v4_dqn_final.pth
Training completed!
Evaluation over 3 episodes: 253.33 average reward


Full training for 10 million frames

In [None]:
train_breakout_dqn(num_frames=10000000,
                    memory_size=1000000,
                    batch_size=32,
                    gamma=0.99,
                    eps_start=1.0,
                    eps_end=0.1,
                    eps_decay=1000000,
                    target_update=5000,
                    learning_rate=0.00025,
                    update_freq=4,
                    replay_start_size=50000,
                    no_op_max=30,
                    eval_interval=250000,
                    save_interval=500000)

Using device: cuda
Allocating sequential replay buffer with capacity 1000000 on cuda...
Allocating sequential states tensor...
Allocating actions tensor...
Allocating rewards tensor...
Allocating dones tensor...
Sequential buffer allocation complete!
Estimated GPU memory usage: 26.29 GiB
Starting quick test training for 10000000 frames...

Filling replay memory with 50000 frames of random experience...


Filling Replay Memory: 100%|██████████| 50000/50000 [00:52<00:00, 961.07it/s]


Replay memory filled!


Training:   0%|          | 481/10000000 [00:01<8:31:34, 325.77it/s]

Episode 1 completed | Reward: 1000.00 | Avg Reward: 1000.00 | Frames: 427 | Epsilon: 0.9996


Training:   0%|          | 868/10000000 [00:02<8:40:53, 319.93it/s]

Episode 2 completed | Reward: 680.00 | Avg Reward: 840.00 | Frames: 837 | Epsilon: 0.9992


Training:   0%|          | 1281/10000000 [00:03<8:52:40, 312.85it/s]

Episode 3 completed | Reward: 500.00 | Avg Reward: 726.67 | Frames: 1228 | Epsilon: 0.9988


Training:   0%|          | 1693/10000000 [00:05<8:59:09, 309.07it/s]

Episode 4 completed | Reward: 440.00 | Avg Reward: 655.00 | Frames: 1644 | Epsilon: 0.9984


Training:   0%|          | 2145/10000000 [00:06<8:49:59, 314.40it/s]

Episode 5 completed | Reward: 1000.00 | Avg Reward: 724.00 | Frames: 2088 | Epsilon: 0.9979


Training:   0%|          | 2527/10000000 [00:07<8:50:45, 313.94it/s]

Episode 6 completed | Reward: 1020.00 | Avg Reward: 773.33 | Frames: 2485 | Epsilon: 0.9975


Training:   0%|          | 2941/10000000 [00:08<8:43:51, 318.06it/s]

Episode 7 completed | Reward: 800.00 | Avg Reward: 777.14 | Frames: 2901 | Epsilon: 0.9971


Training:   0%|          | 3320/10000000 [00:10<8:33:55, 324.19it/s]

Episode 8 completed | Reward: 420.00 | Avg Reward: 732.50 | Frames: 3267 | Epsilon: 0.9967


Training:   0%|          | 3701/10000000 [00:11<9:01:15, 307.81it/s]

Episode 9 completed | Reward: 800.00 | Avg Reward: 740.00 | Frames: 3670 | Epsilon: 0.9963


Training:   0%|          | 4140/10000000 [00:12<8:54:05, 311.93it/s]

Episode 10 completed | Reward: 540.00 | Avg Reward: 720.00 | Frames: 4107 | Epsilon: 0.9959


Training:   0%|          | 4613/10000000 [00:14<9:02:56, 306.82it/s]

Episode 11 completed | Reward: 480.00 | Avg Reward: 698.18 | Frames: 4576 | Epsilon: 0.9954


Training:   0%|          | 5054/10000000 [00:15<9:05:08, 305.58it/s]

Episode 12 completed | Reward: 1080.00 | Avg Reward: 730.00 | Frames: 5016 | Epsilon: 0.9950


Training:   0%|          | 5457/10000000 [00:16<9:08:03, 303.94it/s]

Episode 13 completed | Reward: 460.00 | Avg Reward: 709.23 | Frames: 5414 | Epsilon: 0.9946


Training:   0%|          | 5855/10000000 [00:18<8:56:39, 310.39it/s]

Episode 14 completed | Reward: 840.00 | Avg Reward: 718.57 | Frames: 5799 | Epsilon: 0.9942


Training:   0%|          | 6256/10000000 [00:19<8:51:10, 313.57it/s]

Episode 15 completed | Reward: 480.00 | Avg Reward: 702.67 | Frames: 6205 | Epsilon: 0.9938


Training:   0%|          | 6692/10000000 [00:20<8:46:46, 316.18it/s]

Episode 16 completed | Reward: 720.00 | Avg Reward: 703.75 | Frames: 6639 | Epsilon: 0.9934


Training:   0%|          | 7028/10000000 [00:21<8:58:08, 309.49it/s]

Episode 17 completed | Reward: 920.00 | Avg Reward: 716.47 | Frames: 6993 | Epsilon: 0.9930


Training:   0%|          | 7428/10000000 [00:22<8:36:01, 322.75it/s]

Episode 18 completed | Reward: 540.00 | Avg Reward: 706.67 | Frames: 7388 | Epsilon: 0.9926


Training:   0%|          | 7803/10000000 [00:24<9:03:27, 306.44it/s]

Episode 19 completed | Reward: 500.00 | Avg Reward: 695.79 | Frames: 7777 | Epsilon: 0.9922


Training:   0%|          | 8238/10000000 [00:25<9:03:27, 306.43it/s]

Episode 20 completed | Reward: 2220.00 | Avg Reward: 772.00 | Frames: 8189 | Epsilon: 0.9918


Training:   0%|          | 8637/10000000 [00:26<8:59:10, 308.84it/s]

Episode 21 completed | Reward: 1120.00 | Avg Reward: 788.57 | Frames: 8582 | Epsilon: 0.9914


Training:   0%|          | 9037/10000000 [00:27<9:04:34, 305.78it/s]

Episode 22 completed | Reward: 820.00 | Avg Reward: 790.00 | Frames: 8984 | Epsilon: 0.9910


Training:   0%|          | 9357/10000000 [00:29<9:11:15, 302.05it/s]

Episode 23 completed | Reward: 680.00 | Avg Reward: 785.22 | Frames: 9300 | Epsilon: 0.9907


Training:   0%|          | 9653/10000000 [00:29<9:09:04, 303.25it/s]

Episode 24 completed | Reward: 740.00 | Avg Reward: 783.33 | Frames: 9612 | Epsilon: 0.9904


Training:   0%|          | 9950/10000000 [00:30<9:01:24, 307.53it/s]

Episode 25 completed | Reward: 820.00 | Avg Reward: 784.80 | Frames: 9921 | Epsilon: 0.9901


Training:   0%|          | 10349/10000000 [00:32<12:21:45, 224.46it/s]

Episode 26 completed | Reward: 500.00 | Avg Reward: 773.85 | Frames: 10333 | Epsilon: 0.9897

Memory usage: 1.09 GB


Training:   0%|          | 10841/10000000 [00:33<9:21:03, 296.74it/s]

Episode 27 completed | Reward: 1480.00 | Avg Reward: 800.00 | Frames: 10795 | Epsilon: 0.9892


Training:   0%|          | 11296/10000000 [00:35<9:02:13, 307.03it/s]

Episode 28 completed | Reward: 980.00 | Avg Reward: 806.43 | Frames: 11265 | Epsilon: 0.9887


Training:   0%|          | 11650/10000000 [00:36<9:01:47, 307.26it/s]

Episode 29 completed | Reward: 340.00 | Avg Reward: 790.34 | Frames: 11615 | Epsilon: 0.9884


Training:   0%|          | 12039/10000000 [00:37<8:59:42, 308.44it/s]

Episode 30 completed | Reward: 1000.00 | Avg Reward: 797.33 | Frames: 12006 | Epsilon: 0.9880


Training:   0%|          | 12422/10000000 [00:38<9:02:55, 306.60it/s]

Episode 31 completed | Reward: 360.00 | Avg Reward: 783.23 | Frames: 12382 | Epsilon: 0.9876


Training:   0%|          | 12834/10000000 [00:40<9:34:16, 289.85it/s]

Episode 32 completed | Reward: 620.00 | Avg Reward: 778.12 | Frames: 12785 | Epsilon: 0.9872


Training:   0%|          | 13214/10000000 [00:41<9:32:11, 290.89it/s]

Episode 33 completed | Reward: 460.00 | Avg Reward: 768.48 | Frames: 13171 | Epsilon: 0.9868


Training:   0%|          | 13597/10000000 [00:42<9:26:31, 293.79it/s]

Episode 34 completed | Reward: 640.00 | Avg Reward: 764.71 | Frames: 13563 | Epsilon: 0.9864


Training:   0%|          | 14032/10000000 [00:44<9:46:47, 283.63it/s]

Episode 35 completed | Reward: 800.00 | Avg Reward: 765.71 | Frames: 13983 | Epsilon: 0.9860


Training:   0%|          | 14509/10000000 [00:45<9:11:03, 302.01it/s]

Episode 36 completed | Reward: 1520.00 | Avg Reward: 786.67 | Frames: 14461 | Epsilon: 0.9855


Training:   0%|          | 14915/10000000 [00:47<9:37:30, 288.17it/s]

Episode 37 completed | Reward: 1420.00 | Avg Reward: 803.78 | Frames: 14873 | Epsilon: 0.9851


Training:   0%|          | 15261/10000000 [00:48<9:44:04, 284.92it/s]

Episode 38 completed | Reward: 520.00 | Avg Reward: 796.32 | Frames: 15230 | Epsilon: 0.9848


Training:   0%|          | 15674/10000000 [00:49<9:20:27, 296.91it/s]

Episode 39 completed | Reward: 680.00 | Avg Reward: 793.33 | Frames: 15620 | Epsilon: 0.9844


Training:   0%|          | 16089/10000000 [00:51<9:12:42, 301.06it/s]

Episode 40 completed | Reward: 620.00 | Avg Reward: 789.00 | Frames: 16032 | Epsilon: 0.9840


Training:   0%|          | 16497/10000000 [00:52<9:31:08, 291.33it/s]

Episode 41 completed | Reward: 540.00 | Avg Reward: 782.93 | Frames: 16459 | Epsilon: 0.9835


Training:   0%|          | 16906/10000000 [00:53<9:27:39, 293.11it/s]

Episode 42 completed | Reward: 680.00 | Avg Reward: 780.48 | Frames: 16866 | Epsilon: 0.9831


Training:   0%|          | 17318/10000000 [00:55<9:24:24, 294.78it/s]

Episode 43 completed | Reward: 640.00 | Avg Reward: 777.21 | Frames: 17287 | Epsilon: 0.9827


Training:   0%|          | 17701/10000000 [00:56<9:42:33, 285.59it/s]

Episode 44 completed | Reward: 740.00 | Avg Reward: 776.36 | Frames: 17672 | Epsilon: 0.9823


Training:   0%|          | 18083/10000000 [00:57<9:09:05, 302.99it/s]

Episode 45 completed | Reward: 620.00 | Avg Reward: 772.89 | Frames: 18049 | Epsilon: 0.9820


Training:   0%|          | 18907/10000000 [01:00<9:14:17, 300.11it/s]

Episode 46 completed | Reward: 820.00 | Avg Reward: 773.91 | Frames: 18868 | Epsilon: 0.9811


Training:   0%|          | 19316/10000000 [01:01<9:39:59, 286.81it/s]

Episode 47 completed | Reward: 700.00 | Avg Reward: 772.34 | Frames: 19285 | Epsilon: 0.9807


Training:   0%|          | 19728/10000000 [01:03<9:25:10, 294.31it/s]

Episode 48 completed | Reward: 980.00 | Avg Reward: 776.67 | Frames: 19701 | Epsilon: 0.9803


Training:   0%|          | 20101/10000000 [01:04<9:55:08, 279.48it/s]

Episode 49 completed | Reward: 820.00 | Avg Reward: 777.55 | Frames: 20050 | Epsilon: 0.9799


Training:   0%|          | 20474/10000000 [01:05<9:43:40, 284.97it/s]

Episode 50 completed | Reward: 1440.00 | Avg Reward: 790.80 | Frames: 20474 | Epsilon: 0.9795

Memory usage: 1.09 GB


Training:   0%|          | 20938/10000000 [01:07<9:25:14, 294.24it/s]

Episode 51 completed | Reward: 340.00 | Avg Reward: 781.96 | Frames: 20900 | Epsilon: 0.9791


Training:   0%|          | 21352/10000000 [01:08<9:47:15, 283.20it/s]

Episode 52 completed | Reward: 860.00 | Avg Reward: 783.46 | Frames: 21321 | Epsilon: 0.9787


Training:   0%|          | 21756/10000000 [01:10<9:36:02, 288.70it/s]

Episode 53 completed | Reward: 480.00 | Avg Reward: 777.74 | Frames: 21734 | Epsilon: 0.9783


Training:   0%|          | 22261/10000000 [01:11<9:49:16, 282.20it/s]

Episode 54 completed | Reward: 1060.00 | Avg Reward: 782.96 | Frames: 22223 | Epsilon: 0.9778


Training:   0%|          | 22676/10000000 [01:13<9:39:03, 287.17it/s]

Episode 55 completed | Reward: 500.00 | Avg Reward: 777.82 | Frames: 22644 | Epsilon: 0.9774


Training:   0%|          | 23279/10000000 [01:15<9:21:56, 295.90it/s]

Episode 56 completed | Reward: 900.00 | Avg Reward: 780.00 | Frames: 23229 | Epsilon: 0.9768


Training:   0%|          | 23681/10000000 [01:16<9:45:35, 283.94it/s]

Episode 57 completed | Reward: 400.00 | Avg Reward: 773.33 | Frames: 23646 | Epsilon: 0.9764


Training:   0%|          | 24075/10000000 [01:18<10:13:00, 271.23it/s]

Episode 58 completed | Reward: 740.00 | Avg Reward: 772.76 | Frames: 24049 | Epsilon: 0.9760


Training:   0%|          | 24481/10000000 [01:19<9:50:48, 281.41it/s]

Episode 59 completed | Reward: 540.00 | Avg Reward: 768.81 | Frames: 24451 | Epsilon: 0.9755


Training:   0%|          | 24916/10000000 [01:20<9:30:47, 291.26it/s]

Episode 60 completed | Reward: 580.00 | Avg Reward: 765.67 | Frames: 24884 | Epsilon: 0.9751


Training:   0%|          | 25385/10000000 [01:22<10:06:34, 274.07it/s]

Episode 61 completed | Reward: 1560.00 | Avg Reward: 778.69 | Frames: 25344 | Epsilon: 0.9747


Training:   0%|          | 25757/10000000 [01:23<9:41:40, 285.79it/s]

Episode 62 completed | Reward: 500.00 | Avg Reward: 774.19 | Frames: 25706 | Epsilon: 0.9743


Training:   0%|          | 26134/10000000 [01:25<9:33:33, 289.82it/s]

Episode 63 completed | Reward: 480.00 | Avg Reward: 769.52 | Frames: 26100 | Epsilon: 0.9739


Training:   0%|          | 26541/10000000 [01:26<9:44:51, 284.22it/s]

Episode 64 completed | Reward: 680.00 | Avg Reward: 768.12 | Frames: 26488 | Epsilon: 0.9735


Training:   0%|          | 26954/10000000 [01:27<9:47:14, 283.05it/s]

Episode 65 completed | Reward: 740.00 | Avg Reward: 767.69 | Frames: 26903 | Epsilon: 0.9731


Training:   0%|          | 27331/10000000 [01:29<10:01:46, 276.20it/s]

Episode 66 completed | Reward: 260.00 | Avg Reward: 760.00 | Frames: 27308 | Epsilon: 0.9727


Training:   0%|          | 27713/10000000 [01:30<10:13:56, 270.72it/s]

Episode 67 completed | Reward: 740.00 | Avg Reward: 759.70 | Frames: 27686 | Epsilon: 0.9723


Training:   0%|          | 28207/10000000 [01:32<10:13:42, 270.81it/s]

Episode 68 completed | Reward: 1280.00 | Avg Reward: 767.35 | Frames: 28177 | Epsilon: 0.9718


Training:   0%|          | 28633/10000000 [01:33<9:52:43, 280.38it/s]

Episode 69 completed | Reward: 680.00 | Avg Reward: 766.09 | Frames: 28580 | Epsilon: 0.9714


Training:   0%|          | 28969/10000000 [01:34<10:11:08, 271.92it/s]

Episode 70 completed | Reward: 540.00 | Avg Reward: 762.86 | Frames: 28932 | Epsilon: 0.9711


Training:   0%|          | 29401/10000000 [01:36<9:46:42, 283.24it/s]

Episode 71 completed | Reward: 900.00 | Avg Reward: 764.79 | Frames: 29361 | Epsilon: 0.9706


Training:   0%|          | 29803/10000000 [01:37<10:01:14, 276.38it/s]

Episode 72 completed | Reward: 740.00 | Avg Reward: 764.44 | Frames: 29775 | Epsilon: 0.9702


Training:   0%|          | 30164/10000000 [01:39<9:49:26, 281.90it/s]

Episode 73 completed | Reward: 560.00 | Avg Reward: 761.64 | Frames: 30134 | Epsilon: 0.9699


Training:   0%|          | 30459/10000000 [01:40<10:42:11, 258.73it/s]

Episode 74 completed | Reward: 1040.00 | Avg Reward: 765.41 | Frames: 30436 | Epsilon: 0.9696


Training:   0%|          | 30848/10000000 [01:41<9:34:03, 289.44it/s]

Episode 75 completed | Reward: 440.00 | Avg Reward: 761.07 | Frames: 30848 | Epsilon: 0.9692

Memory usage: 1.09 GB


Training:   0%|          | 31289/10000000 [01:43<10:11:59, 271.48it/s]

Episode 76 completed | Reward: 500.00 | Avg Reward: 757.63 | Frames: 31244 | Epsilon: 0.9688


Training:   0%|          | 31685/10000000 [01:44<10:05:45, 274.27it/s]

Episode 77 completed | Reward: 380.00 | Avg Reward: 752.73 | Frames: 31640 | Epsilon: 0.9684


Training:   0%|          | 32085/10000000 [01:45<10:11:23, 271.73it/s]

Episode 78 completed | Reward: 560.00 | Avg Reward: 750.26 | Frames: 32041 | Epsilon: 0.9680


Training:   0%|          | 32418/10000000 [01:47<10:29:07, 264.06it/s]

Episode 79 completed | Reward: 820.00 | Avg Reward: 751.14 | Frames: 32392 | Epsilon: 0.9676


Training:   0%|          | 32804/10000000 [01:48<10:06:10, 274.05it/s]

Episode 80 completed | Reward: 640.00 | Avg Reward: 749.75 | Frames: 32774 | Epsilon: 0.9672


Training:   0%|          | 33266/10000000 [01:50<10:30:05, 263.63it/s]

Episode 81 completed | Reward: 1100.00 | Avg Reward: 754.07 | Frames: 33238 | Epsilon: 0.9668


Training:   0%|          | 33676/10000000 [01:51<10:05:57, 274.12it/s]

Episode 82 completed | Reward: 380.00 | Avg Reward: 749.51 | Frames: 33648 | Epsilon: 0.9664


Training:   0%|          | 34160/10000000 [01:53<10:16:53, 269.25it/s]

Episode 83 completed | Reward: 1520.00 | Avg Reward: 758.80 | Frames: 34113 | Epsilon: 0.9659


Training:   0%|          | 34587/10000000 [01:55<10:13:25, 270.76it/s]

Episode 84 completed | Reward: 560.00 | Avg Reward: 756.43 | Frames: 34536 | Epsilon: 0.9655


Training:   0%|          | 34970/10000000 [01:56<10:32:35, 262.54it/s]

Episode 85 completed | Reward: 720.00 | Avg Reward: 756.00 | Frames: 34939 | Epsilon: 0.9651


Training:   0%|          | 35317/10000000 [01:57<10:45:48, 257.16it/s]

Episode 86 completed | Reward: 940.00 | Avg Reward: 758.14 | Frames: 35298 | Epsilon: 0.9647


Training:   0%|          | 35752/10000000 [01:59<10:07:07, 273.54it/s]

Episode 87 completed | Reward: 1020.00 | Avg Reward: 761.15 | Frames: 35710 | Epsilon: 0.9643


Training:   0%|          | 36095/10000000 [02:00<10:34:59, 261.53it/s]

Episode 88 completed | Reward: 460.00 | Avg Reward: 757.73 | Frames: 36045 | Epsilon: 0.9640


Training:   0%|          | 36471/10000000 [02:01<9:54:44, 279.21it/s] 

Episode 89 completed | Reward: 680.00 | Avg Reward: 756.85 | Frames: 36438 | Epsilon: 0.9636


Training:   0%|          | 36853/10000000 [02:03<10:15:19, 269.86it/s]

Episode 90 completed | Reward: 480.00 | Avg Reward: 753.78 | Frames: 36803 | Epsilon: 0.9632


Training:   0%|          | 37221/10000000 [02:04<10:35:09, 261.43it/s]

Episode 91 completed | Reward: 560.00 | Avg Reward: 751.65 | Frames: 37192 | Epsilon: 0.9628


Training:   0%|          | 37787/10000000 [02:06<10:13:21, 270.70it/s]

Episode 92 completed | Reward: 1220.00 | Avg Reward: 756.74 | Frames: 37739 | Epsilon: 0.9623


Training:   0%|          | 38161/10000000 [02:07<10:41:27, 258.83it/s]

Episode 93 completed | Reward: 1140.00 | Avg Reward: 760.86 | Frames: 38138 | Epsilon: 0.9619


Training:   0%|          | 38593/10000000 [02:09<10:33:48, 261.94it/s]

Episode 94 completed | Reward: 700.00 | Avg Reward: 760.21 | Frames: 38563 | Epsilon: 0.9614


Training:   0%|          | 38930/10000000 [02:10<10:19:30, 267.98it/s]

Episode 95 completed | Reward: 600.00 | Avg Reward: 758.53 | Frames: 38900 | Epsilon: 0.9611


Training:   0%|          | 39354/10000000 [02:12<10:35:28, 261.24it/s]

Episode 96 completed | Reward: 460.00 | Avg Reward: 755.42 | Frames: 39316 | Epsilon: 0.9607


Training:   0%|          | 39751/10000000 [02:13<10:31:49, 262.74it/s]

Episode 97 completed | Reward: 480.00 | Avg Reward: 752.58 | Frames: 39721 | Epsilon: 0.9603


Training:   0%|          | 40173/10000000 [02:15<10:37:14, 260.50it/s]

Episode 98 completed | Reward: 620.00 | Avg Reward: 751.22 | Frames: 40135 | Epsilon: 0.9599


Training:   0%|          | 40510/10000000 [02:16<10:57:06, 252.61it/s]

Episode 99 completed | Reward: 900.00 | Avg Reward: 752.73 | Frames: 40480 | Epsilon: 0.9595


Training:   0%|          | 40879/10000000 [02:18<10:15:20, 269.74it/s]

Episode 100 completed | Reward: 480.00 | Avg Reward: 750.00 | Frames: 40879 | Epsilon: 0.9591

Memory usage: 1.09 GB


Training:   0%|          | 41297/10000000 [02:19<10:39:32, 259.53it/s]

Episode 101 completed | Reward: 520.00 | Avg Reward: 745.20 | Frames: 41258 | Epsilon: 0.9587


Training:   0%|          | 41661/10000000 [02:21<10:29:57, 263.47it/s]

Episode 102 completed | Reward: 500.00 | Avg Reward: 743.40 | Frames: 41623 | Epsilon: 0.9584


Training:   0%|          | 42026/10000000 [02:22<10:38:10, 260.06it/s]

Episode 103 completed | Reward: 1840.00 | Avg Reward: 756.80 | Frames: 41977 | Epsilon: 0.9580


Training:   0%|          | 42387/10000000 [02:23<10:35:42, 261.06it/s]

Episode 104 completed | Reward: 820.00 | Avg Reward: 760.60 | Frames: 42355 | Epsilon: 0.9576


Training:   0%|          | 42805/10000000 [02:25<11:00:28, 251.26it/s]

Episode 105 completed | Reward: 580.00 | Avg Reward: 756.40 | Frames: 42761 | Epsilon: 0.9572


Training:   0%|          | 43197/10000000 [02:26<10:55:02, 253.34it/s]

Episode 106 completed | Reward: 660.00 | Avg Reward: 752.80 | Frames: 43170 | Epsilon: 0.9568


Training:   0%|          | 43615/10000000 [02:28<10:48:38, 255.82it/s]

Episode 107 completed | Reward: 2060.00 | Avg Reward: 765.40 | Frames: 43575 | Epsilon: 0.9564


Training:   0%|          | 44031/10000000 [02:29<10:31:19, 262.83it/s]

Episode 108 completed | Reward: 460.00 | Avg Reward: 765.80 | Frames: 43986 | Epsilon: 0.9560


Training:   0%|          | 44389/10000000 [02:31<10:55:17, 253.21it/s]

Episode 109 completed | Reward: 740.00 | Avg Reward: 765.20 | Frames: 44344 | Epsilon: 0.9557


Training:   0%|          | 44809/10000000 [02:32<10:29:21, 263.63it/s]

Episode 110 completed | Reward: 620.00 | Avg Reward: 766.00 | Frames: 44769 | Epsilon: 0.9552


Training:   0%|          | 45205/10000000 [02:34<10:47:45, 256.13it/s]

Episode 111 completed | Reward: 740.00 | Avg Reward: 768.60 | Frames: 45160 | Epsilon: 0.9548


Training:   0%|          | 45625/10000000 [02:35<10:44:33, 257.39it/s]

Episode 112 completed | Reward: 520.00 | Avg Reward: 763.00 | Frames: 45597 | Epsilon: 0.9544


Training:   0%|          | 46094/10000000 [02:37<10:58:54, 251.78it/s]

Episode 113 completed | Reward: 1000.00 | Avg Reward: 768.40 | Frames: 46048 | Epsilon: 0.9540


Training:   0%|          | 46595/10000000 [02:39<10:32:59, 262.07it/s]

Episode 114 completed | Reward: 1200.00 | Avg Reward: 772.00 | Frames: 46549 | Epsilon: 0.9535


Training:   0%|          | 46982/10000000 [02:41<11:01:02, 250.94it/s]

Episode 115 completed | Reward: 540.00 | Avg Reward: 772.60 | Frames: 46955 | Epsilon: 0.9530


Training:   0%|          | 47393/10000000 [02:42<10:59:28, 251.53it/s]

Episode 116 completed | Reward: 2740.00 | Avg Reward: 792.80 | Frames: 47356 | Epsilon: 0.9526


Training:   0%|          | 47813/10000000 [02:44<10:39:00, 259.58it/s]

Episode 117 completed | Reward: 1060.00 | Avg Reward: 794.20 | Frames: 47783 | Epsilon: 0.9522


Training:   0%|          | 48204/10000000 [02:45<11:08:12, 248.22it/s]

Episode 118 completed | Reward: 1100.00 | Avg Reward: 799.80 | Frames: 48183 | Epsilon: 0.9518


Training:   0%|          | 48646/10000000 [02:47<10:52:30, 254.18it/s]

Episode 119 completed | Reward: 600.00 | Avg Reward: 800.80 | Frames: 48603 | Epsilon: 0.9514


Training:   0%|          | 49035/10000000 [02:48<10:32:47, 262.09it/s]

Episode 120 completed | Reward: 380.00 | Avg Reward: 782.40 | Frames: 48993 | Epsilon: 0.9510


Training:   0%|          | 49450/10000000 [02:50<11:04:44, 249.48it/s]

Episode 121 completed | Reward: 440.00 | Avg Reward: 775.60 | Frames: 49404 | Epsilon: 0.9506


Training:   0%|          | 49867/10000000 [02:52<10:41:35, 258.48it/s]

Episode 122 completed | Reward: 820.00 | Avg Reward: 775.60 | Frames: 49827 | Epsilon: 0.9502


Training:   1%|          | 50275/10000000 [02:53<11:28:06, 240.99it/s]

Episode 123 completed | Reward: 440.00 | Avg Reward: 773.20 | Frames: 50254 | Epsilon: 0.9497


Training:   1%|          | 50712/10000000 [02:55<11:24:04, 242.40it/s]

Episode 124 completed | Reward: 720.00 | Avg Reward: 773.00 | Frames: 50676 | Epsilon: 0.9493


Training:   1%|          | 51051/10000000 [02:56<10:38:39, 259.63it/s]

Episode 125 completed | Reward: 660.00 | Avg Reward: 771.40 | Frames: 51051 | Epsilon: 0.9489

Memory usage: 1.10 GB


Training:   1%|          | 51430/10000000 [02:58<10:53:22, 253.78it/s]

Episode 126 completed | Reward: 640.00 | Avg Reward: 772.80 | Frames: 51391 | Epsilon: 0.9486


Training:   1%|          | 51874/10000000 [02:59<10:57:27, 252.19it/s]

Episode 127 completed | Reward: 640.00 | Avg Reward: 764.40 | Frames: 51844 | Epsilon: 0.9482


Training:   1%|          | 52233/10000000 [03:01<11:21:31, 243.27it/s]

Episode 128 completed | Reward: 560.00 | Avg Reward: 760.20 | Frames: 52199 | Epsilon: 0.9478


Training:   1%|          | 52761/10000000 [03:03<11:19:09, 244.11it/s]

Episode 129 completed | Reward: 820.00 | Avg Reward: 765.00 | Frames: 52734 | Epsilon: 0.9473


Training:   1%|          | 53197/10000000 [03:05<11:49:30, 233.66it/s]

Episode 130 completed | Reward: 560.00 | Avg Reward: 760.60 | Frames: 53162 | Epsilon: 0.9468


Training:   1%|          | 53521/10000000 [03:06<11:50:40, 233.26it/s]

Episode 131 completed | Reward: 700.00 | Avg Reward: 764.00 | Frames: 53494 | Epsilon: 0.9465


Training:   1%|          | 53961/10000000 [03:08<11:01:31, 250.59it/s]

Episode 132 completed | Reward: 1300.00 | Avg Reward: 770.80 | Frames: 53924 | Epsilon: 0.9461


Training:   1%|          | 54343/10000000 [03:09<11:19:54, 243.80it/s]

Episode 133 completed | Reward: 1140.00 | Avg Reward: 777.60 | Frames: 54322 | Epsilon: 0.9457


Training:   1%|          | 54618/10000000 [03:10<11:26:15, 241.53it/s]

Episode 134 completed | Reward: 720.00 | Avg Reward: 778.40 | Frames: 54592 | Epsilon: 0.9454


Training:   1%|          | 55049/10000000 [03:12<11:07:30, 248.31it/s]

Episode 135 completed | Reward: 1020.00 | Avg Reward: 780.60 | Frames: 55015 | Epsilon: 0.9450


Training:   1%|          | 55408/10000000 [03:13<10:48:51, 255.44it/s]

Episode 136 completed | Reward: 900.00 | Avg Reward: 774.40 | Frames: 55380 | Epsilon: 0.9446


Training:   1%|          | 55878/10000000 [03:15<10:57:06, 252.22it/s]

Episode 137 completed | Reward: 660.00 | Avg Reward: 766.80 | Frames: 55844 | Epsilon: 0.9442


Training:   1%|          | 56745/10000000 [03:19<11:18:03, 244.40it/s]

Episode 138 completed | Reward: 900.00 | Avg Reward: 770.60 | Frames: 56714 | Epsilon: 0.9433


Training:   1%|          | 57149/10000000 [03:20<11:27:53, 240.90it/s]

Episode 139 completed | Reward: 680.00 | Avg Reward: 770.60 | Frames: 57123 | Epsilon: 0.9429


Training:   1%|          | 57665/10000000 [03:22<11:15:36, 245.27it/s]

Episode 140 completed | Reward: 340.00 | Avg Reward: 767.80 | Frames: 57632 | Epsilon: 0.9424


Training:   1%|          | 58125/10000000 [03:24<11:32:22, 239.32it/s]

Episode 141 completed | Reward: 460.00 | Avg Reward: 767.00 | Frames: 58086 | Epsilon: 0.9419


Training:   1%|          | 58481/10000000 [03:26<11:07:44, 248.14it/s]

Episode 142 completed | Reward: 1740.00 | Avg Reward: 777.60 | Frames: 58433 | Epsilon: 0.9416


Training:   1%|          | 58858/10000000 [03:27<11:33:00, 239.08it/s]

Episode 143 completed | Reward: 780.00 | Avg Reward: 779.00 | Frames: 58836 | Epsilon: 0.9412


Training:   1%|          | 59333/10000000 [03:29<11:46:13, 234.60it/s]

Episode 144 completed | Reward: 900.00 | Avg Reward: 780.60 | Frames: 59310 | Epsilon: 0.9407


Training:   1%|          | 59691/10000000 [03:30<11:26:38, 241.28it/s]

Episode 145 completed | Reward: 480.00 | Avg Reward: 779.20 | Frames: 59665 | Epsilon: 0.9403


Training:   1%|          | 60125/10000000 [03:32<11:11:00, 246.89it/s]

Episode 146 completed | Reward: 540.00 | Avg Reward: 776.40 | Frames: 60090 | Epsilon: 0.9399


Training:   1%|          | 60535/10000000 [03:34<11:20:04, 243.59it/s]

Episode 147 completed | Reward: 900.00 | Avg Reward: 778.40 | Frames: 60510 | Epsilon: 0.9395


Training:   1%|          | 60961/10000000 [03:35<11:24:26, 242.02it/s]

Episode 148 completed | Reward: 1540.00 | Avg Reward: 784.00 | Frames: 60926 | Epsilon: 0.9391


Training:   1%|          | 61335/10000000 [03:37<15:21:14, 179.81it/s]

Episode 149 completed | Reward: 980.00 | Avg Reward: 785.60 | Frames: 61330 | Epsilon: 0.9387

Memory usage: 1.10 GB


Training:   1%|          | 61757/10000000 [03:39<11:20:42, 243.33it/s]

Episode 150 completed | Reward: 680.00 | Avg Reward: 778.00 | Frames: 61714 | Epsilon: 0.9383


Training:   1%|          | 62073/10000000 [03:40<11:18:04, 244.27it/s]

Episode 151 completed | Reward: 1180.00 | Avg Reward: 786.40 | Frames: 62037 | Epsilon: 0.9380


Training:   1%|          | 62503/10000000 [03:42<11:53:30, 232.13it/s]

Episode 152 completed | Reward: 540.00 | Avg Reward: 783.20 | Frames: 62476 | Epsilon: 0.9375


Training:   1%|          | 62939/10000000 [03:44<11:16:06, 244.96it/s]

Episode 153 completed | Reward: 400.00 | Avg Reward: 782.40 | Frames: 62912 | Epsilon: 0.9371


Training:   1%|          | 63361/10000000 [03:45<11:30:42, 239.77it/s]

Episode 154 completed | Reward: 1680.00 | Avg Reward: 788.60 | Frames: 63325 | Epsilon: 0.9367


Training:   1%|          | 63775/10000000 [03:47<11:27:45, 240.79it/s]

Episode 155 completed | Reward: 360.00 | Avg Reward: 787.20 | Frames: 63742 | Epsilon: 0.9363


Training:   1%|          | 64085/10000000 [03:48<11:48:22, 233.77it/s]

Episode 156 completed | Reward: 180.00 | Avg Reward: 780.00 | Frames: 64044 | Epsilon: 0.9360


Training:   1%|          | 64472/10000000 [03:50<11:37:15, 237.49it/s]

Episode 157 completed | Reward: 520.00 | Avg Reward: 781.20 | Frames: 64434 | Epsilon: 0.9356


Training:   1%|          | 64884/10000000 [03:52<11:19:38, 243.64it/s]

Episode 158 completed | Reward: 480.00 | Avg Reward: 778.60 | Frames: 64854 | Epsilon: 0.9351


Training:   1%|          | 65217/10000000 [03:53<12:45:07, 216.41it/s]

Episode 159 completed | Reward: 660.00 | Avg Reward: 779.80 | Frames: 65197 | Epsilon: 0.9348


Training:   1%|          | 65639/10000000 [03:55<12:01:33, 229.46it/s]

Episode 160 completed | Reward: 940.00 | Avg Reward: 783.40 | Frames: 65606 | Epsilon: 0.9344


Training:   1%|          | 66050/10000000 [03:57<12:18:53, 224.07it/s]

Episode 161 completed | Reward: 680.00 | Avg Reward: 774.60 | Frames: 66025 | Epsilon: 0.9340


Training:   1%|          | 66351/10000000 [03:58<11:48:24, 233.71it/s]

Episode 162 completed | Reward: 380.00 | Avg Reward: 773.40 | Frames: 66326 | Epsilon: 0.9337


Training:   1%|          | 66708/10000000 [03:59<11:24:50, 241.74it/s]

Episode 163 completed | Reward: 580.00 | Avg Reward: 774.40 | Frames: 66679 | Epsilon: 0.9333


Training:   1%|          | 67191/10000000 [04:01<12:03:40, 228.76it/s]

Episode 164 completed | Reward: 900.00 | Avg Reward: 776.60 | Frames: 67159 | Epsilon: 0.9328


Training:   1%|          | 67493/10000000 [04:03<12:10:43, 226.55it/s]

Episode 165 completed | Reward: 420.00 | Avg Reward: 773.40 | Frames: 67455 | Epsilon: 0.9325


Training:   1%|          | 67817/10000000 [04:04<12:18:53, 224.03it/s]

Episode 166 completed | Reward: 980.00 | Avg Reward: 780.60 | Frames: 67798 | Epsilon: 0.9322


Training:   1%|          | 68185/10000000 [04:06<12:41:38, 217.33it/s]

Episode 167 completed | Reward: 620.00 | Avg Reward: 779.40 | Frames: 68159 | Epsilon: 0.9318


Training:   1%|          | 68426/10000000 [04:07<12:45:16, 216.30it/s]

Episode 168 completed | Reward: 580.00 | Avg Reward: 772.40 | Frames: 68407 | Epsilon: 0.9316


Training:   1%|          | 68850/10000000 [04:08<11:53:50, 231.87it/s]

Episode 169 completed | Reward: 540.00 | Avg Reward: 771.00 | Frames: 68819 | Epsilon: 0.9312


Training:   1%|          | 69271/10000000 [04:10<12:09:13, 226.97it/s]

Episode 170 completed | Reward: 1180.00 | Avg Reward: 777.40 | Frames: 69235 | Epsilon: 0.9308


Training:   1%|          | 69617/10000000 [04:12<12:41:59, 217.20it/s]

Episode 171 completed | Reward: 620.00 | Avg Reward: 774.60 | Frames: 69596 | Epsilon: 0.9304


Training:   1%|          | 69989/10000000 [04:13<12:28:40, 221.06it/s]

Episode 172 completed | Reward: 460.00 | Avg Reward: 771.80 | Frames: 69967 | Epsilon: 0.9300


Training:   1%|          | 70413/10000000 [04:15<12:16:35, 224.68it/s]

Episode 173 completed | Reward: 360.00 | Avg Reward: 769.80 | Frames: 70379 | Epsilon: 0.9296


Training:   1%|          | 70805/10000000 [04:17<12:17:29, 224.39it/s]

Episode 174 completed | Reward: 760.00 | Avg Reward: 767.00 | Frames: 70766 | Epsilon: 0.9292


Training:   1%|          | 71165/10000000 [04:18<13:11:09, 209.16it/s]

Episode 175 completed | Reward: 600.00 | Avg Reward: 768.60 | Frames: 71141 | Epsilon: 0.9289


Training:   1%|          | 71959/10000000 [04:22<15:42:53, 175.49it/s]

Episode 176 completed | Reward: 800.00 | Avg Reward: 771.60 | Frames: 71951 | Epsilon: 0.9280

Memory usage: 1.10 GB


Training:   1%|          | 72421/10000000 [04:24<12:31:41, 220.11it/s]

Episode 177 completed | Reward: 880.00 | Avg Reward: 776.60 | Frames: 72397 | Epsilon: 0.9276


Training:   1%|          | 72717/10000000 [04:25<12:28:17, 221.11it/s]

Episode 178 completed | Reward: 360.00 | Avg Reward: 774.60 | Frames: 72682 | Epsilon: 0.9273


Training:   1%|          | 73038/10000000 [04:26<11:56:07, 231.04it/s]

Episode 179 completed | Reward: 440.00 | Avg Reward: 770.80 | Frames: 73005 | Epsilon: 0.9270


Training:   1%|          | 73414/10000000 [04:28<12:15:23, 224.97it/s]

Episode 180 completed | Reward: 1120.00 | Avg Reward: 775.60 | Frames: 73390 | Epsilon: 0.9266


Training:   1%|          | 73799/10000000 [04:30<12:19:11, 223.81it/s]

Episode 181 completed | Reward: 540.00 | Avg Reward: 770.00 | Frames: 73774 | Epsilon: 0.9262


Training:   1%|          | 74230/10000000 [04:32<12:12:53, 225.72it/s]

Episode 182 completed | Reward: 1200.00 | Avg Reward: 778.20 | Frames: 74198 | Epsilon: 0.9258


Training:   1%|          | 74639/10000000 [04:33<13:02:39, 211.36it/s]

Episode 183 completed | Reward: 580.00 | Avg Reward: 768.80 | Frames: 74617 | Epsilon: 0.9254


Training:   1%|          | 75008/10000000 [04:35<11:35:52, 237.71it/s]

Episode 184 completed | Reward: 800.00 | Avg Reward: 771.20 | Frames: 74961 | Epsilon: 0.9250


Training:   1%|          | 75448/10000000 [04:37<11:48:48, 233.36it/s]

Episode 185 completed | Reward: 1020.00 | Avg Reward: 774.20 | Frames: 75426 | Epsilon: 0.9246


Training:   1%|          | 75864/10000000 [04:38<11:31:20, 239.25it/s]

Episode 186 completed | Reward: 500.00 | Avg Reward: 769.80 | Frames: 75826 | Epsilon: 0.9242


Training:   1%|          | 76278/10000000 [04:40<12:02:28, 228.93it/s]

Episode 187 completed | Reward: 620.00 | Avg Reward: 765.80 | Frames: 76239 | Epsilon: 0.9238


Training:   1%|          | 76653/10000000 [04:42<12:05:55, 227.83it/s]

Episode 188 completed | Reward: 400.00 | Avg Reward: 765.20 | Frames: 76624 | Epsilon: 0.9234


Training:   1%|          | 77177/10000000 [04:44<11:35:29, 237.79it/s]

Episode 189 completed | Reward: 920.00 | Avg Reward: 767.60 | Frames: 77149 | Epsilon: 0.9229


Training:   1%|          | 77643/10000000 [04:46<12:09:37, 226.66it/s]

Episode 190 completed | Reward: 860.00 | Avg Reward: 771.40 | Frames: 77614 | Epsilon: 0.9224


Training:   1%|          | 78042/10000000 [04:48<11:54:49, 231.34it/s]

Episode 191 completed | Reward: 1560.00 | Avg Reward: 781.40 | Frames: 78003 | Epsilon: 0.9220


Training:   1%|          | 78421/10000000 [04:49<11:55:53, 230.98it/s]

Episode 192 completed | Reward: 480.00 | Avg Reward: 774.00 | Frames: 78383 | Epsilon: 0.9216


Training:   1%|          | 78799/10000000 [04:51<11:55:57, 230.96it/s]

Episode 193 completed | Reward: 420.00 | Avg Reward: 766.80 | Frames: 78763 | Epsilon: 0.9212


Training:   1%|          | 79258/10000000 [04:53<11:54:12, 231.51it/s]

Episode 194 completed | Reward: 680.00 | Avg Reward: 766.60 | Frames: 79224 | Epsilon: 0.9208


Training:   1%|          | 79665/10000000 [04:54<12:43:38, 216.51it/s]

Episode 195 completed | Reward: 600.00 | Avg Reward: 766.60 | Frames: 79628 | Epsilon: 0.9204


Training:   1%|          | 80062/10000000 [04:56<11:57:35, 230.40it/s]

Episode 196 completed | Reward: 680.00 | Avg Reward: 768.80 | Frames: 80040 | Epsilon: 0.9200


Training:   1%|          | 80497/10000000 [04:58<11:52:47, 231.94it/s]

Episode 197 completed | Reward: 780.00 | Avg Reward: 771.80 | Frames: 80453 | Epsilon: 0.9195


Training:   1%|          | 80905/10000000 [05:00<11:46:59, 233.83it/s]

Episode 198 completed | Reward: 320.00 | Avg Reward: 768.80 | Frames: 80879 | Epsilon: 0.9191


Training:   1%|          | 81329/10000000 [05:01<11:59:52, 229.64it/s]

Episode 199 completed | Reward: 980.00 | Avg Reward: 769.60 | Frames: 81304 | Epsilon: 0.9187


Training:   1%|          | 81750/10000000 [05:03<12:39:43, 217.59it/s]

Episode 200 completed | Reward: 460.00 | Avg Reward: 769.40 | Frames: 81727 | Epsilon: 0.9183


Training:   1%|          | 82194/10000000 [05:05<12:09:29, 226.59it/s]

Episode 201 completed | Reward: 940.00 | Avg Reward: 773.60 | Frames: 82194 | Epsilon: 0.9178

Memory usage: 1.10 GB


Training:   1%|          | 82585/10000000 [05:07<13:27:22, 204.73it/s]

Episode 202 completed | Reward: 700.00 | Avg Reward: 775.60 | Frames: 82555 | Epsilon: 0.9174


Training:   1%|          | 82952/10000000 [05:09<11:50:52, 232.51it/s]

Episode 203 completed | Reward: 1060.00 | Avg Reward: 767.80 | Frames: 82912 | Epsilon: 0.9171


Training:   1%|          | 83373/10000000 [05:10<12:03:14, 228.52it/s]

Episode 204 completed | Reward: 480.00 | Avg Reward: 764.40 | Frames: 83336 | Epsilon: 0.9167


Training:   1%|          | 83725/10000000 [05:12<12:52:06, 214.05it/s]

Episode 205 completed | Reward: 860.00 | Avg Reward: 767.20 | Frames: 83709 | Epsilon: 0.9163


Training:   1%|          | 84161/10000000 [05:14<12:04:36, 228.07it/s]

Episode 206 completed | Reward: 280.00 | Avg Reward: 763.40 | Frames: 84116 | Epsilon: 0.9159


Training:   1%|          | 84549/10000000 [05:15<12:07:20, 227.21it/s]

Episode 207 completed | Reward: 480.00 | Avg Reward: 747.60 | Frames: 84509 | Epsilon: 0.9155


Training:   1%|          | 84958/10000000 [05:17<12:58:36, 212.24it/s]

Episode 208 completed | Reward: 340.00 | Avg Reward: 746.40 | Frames: 84932 | Epsilon: 0.9151


Training:   1%|          | 85365/10000000 [05:19<12:55:18, 213.13it/s]

Episode 209 completed | Reward: 480.00 | Avg Reward: 743.80 | Frames: 85337 | Epsilon: 0.9147


Training:   1%|          | 85785/10000000 [05:21<12:30:16, 220.23it/s]

Episode 210 completed | Reward: 940.00 | Avg Reward: 747.00 | Frames: 85748 | Epsilon: 0.9143


Training:   1%|          | 86245/10000000 [05:23<12:28:28, 220.76it/s]

Episode 211 completed | Reward: 940.00 | Avg Reward: 749.00 | Frames: 86210 | Epsilon: 0.9138


Training:   1%|          | 86605/10000000 [05:24<12:19:24, 223.45it/s]

Episode 212 completed | Reward: 540.00 | Avg Reward: 749.20 | Frames: 86580 | Epsilon: 0.9134


Training:   1%|          | 87017/10000000 [05:26<12:49:57, 214.58it/s]

Episode 213 completed | Reward: 400.00 | Avg Reward: 743.20 | Frames: 86998 | Epsilon: 0.9130


Training:   1%|          | 87481/10000000 [05:28<12:22:03, 222.64it/s]

Episode 214 completed | Reward: 1300.00 | Avg Reward: 744.20 | Frames: 87454 | Epsilon: 0.9125


Training:   1%|          | 87845/10000000 [05:30<12:23:13, 222.28it/s]

Episode 215 completed | Reward: 680.00 | Avg Reward: 745.60 | Frames: 87813 | Epsilon: 0.9122


Training:   1%|          | 88572/10000000 [05:33<12:09:54, 226.32it/s]

Episode 216 completed | Reward: 960.00 | Avg Reward: 727.80 | Frames: 88544 | Epsilon: 0.9115


Training:   1%|          | 88953/10000000 [05:34<12:14:08, 225.00it/s]

Episode 217 completed | Reward: 500.00 | Avg Reward: 722.20 | Frames: 88912 | Epsilon: 0.9111


Training:   1%|          | 89246/10000000 [05:36<12:08:50, 226.63it/s]

Episode 218 completed | Reward: 560.00 | Avg Reward: 716.80 | Frames: 89227 | Epsilon: 0.9108


Training:   1%|          | 89581/10000000 [05:37<12:22:27, 222.47it/s]

Episode 219 completed | Reward: 1000.00 | Avg Reward: 720.80 | Frames: 89546 | Epsilon: 0.9105


Training:   1%|          | 89921/10000000 [05:39<12:06:50, 227.24it/s]

Episode 220 completed | Reward: 860.00 | Avg Reward: 725.60 | Frames: 89898 | Epsilon: 0.9101


Training:   1%|          | 90375/10000000 [05:41<12:20:58, 222.90it/s]

Episode 221 completed | Reward: 900.00 | Avg Reward: 730.20 | Frames: 90346 | Epsilon: 0.9097


Training:   1%|          | 90802/10000000 [05:42<12:41:53, 216.77it/s]

Episode 222 completed | Reward: 620.00 | Avg Reward: 728.20 | Frames: 90761 | Epsilon: 0.9092


Training:   1%|          | 91273/10000000 [05:45<12:36:53, 218.19it/s]

Episode 223 completed | Reward: 1160.00 | Avg Reward: 735.40 | Frames: 91250 | Epsilon: 0.9087


Training:   1%|          | 91681/10000000 [05:46<12:55:17, 213.00it/s]

Episode 224 completed | Reward: 2600.00 | Avg Reward: 754.20 | Frames: 91649 | Epsilon: 0.9084


Training:   1%|          | 92089/10000000 [05:48<12:15:49, 224.41it/s]

Episode 225 completed | Reward: 480.00 | Avg Reward: 752.40 | Frames: 92060 | Epsilon: 0.9079


Training:   1%|          | 92474/10000000 [05:50<16:01:41, 171.70it/s]

Episode 226 completed | Reward: 400.00 | Avg Reward: 750.00 | Frames: 92464 | Epsilon: 0.9075

Memory usage: 1.10 GB


Training:   1%|          | 92857/10000000 [05:52<12:40:17, 217.18it/s]

Episode 227 completed | Reward: 960.00 | Avg Reward: 753.20 | Frames: 92828 | Epsilon: 0.9072


Training:   1%|          | 93311/10000000 [05:54<12:21:27, 222.68it/s]

Episode 228 completed | Reward: 1060.00 | Avg Reward: 758.20 | Frames: 93276 | Epsilon: 0.9067


Training:   1%|          | 93733/10000000 [05:56<12:55:25, 212.92it/s]

Episode 229 completed | Reward: 980.00 | Avg Reward: 759.80 | Frames: 93696 | Epsilon: 0.9063


Training:   1%|          | 94185/10000000 [05:58<12:27:21, 220.91it/s]

Episode 230 completed | Reward: 460.00 | Avg Reward: 758.80 | Frames: 94163 | Epsilon: 0.9058


Training:   1%|          | 95001/10000000 [06:01<12:21:08, 222.74it/s]

Episode 231 completed | Reward: 1960.00 | Avg Reward: 771.40 | Frames: 94964 | Epsilon: 0.9050


Training:   1%|          | 95407/10000000 [06:03<12:25:47, 221.35it/s]

Episode 232 completed | Reward: 1200.00 | Avg Reward: 770.40 | Frames: 95375 | Epsilon: 0.9046


Training:   1%|          | 95834/10000000 [06:05<13:01:03, 211.34it/s]

Episode 233 completed | Reward: 820.00 | Avg Reward: 767.20 | Frames: 95802 | Epsilon: 0.9042


Training:   1%|          | 96190/10000000 [06:06<12:43:11, 216.28it/s]

Episode 234 completed | Reward: 1140.00 | Avg Reward: 771.40 | Frames: 96149 | Epsilon: 0.9039


Training:   1%|          | 96616/10000000 [06:08<12:53:45, 213.32it/s]

Episode 235 completed | Reward: 320.00 | Avg Reward: 764.40 | Frames: 96595 | Epsilon: 0.9034


Training:   1%|          | 97062/10000000 [06:10<13:03:02, 210.78it/s]

Episode 236 completed | Reward: 600.00 | Avg Reward: 761.40 | Frames: 97025 | Epsilon: 0.9030


Training:   1%|          | 97798/10000000 [06:14<12:58:21, 212.03it/s]

Episode 237 completed | Reward: 740.00 | Avg Reward: 762.20 | Frames: 97770 | Epsilon: 0.9022


Training:   1%|          | 98202/10000000 [06:16<12:49:43, 214.40it/s]

Episode 238 completed | Reward: 1420.00 | Avg Reward: 767.40 | Frames: 98172 | Epsilon: 0.9018


Training:   1%|          | 98655/10000000 [06:18<13:17:30, 206.92it/s]

Episode 239 completed | Reward: 940.00 | Avg Reward: 770.00 | Frames: 98633 | Epsilon: 0.9014


Training:   1%|          | 99121/10000000 [06:20<13:05:21, 210.11it/s]

Episode 240 completed | Reward: 940.00 | Avg Reward: 776.00 | Frames: 99093 | Epsilon: 0.9009


Training:   1%|          | 99528/10000000 [06:22<13:00:40, 211.37it/s]

Episode 241 completed | Reward: 880.00 | Avg Reward: 780.20 | Frames: 99510 | Epsilon: 0.9005


Training:   1%|          | 99956/10000000 [06:24<12:58:24, 211.97it/s]

Episode 242 completed | Reward: 700.00 | Avg Reward: 769.80 | Frames: 99939 | Epsilon: 0.9001


Training:   1%|          | 100286/10000000 [06:25<12:43:38, 216.06it/s]

Episode 243 completed | Reward: 660.00 | Avg Reward: 768.60 | Frames: 100255 | Epsilon: 0.8997


Training:   1%|          | 100692/10000000 [06:27<12:50:43, 214.07it/s]

Episode 244 completed | Reward: 980.00 | Avg Reward: 769.40 | Frames: 100672 | Epsilon: 0.8993


Training:   1%|          | 101094/10000000 [06:29<13:05:01, 210.16it/s]

Episode 245 completed | Reward: 520.00 | Avg Reward: 769.80 | Frames: 101067 | Epsilon: 0.8989


Training:   1%|          | 101469/10000000 [06:30<13:13:42, 207.86it/s]

Episode 246 completed | Reward: 540.00 | Avg Reward: 769.80 | Frames: 101439 | Epsilon: 0.8986


Training:   1%|          | 101872/10000000 [06:32<12:44:18, 215.84it/s]

Episode 247 completed | Reward: 620.00 | Avg Reward: 767.00 | Frames: 101851 | Epsilon: 0.8981


Training:   1%|          | 102250/10000000 [06:34<13:02:20, 210.86it/s]

Episode 248 completed | Reward: 540.00 | Avg Reward: 757.00 | Frames: 102225 | Epsilon: 0.8978


Training:   1%|          | 102687/10000000 [06:36<12:12:39, 225.15it/s]

Episode 249 completed | Reward: 800.00 | Avg Reward: 755.20 | Frames: 102687 | Epsilon: 0.8973

Memory usage: 1.10 GB


Training:   1%|          | 103084/10000000 [06:38<13:00:12, 211.42it/s]

Episode 250 completed | Reward: 620.00 | Avg Reward: 754.60 | Frames: 103061 | Epsilon: 0.8969


Training:   1%|          | 103534/10000000 [06:40<13:09:02, 209.04it/s]

Episode 251 completed | Reward: 1020.00 | Avg Reward: 753.00 | Frames: 103506 | Epsilon: 0.8965


Training:   1%|          | 103868/10000000 [06:42<12:55:47, 212.61it/s]

Episode 252 completed | Reward: 700.00 | Avg Reward: 754.60 | Frames: 103851 | Epsilon: 0.8961


Training:   1%|          | 104193/10000000 [06:43<13:05:01, 210.09it/s]

Episode 253 completed | Reward: 560.00 | Avg Reward: 756.20 | Frames: 104169 | Epsilon: 0.8958


Training:   1%|          | 104492/10000000 [06:44<13:05:09, 210.05it/s]

Episode 254 completed | Reward: 1420.00 | Avg Reward: 753.60 | Frames: 104470 | Epsilon: 0.8955


Training:   1%|          | 104870/10000000 [06:46<12:50:24, 214.07it/s]

Episode 255 completed | Reward: 320.00 | Avg Reward: 753.20 | Frames: 104836 | Epsilon: 0.8952


Training:   1%|          | 105152/10000000 [06:47<13:17:00, 206.92it/s]

Episode 256 completed | Reward: 960.00 | Avg Reward: 761.00 | Frames: 105129 | Epsilon: 0.8949


Training:   1%|          | 105622/10000000 [06:50<12:41:02, 216.68it/s]

Episode 257 completed | Reward: 840.00 | Avg Reward: 764.20 | Frames: 105582 | Epsilon: 0.8944


Training:   1%|          | 106029/10000000 [06:51<13:06:25, 209.68it/s]

Episode 258 completed | Reward: 760.00 | Avg Reward: 767.00 | Frames: 106004 | Epsilon: 0.8940


Training:   1%|          | 106362/10000000 [06:53<12:47:23, 214.88it/s]

Episode 259 completed | Reward: 440.00 | Avg Reward: 764.80 | Frames: 106333 | Epsilon: 0.8937


Training:   1%|          | 106733/10000000 [06:55<13:02:10, 210.81it/s]

Episode 260 completed | Reward: 500.00 | Avg Reward: 760.40 | Frames: 106703 | Epsilon: 0.8933


Training:   1%|          | 107125/10000000 [06:57<13:05:38, 209.87it/s]

Episode 261 completed | Reward: 1140.00 | Avg Reward: 765.00 | Frames: 107095 | Epsilon: 0.8929


Training:   1%|          | 107434/10000000 [06:58<12:45:00, 215.52it/s]

Episode 262 completed | Reward: 1180.00 | Avg Reward: 773.00 | Frames: 107396 | Epsilon: 0.8926


Training:   1%|          | 107836/10000000 [07:00<13:06:10, 209.71it/s]

Episode 263 completed | Reward: 420.00 | Avg Reward: 771.40 | Frames: 107815 | Epsilon: 0.8922


Training:   1%|          | 108161/10000000 [07:01<12:59:33, 211.48it/s]

Episode 264 completed | Reward: 640.00 | Avg Reward: 768.80 | Frames: 108120 | Epsilon: 0.8919


Training:   1%|          | 108638/10000000 [07:03<12:42:38, 216.16it/s]

Episode 265 completed | Reward: 1040.00 | Avg Reward: 775.00 | Frames: 108610 | Epsilon: 0.8914


Training:   1%|          | 109069/10000000 [07:05<13:10:24, 208.56it/s]

Episode 266 completed | Reward: 940.00 | Avg Reward: 774.60 | Frames: 109044 | Epsilon: 0.8910


Training:   1%|          | 109457/10000000 [07:07<13:48:48, 198.89it/s]

Episode 267 completed | Reward: 480.00 | Avg Reward: 773.20 | Frames: 109424 | Epsilon: 0.8906


Training:   1%|          | 109849/10000000 [07:09<13:11:49, 208.17it/s]

Episode 268 completed | Reward: 840.00 | Avg Reward: 775.80 | Frames: 109817 | Epsilon: 0.8902


Training:   1%|          | 110365/10000000 [07:12<13:02:19, 210.69it/s]

Episode 269 completed | Reward: 1080.00 | Avg Reward: 781.20 | Frames: 110329 | Epsilon: 0.8897


Training:   1%|          | 110719/10000000 [07:13<13:07:43, 209.24it/s]

Episode 270 completed | Reward: 620.00 | Avg Reward: 775.60 | Frames: 110697 | Epsilon: 0.8893


Training:   1%|          | 111153/10000000 [07:15<13:05:46, 209.75it/s]

Episode 271 completed | Reward: 500.00 | Avg Reward: 774.40 | Frames: 111121 | Epsilon: 0.8889


Training:   1%|          | 111581/10000000 [07:17<13:10:14, 208.55it/s]

Episode 272 completed | Reward: 740.00 | Avg Reward: 777.20 | Frames: 111552 | Epsilon: 0.8884


Training:   1%|          | 112021/10000000 [07:19<13:32:45, 202.77it/s]

Episode 273 completed | Reward: 1440.00 | Avg Reward: 788.00 | Frames: 111998 | Epsilon: 0.8880


Training:   1%|          | 112409/10000000 [07:21<13:24:45, 204.77it/s]

Episode 274 completed | Reward: 700.00 | Avg Reward: 787.40 | Frames: 112379 | Epsilon: 0.8876


Training:   1%|          | 112739/10000000 [07:23<17:00:22, 161.50it/s]

Episode 275 completed | Reward: 880.00 | Avg Reward: 790.20 | Frames: 112730 | Epsilon: 0.8873

Memory usage: 1.10 GB


Training:   1%|          | 113153/10000000 [07:25<13:48:43, 198.84it/s]

Episode 276 completed | Reward: 440.00 | Avg Reward: 786.60 | Frames: 113122 | Epsilon: 0.8869


Training:   1%|          | 113509/10000000 [07:26<13:23:24, 205.09it/s]

Episode 277 completed | Reward: 640.00 | Avg Reward: 784.20 | Frames: 113484 | Epsilon: 0.8865


Training:   1%|          | 113907/10000000 [07:28<13:23:27, 205.07it/s]

Episode 278 completed | Reward: 560.00 | Avg Reward: 786.20 | Frames: 113890 | Epsilon: 0.8861


Training:   1%|          | 114208/10000000 [07:30<13:22:16, 205.37it/s]

Episode 279 completed | Reward: 560.00 | Avg Reward: 787.40 | Frames: 114185 | Epsilon: 0.8858


Training:   1%|          | 114591/10000000 [07:32<14:04:29, 195.10it/s]

Episode 280 completed | Reward: 720.00 | Avg Reward: 783.40 | Frames: 114562 | Epsilon: 0.8854


Training:   1%|          | 115069/10000000 [07:34<13:30:01, 203.39it/s]

Episode 281 completed | Reward: 680.00 | Avg Reward: 784.80 | Frames: 115039 | Epsilon: 0.8850


Training:   1%|          | 115443/10000000 [07:36<13:02:27, 210.54it/s]

Episode 282 completed | Reward: 700.00 | Avg Reward: 779.80 | Frames: 115421 | Epsilon: 0.8846


Training:   1%|          | 115885/10000000 [07:38<13:31:34, 202.98it/s]

Episode 283 completed | Reward: 620.00 | Avg Reward: 780.20 | Frames: 115852 | Epsilon: 0.8841


Training:   1%|          | 116287/10000000 [07:40<13:28:07, 203.84it/s]

Episode 284 completed | Reward: 1080.00 | Avg Reward: 783.00 | Frames: 116270 | Epsilon: 0.8837


Training:   1%|          | 116681/10000000 [07:42<13:30:54, 203.13it/s]

Episode 285 completed | Reward: 660.00 | Avg Reward: 779.40 | Frames: 116645 | Epsilon: 0.8834


Training:   1%|          | 117050/10000000 [07:43<14:07:13, 194.42it/s]

Episode 286 completed | Reward: 720.00 | Avg Reward: 781.60 | Frames: 117019 | Epsilon: 0.8830


Training:   1%|          | 117468/10000000 [07:45<13:17:20, 206.57it/s]

Episode 287 completed | Reward: 300.00 | Avg Reward: 778.40 | Frames: 117441 | Epsilon: 0.8826


Training:   1%|          | 117912/10000000 [07:48<14:13:43, 192.92it/s]

Episode 288 completed | Reward: 1220.00 | Avg Reward: 786.60 | Frames: 117892 | Epsilon: 0.8821


Training:   1%|          | 118297/10000000 [07:49<13:52:36, 197.81it/s]

Episode 289 completed | Reward: 480.00 | Avg Reward: 782.20 | Frames: 118275 | Epsilon: 0.8817


Training:   1%|          | 118653/10000000 [07:51<13:13:36, 207.52it/s]

Episode 290 completed | Reward: 500.00 | Avg Reward: 778.60 | Frames: 118620 | Epsilon: 0.8814


Training:   1%|          | 118949/10000000 [07:53<13:34:46, 202.12it/s]

Episode 291 completed | Reward: 540.00 | Avg Reward: 768.40 | Frames: 118916 | Epsilon: 0.8811


Training:   1%|          | 119243/10000000 [07:54<13:36:49, 201.61it/s]

Episode 292 completed | Reward: 460.00 | Avg Reward: 768.20 | Frames: 119226 | Epsilon: 0.8808


Training:   1%|          | 119558/10000000 [07:56<13:54:17, 197.38it/s]

Episode 293 completed | Reward: 780.00 | Avg Reward: 771.80 | Frames: 119538 | Epsilon: 0.8805


Training:   1%|          | 120006/10000000 [07:58<14:22:58, 190.81it/s]

Episode 294 completed | Reward: 560.00 | Avg Reward: 770.60 | Frames: 119993 | Epsilon: 0.8800


Training:   1%|          | 120459/10000000 [08:00<13:58:58, 196.26it/s]

Episode 295 completed | Reward: 560.00 | Avg Reward: 770.20 | Frames: 120430 | Epsilon: 0.8796


Training:   1%|          | 120902/10000000 [08:02<13:51:37, 197.99it/s]

Episode 296 completed | Reward: 820.00 | Avg Reward: 771.60 | Frames: 120885 | Epsilon: 0.8791


Training:   1%|          | 121279/10000000 [08:04<14:33:42, 188.45it/s]

Episode 297 completed | Reward: 1020.00 | Avg Reward: 774.00 | Frames: 121258 | Epsilon: 0.8787


Training:   1%|          | 121697/10000000 [08:06<13:56:19, 196.86it/s]

Episode 298 completed | Reward: 1000.00 | Avg Reward: 780.80 | Frames: 121671 | Epsilon: 0.8783


Training:   1%|          | 122082/10000000 [08:08<14:08:51, 193.94it/s]

Episode 299 completed | Reward: 180.00 | Avg Reward: 772.80 | Frames: 122063 | Epsilon: 0.8779


Training:   1%|          | 122491/10000000 [08:10<13:43:25, 199.93it/s]

Episode 300 completed | Reward: 480.00 | Avg Reward: 773.00 | Frames: 122470 | Epsilon: 0.8775


Training:   1%|          | 122882/10000000 [08:12<18:23:49, 149.13it/s]

Episode 301 completed | Reward: 900.00 | Avg Reward: 772.60 | Frames: 122878 | Epsilon: 0.8771

Memory usage: 1.10 GB


Training:   1%|          | 123545/10000000 [08:15<14:07:09, 194.30it/s]

Episode 302 completed | Reward: 560.00 | Avg Reward: 771.20 | Frames: 123519 | Epsilon: 0.8765


Training:   1%|          | 123964/10000000 [08:17<13:21:28, 205.37it/s]

Episode 303 completed | Reward: 500.00 | Avg Reward: 765.60 | Frames: 123941 | Epsilon: 0.8761


Training:   1%|          | 124329/10000000 [08:19<14:55:30, 183.80it/s]

Episode 304 completed | Reward: 940.00 | Avg Reward: 770.20 | Frames: 124307 | Epsilon: 0.8757


Training:   1%|          | 124751/10000000 [08:21<13:51:47, 197.87it/s]

Episode 305 completed | Reward: 420.00 | Avg Reward: 765.80 | Frames: 124714 | Epsilon: 0.8753


Training:   1%|▏         | 125157/10000000 [08:23<14:42:16, 186.54it/s]

Episode 306 completed | Reward: 1040.00 | Avg Reward: 773.40 | Frames: 125132 | Epsilon: 0.8749


Training:   1%|▏         | 125607/10000000 [08:25<14:08:07, 194.04it/s]

Episode 307 completed | Reward: 1240.00 | Avg Reward: 781.00 | Frames: 125590 | Epsilon: 0.8744


Training:   1%|▏         | 126040/10000000 [08:28<13:29:25, 203.31it/s]

Episode 308 completed | Reward: 560.00 | Avg Reward: 783.20 | Frames: 126016 | Epsilon: 0.8740


Training:   1%|▏         | 126400/10000000 [08:29<14:35:28, 187.97it/s]

Episode 309 completed | Reward: 940.00 | Avg Reward: 787.80 | Frames: 126386 | Epsilon: 0.8736


Training:   1%|▏         | 126797/10000000 [08:31<14:43:25, 186.27it/s]

Episode 310 completed | Reward: 1020.00 | Avg Reward: 788.60 | Frames: 126764 | Epsilon: 0.8732


Training:   1%|▏         | 127213/10000000 [08:34<14:01:15, 195.60it/s]

Episode 311 completed | Reward: 460.00 | Avg Reward: 783.80 | Frames: 127184 | Epsilon: 0.8728


Training:   1%|▏         | 127652/10000000 [08:36<13:36:51, 201.43it/s]

Episode 312 completed | Reward: 340.00 | Avg Reward: 781.80 | Frames: 127615 | Epsilon: 0.8724


Training:   1%|▏         | 128017/10000000 [08:38<14:18:04, 191.75it/s]

Episode 313 completed | Reward: 1120.00 | Avg Reward: 789.00 | Frames: 127984 | Epsilon: 0.8720


Training:   1%|▏         | 128421/10000000 [08:40<14:36:53, 187.62it/s]

Episode 314 completed | Reward: 720.00 | Avg Reward: 783.20 | Frames: 128403 | Epsilon: 0.8716


Training:   1%|▏         | 128802/10000000 [08:41<14:16:15, 192.14it/s]

Episode 315 completed | Reward: 560.00 | Avg Reward: 782.00 | Frames: 128774 | Epsilon: 0.8712


Training:   1%|▏         | 129185/10000000 [08:43<14:58:25, 183.11it/s]

Episode 316 completed | Reward: 360.00 | Avg Reward: 776.00 | Frames: 129155 | Epsilon: 0.8708


Training:   1%|▏         | 129666/10000000 [08:46<14:10:58, 193.31it/s]

Episode 317 completed | Reward: 260.00 | Avg Reward: 773.60 | Frames: 129646 | Epsilon: 0.8704


Training:   1%|▏         | 130041/10000000 [08:48<15:35:21, 175.87it/s]

Episode 318 completed | Reward: 1040.00 | Avg Reward: 778.40 | Frames: 130028 | Epsilon: 0.8700


Training:   1%|▏         | 130553/10000000 [08:50<14:52:21, 184.33it/s]

Episode 319 completed | Reward: 2120.00 | Avg Reward: 789.60 | Frames: 130538 | Epsilon: 0.8695


Training:   1%|▏         | 130922/10000000 [08:52<14:05:55, 194.44it/s]

Episode 320 completed | Reward: 520.00 | Avg Reward: 786.20 | Frames: 130898 | Epsilon: 0.8691


Training:   1%|▏         | 131400/10000000 [08:55<14:20:57, 191.04it/s]

Episode 321 completed | Reward: 720.00 | Avg Reward: 784.40 | Frames: 131371 | Epsilon: 0.8686


Training:   1%|▏         | 131776/10000000 [08:57<15:03:05, 182.12it/s]

Episode 322 completed | Reward: 680.00 | Avg Reward: 785.00 | Frames: 131757 | Epsilon: 0.8682


Training:   1%|▏         | 132179/10000000 [08:59<15:05:51, 181.56it/s]

Episode 323 completed | Reward: 940.00 | Avg Reward: 782.80 | Frames: 132154 | Epsilon: 0.8678


Training:   1%|▏         | 132617/10000000 [09:01<14:16:15, 192.06it/s]

Episode 324 completed | Reward: 980.00 | Avg Reward: 766.60 | Frames: 132593 | Epsilon: 0.8674


Training:   1%|▏         | 133034/10000000 [09:03<18:59:30, 144.32it/s]

Episode 325 completed | Reward: 520.00 | Avg Reward: 767.00 | Frames: 133023 | Epsilon: 0.8670

Memory usage: 1.10 GB


Training:   1%|▏         | 133352/10000000 [09:05<14:21:34, 190.86it/s]

Episode 326 completed | Reward: 540.00 | Avg Reward: 768.40 | Frames: 133330 | Epsilon: 0.8667


Training:   1%|▏         | 133693/10000000 [09:07<15:09:16, 180.85it/s]

Episode 327 completed | Reward: 300.00 | Avg Reward: 761.80 | Frames: 133679 | Epsilon: 0.8663


Training:   1%|▏         | 134073/10000000 [09:09<15:16:52, 179.34it/s]

Episode 328 completed | Reward: 980.00 | Avg Reward: 761.00 | Frames: 134059 | Epsilon: 0.8659


Training:   1%|▏         | 134461/10000000 [09:11<14:54:19, 183.85it/s]

Episode 329 completed | Reward: 800.00 | Avg Reward: 759.20 | Frames: 134445 | Epsilon: 0.8656


Training:   1%|▏         | 134785/10000000 [09:12<14:28:10, 189.38it/s]

Episode 330 completed | Reward: 600.00 | Avg Reward: 760.60 | Frames: 134762 | Epsilon: 0.8652


Training:   1%|▏         | 135146/10000000 [09:14<14:22:36, 190.60it/s]

Episode 331 completed | Reward: 580.00 | Avg Reward: 746.80 | Frames: 135114 | Epsilon: 0.8649


Training:   1%|▏         | 135624/10000000 [09:17<14:00:30, 195.60it/s]

Episode 332 completed | Reward: 680.00 | Avg Reward: 741.60 | Frames: 135589 | Epsilon: 0.8644


Training:   1%|▏         | 136037/10000000 [09:19<14:33:15, 188.26it/s]

Episode 333 completed | Reward: 940.00 | Avg Reward: 742.80 | Frames: 136011 | Epsilon: 0.8640


Training:   1%|▏         | 136377/10000000 [09:20<15:15:37, 179.54it/s]

Episode 334 completed | Reward: 180.00 | Avg Reward: 733.20 | Frames: 136352 | Epsilon: 0.8636


Training:   1%|▏         | 136775/10000000 [09:23<14:38:58, 187.02it/s]

Episode 335 completed | Reward: 700.00 | Avg Reward: 737.00 | Frames: 136745 | Epsilon: 0.8633


Training:   1%|▏         | 137193/10000000 [09:25<14:51:35, 184.37it/s]

Episode 336 completed | Reward: 940.00 | Avg Reward: 740.40 | Frames: 137169 | Epsilon: 0.8628


Training:   1%|▏         | 137700/10000000 [09:27<14:38:33, 187.09it/s]

Episode 337 completed | Reward: 1000.00 | Avg Reward: 743.00 | Frames: 137679 | Epsilon: 0.8623


Training:   1%|▏         | 138177/10000000 [09:30<15:05:40, 181.48it/s]

Episode 338 completed | Reward: 940.00 | Avg Reward: 738.20 | Frames: 138147 | Epsilon: 0.8619


Training:   1%|▏         | 138555/10000000 [09:32<15:06:54, 181.23it/s]

Episode 339 completed | Reward: 760.00 | Avg Reward: 736.40 | Frames: 138531 | Epsilon: 0.8615


Training:   1%|▏         | 138974/10000000 [09:34<15:00:19, 182.55it/s]

Episode 340 completed | Reward: 460.00 | Avg Reward: 731.60 | Frames: 138948 | Epsilon: 0.8611


Training:   1%|▏         | 139397/10000000 [09:36<14:35:11, 187.78it/s]

Episode 341 completed | Reward: 460.00 | Avg Reward: 727.40 | Frames: 139363 | Epsilon: 0.8606


Training:   1%|▏         | 139761/10000000 [09:38<14:43:52, 185.93it/s]

Episode 342 completed | Reward: 660.00 | Avg Reward: 727.00 | Frames: 139732 | Epsilon: 0.8603


Training:   1%|▏         | 140181/10000000 [09:40<15:28:55, 176.90it/s]

Episode 343 completed | Reward: 380.00 | Avg Reward: 724.20 | Frames: 140163 | Epsilon: 0.8598


Training:   1%|▏         | 140643/10000000 [09:43<15:04:28, 181.68it/s]

Episode 344 completed | Reward: 700.00 | Avg Reward: 721.40 | Frames: 140623 | Epsilon: 0.8594


Training:   1%|▏         | 141057/10000000 [09:45<16:17:20, 168.12it/s]

Episode 345 completed | Reward: 500.00 | Avg Reward: 721.20 | Frames: 141034 | Epsilon: 0.8590


Training:   1%|▏         | 141521/10000000 [09:47<14:48:43, 184.88it/s]

Episode 346 completed | Reward: 780.00 | Avg Reward: 723.60 | Frames: 141500 | Epsilon: 0.8585


Training:   1%|▏         | 141941/10000000 [09:50<14:38:18, 187.06it/s]

Episode 347 completed | Reward: 620.00 | Avg Reward: 723.60 | Frames: 141905 | Epsilon: 0.8581


Training:   1%|▏         | 142342/10000000 [09:52<15:09:37, 180.62it/s]

Episode 348 completed | Reward: 540.00 | Avg Reward: 723.60 | Frames: 142325 | Epsilon: 0.8577


Training:   1%|▏         | 142841/10000000 [09:54<14:46:06, 185.40it/s]

Episode 349 completed | Reward: 780.00 | Avg Reward: 723.40 | Frames: 142810 | Epsilon: 0.8572


Training:   1%|▏         | 143181/10000000 [09:56<19:52:54, 137.71it/s]

Episode 350 completed | Reward: 820.00 | Avg Reward: 725.40 | Frames: 143180 | Epsilon: 0.8568

Memory usage: 1.10 GB


Training:   1%|▏         | 143621/10000000 [09:59<15:15:39, 179.40it/s]

Episode 351 completed | Reward: 620.00 | Avg Reward: 721.40 | Frames: 143601 | Epsilon: 0.8564


Training:   1%|▏         | 143959/10000000 [10:00<15:00:05, 182.50it/s]

Episode 352 completed | Reward: 380.00 | Avg Reward: 718.20 | Frames: 143926 | Epsilon: 0.8561


Training:   1%|▏         | 144455/10000000 [10:03<15:11:36, 180.18it/s]

Episode 353 completed | Reward: 1200.00 | Avg Reward: 724.60 | Frames: 144429 | Epsilon: 0.8556


Training:   1%|▏         | 144833/10000000 [10:05<14:42:28, 186.13it/s]

Episode 354 completed | Reward: 780.00 | Avg Reward: 718.20 | Frames: 144806 | Epsilon: 0.8552


Training:   1%|▏         | 145231/10000000 [10:07<15:08:26, 180.80it/s]

Episode 355 completed | Reward: 420.00 | Avg Reward: 719.20 | Frames: 145199 | Epsilon: 0.8548


Training:   1%|▏         | 145608/10000000 [10:09<15:53:27, 172.26it/s]

Episode 356 completed | Reward: 380.00 | Avg Reward: 713.40 | Frames: 145594 | Epsilon: 0.8544


Training:   1%|▏         | 145965/10000000 [10:11<15:24:16, 177.69it/s]

Episode 357 completed | Reward: 1060.00 | Avg Reward: 715.60 | Frames: 145947 | Epsilon: 0.8541


Training:   1%|▏         | 146365/10000000 [10:13<15:07:43, 180.92it/s]

Episode 358 completed | Reward: 780.00 | Avg Reward: 715.80 | Frames: 146338 | Epsilon: 0.8537


Training:   1%|▏         | 146805/10000000 [10:16<15:34:51, 175.66it/s]

Episode 359 completed | Reward: 1420.00 | Avg Reward: 725.60 | Frames: 146789 | Epsilon: 0.8532


Training:   1%|▏         | 147144/10000000 [10:18<14:47:11, 185.10it/s]

Episode 360 completed | Reward: 880.00 | Avg Reward: 729.40 | Frames: 147125 | Epsilon: 0.8529


Training:   1%|▏         | 147578/10000000 [10:20<15:34:30, 175.71it/s]

Episode 361 completed | Reward: 780.00 | Avg Reward: 725.80 | Frames: 147546 | Epsilon: 0.8525


Training:   1%|▏         | 147991/10000000 [10:22<15:11:58, 180.05it/s]

Episode 362 completed | Reward: 1700.00 | Avg Reward: 731.00 | Frames: 147964 | Epsilon: 0.8520


Training:   1%|▏         | 148429/10000000 [10:25<15:10:27, 180.34it/s]

Episode 363 completed | Reward: 1060.00 | Avg Reward: 737.40 | Frames: 148408 | Epsilon: 0.8516


Training:   1%|▏         | 148847/10000000 [10:27<14:55:59, 183.25it/s]

Episode 364 completed | Reward: 600.00 | Avg Reward: 737.00 | Frames: 148823 | Epsilon: 0.8512


Training:   1%|▏         | 149343/10000000 [10:29<14:51:53, 184.08it/s]

Episode 365 completed | Reward: 540.00 | Avg Reward: 732.00 | Frames: 149309 | Epsilon: 0.8507


Training:   1%|▏         | 149759/10000000 [10:32<15:14:22, 179.54it/s]

Episode 366 completed | Reward: 480.00 | Avg Reward: 727.40 | Frames: 149727 | Epsilon: 0.8503


Training:   2%|▏         | 150211/10000000 [10:34<15:08:08, 180.77it/s]

Episode 367 completed | Reward: 900.00 | Avg Reward: 731.60 | Frames: 150188 | Epsilon: 0.8498


Training:   2%|▏         | 150607/10000000 [10:36<15:15:00, 179.41it/s]

Episode 368 completed | Reward: 840.00 | Avg Reward: 731.60 | Frames: 150581 | Epsilon: 0.8494


Training:   2%|▏         | 150983/10000000 [10:38<14:57:05, 182.98it/s]

Episode 369 completed | Reward: 440.00 | Avg Reward: 725.20 | Frames: 150953 | Epsilon: 0.8490


Training:   2%|▏         | 151359/10000000 [10:40<15:19:16, 178.56it/s]

Episode 370 completed | Reward: 420.00 | Avg Reward: 723.20 | Frames: 151336 | Epsilon: 0.8487


Training:   2%|▏         | 151754/10000000 [10:43<15:26:20, 177.19it/s]

Episode 371 completed | Reward: 1340.00 | Avg Reward: 731.60 | Frames: 151723 | Epsilon: 0.8483


Training:   2%|▏         | 152251/10000000 [10:45<14:44:28, 185.57it/s]

Episode 372 completed | Reward: 840.00 | Avg Reward: 732.60 | Frames: 152221 | Epsilon: 0.8478


Training:   2%|▏         | 152668/10000000 [10:47<15:25:20, 177.36it/s]

Episode 373 completed | Reward: 480.00 | Avg Reward: 723.00 | Frames: 152651 | Epsilon: 0.8473


Training:   2%|▏         | 153045/10000000 [10:50<15:24:23, 177.54it/s]

Episode 374 completed | Reward: 840.00 | Avg Reward: 724.40 | Frames: 153024 | Epsilon: 0.8470


Training:   2%|▏         | 153424/10000000 [10:52<20:00:12, 136.73it/s]

Episode 375 completed | Reward: 520.00 | Avg Reward: 720.80 | Frames: 153415 | Epsilon: 0.8466

Memory usage: 1.10 GB


Training:   2%|▏         | 153900/10000000 [10:54<15:36:57, 175.14it/s]

Episode 376 completed | Reward: 1020.00 | Avg Reward: 726.60 | Frames: 153883 | Epsilon: 0.8461


Training:   2%|▏         | 154334/10000000 [10:57<16:00:41, 170.81it/s]

Episode 377 completed | Reward: 600.00 | Avg Reward: 726.20 | Frames: 154314 | Epsilon: 0.8457


Training:   2%|▏         | 154708/10000000 [10:59<15:35:14, 175.45it/s]

Episode 378 completed | Reward: 460.00 | Avg Reward: 725.20 | Frames: 154689 | Epsilon: 0.8453


Training:   2%|▏         | 155142/10000000 [11:01<15:35:25, 175.41it/s]

Episode 379 completed | Reward: 220.00 | Avg Reward: 721.80 | Frames: 155121 | Epsilon: 0.8449


Training:   2%|▏         | 155560/10000000 [11:03<15:44:30, 173.71it/s]

Episode 380 completed | Reward: 460.00 | Avg Reward: 719.20 | Frames: 155547 | Epsilon: 0.8445


Training:   2%|▏         | 155917/10000000 [11:05<15:27:54, 176.81it/s]

Episode 381 completed | Reward: 920.00 | Avg Reward: 721.60 | Frames: 155896 | Epsilon: 0.8441


Training:   2%|▏         | 156336/10000000 [11:08<15:53:11, 172.12it/s]

Episode 382 completed | Reward: 680.00 | Avg Reward: 721.40 | Frames: 156318 | Epsilon: 0.8437


Training:   2%|▏         | 156729/10000000 [11:10<15:58:31, 171.15it/s]

Episode 383 completed | Reward: 540.00 | Avg Reward: 720.60 | Frames: 156705 | Epsilon: 0.8433


Training:   2%|▏         | 157167/10000000 [11:12<15:25:45, 177.20it/s]

Episode 384 completed | Reward: 1040.00 | Avg Reward: 720.20 | Frames: 157139 | Epsilon: 0.8429


Training:   2%|▏         | 157758/10000000 [11:16<16:11:32, 168.84it/s]

Episode 385 completed | Reward: 840.00 | Avg Reward: 722.00 | Frames: 157733 | Epsilon: 0.8423


Training:   2%|▏         | 158156/10000000 [11:18<15:33:50, 175.65it/s]

Episode 386 completed | Reward: 500.00 | Avg Reward: 719.80 | Frames: 158139 | Epsilon: 0.8419


Training:   2%|▏         | 158530/10000000 [11:20<16:06:31, 169.71it/s]

Episode 387 completed | Reward: 1240.00 | Avg Reward: 729.20 | Frames: 158502 | Epsilon: 0.8415


Training:   2%|▏         | 158881/10000000 [11:22<16:35:02, 164.84it/s]

Episode 388 completed | Reward: 460.00 | Avg Reward: 721.60 | Frames: 158861 | Epsilon: 0.8411


Training:   2%|▏         | 159257/10000000 [11:24<15:58:59, 171.03it/s]

Episode 389 completed | Reward: 920.00 | Avg Reward: 726.00 | Frames: 159231 | Epsilon: 0.8408


Training:   2%|▏         | 159633/10000000 [11:26<16:17:57, 167.70it/s]

Episode 390 completed | Reward: 540.00 | Avg Reward: 726.40 | Frames: 159603 | Epsilon: 0.8404


Training:   2%|▏         | 160032/10000000 [11:28<15:30:03, 176.33it/s]

Episode 391 completed | Reward: 840.00 | Avg Reward: 729.40 | Frames: 160016 | Epsilon: 0.8400


Training:   2%|▏         | 160407/10000000 [11:31<15:17:58, 178.65it/s]

Episode 392 completed | Reward: 1420.00 | Avg Reward: 739.00 | Frames: 160383 | Epsilon: 0.8396


Training:   2%|▏         | 160782/10000000 [11:33<16:42:06, 163.64it/s]

Episode 393 completed | Reward: 660.00 | Avg Reward: 737.80 | Frames: 160757 | Epsilon: 0.8392


Training:   2%|▏         | 161181/10000000 [11:35<15:43:56, 173.72it/s]

Episode 394 completed | Reward: 320.00 | Avg Reward: 735.40 | Frames: 161160 | Epsilon: 0.8388


Training:   2%|▏         | 161594/10000000 [11:37<15:56:21, 171.46it/s]

Episode 395 completed | Reward: 600.00 | Avg Reward: 735.80 | Frames: 161567 | Epsilon: 0.8384


Training:   2%|▏         | 162130/10000000 [11:40<16:02:44, 170.31it/s]

Episode 396 completed | Reward: 880.00 | Avg Reward: 736.40 | Frames: 162110 | Epsilon: 0.8379


Training:   2%|▏         | 162546/10000000 [11:43<15:48:03, 172.94it/s]

Episode 397 completed | Reward: 800.00 | Avg Reward: 734.20 | Frames: 162527 | Epsilon: 0.8375


Training:   2%|▏         | 162963/10000000 [11:45<16:14:27, 168.25it/s]

Episode 398 completed | Reward: 640.00 | Avg Reward: 730.60 | Frames: 162949 | Epsilon: 0.8371


Training:   2%|▏         | 163369/10000000 [11:47<16:07:14, 169.50it/s]

Episode 399 completed | Reward: 460.00 | Avg Reward: 733.40 | Frames: 163337 | Epsilon: 0.8367


Training:   2%|▏         | 163734/10000000 [11:49<15:22:03, 177.79it/s]

Episode 400 completed | Reward: 660.00 | Avg Reward: 735.20 | Frames: 163734 | Epsilon: 0.8363

Memory usage: 1.10 GB


Training:   2%|▏         | 164163/10000000 [11:52<15:52:50, 172.05it/s]

Episode 401 completed | Reward: 360.00 | Avg Reward: 729.80 | Frames: 164147 | Epsilon: 0.8359


Training:   2%|▏         | 164617/10000000 [11:54<16:04:31, 169.95it/s]

Episode 402 completed | Reward: 740.00 | Avg Reward: 731.60 | Frames: 164595 | Epsilon: 0.8354


Training:   2%|▏         | 165033/10000000 [11:57<16:20:03, 167.25it/s]

Episode 403 completed | Reward: 360.00 | Avg Reward: 730.20 | Frames: 165003 | Epsilon: 0.8350


Training:   2%|▏         | 165437/10000000 [11:59<16:38:59, 164.08it/s]

Episode 404 completed | Reward: 380.00 | Avg Reward: 724.60 | Frames: 165415 | Epsilon: 0.8346


Training:   2%|▏         | 165809/10000000 [12:01<16:15:15, 168.06it/s]

Episode 405 completed | Reward: 900.00 | Avg Reward: 729.40 | Frames: 165786 | Epsilon: 0.8342


Training:   2%|▏         | 166225/10000000 [12:04<16:18:10, 167.55it/s]

Episode 406 completed | Reward: 560.00 | Avg Reward: 724.60 | Frames: 166202 | Epsilon: 0.8338


Training:   2%|▏         | 166604/10000000 [12:06<16:03:06, 170.17it/s]

Episode 407 completed | Reward: 1020.00 | Avg Reward: 722.40 | Frames: 166585 | Epsilon: 0.8334


Training:   2%|▏         | 166936/10000000 [12:08<16:21:14, 167.02it/s]

Episode 408 completed | Reward: 540.00 | Avg Reward: 722.20 | Frames: 166917 | Epsilon: 0.8331


Training:   2%|▏         | 167336/10000000 [12:10<16:56:12, 161.26it/s]

Episode 409 completed | Reward: 460.00 | Avg Reward: 717.40 | Frames: 167312 | Epsilon: 0.8327


Training:   2%|▏         | 168099/10000000 [12:14<15:12:54, 179.50it/s]

Episode 410 completed | Reward: 1120.00 | Avg Reward: 718.40 | Frames: 168078 | Epsilon: 0.8319


Training:   2%|▏         | 168525/10000000 [12:17<16:29:34, 165.58it/s]

Episode 411 completed | Reward: 1440.00 | Avg Reward: 728.20 | Frames: 168496 | Epsilon: 0.8315


Training:   2%|▏         | 168937/10000000 [12:19<15:48:48, 172.69it/s]

Episode 412 completed | Reward: 880.00 | Avg Reward: 733.60 | Frames: 168915 | Epsilon: 0.8311


Training:   2%|▏         | 169310/10000000 [12:21<16:14:18, 168.17it/s]

Episode 413 completed | Reward: 1760.00 | Avg Reward: 740.00 | Frames: 169287 | Epsilon: 0.8307


Training:   2%|▏         | 169679/10000000 [12:23<16:40:07, 163.82it/s]

Episode 414 completed | Reward: 420.00 | Avg Reward: 737.00 | Frames: 169663 | Epsilon: 0.8303


Training:   2%|▏         | 170109/10000000 [12:26<16:08:19, 169.19it/s]

Episode 415 completed | Reward: 800.00 | Avg Reward: 739.40 | Frames: 170083 | Epsilon: 0.8299


Training:   2%|▏         | 170519/10000000 [12:28<15:48:31, 172.71it/s]

Episode 416 completed | Reward: 840.00 | Avg Reward: 744.20 | Frames: 170502 | Epsilon: 0.8295


Training:   2%|▏         | 170909/10000000 [12:30<16:27:35, 165.88it/s]

Episode 417 completed | Reward: 480.00 | Avg Reward: 746.40 | Frames: 170888 | Epsilon: 0.8291


Training:   2%|▏         | 171363/10000000 [12:33<17:04:34, 159.88it/s]

Episode 418 completed | Reward: 1060.00 | Avg Reward: 746.60 | Frames: 171345 | Epsilon: 0.8287


Training:   2%|▏         | 171821/10000000 [12:36<16:33:26, 164.89it/s]

Episode 419 completed | Reward: 320.00 | Avg Reward: 728.60 | Frames: 171797 | Epsilon: 0.8282


Training:   2%|▏         | 172257/10000000 [12:38<16:34:36, 164.68it/s]

Episode 420 completed | Reward: 1000.00 | Avg Reward: 733.40 | Frames: 172232 | Epsilon: 0.8278


Training:   2%|▏         | 172671/10000000 [12:41<16:53:33, 161.60it/s]

Episode 421 completed | Reward: 420.00 | Avg Reward: 730.40 | Frames: 172653 | Epsilon: 0.8273


Training:   2%|▏         | 173058/10000000 [12:43<17:04:16, 159.90it/s]

Episode 422 completed | Reward: 380.00 | Avg Reward: 727.40 | Frames: 173046 | Epsilon: 0.8270


Training:   2%|▏         | 173539/10000000 [12:46<16:01:12, 170.39it/s]

Episode 423 completed | Reward: 560.00 | Avg Reward: 723.60 | Frames: 173522 | Epsilon: 0.8265


Training:   2%|▏         | 173936/10000000 [12:48<16:09:16, 168.96it/s]

Episode 424 completed | Reward: 1120.00 | Avg Reward: 725.00 | Frames: 173936 | Epsilon: 0.8261

Memory usage: 1.10 GB


Training:   2%|▏         | 174282/10000000 [12:50<16:10:01, 168.82it/s]

Episode 425 completed | Reward: 820.00 | Avg Reward: 728.00 | Frames: 174263 | Epsilon: 0.8257


Training:   2%|▏         | 174765/10000000 [12:53<16:25:55, 166.09it/s]

Episode 426 completed | Reward: 480.00 | Avg Reward: 727.40 | Frames: 174738 | Epsilon: 0.8253


Training:   2%|▏         | 175235/10000000 [12:56<16:36:52, 164.26it/s]

Episode 427 completed | Reward: 940.00 | Avg Reward: 733.80 | Frames: 175223 | Epsilon: 0.8248


Training:   2%|▏         | 175619/10000000 [12:58<16:27:47, 165.76it/s]

Episode 428 completed | Reward: 880.00 | Avg Reward: 732.80 | Frames: 175601 | Epsilon: 0.8244


Training:   2%|▏         | 176035/10000000 [13:00<16:33:28, 164.81it/s]

Episode 429 completed | Reward: 500.00 | Avg Reward: 729.80 | Frames: 176023 | Epsilon: 0.8240


Training:   2%|▏         | 176485/10000000 [13:03<16:24:47, 166.25it/s]

Episode 430 completed | Reward: 760.00 | Avg Reward: 731.40 | Frames: 176466 | Epsilon: 0.8235


Training:   2%|▏         | 176855/10000000 [13:05<16:50:02, 162.09it/s]

Episode 431 completed | Reward: 540.00 | Avg Reward: 731.00 | Frames: 176843 | Epsilon: 0.8232


Training:   2%|▏         | 177261/10000000 [13:08<17:07:41, 159.30it/s]

Episode 432 completed | Reward: 120.00 | Avg Reward: 725.40 | Frames: 177240 | Epsilon: 0.8228


Training:   2%|▏         | 177677/10000000 [13:10<17:39:28, 154.52it/s]

Episode 433 completed | Reward: 640.00 | Avg Reward: 722.40 | Frames: 177650 | Epsilon: 0.8224


Training:   2%|▏         | 178045/10000000 [13:12<16:55:36, 161.18it/s]

Episode 434 completed | Reward: 840.00 | Avg Reward: 729.00 | Frames: 178020 | Epsilon: 0.8220


Training:   2%|▏         | 178579/10000000 [13:15<16:08:29, 169.02it/s]

Episode 435 completed | Reward: 2120.00 | Avg Reward: 743.20 | Frames: 178561 | Epsilon: 0.8214


Training:   2%|▏         | 178936/10000000 [13:18<16:41:52, 163.38it/s]

Episode 436 completed | Reward: 1420.00 | Avg Reward: 748.00 | Frames: 178915 | Epsilon: 0.8211


Training:   2%|▏         | 179551/10000000 [13:21<17:09:40, 158.96it/s]

Episode 437 completed | Reward: 740.00 | Avg Reward: 745.40 | Frames: 179538 | Epsilon: 0.8205


Training:   2%|▏         | 180119/10000000 [13:25<17:07:53, 159.22it/s]

Episode 438 completed | Reward: 1440.00 | Avg Reward: 750.40 | Frames: 180104 | Epsilon: 0.8199


Training:   2%|▏         | 180477/10000000 [13:27<16:25:29, 166.07it/s]

Episode 439 completed | Reward: 400.00 | Avg Reward: 746.80 | Frames: 180447 | Epsilon: 0.8196


Training:   2%|▏         | 180861/10000000 [13:29<17:36:58, 154.83it/s]

Episode 440 completed | Reward: 700.00 | Avg Reward: 749.20 | Frames: 180836 | Epsilon: 0.8192


Training:   2%|▏         | 181195/10000000 [13:31<16:55:06, 161.21it/s]

Episode 441 completed | Reward: 580.00 | Avg Reward: 750.40 | Frames: 181181 | Epsilon: 0.8188


Training:   2%|▏         | 181665/10000000 [13:34<17:33:59, 155.26it/s]

Episode 442 completed | Reward: 740.00 | Avg Reward: 751.20 | Frames: 181642 | Epsilon: 0.8184


Training:   2%|▏         | 182026/10000000 [13:36<17:04:07, 159.78it/s]

Episode 443 completed | Reward: 440.00 | Avg Reward: 751.80 | Frames: 182010 | Epsilon: 0.8180


Training:   2%|▏         | 182437/10000000 [13:39<18:10:41, 150.02it/s]

Episode 444 completed | Reward: 700.00 | Avg Reward: 751.80 | Frames: 182423 | Epsilon: 0.8176


Training:   2%|▏         | 182849/10000000 [13:41<17:42:25, 154.01it/s]

Episode 445 completed | Reward: 380.00 | Avg Reward: 750.60 | Frames: 182826 | Epsilon: 0.8172


Training:   2%|▏         | 183201/10000000 [13:43<17:05:48, 159.50it/s]

Episode 446 completed | Reward: 780.00 | Avg Reward: 750.60 | Frames: 183175 | Epsilon: 0.8168


Training:   2%|▏         | 183559/10000000 [13:45<16:52:52, 161.53it/s]

Episode 447 completed | Reward: 1500.00 | Avg Reward: 759.40 | Frames: 183542 | Epsilon: 0.8165


Training:   2%|▏         | 183946/10000000 [13:48<17:20:23, 157.25it/s]

Episode 448 completed | Reward: 480.00 | Avg Reward: 758.80 | Frames: 183929 | Epsilon: 0.8161


Training:   2%|▏         | 184339/10000000 [13:50<22:17:33, 122.31it/s]

Episode 449 completed | Reward: 900.00 | Avg Reward: 760.00 | Frames: 184334 | Epsilon: 0.8157

Memory usage: 1.10 GB


Training:   2%|▏         | 184742/10000000 [13:53<16:54:30, 161.25it/s]

Episode 450 completed | Reward: 740.00 | Avg Reward: 759.20 | Frames: 184714 | Epsilon: 0.8153


Training:   2%|▏         | 185158/10000000 [13:55<17:47:02, 153.30it/s]

Episode 451 completed | Reward: 640.00 | Avg Reward: 759.40 | Frames: 185137 | Epsilon: 0.8149


Training:   2%|▏         | 185542/10000000 [13:58<17:12:58, 158.35it/s]

Episode 452 completed | Reward: 480.00 | Avg Reward: 760.40 | Frames: 185516 | Epsilon: 0.8145


Training:   2%|▏         | 185985/10000000 [14:00<17:11:03, 158.64it/s]

Episode 453 completed | Reward: 1200.00 | Avg Reward: 760.40 | Frames: 185959 | Epsilon: 0.8140


Training:   2%|▏         | 186412/10000000 [14:03<16:45:40, 162.64it/s]

Episode 454 completed | Reward: 760.00 | Avg Reward: 760.20 | Frames: 186382 | Epsilon: 0.8136


Training:   2%|▏         | 186805/10000000 [14:05<17:19:42, 157.31it/s]

Episode 455 completed | Reward: 1080.00 | Avg Reward: 766.80 | Frames: 186788 | Epsilon: 0.8132


Training:   2%|▏         | 187223/10000000 [14:08<17:18:30, 157.48it/s]

Episode 456 completed | Reward: 1080.00 | Avg Reward: 773.80 | Frames: 187201 | Epsilon: 0.8128


Training:   2%|▏         | 187533/10000000 [14:10<17:42:00, 153.99it/s]

Episode 457 completed | Reward: 920.00 | Avg Reward: 772.40 | Frames: 187512 | Epsilon: 0.8125


Training:   2%|▏         | 187973/10000000 [14:12<17:02:00, 160.01it/s]

Episode 458 completed | Reward: 820.00 | Avg Reward: 772.80 | Frames: 187954 | Epsilon: 0.8120


Training:   2%|▏         | 188371/10000000 [14:15<18:04:10, 150.83it/s]

Episode 459 completed | Reward: 1500.00 | Avg Reward: 773.60 | Frames: 188358 | Epsilon: 0.8116


Training:   2%|▏         | 188784/10000000 [14:17<17:21:14, 157.04it/s]

Episode 460 completed | Reward: 640.00 | Avg Reward: 771.20 | Frames: 188767 | Epsilon: 0.8112


Training:   2%|▏         | 189169/10000000 [14:20<17:19:58, 157.23it/s]

Episode 461 completed | Reward: 860.00 | Avg Reward: 772.00 | Frames: 189152 | Epsilon: 0.8108


Training:   2%|▏         | 189501/10000000 [14:22<18:06:19, 150.51it/s]

Episode 462 completed | Reward: 920.00 | Avg Reward: 764.20 | Frames: 189477 | Epsilon: 0.8105


Training:   2%|▏         | 189959/10000000 [14:25<16:53:10, 161.37it/s]

Episode 463 completed | Reward: 900.00 | Avg Reward: 762.60 | Frames: 189941 | Epsilon: 0.8101


Training:   2%|▏         | 190412/10000000 [14:27<16:17:49, 167.20it/s]

Episode 464 completed | Reward: 900.00 | Avg Reward: 765.60 | Frames: 190388 | Epsilon: 0.8096


Training:   2%|▏         | 190855/10000000 [14:30<17:02:20, 159.91it/s]

Episode 465 completed | Reward: 760.00 | Avg Reward: 767.80 | Frames: 190829 | Epsilon: 0.8092


Training:   2%|▏         | 191186/10000000 [14:32<16:23:54, 166.15it/s]

Episode 466 completed | Reward: 500.00 | Avg Reward: 768.00 | Frames: 191171 | Epsilon: 0.8088


Training:   2%|▏         | 191496/10000000 [14:34<16:14:20, 167.78it/s]

Episode 467 completed | Reward: 660.00 | Avg Reward: 765.60 | Frames: 191472 | Epsilon: 0.8085


Training:   2%|▏         | 191931/10000000 [14:37<17:50:08, 152.75it/s]

Episode 468 completed | Reward: 1040.00 | Avg Reward: 767.60 | Frames: 191914 | Epsilon: 0.8081


Training:   2%|▏         | 192332/10000000 [14:39<16:57:54, 160.59it/s]

Episode 469 completed | Reward: 480.00 | Avg Reward: 768.00 | Frames: 192305 | Epsilon: 0.8077


Training:   2%|▏         | 192693/10000000 [14:41<17:46:40, 153.24it/s]

Episode 470 completed | Reward: 1040.00 | Avg Reward: 774.20 | Frames: 192669 | Epsilon: 0.8073


Training:   2%|▏         | 193104/10000000 [14:44<16:44:57, 162.64it/s]

Episode 471 completed | Reward: 820.00 | Avg Reward: 769.00 | Frames: 193073 | Epsilon: 0.8069


Training:   2%|▏         | 193491/10000000 [14:46<17:58:15, 151.58it/s]

Episode 472 completed | Reward: 800.00 | Avg Reward: 768.60 | Frames: 193471 | Epsilon: 0.8065


Training:   2%|▏         | 193880/10000000 [14:49<18:02:21, 151.00it/s]

Episode 473 completed | Reward: 580.00 | Avg Reward: 769.60 | Frames: 193865 | Epsilon: 0.8061


Training:   2%|▏         | 194256/10000000 [14:51<17:39:28, 154.26it/s]

Episode 474 completed | Reward: 720.00 | Avg Reward: 768.40 | Frames: 194234 | Epsilon: 0.8058


Training:   2%|▏         | 194653/10000000 [14:54<23:47:42, 114.47it/s]

Episode 475 completed | Reward: 520.00 | Avg Reward: 768.40 | Frames: 194645 | Epsilon: 0.8054

Memory usage: 1.10 GB


Training:   2%|▏         | 195061/10000000 [14:57<18:00:59, 151.17it/s]

Episode 476 completed | Reward: 1000.00 | Avg Reward: 768.20 | Frames: 195041 | Epsilon: 0.8050


Training:   2%|▏         | 195469/10000000 [14:59<18:12:46, 149.54it/s]

Episode 477 completed | Reward: 480.00 | Avg Reward: 767.00 | Frames: 195452 | Epsilon: 0.8045


Training:   2%|▏         | 196217/10000000 [15:04<18:06:52, 150.34it/s]

Episode 478 completed | Reward: 820.00 | Avg Reward: 770.60 | Frames: 196197 | Epsilon: 0.8038


Training:   2%|▏         | 196618/10000000 [15:06<17:48:32, 152.91it/s]

Episode 479 completed | Reward: 360.00 | Avg Reward: 772.00 | Frames: 196591 | Epsilon: 0.8034


Training:   2%|▏         | 196997/10000000 [15:09<18:29:08, 147.31it/s]

Episode 480 completed | Reward: 440.00 | Avg Reward: 771.80 | Frames: 196981 | Epsilon: 0.8030


Training:   2%|▏         | 197445/10000000 [15:11<19:15:15, 141.42it/s]

Episode 481 completed | Reward: 760.00 | Avg Reward: 770.20 | Frames: 197431 | Epsilon: 0.8026


Training:   2%|▏         | 197857/10000000 [15:14<18:48:59, 144.70it/s]

Episode 482 completed | Reward: 620.00 | Avg Reward: 769.60 | Frames: 197843 | Epsilon: 0.8022


Training:   2%|▏         | 198308/10000000 [15:17<17:35:40, 154.75it/s]

Episode 483 completed | Reward: 1040.00 | Avg Reward: 774.60 | Frames: 198291 | Epsilon: 0.8017


Training:   2%|▏         | 198717/10000000 [15:19<18:29:32, 147.23it/s]

Episode 484 completed | Reward: 820.00 | Avg Reward: 772.40 | Frames: 198703 | Epsilon: 0.8013


Training:   2%|▏         | 199076/10000000 [15:22<18:11:38, 149.64it/s]

Episode 485 completed | Reward: 1260.00 | Avg Reward: 776.60 | Frames: 199048 | Epsilon: 0.8010


Training:   2%|▏         | 199449/10000000 [15:24<18:11:47, 149.61it/s]

Episode 486 completed | Reward: 540.00 | Avg Reward: 777.00 | Frames: 199426 | Epsilon: 0.8006


Training:   2%|▏         | 199882/10000000 [15:27<18:02:43, 150.85it/s]

Episode 487 completed | Reward: 300.00 | Avg Reward: 767.60 | Frames: 199863 | Epsilon: 0.8001


Training:   2%|▏         | 200313/10000000 [15:30<18:52:34, 144.21it/s]

Episode 488 completed | Reward: 1020.00 | Avg Reward: 773.20 | Frames: 200302 | Epsilon: 0.7997


Training:   2%|▏         | 200702/10000000 [15:32<17:33:35, 155.01it/s]

Episode 489 completed | Reward: 620.00 | Avg Reward: 770.20 | Frames: 200682 | Epsilon: 0.7993


Training:   2%|▏         | 201137/10000000 [15:35<18:35:43, 146.38it/s]

Episode 490 completed | Reward: 840.00 | Avg Reward: 773.20 | Frames: 201117 | Epsilon: 0.7989


Training:   2%|▏         | 201581/10000000 [15:38<18:47:13, 144.88it/s]

Episode 491 completed | Reward: 1100.00 | Avg Reward: 775.80 | Frames: 201569 | Epsilon: 0.7984


Training:   2%|▏         | 201969/10000000 [15:40<16:31:59, 164.62it/s]

Episode 492 completed | Reward: 440.00 | Avg Reward: 766.00 | Frames: 201939 | Epsilon: 0.7981


Training:   2%|▏         | 202373/10000000 [15:43<18:26:35, 147.56it/s]

Episode 493 completed | Reward: 220.00 | Avg Reward: 761.60 | Frames: 202357 | Epsilon: 0.7976


Training:   2%|▏         | 202828/10000000 [15:46<17:28:43, 155.70it/s]

Episode 494 completed | Reward: 700.00 | Avg Reward: 765.40 | Frames: 202804 | Epsilon: 0.7972


Training:   2%|▏         | 203185/10000000 [15:48<18:25:13, 147.73it/s]

Episode 495 completed | Reward: 940.00 | Avg Reward: 768.80 | Frames: 203167 | Epsilon: 0.7968


Training:   2%|▏         | 203548/10000000 [15:50<17:56:36, 151.66it/s]

Episode 496 completed | Reward: 700.00 | Avg Reward: 767.00 | Frames: 203530 | Epsilon: 0.7965


Training:   2%|▏         | 203945/10000000 [15:53<18:09:14, 149.89it/s]

Episode 497 completed | Reward: 740.00 | Avg Reward: 766.40 | Frames: 203923 | Epsilon: 0.7961


Training:   2%|▏         | 204323/10000000 [15:55<18:00:19, 151.12it/s]

Episode 498 completed | Reward: 280.00 | Avg Reward: 762.80 | Frames: 204296 | Epsilon: 0.7957


Training:   2%|▏         | 204803/10000000 [15:58<16:51:47, 161.35it/s]

Episode 499 completed | Reward: 1100.00 | Avg Reward: 769.20 | Frames: 204803 | Epsilon: 0.7952

Memory usage: 1.10 GB


Training:   2%|▏         | 205249/10000000 [16:01<18:45:39, 145.02it/s]

Episode 500 completed | Reward: 300.00 | Avg Reward: 765.60 | Frames: 205227 | Epsilon: 0.7948


Training:   2%|▏         | 205673/10000000 [16:04<17:59:51, 151.17it/s]

Episode 501 completed | Reward: 280.00 | Avg Reward: 764.80 | Frames: 205647 | Epsilon: 0.7944


Training:   2%|▏         | 206021/10000000 [16:06<18:13:04, 149.33it/s]

Episode 502 completed | Reward: 1320.00 | Avg Reward: 770.60 | Frames: 206007 | Epsilon: 0.7940


Training:   2%|▏         | 206481/10000000 [16:09<18:52:59, 144.07it/s]

Episode 503 completed | Reward: 240.00 | Avg Reward: 769.40 | Frames: 206457 | Epsilon: 0.7935


Training:   2%|▏         | 206869/10000000 [16:12<18:10:57, 149.61it/s]

Episode 504 completed | Reward: 1040.00 | Avg Reward: 776.00 | Frames: 206851 | Epsilon: 0.7931


Training:   2%|▏         | 207209/10000000 [16:14<18:17:21, 148.73it/s]

Episode 505 completed | Reward: 540.00 | Avg Reward: 772.40 | Frames: 207182 | Epsilon: 0.7928


Training:   2%|▏         | 207614/10000000 [16:17<18:05:20, 150.37it/s]

Episode 506 completed | Reward: 460.00 | Avg Reward: 771.40 | Frames: 207584 | Epsilon: 0.7924


Training:   2%|▏         | 208065/10000000 [16:19<19:13:09, 141.52it/s]

Episode 507 completed | Reward: 1400.00 | Avg Reward: 775.20 | Frames: 208055 | Epsilon: 0.7919


Training:   2%|▏         | 208433/10000000 [16:22<18:19:33, 148.42it/s]

Episode 508 completed | Reward: 880.00 | Avg Reward: 778.60 | Frames: 208412 | Epsilon: 0.7916


Training:   2%|▏         | 208865/10000000 [16:25<18:52:52, 144.04it/s]

Episode 509 completed | Reward: 780.00 | Avg Reward: 781.80 | Frames: 208852 | Epsilon: 0.7911


Training:   2%|▏         | 209297/10000000 [16:28<18:59:05, 143.25it/s]

Episode 510 completed | Reward: 300.00 | Avg Reward: 773.60 | Frames: 209284 | Epsilon: 0.7907


Training:   2%|▏         | 209697/10000000 [16:30<18:58:09, 143.36it/s]

Episode 511 completed | Reward: 400.00 | Avg Reward: 763.20 | Frames: 209680 | Epsilon: 0.7903


Training:   2%|▏         | 210145/10000000 [16:33<18:20:18, 148.29it/s]

Episode 512 completed | Reward: 560.00 | Avg Reward: 760.00 | Frames: 210125 | Epsilon: 0.7899


Training:   2%|▏         | 210575/10000000 [16:36<19:10:52, 141.77it/s]

Episode 513 completed | Reward: 1060.00 | Avg Reward: 753.00 | Frames: 210559 | Epsilon: 0.7894


Training:   2%|▏         | 210973/10000000 [16:39<18:35:15, 146.29it/s]

Episode 514 completed | Reward: 880.00 | Avg Reward: 757.60 | Frames: 210952 | Epsilon: 0.7890


Training:   2%|▏         | 211293/10000000 [16:41<18:35:12, 146.29it/s]

Episode 515 completed | Reward: 1040.00 | Avg Reward: 760.00 | Frames: 211273 | Epsilon: 0.7887


Training:   2%|▏         | 211645/10000000 [16:43<18:45:15, 144.98it/s]

Episode 516 completed | Reward: 880.00 | Avg Reward: 760.40 | Frames: 211632 | Epsilon: 0.7884


Training:   2%|▏         | 212075/10000000 [16:46<17:39:41, 153.94it/s]

Episode 517 completed | Reward: 620.00 | Avg Reward: 761.80 | Frames: 212050 | Epsilon: 0.7880


Training:   2%|▏         | 212458/10000000 [16:48<18:36:41, 146.08it/s]

Episode 518 completed | Reward: 700.00 | Avg Reward: 758.20 | Frames: 212443 | Epsilon: 0.7876


Training:   2%|▏         | 212873/10000000 [16:51<19:05:32, 142.40it/s]

Episode 519 completed | Reward: 200.00 | Avg Reward: 757.00 | Frames: 212856 | Epsilon: 0.7871


Training:   2%|▏         | 213193/10000000 [16:53<19:10:13, 141.81it/s]

Episode 520 completed | Reward: 580.00 | Avg Reward: 752.80 | Frames: 213178 | Epsilon: 0.7868


Training:   2%|▏         | 213577/10000000 [16:56<18:23:03, 147.87it/s]

Episode 521 completed | Reward: 740.00 | Avg Reward: 756.00 | Frames: 213549 | Epsilon: 0.7865


Training:   2%|▏         | 213949/10000000 [16:58<19:20:52, 140.50it/s]

Episode 522 completed | Reward: 380.00 | Avg Reward: 756.00 | Frames: 213930 | Epsilon: 0.7861


Training:   2%|▏         | 214299/10000000 [17:00<18:26:45, 147.36it/s]

Episode 523 completed | Reward: 900.00 | Avg Reward: 759.40 | Frames: 214272 | Epsilon: 0.7857


Training:   2%|▏         | 214713/10000000 [17:03<18:51:21, 144.15it/s]

Episode 524 completed | Reward: 480.00 | Avg Reward: 753.00 | Frames: 214695 | Epsilon: 0.7853


Training:   2%|▏         | 215144/10000000 [17:06<24:16:22, 111.98it/s]

Episode 525 completed | Reward: 960.00 | Avg Reward: 754.40 | Frames: 215141 | Epsilon: 0.7849

Memory usage: 1.10 GB


Training:   2%|▏         | 215606/10000000 [17:09<19:02:42, 142.71it/s]

Episode 526 completed | Reward: 1160.00 | Avg Reward: 761.20 | Frames: 215592 | Epsilon: 0.7844


Training:   2%|▏         | 216067/10000000 [17:12<19:12:55, 141.44it/s]

Episode 527 completed | Reward: 1020.00 | Avg Reward: 762.00 | Frames: 216043 | Epsilon: 0.7840


Training:   2%|▏         | 216481/10000000 [17:15<18:51:18, 144.13it/s]

Episode 528 completed | Reward: 380.00 | Avg Reward: 757.00 | Frames: 216460 | Epsilon: 0.7835


Training:   2%|▏         | 216865/10000000 [17:18<19:06:20, 142.24it/s]

Episode 529 completed | Reward: 840.00 | Avg Reward: 760.40 | Frames: 216853 | Epsilon: 0.7831


Training:   2%|▏         | 217265/10000000 [17:20<18:33:31, 146.42it/s]

Episode 530 completed | Reward: 640.00 | Avg Reward: 759.20 | Frames: 217239 | Epsilon: 0.7828


Training:   2%|▏         | 217679/10000000 [17:23<19:20:59, 140.43it/s]

Episode 531 completed | Reward: 1880.00 | Avg Reward: 772.60 | Frames: 217652 | Epsilon: 0.7823


Training:   2%|▏         | 218141/10000000 [17:26<18:38:38, 145.74it/s]

Episode 532 completed | Reward: 980.00 | Avg Reward: 781.20 | Frames: 218121 | Epsilon: 0.7819


Training:   2%|▏         | 218573/10000000 [17:29<18:23:45, 147.70it/s]

Episode 533 completed | Reward: 960.00 | Avg Reward: 784.40 | Frames: 218551 | Epsilon: 0.7814


Training:   2%|▏         | 218989/10000000 [17:32<19:06:53, 142.14it/s]

Episode 534 completed | Reward: 320.00 | Avg Reward: 779.20 | Frames: 218975 | Epsilon: 0.7810


Training:   2%|▏         | 219373/10000000 [17:34<19:58:51, 135.97it/s]

Episode 535 completed | Reward: 1000.00 | Avg Reward: 768.00 | Frames: 219365 | Epsilon: 0.7806


Training:   2%|▏         | 219789/10000000 [17:37<18:43:54, 145.03it/s]

Episode 536 completed | Reward: 840.00 | Avg Reward: 762.20 | Frames: 219770 | Epsilon: 0.7802


Training:   2%|▏         | 220190/10000000 [17:40<18:57:48, 143.26it/s]

Episode 537 completed | Reward: 1160.00 | Avg Reward: 766.40 | Frames: 220174 | Epsilon: 0.7798


Training:   2%|▏         | 220603/10000000 [17:43<18:38:53, 145.67it/s]

Episode 538 completed | Reward: 920.00 | Avg Reward: 761.20 | Frames: 220581 | Epsilon: 0.7794


Training:   2%|▏         | 221001/10000000 [17:45<18:23:48, 147.66it/s]

Episode 539 completed | Reward: 760.00 | Avg Reward: 764.80 | Frames: 220982 | Epsilon: 0.7790


Training:   2%|▏         | 221461/10000000 [17:48<20:10:06, 134.68it/s]

Episode 540 completed | Reward: 1300.00 | Avg Reward: 770.80 | Frames: 221438 | Epsilon: 0.7786


Training:   2%|▏         | 221867/10000000 [17:51<19:07:43, 141.99it/s]

Episode 541 completed | Reward: 1420.00 | Avg Reward: 779.20 | Frames: 221853 | Epsilon: 0.7781


Training:   2%|▏         | 222217/10000000 [17:53<18:27:10, 147.19it/s]

Episode 542 completed | Reward: 480.00 | Avg Reward: 776.60 | Frames: 222195 | Epsilon: 0.7778


Training:   2%|▏         | 222617/10000000 [17:56<19:39:54, 138.11it/s]

Episode 543 completed | Reward: 740.00 | Avg Reward: 779.60 | Frames: 222606 | Epsilon: 0.7774


Training:   2%|▏         | 223014/10000000 [17:59<19:44:33, 137.56it/s]

Episode 544 completed | Reward: 480.00 | Avg Reward: 777.40 | Frames: 222993 | Epsilon: 0.7770


Training:   2%|▏         | 223381/10000000 [18:01<18:40:19, 145.44it/s]

Episode 545 completed | Reward: 480.00 | Avg Reward: 778.40 | Frames: 223362 | Epsilon: 0.7766


Training:   2%|▏         | 223733/10000000 [18:04<19:02:25, 142.62it/s]

Episode 546 completed | Reward: 480.00 | Avg Reward: 775.40 | Frames: 223717 | Epsilon: 0.7763


Training:   2%|▏         | 224117/10000000 [18:06<19:35:21, 138.62it/s]

Episode 547 completed | Reward: 480.00 | Avg Reward: 765.20 | Frames: 224105 | Epsilon: 0.7759


Training:   2%|▏         | 224515/10000000 [18:09<18:51:15, 144.02it/s]

Episode 548 completed | Reward: 700.00 | Avg Reward: 767.40 | Frames: 224498 | Epsilon: 0.7755


Training:   2%|▏         | 224864/10000000 [18:11<19:30:19, 139.21it/s]

Episode 549 completed | Reward: 660.00 | Avg Reward: 765.00 | Frames: 224850 | Epsilon: 0.7752


Training:   2%|▏         | 225276/10000000 [18:14<24:53:57, 109.05it/s]

Episode 550 completed | Reward: 360.00 | Avg Reward: 761.20 | Frames: 225268 | Epsilon: 0.7747

Memory usage: 1.10 GB


Training:   2%|▏         | 225785/10000000 [18:18<19:00:21, 142.85it/s]

Episode 551 completed | Reward: 1180.00 | Avg Reward: 766.60 | Frames: 225760 | Epsilon: 0.7742


Training:   2%|▏         | 226200/10000000 [18:21<19:14:48, 141.06it/s]

Episode 552 completed | Reward: 680.00 | Avg Reward: 768.60 | Frames: 226185 | Epsilon: 0.7738


Training:   2%|▏         | 226578/10000000 [18:23<20:15:19, 134.03it/s]

Episode 553 completed | Reward: 680.00 | Avg Reward: 763.40 | Frames: 226562 | Epsilon: 0.7734


Training:   2%|▏         | 227201/10000000 [18:28<18:32:34, 146.40it/s]

Episode 554 completed | Reward: 460.00 | Avg Reward: 760.40 | Frames: 227184 | Epsilon: 0.7728


Training:   2%|▏         | 227583/10000000 [18:30<19:09:33, 141.68it/s]

Episode 555 completed | Reward: 700.00 | Avg Reward: 756.60 | Frames: 227561 | Epsilon: 0.7724


Training:   2%|▏         | 228045/10000000 [18:33<19:01:30, 142.68it/s]

Episode 556 completed | Reward: 460.00 | Avg Reward: 750.40 | Frames: 228033 | Epsilon: 0.7720


Training:   2%|▏         | 228476/10000000 [18:36<19:06:23, 142.06it/s]

Episode 557 completed | Reward: 1080.00 | Avg Reward: 752.00 | Frames: 228462 | Epsilon: 0.7715


Training:   2%|▏         | 228856/10000000 [18:39<18:50:38, 144.04it/s]

Episode 558 completed | Reward: 420.00 | Avg Reward: 748.00 | Frames: 228841 | Epsilon: 0.7712


Training:   2%|▏         | 229269/10000000 [18:42<19:38:15, 138.21it/s]

Episode 559 completed | Reward: 620.00 | Avg Reward: 739.20 | Frames: 229252 | Epsilon: 0.7707


Training:   2%|▏         | 229667/10000000 [18:44<18:42:22, 145.08it/s]

Episode 560 completed | Reward: 360.00 | Avg Reward: 736.40 | Frames: 229646 | Epsilon: 0.7704


Training:   2%|▏         | 230063/10000000 [18:47<19:31:39, 138.98it/s]

Episode 561 completed | Reward: 320.00 | Avg Reward: 731.00 | Frames: 230040 | Epsilon: 0.7700


Training:   2%|▏         | 230476/10000000 [18:50<18:13:04, 148.96it/s]

Episode 562 completed | Reward: 880.00 | Avg Reward: 730.60 | Frames: 230461 | Epsilon: 0.7695


Training:   2%|▏         | 230855/10000000 [18:53<19:40:36, 137.91it/s]

Episode 563 completed | Reward: 780.00 | Avg Reward: 729.40 | Frames: 230839 | Epsilon: 0.7692


Training:   2%|▏         | 231205/10000000 [18:55<19:21:46, 140.14it/s]

Episode 564 completed | Reward: 920.00 | Avg Reward: 729.60 | Frames: 231180 | Epsilon: 0.7688


Training:   2%|▏         | 231604/10000000 [18:58<18:48:29, 144.27it/s]

Episode 565 completed | Reward: 300.00 | Avg Reward: 725.00 | Frames: 231591 | Epsilon: 0.7684


Training:   2%|▏         | 231983/10000000 [19:00<20:42:12, 131.06it/s]

Episode 566 completed | Reward: 1580.00 | Avg Reward: 735.80 | Frames: 231972 | Epsilon: 0.7680


Training:   2%|▏         | 232363/10000000 [19:03<19:35:46, 138.46it/s]

Episode 567 completed | Reward: 660.00 | Avg Reward: 735.80 | Frames: 232342 | Epsilon: 0.7677


Training:   2%|▏         | 232775/10000000 [19:06<18:55:58, 143.30it/s]

Episode 568 completed | Reward: 700.00 | Avg Reward: 732.40 | Frames: 232748 | Epsilon: 0.7673


Training:   2%|▏         | 233282/10000000 [19:09<19:57:51, 135.89it/s]

Episode 569 completed | Reward: 1320.00 | Avg Reward: 740.80 | Frames: 233260 | Epsilon: 0.7667


Training:   2%|▏         | 233647/10000000 [19:12<19:37:50, 138.20it/s]

Episode 570 completed | Reward: 480.00 | Avg Reward: 735.20 | Frames: 233631 | Epsilon: 0.7664


Training:   2%|▏         | 234070/10000000 [19:15<20:02:27, 135.36it/s]

Episode 571 completed | Reward: 600.00 | Avg Reward: 733.00 | Frames: 234052 | Epsilon: 0.7659


Training:   2%|▏         | 234436/10000000 [19:18<18:48:04, 144.28it/s]

Episode 572 completed | Reward: 540.00 | Avg Reward: 730.40 | Frames: 234423 | Epsilon: 0.7656


Training:   2%|▏         | 234752/10000000 [19:20<19:04:52, 142.16it/s]

Episode 573 completed | Reward: 660.00 | Avg Reward: 731.20 | Frames: 234738 | Epsilon: 0.7653


Training:   2%|▏         | 235179/10000000 [19:23<19:48:46, 136.90it/s]

Episode 574 completed | Reward: 40.00 | Avg Reward: 724.40 | Frames: 235158 | Epsilon: 0.7648


Training:   2%|▏         | 235579/10000000 [19:26<18:22:11, 147.65it/s]

Episode 575 completed | Reward: 500.00 | Avg Reward: 724.20 | Frames: 235579 | Epsilon: 0.7644

Memory usage: 1.10 GB


Training:   2%|▏         | 236003/10000000 [19:29<19:32:27, 138.80it/s]

Episode 576 completed | Reward: 1640.00 | Avg Reward: 730.60 | Frames: 235979 | Epsilon: 0.7640


Training:   2%|▏         | 236416/10000000 [19:32<18:42:17, 145.00it/s]

Episode 577 completed | Reward: 1160.00 | Avg Reward: 737.40 | Frames: 236401 | Epsilon: 0.7636


Training:   2%|▏         | 236794/10000000 [19:34<20:09:42, 134.51it/s]

Episode 578 completed | Reward: 740.00 | Avg Reward: 736.60 | Frames: 236769 | Epsilon: 0.7632


Training:   2%|▏         | 237190/10000000 [19:37<19:54:33, 136.21it/s]

Episode 579 completed | Reward: 960.00 | Avg Reward: 742.60 | Frames: 237164 | Epsilon: 0.7628


Training:   2%|▏         | 237669/10000000 [19:40<19:27:01, 139.42it/s]

Episode 580 completed | Reward: 900.00 | Avg Reward: 747.20 | Frames: 237652 | Epsilon: 0.7623


Training:   2%|▏         | 238099/10000000 [19:43<19:23:53, 139.79it/s]

Episode 581 completed | Reward: 500.00 | Avg Reward: 744.60 | Frames: 238073 | Epsilon: 0.7619


Training:   2%|▏         | 238464/10000000 [19:46<19:31:53, 138.83it/s]

Episode 582 completed | Reward: 660.00 | Avg Reward: 745.00 | Frames: 238449 | Epsilon: 0.7616


Training:   2%|▏         | 238873/10000000 [19:49<20:21:20, 133.20it/s]

Episode 583 completed | Reward: 220.00 | Avg Reward: 736.80 | Frames: 238848 | Epsilon: 0.7612


Training:   2%|▏         | 239303/10000000 [19:52<19:00:26, 142.64it/s]

Episode 584 completed | Reward: 1640.00 | Avg Reward: 745.00 | Frames: 239282 | Epsilon: 0.7607


Training:   2%|▏         | 239730/10000000 [19:55<20:29:50, 132.27it/s]

Episode 585 completed | Reward: 560.00 | Avg Reward: 738.00 | Frames: 239710 | Epsilon: 0.7603


Training:   2%|▏         | 240110/10000000 [19:58<20:03:12, 135.19it/s]

Episode 586 completed | Reward: 1860.00 | Avg Reward: 751.20 | Frames: 240088 | Epsilon: 0.7599


Training:   2%|▏         | 240492/10000000 [20:00<19:13:02, 141.07it/s]

Episode 587 completed | Reward: 1540.00 | Avg Reward: 763.60 | Frames: 240478 | Epsilon: 0.7595


Training:   2%|▏         | 240887/10000000 [20:03<19:13:28, 141.01it/s]

Episode 588 completed | Reward: 1040.00 | Avg Reward: 763.80 | Frames: 240860 | Epsilon: 0.7591


Training:   2%|▏         | 241235/10000000 [20:06<18:57:42, 142.96it/s]

Episode 589 completed | Reward: 880.00 | Avg Reward: 766.40 | Frames: 241211 | Epsilon: 0.7588


Training:   2%|▏         | 241614/10000000 [20:08<20:05:59, 134.86it/s]

Episode 590 completed | Reward: 940.00 | Avg Reward: 767.40 | Frames: 241595 | Epsilon: 0.7584


Training:   2%|▏         | 241962/10000000 [20:11<20:36:54, 131.48it/s]

Episode 591 completed | Reward: 620.00 | Avg Reward: 762.60 | Frames: 241945 | Epsilon: 0.7581


Training:   2%|▏         | 242344/10000000 [20:14<19:23:09, 139.82it/s]

Episode 592 completed | Reward: 380.00 | Avg Reward: 762.00 | Frames: 242329 | Epsilon: 0.7577


Training:   2%|▏         | 242754/10000000 [20:17<20:10:01, 134.39it/s]

Episode 593 completed | Reward: 480.00 | Avg Reward: 764.60 | Frames: 242733 | Epsilon: 0.7573


Training:   2%|▏         | 243213/10000000 [20:20<21:10:45, 127.97it/s]

Episode 594 completed | Reward: 680.00 | Avg Reward: 764.40 | Frames: 243193 | Epsilon: 0.7568


Training:   2%|▏         | 243612/10000000 [20:23<19:56:08, 135.94it/s]

Episode 595 completed | Reward: 300.00 | Avg Reward: 758.00 | Frames: 243597 | Epsilon: 0.7564


Training:   2%|▏         | 244021/10000000 [20:26<20:40:57, 131.03it/s]

Episode 596 completed | Reward: 440.00 | Avg Reward: 755.40 | Frames: 244005 | Epsilon: 0.7560


Training:   2%|▏         | 244323/10000000 [20:28<19:38:24, 137.98it/s]

Episode 597 completed | Reward: 680.00 | Avg Reward: 754.80 | Frames: 244307 | Epsilon: 0.7557


Training:   2%|▏         | 244718/10000000 [20:31<20:11:57, 134.15it/s]

Episode 598 completed | Reward: 400.00 | Avg Reward: 756.00 | Frames: 244702 | Epsilon: 0.7553


Training:   2%|▏         | 245101/10000000 [20:33<20:06:40, 134.73it/s]

Episode 599 completed | Reward: 160.00 | Avg Reward: 746.60 | Frames: 245084 | Epsilon: 0.7549


Training:   2%|▏         | 245482/10000000 [20:36<20:17:44, 133.50it/s]

Episode 600 completed | Reward: 340.00 | Avg Reward: 747.00 | Frames: 245460 | Epsilon: 0.7545


Training:   2%|▏         | 245945/10000000 [20:40<25:21:20, 106.86it/s]

Episode 601 completed | Reward: 740.00 | Avg Reward: 751.60 | Frames: 245944 | Epsilon: 0.7541

Memory usage: 1.10 GB


Training:   2%|▏         | 246391/10000000 [20:43<19:34:14, 138.44it/s]

Episode 602 completed | Reward: 440.00 | Avg Reward: 742.80 | Frames: 246374 | Epsilon: 0.7536


Training:   2%|▏         | 246852/10000000 [20:46<19:03:22, 142.17it/s]

Episode 603 completed | Reward: 880.00 | Avg Reward: 749.20 | Frames: 246839 | Epsilon: 0.7532


Training:   2%|▏         | 247181/10000000 [20:48<20:48:09, 130.23it/s]

Episode 604 completed | Reward: 1120.00 | Avg Reward: 750.00 | Frames: 247161 | Epsilon: 0.7528


Training:   2%|▏         | 247626/10000000 [20:52<20:03:07, 135.10it/s]

Episode 605 completed | Reward: 520.00 | Avg Reward: 749.80 | Frames: 247601 | Epsilon: 0.7524


Training:   2%|▏         | 247960/10000000 [20:54<19:52:14, 136.33it/s]

Episode 606 completed | Reward: 1040.00 | Avg Reward: 755.60 | Frames: 247947 | Epsilon: 0.7521


Training:   2%|▏         | 248369/10000000 [20:57<20:47:36, 130.27it/s]

Episode 607 completed | Reward: 240.00 | Avg Reward: 744.00 | Frames: 248351 | Epsilon: 0.7516


Training:   2%|▏         | 248829/10000000 [21:00<20:55:37, 129.43it/s]

Episode 608 completed | Reward: 840.00 | Avg Reward: 743.60 | Frames: 248814 | Epsilon: 0.7512


Training:   2%|▏         | 249180/10000000 [21:03<19:38:25, 137.91it/s]

Episode 609 completed | Reward: 320.00 | Avg Reward: 739.00 | Frames: 249166 | Epsilon: 0.7508


Training:   2%|▏         | 249720/10000000 [21:07<19:28:16, 139.10it/s]

Episode 610 completed | Reward: 180.00 | Avg Reward: 737.80 | Frames: 249706 | Epsilon: 0.7503


Training:   3%|▎         | 250013/10000000 [21:25<654:45:59,  4.14it/s]


Evaluation at frame 250000: 516.00
Model saved to weights/CarnivalDeterministic-v4_dqn_best.pth
Episode 611 completed | Reward: 880.00 | Avg Reward: 742.60 | Frames: 250001 | Epsilon: 0.7500


Training:   3%|▎         | 250481/10000000 [21:28<20:31:54, 131.90it/s]

Episode 612 completed | Reward: 1180.00 | Avg Reward: 748.80 | Frames: 250462 | Epsilon: 0.7495


Training:   3%|▎         | 250989/10000000 [21:32<20:47:54, 130.21it/s]

Episode 613 completed | Reward: 1160.00 | Avg Reward: 749.80 | Frames: 250969 | Epsilon: 0.7490


Training:   3%|▎         | 251558/10000000 [21:36<21:07:31, 128.18it/s]

Episode 614 completed | Reward: 520.00 | Avg Reward: 746.20 | Frames: 251543 | Epsilon: 0.7485


Training:   3%|▎         | 251965/10000000 [21:39<21:15:56, 127.33it/s]

Episode 615 completed | Reward: 500.00 | Avg Reward: 740.80 | Frames: 251947 | Epsilon: 0.7481


Training:   3%|▎         | 252409/10000000 [21:42<20:58:02, 129.14it/s]

Episode 616 completed | Reward: 480.00 | Avg Reward: 736.80 | Frames: 252389 | Epsilon: 0.7476


Training:   3%|▎         | 252805/10000000 [21:45<20:50:18, 129.93it/s]

Episode 617 completed | Reward: 740.00 | Avg Reward: 738.00 | Frames: 252789 | Epsilon: 0.7472


Training:   3%|▎         | 253265/10000000 [21:49<21:10:27, 127.86it/s]

Episode 618 completed | Reward: 740.00 | Avg Reward: 738.40 | Frames: 253248 | Epsilon: 0.7468


Training:   3%|▎         | 253675/10000000 [21:52<20:11:25, 134.09it/s]

Episode 619 completed | Reward: 660.00 | Avg Reward: 743.00 | Frames: 253662 | Epsilon: 0.7463


Training:   3%|▎         | 254121/10000000 [21:55<20:29:34, 132.10it/s]

Episode 620 completed | Reward: 380.00 | Avg Reward: 741.00 | Frames: 254104 | Epsilon: 0.7459


Training:   3%|▎         | 254485/10000000 [21:58<20:39:47, 131.01it/s]

Episode 621 completed | Reward: 320.00 | Avg Reward: 736.80 | Frames: 254468 | Epsilon: 0.7455


Training:   3%|▎         | 254801/10000000 [22:00<21:37:08, 125.21it/s]

Episode 622 completed | Reward: 600.00 | Avg Reward: 739.00 | Frames: 254784 | Epsilon: 0.7452


Training:   3%|▎         | 255163/10000000 [22:03<20:05:29, 134.73it/s]

Episode 623 completed | Reward: 1000.00 | Avg Reward: 740.00 | Frames: 255145 | Epsilon: 0.7449


Training:   3%|▎         | 255575/10000000 [22:06<20:05:53, 134.68it/s]

Episode 624 completed | Reward: 740.00 | Avg Reward: 742.60 | Frames: 255562 | Epsilon: 0.7444


Training:   3%|▎         | 255939/10000000 [22:08<20:23:54, 132.69it/s]

Episode 625 completed | Reward: 1120.00 | Avg Reward: 744.20 | Frames: 255926 | Epsilon: 0.7441


Training:   3%|▎         | 256306/10000000 [22:11<19:57:48, 135.58it/s]

Episode 626 completed | Reward: 1440.00 | Avg Reward: 747.00 | Frames: 256306 | Epsilon: 0.7437

Memory usage: 1.10 GB


Training:   3%|▎         | 256773/10000000 [22:15<20:46:42, 130.25it/s]

Episode 627 completed | Reward: 2560.00 | Avg Reward: 762.40 | Frames: 256756 | Epsilon: 0.7432


Training:   3%|▎         | 257169/10000000 [22:18<20:39:54, 130.96it/s]

Episode 628 completed | Reward: 1040.00 | Avg Reward: 769.00 | Frames: 257154 | Epsilon: 0.7428


Training:   3%|▎         | 257565/10000000 [22:21<20:20:31, 133.04it/s]

Episode 629 completed | Reward: 900.00 | Avg Reward: 769.60 | Frames: 257548 | Epsilon: 0.7425


Training:   3%|▎         | 257863/10000000 [22:23<21:23:47, 126.48it/s]

Episode 630 completed | Reward: 480.00 | Avg Reward: 768.00 | Frames: 257853 | Epsilon: 0.7421


Training:   3%|▎         | 258313/10000000 [22:26<21:49:02, 124.03it/s]

Episode 631 completed | Reward: 1400.00 | Avg Reward: 763.20 | Frames: 258293 | Epsilon: 0.7417


Training:   3%|▎         | 258693/10000000 [22:29<20:58:14, 129.03it/s]

Episode 632 completed | Reward: 700.00 | Avg Reward: 760.40 | Frames: 258671 | Epsilon: 0.7413


Training:   3%|▎         | 259049/10000000 [22:32<20:54:59, 129.36it/s]

Episode 633 completed | Reward: 520.00 | Avg Reward: 756.00 | Frames: 259026 | Epsilon: 0.7410


Training:   3%|▎         | 259414/10000000 [22:34<20:29:41, 132.02it/s]

Episode 634 completed | Reward: 1300.00 | Avg Reward: 765.80 | Frames: 259399 | Epsilon: 0.7406


Training:   3%|▎         | 259885/10000000 [22:38<21:26:01, 126.23it/s]

Episode 635 completed | Reward: 860.00 | Avg Reward: 764.40 | Frames: 259870 | Epsilon: 0.7401


Training:   3%|▎         | 260207/10000000 [22:40<20:36:59, 131.23it/s]

Episode 636 completed | Reward: 700.00 | Avg Reward: 763.00 | Frames: 260193 | Epsilon: 0.7398


Training:   3%|▎         | 260681/10000000 [22:44<20:55:21, 129.30it/s]

Episode 637 completed | Reward: 660.00 | Avg Reward: 758.00 | Frames: 260659 | Epsilon: 0.7393


Training:   3%|▎         | 260999/10000000 [22:46<21:05:24, 128.27it/s]

Episode 638 completed | Reward: 880.00 | Avg Reward: 757.60 | Frames: 260988 | Epsilon: 0.7390


Training:   3%|▎         | 261382/10000000 [22:49<22:07:54, 122.23it/s]

Episode 639 completed | Reward: 680.00 | Avg Reward: 756.80 | Frames: 261374 | Epsilon: 0.7386


Training:   3%|▎         | 261795/10000000 [22:52<20:57:12, 129.10it/s]

Episode 640 completed | Reward: 1300.00 | Avg Reward: 756.80 | Frames: 261783 | Epsilon: 0.7382


Training:   3%|▎         | 262205/10000000 [22:56<21:33:57, 125.43it/s]

Episode 641 completed | Reward: 1360.00 | Avg Reward: 756.20 | Frames: 262188 | Epsilon: 0.7378


Training:   3%|▎         | 262629/10000000 [22:59<21:32:38, 125.55it/s]

Episode 642 completed | Reward: 960.00 | Avg Reward: 761.00 | Frames: 262614 | Epsilon: 0.7374


Training:   3%|▎         | 263013/10000000 [23:02<21:38:16, 125.00it/s]

Episode 643 completed | Reward: 400.00 | Avg Reward: 757.60 | Frames: 262992 | Epsilon: 0.7370


Training:   3%|▎         | 263359/10000000 [23:04<20:40:50, 130.78it/s]

Episode 644 completed | Reward: 380.00 | Avg Reward: 756.60 | Frames: 263347 | Epsilon: 0.7367


Training:   3%|▎         | 263753/10000000 [23:07<21:50:04, 123.86it/s]

Episode 645 completed | Reward: 620.00 | Avg Reward: 758.00 | Frames: 263736 | Epsilon: 0.7363


Training:   3%|▎         | 264177/10000000 [23:11<21:44:32, 124.38it/s]

Episode 646 completed | Reward: 940.00 | Avg Reward: 762.60 | Frames: 264161 | Epsilon: 0.7358


Training:   3%|▎         | 264597/10000000 [23:14<21:39:25, 124.87it/s]

Episode 647 completed | Reward: 680.00 | Avg Reward: 764.60 | Frames: 264573 | Epsilon: 0.7354


Training:   3%|▎         | 265377/10000000 [23:20<21:01:07, 128.65it/s]

Episode 648 completed | Reward: 920.00 | Avg Reward: 766.80 | Frames: 265358 | Epsilon: 0.7346


Training:   3%|▎         | 265791/10000000 [23:23<21:58:36, 123.04it/s]

Episode 649 completed | Reward: 460.00 | Avg Reward: 764.80 | Frames: 265783 | Epsilon: 0.7342


Training:   3%|▎         | 266117/10000000 [23:25<22:35:36, 119.67it/s]

Episode 650 completed | Reward: 460.00 | Avg Reward: 765.80 | Frames: 266100 | Epsilon: 0.7339


Training:   3%|▎         | 266514/10000000 [23:28<20:25:06, 132.42it/s]

Episode 651 completed | Reward: 480.00 | Avg Reward: 758.80 | Frames: 266514 | Epsilon: 0.7335

Memory usage: 1.10 GB


Training:   3%|▎         | 266937/10000000 [23:32<21:02:01, 128.54it/s]

Episode 652 completed | Reward: 940.00 | Avg Reward: 761.40 | Frames: 266912 | Epsilon: 0.7331


Training:   3%|▎         | 267351/10000000 [23:35<20:21:33, 132.79it/s]

Episode 653 completed | Reward: 1700.00 | Avg Reward: 771.60 | Frames: 267340 | Epsilon: 0.7327


Training:   3%|▎         | 267807/10000000 [23:38<21:37:11, 125.04it/s]

Episode 654 completed | Reward: 1440.00 | Avg Reward: 781.40 | Frames: 267799 | Epsilon: 0.7322


Training:   3%|▎         | 268167/10000000 [23:41<21:12:10, 127.50it/s]

Episode 655 completed | Reward: 1100.00 | Avg Reward: 785.40 | Frames: 268153 | Epsilon: 0.7318


Training:   3%|▎         | 268542/10000000 [23:44<21:23:33, 126.36it/s]

Episode 656 completed | Reward: 820.00 | Avg Reward: 789.00 | Frames: 268525 | Epsilon: 0.7315


Training:   3%|▎         | 268901/10000000 [23:47<22:07:18, 122.19it/s]

Episode 657 completed | Reward: 680.00 | Avg Reward: 785.00 | Frames: 268887 | Epsilon: 0.7311


Training:   3%|▎         | 269251/10000000 [23:49<21:52:52, 123.53it/s]

Episode 658 completed | Reward: 500.00 | Avg Reward: 785.80 | Frames: 269238 | Epsilon: 0.7308


Training:   3%|▎         | 269657/10000000 [23:52<21:32:51, 125.44it/s]

Episode 659 completed | Reward: 960.00 | Avg Reward: 789.20 | Frames: 269635 | Epsilon: 0.7304


Training:   3%|▎         | 270011/10000000 [23:55<21:36:43, 125.06it/s]

Episode 660 completed | Reward: 740.00 | Avg Reward: 793.00 | Frames: 270002 | Epsilon: 0.7300


Training:   3%|▎         | 270409/10000000 [23:58<21:39:54, 124.75it/s]

Episode 661 completed | Reward: 520.00 | Avg Reward: 795.00 | Frames: 270384 | Epsilon: 0.7296


Training:   3%|▎         | 270771/10000000 [24:01<21:29:55, 125.71it/s]

Episode 662 completed | Reward: 1100.00 | Avg Reward: 797.20 | Frames: 270748 | Epsilon: 0.7293


Training:   3%|▎         | 271201/10000000 [24:04<21:48:19, 123.93it/s]

Episode 663 completed | Reward: 640.00 | Avg Reward: 795.80 | Frames: 271185 | Epsilon: 0.7288


Training:   3%|▎         | 271631/10000000 [24:08<20:28:56, 131.93it/s]

Episode 664 completed | Reward: 1180.00 | Avg Reward: 798.40 | Frames: 271619 | Epsilon: 0.7284


Training:   3%|▎         | 272160/10000000 [24:12<21:11:34, 127.50it/s]

Episode 665 completed | Reward: 560.00 | Avg Reward: 801.00 | Frames: 272145 | Epsilon: 0.7279


Training:   3%|▎         | 272527/10000000 [24:15<21:42:12, 124.50it/s]

Episode 666 completed | Reward: 1080.00 | Avg Reward: 796.00 | Frames: 272517 | Epsilon: 0.7275


Training:   3%|▎         | 272945/10000000 [24:18<22:22:17, 120.78it/s]

Episode 667 completed | Reward: 660.00 | Avg Reward: 796.00 | Frames: 272924 | Epsilon: 0.7271


Training:   3%|▎         | 273336/10000000 [24:21<22:10:42, 121.82it/s]

Episode 668 completed | Reward: 400.00 | Avg Reward: 793.00 | Frames: 273323 | Epsilon: 0.7267


Training:   3%|▎         | 273741/10000000 [24:24<22:39:44, 119.22it/s]

Episode 669 completed | Reward: 900.00 | Avg Reward: 788.80 | Frames: 273725 | Epsilon: 0.7263


Training:   3%|▎         | 274165/10000000 [24:27<22:06:45, 122.18it/s]

Episode 670 completed | Reward: 640.00 | Avg Reward: 790.40 | Frames: 274145 | Epsilon: 0.7259


Training:   3%|▎         | 274550/10000000 [24:30<21:28:26, 125.80it/s]

Episode 671 completed | Reward: 380.00 | Avg Reward: 788.20 | Frames: 274539 | Epsilon: 0.7255


Training:   3%|▎         | 274893/10000000 [24:33<21:58:52, 122.90it/s]

Episode 672 completed | Reward: 1900.00 | Avg Reward: 801.80 | Frames: 274873 | Epsilon: 0.7251


Training:   3%|▎         | 275294/10000000 [24:36<21:58:18, 122.94it/s]

Episode 673 completed | Reward: 540.00 | Avg Reward: 800.60 | Frames: 275284 | Epsilon: 0.7247


Training:   3%|▎         | 275745/10000000 [24:40<22:08:00, 122.04it/s]

Episode 674 completed | Reward: 960.00 | Avg Reward: 809.80 | Frames: 275729 | Epsilon: 0.7243


Training:   3%|▎         | 276173/10000000 [24:43<23:17:26, 115.97it/s]

Episode 675 completed | Reward: 760.00 | Avg Reward: 812.40 | Frames: 276155 | Epsilon: 0.7238


Training:   3%|▎         | 276563/10000000 [24:46<21:23:28, 126.26it/s]

Episode 676 completed | Reward: 1220.00 | Avg Reward: 808.20 | Frames: 276563 | Epsilon: 0.7234

Memory usage: 1.10 GB


Training:   3%|▎         | 276933/10000000 [24:49<24:23:46, 110.71it/s]

Episode 677 completed | Reward: 460.00 | Avg Reward: 801.20 | Frames: 276922 | Epsilon: 0.7231


Training:   3%|▎         | 277362/10000000 [24:53<22:51:46, 118.13it/s]

Episode 678 completed | Reward: 360.00 | Avg Reward: 797.40 | Frames: 277352 | Epsilon: 0.7226


Training:   3%|▎         | 277777/10000000 [24:56<22:21:45, 120.76it/s]

Episode 679 completed | Reward: 1060.00 | Avg Reward: 798.40 | Frames: 277760 | Epsilon: 0.7222


Training:   3%|▎         | 278207/10000000 [25:00<22:18:45, 121.03it/s]

Episode 680 completed | Reward: 600.00 | Avg Reward: 795.40 | Frames: 278184 | Epsilon: 0.7218


Training:   3%|▎         | 278649/10000000 [25:03<21:48:10, 123.85it/s]

Episode 681 completed | Reward: 2340.00 | Avg Reward: 813.80 | Frames: 278626 | Epsilon: 0.7214


Training:   3%|▎         | 279092/10000000 [25:07<21:43:59, 124.25it/s]

Episode 682 completed | Reward: 1140.00 | Avg Reward: 818.60 | Frames: 279075 | Epsilon: 0.7209


Training:   3%|▎         | 279556/10000000 [25:10<21:24:57, 126.08it/s]

Episode 683 completed | Reward: 540.00 | Avg Reward: 821.80 | Frames: 279535 | Epsilon: 0.7205


Training:   3%|▎         | 279969/10000000 [25:14<24:18:59, 111.04it/s]

Episode 684 completed | Reward: 940.00 | Avg Reward: 814.80 | Frames: 279958 | Epsilon: 0.7200


Training:   3%|▎         | 280396/10000000 [25:17<21:52:27, 123.43it/s]

Episode 685 completed | Reward: 520.00 | Avg Reward: 814.40 | Frames: 280376 | Epsilon: 0.7196


Training:   3%|▎         | 280738/10000000 [25:20<23:05:02, 116.96it/s]

Episode 686 completed | Reward: 920.00 | Avg Reward: 805.00 | Frames: 280725 | Epsilon: 0.7193


Training:   3%|▎         | 281143/10000000 [25:23<22:36:28, 119.41it/s]

Episode 687 completed | Reward: 300.00 | Avg Reward: 792.60 | Frames: 281132 | Epsilon: 0.7189


Training:   3%|▎         | 281546/10000000 [25:27<22:58:29, 117.50it/s]

Episode 688 completed | Reward: 640.00 | Avg Reward: 788.60 | Frames: 281526 | Epsilon: 0.7185


Training:   3%|▎         | 281961/10000000 [25:30<24:14:37, 111.35it/s]

Episode 689 completed | Reward: 340.00 | Avg Reward: 783.20 | Frames: 281951 | Epsilon: 0.7180


Training:   3%|▎         | 282369/10000000 [25:33<22:27:14, 120.22it/s]

Episode 690 completed | Reward: 520.00 | Avg Reward: 779.00 | Frames: 282349 | Epsilon: 0.7177


Training:   3%|▎         | 282785/10000000 [25:37<24:07:00, 111.92it/s]

Episode 691 completed | Reward: 500.00 | Avg Reward: 777.80 | Frames: 282766 | Epsilon: 0.7172


Training:   3%|▎         | 283133/10000000 [25:39<23:48:01, 113.41it/s]

Episode 692 completed | Reward: 940.00 | Avg Reward: 783.40 | Frames: 283112 | Epsilon: 0.7169


Training:   3%|▎         | 283575/10000000 [25:43<22:08:21, 121.91it/s]

Episode 693 completed | Reward: 680.00 | Avg Reward: 785.40 | Frames: 283560 | Epsilon: 0.7164


Training:   3%|▎         | 284053/10000000 [25:47<23:36:18, 114.33it/s]

Episode 694 completed | Reward: 1200.00 | Avg Reward: 790.60 | Frames: 284041 | Epsilon: 0.7160


Training:   3%|▎         | 284475/10000000 [25:50<22:31:35, 119.80it/s]

Episode 695 completed | Reward: 820.00 | Avg Reward: 795.80 | Frames: 284460 | Epsilon: 0.7155


Training:   3%|▎         | 284870/10000000 [25:54<23:27:29, 115.04it/s]

Episode 696 completed | Reward: 580.00 | Avg Reward: 797.20 | Frames: 284859 | Epsilon: 0.7151


Training:   3%|▎         | 285264/10000000 [25:57<22:13:14, 121.44it/s]

Episode 697 completed | Reward: 860.00 | Avg Reward: 799.00 | Frames: 285242 | Epsilon: 0.7148


Training:   3%|▎         | 285668/10000000 [26:00<23:01:57, 117.16it/s]

Episode 698 completed | Reward: 480.00 | Avg Reward: 799.80 | Frames: 285651 | Epsilon: 0.7143


Training:   3%|▎         | 286268/10000000 [26:05<22:10:54, 121.64it/s]

Episode 699 completed | Reward: 1120.00 | Avg Reward: 809.40 | Frames: 286255 | Epsilon: 0.7137


Training:   3%|▎         | 286640/10000000 [26:08<28:35:13, 94.38it/s] 

Episode 700 completed | Reward: 1700.00 | Avg Reward: 823.00 | Frames: 286639 | Epsilon: 0.7134

Memory usage: 1.10 GB


Training:   3%|▎         | 287099/10000000 [26:12<23:18:30, 115.75it/s]

Episode 701 completed | Reward: 1060.00 | Avg Reward: 826.20 | Frames: 287079 | Epsilon: 0.7129


Training:   3%|▎         | 287493/10000000 [26:15<23:02:58, 117.05it/s]

Episode 702 completed | Reward: 740.00 | Avg Reward: 829.20 | Frames: 287474 | Epsilon: 0.7125


Training:   3%|▎         | 287853/10000000 [26:18<23:33:05, 114.55it/s]

Episode 703 completed | Reward: 880.00 | Avg Reward: 829.20 | Frames: 287840 | Epsilon: 0.7122


Training:   3%|▎         | 288292/10000000 [26:22<22:16:52, 121.08it/s]

Episode 704 completed | Reward: 740.00 | Avg Reward: 825.40 | Frames: 288278 | Epsilon: 0.7117


Training:   3%|▎         | 288629/10000000 [26:25<24:06:23, 111.90it/s]

Episode 705 completed | Reward: 640.00 | Avg Reward: 826.60 | Frames: 288619 | Epsilon: 0.7114


Training:   3%|▎         | 289053/10000000 [26:28<23:29:17, 114.84it/s]

Episode 706 completed | Reward: 1480.00 | Avg Reward: 831.00 | Frames: 289034 | Epsilon: 0.7110


Training:   3%|▎         | 289492/10000000 [26:32<23:09:38, 116.46it/s]

Episode 707 completed | Reward: 780.00 | Avg Reward: 836.40 | Frames: 289469 | Epsilon: 0.7105


Training:   3%|▎         | 289884/10000000 [26:35<22:00:09, 122.59it/s]

Episode 708 completed | Reward: 1380.00 | Avg Reward: 841.80 | Frames: 289869 | Epsilon: 0.7101


Training:   3%|▎         | 290270/10000000 [26:38<24:08:33, 111.72it/s]

Episode 709 completed | Reward: 880.00 | Avg Reward: 847.40 | Frames: 290254 | Epsilon: 0.7097


Training:   3%|▎         | 290746/10000000 [26:42<23:30:53, 114.69it/s]

Episode 710 completed | Reward: 1000.00 | Avg Reward: 855.60 | Frames: 290725 | Epsilon: 0.7093


Training:   3%|▎         | 291197/10000000 [26:46<23:35:21, 114.33it/s]

Episode 711 completed | Reward: 560.00 | Avg Reward: 852.40 | Frames: 291182 | Epsilon: 0.7088


Training:   3%|▎         | 291930/10000000 [26:52<23:04:02, 116.90it/s]

Episode 712 completed | Reward: 580.00 | Avg Reward: 846.40 | Frames: 291909 | Epsilon: 0.7081


Training:   3%|▎         | 292331/10000000 [26:55<22:39:22, 119.02it/s]

Episode 713 completed | Reward: 940.00 | Avg Reward: 844.20 | Frames: 292311 | Epsilon: 0.7077


Training:   3%|▎         | 292753/10000000 [26:59<23:28:40, 114.85it/s]

Episode 714 completed | Reward: 660.00 | Avg Reward: 845.60 | Frames: 292737 | Epsilon: 0.7073


Training:   3%|▎         | 293170/10000000 [27:02<25:07:32, 107.31it/s]

Episode 715 completed | Reward: 460.00 | Avg Reward: 845.20 | Frames: 293161 | Epsilon: 0.7068


Training:   3%|▎         | 293504/10000000 [27:05<22:40:34, 118.90it/s]

Episode 716 completed | Reward: 740.00 | Avg Reward: 847.80 | Frames: 293491 | Epsilon: 0.7065


Training:   3%|▎         | 293877/10000000 [27:08<24:32:59, 109.82it/s]

Episode 717 completed | Reward: 780.00 | Avg Reward: 848.20 | Frames: 293865 | Epsilon: 0.7061


Training:   3%|▎         | 294293/10000000 [27:12<23:25:30, 115.09it/s]

Episode 718 completed | Reward: 1140.00 | Avg Reward: 852.20 | Frames: 294275 | Epsilon: 0.7057


Training:   3%|▎         | 294712/10000000 [27:15<22:37:09, 119.19it/s]

Episode 719 completed | Reward: 760.00 | Avg Reward: 853.20 | Frames: 294702 | Epsilon: 0.7053


Training:   3%|▎         | 295137/10000000 [27:19<24:18:02, 110.93it/s]

Episode 720 completed | Reward: 320.00 | Avg Reward: 852.60 | Frames: 295116 | Epsilon: 0.7049


Training:   3%|▎         | 295558/10000000 [27:22<23:20:11, 115.51it/s]

Episode 721 completed | Reward: 640.00 | Avg Reward: 855.80 | Frames: 295544 | Epsilon: 0.7045


Training:   3%|▎         | 295989/10000000 [27:26<24:56:47, 108.05it/s]

Episode 722 completed | Reward: 680.00 | Avg Reward: 856.60 | Frames: 295973 | Epsilon: 0.7040


Training:   3%|▎         | 296329/10000000 [27:29<24:08:09, 111.68it/s]

Episode 723 completed | Reward: 780.00 | Avg Reward: 854.40 | Frames: 296313 | Epsilon: 0.7037


Training:   3%|▎         | 296669/10000000 [27:32<23:30:19, 114.67it/s]

Episode 724 completed | Reward: 980.00 | Avg Reward: 856.80 | Frames: 296669 | Epsilon: 0.7033

Memory usage: 1.10 GB


Training:   3%|▎         | 297165/10000000 [27:36<23:42:46, 113.66it/s]

Episode 725 completed | Reward: 1320.00 | Avg Reward: 858.80 | Frames: 297149 | Epsilon: 0.7029


Training:   3%|▎         | 297549/10000000 [27:39<24:31:11, 109.92it/s]

Episode 726 completed | Reward: 840.00 | Avg Reward: 852.80 | Frames: 297534 | Epsilon: 0.7025


Training:   3%|▎         | 297936/10000000 [27:43<23:18:15, 115.64it/s]

Episode 727 completed | Reward: 580.00 | Avg Reward: 833.00 | Frames: 297915 | Epsilon: 0.7021


Training:   3%|▎         | 298352/10000000 [27:46<23:36:55, 114.12it/s]

Episode 728 completed | Reward: 900.00 | Avg Reward: 831.60 | Frames: 298342 | Epsilon: 0.7017


Training:   3%|▎         | 298774/10000000 [27:50<25:30:45, 105.63it/s]

Episode 729 completed | Reward: 200.00 | Avg Reward: 824.60 | Frames: 298763 | Epsilon: 0.7012


Training:   3%|▎         | 299145/10000000 [27:53<25:06:26, 107.33it/s]

Episode 730 completed | Reward: 1220.00 | Avg Reward: 832.00 | Frames: 299133 | Epsilon: 0.7009


Training:   3%|▎         | 299529/10000000 [27:56<24:29:26, 110.02it/s]

Episode 731 completed | Reward: 420.00 | Avg Reward: 822.20 | Frames: 299510 | Epsilon: 0.7005


Training:   3%|▎         | 299865/10000000 [27:59<23:58:38, 112.38it/s]

Episode 732 completed | Reward: 660.00 | Avg Reward: 821.80 | Frames: 299845 | Epsilon: 0.7002


Training:   3%|▎         | 300265/10000000 [28:02<24:57:28, 107.96it/s]

Episode 733 completed | Reward: 460.00 | Avg Reward: 821.20 | Frames: 300249 | Epsilon: 0.6998


Training:   3%|▎         | 300621/10000000 [28:05<25:07:48, 107.21it/s]

Episode 734 completed | Reward: 1140.00 | Avg Reward: 819.60 | Frames: 300611 | Epsilon: 0.6994


Training:   3%|▎         | 301025/10000000 [28:09<24:37:16, 109.42it/s]

Episode 735 completed | Reward: 740.00 | Avg Reward: 818.40 | Frames: 301012 | Epsilon: 0.6990


Training:   3%|▎         | 301401/10000000 [28:12<24:13:21, 111.22it/s]

Episode 736 completed | Reward: 1080.00 | Avg Reward: 822.20 | Frames: 301384 | Epsilon: 0.6986


Training:   3%|▎         | 301777/10000000 [28:15<24:43:28, 108.96it/s]

Episode 737 completed | Reward: 460.00 | Avg Reward: 820.20 | Frames: 301766 | Epsilon: 0.6982


Training:   3%|▎         | 302285/10000000 [28:20<24:36:32, 109.46it/s]

Episode 738 completed | Reward: 2140.00 | Avg Reward: 832.80 | Frames: 302265 | Epsilon: 0.6977


Training:   3%|▎         | 302689/10000000 [28:23<24:14:03, 111.15it/s]

Episode 739 completed | Reward: 480.00 | Avg Reward: 830.80 | Frames: 302672 | Epsilon: 0.6973


Training:   3%|▎         | 303046/10000000 [28:26<25:05:01, 107.38it/s]

Episode 740 completed | Reward: 640.00 | Avg Reward: 824.20 | Frames: 303029 | Epsilon: 0.6970


Training:   3%|▎         | 303465/10000000 [28:30<24:24:58, 110.32it/s]

Episode 741 completed | Reward: 880.00 | Avg Reward: 819.40 | Frames: 303446 | Epsilon: 0.6966


Training:   3%|▎         | 303852/10000000 [28:33<22:32:19, 119.50it/s]

Episode 742 completed | Reward: 860.00 | Avg Reward: 818.40 | Frames: 303841 | Epsilon: 0.6962


Training:   3%|▎         | 304334/10000000 [28:37<24:11:48, 111.31it/s]

Episode 743 completed | Reward: 1220.00 | Avg Reward: 826.60 | Frames: 304314 | Epsilon: 0.6957


Training:   3%|▎         | 304729/10000000 [28:41<24:04:39, 111.85it/s]

Episode 744 completed | Reward: 500.00 | Avg Reward: 827.80 | Frames: 304715 | Epsilon: 0.6953


Training:   3%|▎         | 305113/10000000 [28:44<24:11:12, 111.34it/s]

Episode 745 completed | Reward: 820.00 | Avg Reward: 829.80 | Frames: 305097 | Epsilon: 0.6949


Training:   3%|▎         | 305405/10000000 [28:46<25:18:59, 106.37it/s]

Episode 746 completed | Reward: 620.00 | Avg Reward: 826.60 | Frames: 305396 | Epsilon: 0.6946


Training:   3%|▎         | 306089/10000000 [28:52<24:46:37, 108.68it/s]

Episode 747 completed | Reward: 1840.00 | Avg Reward: 838.20 | Frames: 306073 | Epsilon: 0.6939


Training:   3%|▎         | 306490/10000000 [28:56<24:11:05, 111.34it/s]

Episode 748 completed | Reward: 740.00 | Avg Reward: 836.40 | Frames: 306475 | Epsilon: 0.6935


Training:   3%|▎         | 306908/10000000 [28:59<23:25:28, 114.94it/s]

Episode 749 completed | Reward: 1240.00 | Avg Reward: 844.20 | Frames: 306908 | Epsilon: 0.6931

Memory usage: 1.10 GB


Training:   3%|▎         | 307337/10000000 [29:03<24:13:53, 111.11it/s]

Episode 750 completed | Reward: 420.00 | Avg Reward: 843.80 | Frames: 307318 | Epsilon: 0.6927


Training:   3%|▎         | 307725/10000000 [29:06<24:00:13, 112.16it/s]

Episode 751 completed | Reward: 1320.00 | Avg Reward: 852.20 | Frames: 307705 | Epsilon: 0.6923


Training:   3%|▎         | 308137/10000000 [29:10<24:58:31, 107.79it/s]

Episode 752 completed | Reward: 1000.00 | Avg Reward: 852.80 | Frames: 308127 | Epsilon: 0.6919


Training:   3%|▎         | 308534/10000000 [29:14<25:54:43, 103.89it/s]

Episode 753 completed | Reward: 1440.00 | Avg Reward: 850.20 | Frames: 308518 | Epsilon: 0.6915


Training:   3%|▎         | 308897/10000000 [29:17<23:58:36, 112.27it/s]

Episode 754 completed | Reward: 700.00 | Avg Reward: 842.80 | Frames: 308876 | Epsilon: 0.6911


Training:   3%|▎         | 309285/10000000 [29:20<25:10:51, 106.90it/s]

Episode 755 completed | Reward: 1240.00 | Avg Reward: 844.20 | Frames: 309270 | Epsilon: 0.6907


Training:   3%|▎         | 309729/10000000 [29:24<23:59:41, 112.18it/s]

Episode 756 completed | Reward: 1060.00 | Avg Reward: 846.60 | Frames: 309709 | Epsilon: 0.6903


Training:   3%|▎         | 310137/10000000 [29:28<25:46:33, 104.42it/s]

Episode 757 completed | Reward: 660.00 | Avg Reward: 846.40 | Frames: 310126 | Epsilon: 0.6899


Training:   3%|▎         | 310545/10000000 [29:31<25:05:19, 107.28it/s]

Episode 758 completed | Reward: 1480.00 | Avg Reward: 856.20 | Frames: 310529 | Epsilon: 0.6895


Training:   3%|▎         | 310953/10000000 [29:35<24:51:08, 108.30it/s]

Episode 759 completed | Reward: 600.00 | Avg Reward: 852.60 | Frames: 310939 | Epsilon: 0.6891


Training:   3%|▎         | 311385/10000000 [29:38<25:03:09, 107.42it/s]

Episode 760 completed | Reward: 1040.00 | Avg Reward: 855.60 | Frames: 311369 | Epsilon: 0.6886


Training:   3%|▎         | 311817/10000000 [29:42<25:22:56, 106.02it/s]

Episode 761 completed | Reward: 1020.00 | Avg Reward: 860.60 | Frames: 311806 | Epsilon: 0.6882


Training:   3%|▎         | 312301/10000000 [29:46<24:06:31, 111.62it/s]

Episode 762 completed | Reward: 2280.00 | Avg Reward: 872.40 | Frames: 312285 | Epsilon: 0.6877


Training:   3%|▎         | 312794/10000000 [29:51<24:46:22, 108.62it/s]

Episode 763 completed | Reward: 740.00 | Avg Reward: 873.40 | Frames: 312783 | Epsilon: 0.6872


Training:   3%|▎         | 313157/10000000 [29:54<25:08:56, 106.99it/s]

Episode 764 completed | Reward: 1140.00 | Avg Reward: 873.00 | Frames: 313146 | Epsilon: 0.6869


Training:   3%|▎         | 313577/10000000 [29:58<24:43:13, 108.84it/s]

Episode 765 completed | Reward: 560.00 | Avg Reward: 873.00 | Frames: 313557 | Epsilon: 0.6864


Training:   3%|▎         | 313961/10000000 [30:01<24:41:23, 108.98it/s]

Episode 766 completed | Reward: 680.00 | Avg Reward: 869.00 | Frames: 313940 | Epsilon: 0.6861


Training:   3%|▎         | 314358/10000000 [30:04<25:20:08, 106.19it/s]

Episode 767 completed | Reward: 1140.00 | Avg Reward: 873.80 | Frames: 314349 | Epsilon: 0.6857


Training:   3%|▎         | 314802/10000000 [30:08<24:16:56, 110.79it/s]

Episode 768 completed | Reward: 800.00 | Avg Reward: 877.80 | Frames: 314783 | Epsilon: 0.6852


Training:   3%|▎         | 315210/10000000 [30:12<24:14:43, 110.96it/s]

Episode 769 completed | Reward: 540.00 | Avg Reward: 874.20 | Frames: 315195 | Epsilon: 0.6848


Training:   3%|▎         | 315677/10000000 [30:16<24:14:39, 110.96it/s]

Episode 770 completed | Reward: 540.00 | Avg Reward: 873.20 | Frames: 315662 | Epsilon: 0.6843


Training:   3%|▎         | 316074/10000000 [30:20<24:17:01, 110.77it/s]

Episode 771 completed | Reward: 920.00 | Avg Reward: 878.60 | Frames: 316053 | Epsilon: 0.6839


Training:   3%|▎         | 316493/10000000 [30:23<25:55:52, 103.73it/s]

Episode 772 completed | Reward: 340.00 | Avg Reward: 863.00 | Frames: 316482 | Epsilon: 0.6835


Training:   3%|▎         | 316913/10000000 [30:27<31:51:24, 84.43it/s] 

Episode 773 completed | Reward: 1060.00 | Avg Reward: 868.20 | Frames: 316911 | Epsilon: 0.6831

Memory usage: 1.10 GB


Training:   3%|▎         | 317381/10000000 [30:31<25:08:34, 106.97it/s]

Episode 774 completed | Reward: 1860.00 | Avg Reward: 877.20 | Frames: 317364 | Epsilon: 0.6826


Training:   3%|▎         | 317789/10000000 [30:35<25:44:58, 104.45it/s]

Episode 775 completed | Reward: 680.00 | Avg Reward: 876.40 | Frames: 317782 | Epsilon: 0.6822


Training:   3%|▎         | 318209/10000000 [30:39<26:11:30, 102.68it/s]

Episode 776 completed | Reward: 700.00 | Avg Reward: 871.20 | Frames: 318197 | Epsilon: 0.6818


Training:   3%|▎         | 318653/10000000 [30:43<25:26:54, 105.67it/s]

Episode 777 completed | Reward: 500.00 | Avg Reward: 871.60 | Frames: 318636 | Epsilon: 0.6814


Training:   3%|▎         | 319061/10000000 [30:46<25:58:15, 103.54it/s]

Episode 778 completed | Reward: 860.00 | Avg Reward: 876.60 | Frames: 319051 | Epsilon: 0.6809


Training:   3%|▎         | 319496/10000000 [30:50<24:00:29, 112.00it/s]

Episode 779 completed | Reward: 580.00 | Avg Reward: 871.80 | Frames: 319478 | Epsilon: 0.6805


Training:   3%|▎         | 319921/10000000 [30:54<25:50:34, 104.05it/s]

Episode 780 completed | Reward: 660.00 | Avg Reward: 872.40 | Frames: 319910 | Epsilon: 0.6801


Training:   3%|▎         | 320293/10000000 [30:57<24:05:49, 111.58it/s]

Episode 781 completed | Reward: 2080.00 | Avg Reward: 869.80 | Frames: 320278 | Epsilon: 0.6797


Training:   3%|▎         | 320725/10000000 [31:01<25:30:38, 105.39it/s]

Episode 782 completed | Reward: 640.00 | Avg Reward: 864.80 | Frames: 320708 | Epsilon: 0.6793


Training:   3%|▎         | 321133/10000000 [31:05<24:38:25, 109.11it/s]

Episode 783 completed | Reward: 800.00 | Avg Reward: 867.40 | Frames: 321120 | Epsilon: 0.6789


Training:   3%|▎         | 321553/10000000 [31:08<24:59:25, 107.58it/s]

Episode 784 completed | Reward: 880.00 | Avg Reward: 866.80 | Frames: 321532 | Epsilon: 0.6785


Training:   3%|▎         | 321937/10000000 [31:12<25:16:27, 106.37it/s]

Episode 785 completed | Reward: 680.00 | Avg Reward: 868.40 | Frames: 321921 | Epsilon: 0.6781


Training:   3%|▎         | 322357/10000000 [31:16<24:47:03, 108.47it/s]

Episode 786 completed | Reward: 600.00 | Avg Reward: 865.20 | Frames: 322347 | Epsilon: 0.6777


Training:   3%|▎         | 322765/10000000 [31:19<25:17:16, 106.30it/s]

Episode 787 completed | Reward: 1180.00 | Avg Reward: 874.00 | Frames: 322751 | Epsilon: 0.6772


Training:   3%|▎         | 323173/10000000 [31:23<25:04:45, 107.18it/s]

Episode 788 completed | Reward: 1140.00 | Avg Reward: 879.00 | Frames: 323157 | Epsilon: 0.6768


Training:   3%|▎         | 323591/10000000 [31:27<25:26:55, 105.62it/s]

Episode 789 completed | Reward: 980.00 | Avg Reward: 885.40 | Frames: 323573 | Epsilon: 0.6764


Training:   3%|▎         | 323973/10000000 [31:30<24:53:21, 107.99it/s]

Episode 790 completed | Reward: 420.00 | Avg Reward: 884.40 | Frames: 323955 | Epsilon: 0.6760


Training:   3%|▎         | 324429/10000000 [31:34<25:26:41, 105.63it/s]

Episode 791 completed | Reward: 960.00 | Avg Reward: 889.00 | Frames: 324414 | Epsilon: 0.6756


Training:   3%|▎         | 324825/10000000 [31:38<24:48:55, 108.30it/s]

Episode 792 completed | Reward: 700.00 | Avg Reward: 886.60 | Frames: 324805 | Epsilon: 0.6752


Training:   3%|▎         | 325233/10000000 [31:42<25:04:23, 107.18it/s]

Episode 793 completed | Reward: 1020.00 | Avg Reward: 890.00 | Frames: 325216 | Epsilon: 0.6748


Training:   3%|▎         | 325713/10000000 [31:46<25:54:56, 103.69it/s]

Episode 794 completed | Reward: 1060.00 | Avg Reward: 888.60 | Frames: 325702 | Epsilon: 0.6743


Training:   3%|▎         | 326133/10000000 [31:50<24:56:25, 107.74it/s]

Episode 795 completed | Reward: 1280.00 | Avg Reward: 893.20 | Frames: 326117 | Epsilon: 0.6739


Training:   3%|▎         | 326505/10000000 [31:53<25:51:28, 103.92it/s]

Episode 796 completed | Reward: 880.00 | Avg Reward: 896.20 | Frames: 326493 | Epsilon: 0.6735


Training:   3%|▎         | 326869/10000000 [31:56<23:44:06, 113.21it/s]

Episode 797 completed | Reward: 700.00 | Avg Reward: 894.60 | Frames: 326851 | Epsilon: 0.6731


Training:   3%|▎         | 327303/10000000 [32:00<23:59:41, 111.98it/s]

Episode 798 completed | Reward: 200.00 | Avg Reward: 891.80 | Frames: 327303 | Epsilon: 0.6727

Memory usage: 1.10 GB


Training:   3%|▎         | 327733/10000000 [32:04<24:56:06, 107.75it/s]

Episode 799 completed | Reward: 940.00 | Avg Reward: 890.00 | Frames: 327721 | Epsilon: 0.6723


Training:   3%|▎         | 328057/10000000 [32:07<25:00:01, 107.46it/s]

Episode 800 completed | Reward: 1060.00 | Avg Reward: 883.60 | Frames: 328044 | Epsilon: 0.6720


Training:   3%|▎         | 328429/10000000 [32:11<24:45:01, 108.54it/s]

Episode 801 completed | Reward: 1200.00 | Avg Reward: 885.00 | Frames: 328415 | Epsilon: 0.6716


Training:   3%|▎         | 328765/10000000 [32:14<25:47:52, 104.14it/s]

Episode 802 completed | Reward: 100.00 | Avg Reward: 878.60 | Frames: 328754 | Epsilon: 0.6712


Training:   3%|▎         | 329149/10000000 [32:17<24:57:58, 107.60it/s]

Episode 803 completed | Reward: 680.00 | Avg Reward: 876.60 | Frames: 329135 | Epsilon: 0.6709


Training:   3%|▎         | 329557/10000000 [32:21<25:43:26, 104.42it/s]

Episode 804 completed | Reward: 1040.00 | Avg Reward: 879.60 | Frames: 329543 | Epsilon: 0.6705


Training:   3%|▎         | 329917/10000000 [32:24<25:02:09, 107.29it/s]

Episode 805 completed | Reward: 940.00 | Avg Reward: 882.60 | Frames: 329905 | Epsilon: 0.6701


Training:   3%|▎         | 330277/10000000 [32:28<24:52:16, 108.00it/s]

Episode 806 completed | Reward: 1560.00 | Avg Reward: 883.40 | Frames: 330266 | Epsilon: 0.6697


Training:   3%|▎         | 330661/10000000 [32:31<24:45:55, 108.46it/s]

Episode 807 completed | Reward: 480.00 | Avg Reward: 880.40 | Frames: 330641 | Epsilon: 0.6694


Training:   3%|▎         | 331045/10000000 [32:35<25:52:48, 103.78it/s]

Episode 808 completed | Reward: 920.00 | Avg Reward: 875.80 | Frames: 331028 | Epsilon: 0.6690


Training:   3%|▎         | 331453/10000000 [32:38<26:13:22, 102.42it/s]

Episode 809 completed | Reward: 940.00 | Avg Reward: 876.40 | Frames: 331441 | Epsilon: 0.6686


Training:   3%|▎         | 331837/10000000 [32:42<24:44:57, 108.51it/s]

Episode 810 completed | Reward: 480.00 | Avg Reward: 871.20 | Frames: 331821 | Epsilon: 0.6682


Training:   3%|▎         | 332233/10000000 [32:45<26:15:43, 102.26it/s]

Episode 811 completed | Reward: 820.00 | Avg Reward: 873.80 | Frames: 332223 | Epsilon: 0.6678


Training:   3%|▎         | 332665/10000000 [32:49<25:31:14, 105.22it/s]

Episode 812 completed | Reward: 680.00 | Avg Reward: 874.80 | Frames: 332652 | Epsilon: 0.6673


Training:   3%|▎         | 333061/10000000 [32:53<24:50:55, 108.06it/s]

Episode 813 completed | Reward: 2480.00 | Avg Reward: 890.20 | Frames: 333050 | Epsilon: 0.6669


Training:   3%|▎         | 333433/10000000 [32:56<25:05:58, 106.98it/s]

Episode 814 completed | Reward: 1300.00 | Avg Reward: 896.60 | Frames: 333412 | Epsilon: 0.6666


Training:   3%|▎         | 333901/10000000 [33:01<25:05:21, 107.02it/s]

Episode 815 completed | Reward: 1360.00 | Avg Reward: 905.60 | Frames: 333886 | Epsilon: 0.6661


Training:   3%|▎         | 334285/10000000 [33:04<25:19:37, 106.01it/s]

Episode 816 completed | Reward: 1840.00 | Avg Reward: 916.60 | Frames: 334272 | Epsilon: 0.6657


Training:   3%|▎         | 334669/10000000 [33:08<25:01:35, 107.28it/s]

Episode 817 completed | Reward: 1600.00 | Avg Reward: 924.80 | Frames: 334656 | Epsilon: 0.6653


Training:   3%|▎         | 335125/10000000 [33:12<25:36:33, 104.83it/s]

Episode 818 completed | Reward: 920.00 | Avg Reward: 922.60 | Frames: 335109 | Epsilon: 0.6649


Training:   3%|▎         | 335569/10000000 [33:16<25:10:02, 106.67it/s]

Episode 819 completed | Reward: 740.00 | Avg Reward: 922.40 | Frames: 335559 | Epsilon: 0.6644


Training:   3%|▎         | 336023/10000000 [33:20<25:15:45, 106.26it/s]

Episode 820 completed | Reward: 1200.00 | Avg Reward: 931.20 | Frames: 336006 | Epsilon: 0.6640


Training:   3%|▎         | 336429/10000000 [33:24<26:31:52, 101.18it/s]

Episode 821 completed | Reward: 620.00 | Avg Reward: 931.00 | Frames: 336418 | Epsilon: 0.6636


Training:   3%|▎         | 336848/10000000 [33:28<24:50:12, 108.07it/s]

Episode 822 completed | Reward: 720.00 | Avg Reward: 931.40 | Frames: 336838 | Epsilon: 0.6632


Training:   3%|▎         | 337205/10000000 [33:31<25:15:37, 106.26it/s]

Episode 823 completed | Reward: 1080.00 | Avg Reward: 934.40 | Frames: 337190 | Epsilon: 0.6628


Training:   3%|▎         | 337564/10000000 [33:34<33:16:42, 80.65it/s] 

Episode 824 completed | Reward: 420.00 | Avg Reward: 928.80 | Frames: 337559 | Epsilon: 0.6624

Memory usage: 1.10 GB


Training:   3%|▎         | 337957/10000000 [33:38<25:55:47, 103.51it/s]

Episode 825 completed | Reward: 480.00 | Avg Reward: 920.40 | Frames: 337945 | Epsilon: 0.6621


Training:   3%|▎         | 338519/10000000 [33:43<25:13:51, 106.37it/s]

Episode 826 completed | Reward: 1060.00 | Avg Reward: 922.60 | Frames: 338507 | Epsilon: 0.6615


Training:   3%|▎         | 338875/10000000 [33:46<25:20:05, 105.93it/s]

Episode 827 completed | Reward: 1080.00 | Avg Reward: 927.60 | Frames: 338860 | Epsilon: 0.6611


Training:   3%|▎         | 339317/10000000 [33:51<26:06:37, 102.78it/s]

Episode 828 completed | Reward: 620.00 | Avg Reward: 924.80 | Frames: 339300 | Epsilon: 0.6607


Training:   3%|▎         | 339725/10000000 [33:54<25:28:22, 105.34it/s]

Episode 829 completed | Reward: 640.00 | Avg Reward: 929.20 | Frames: 339712 | Epsilon: 0.6603


Training:   3%|▎         | 340131/10000000 [33:58<25:17:18, 106.11it/s]

Episode 830 completed | Reward: 1820.00 | Avg Reward: 935.20 | Frames: 340115 | Epsilon: 0.6599


Training:   3%|▎         | 340549/10000000 [34:02<25:41:59, 104.40it/s]

Episode 831 completed | Reward: 100.00 | Avg Reward: 932.00 | Frames: 340535 | Epsilon: 0.6595


Training:   3%|▎         | 340969/10000000 [34:06<25:13:09, 106.39it/s]

Episode 832 completed | Reward: 1840.00 | Avg Reward: 943.80 | Frames: 340954 | Epsilon: 0.6590


Training:   3%|▎         | 341365/10000000 [34:09<25:26:09, 105.48it/s]

Episode 833 completed | Reward: 1660.00 | Avg Reward: 955.80 | Frames: 341352 | Epsilon: 0.6586


Training:   3%|▎         | 341773/10000000 [34:13<25:46:01, 104.12it/s]

Episode 834 completed | Reward: 920.00 | Avg Reward: 953.60 | Frames: 341761 | Epsilon: 0.6582


Training:   3%|▎         | 342179/10000000 [34:17<25:49:04, 103.91it/s]

Episode 835 completed | Reward: 680.00 | Avg Reward: 953.00 | Frames: 342164 | Epsilon: 0.6578


Training:   3%|▎         | 342537/10000000 [34:20<24:56:48, 107.53it/s]

Episode 836 completed | Reward: 880.00 | Avg Reward: 951.00 | Frames: 342521 | Epsilon: 0.6575


Training:   3%|▎         | 342921/10000000 [34:24<25:37:11, 104.70it/s]

Episode 837 completed | Reward: 940.00 | Avg Reward: 955.80 | Frames: 342911 | Epsilon: 0.6571


Training:   3%|▎         | 343327/10000000 [34:28<25:30:40, 105.15it/s]

Episode 838 completed | Reward: 740.00 | Avg Reward: 941.80 | Frames: 343309 | Epsilon: 0.6567


Training:   3%|▎         | 343755/10000000 [34:32<25:55:27, 103.47it/s]

Episode 839 completed | Reward: 1400.00 | Avg Reward: 951.00 | Frames: 343739 | Epsilon: 0.6563


Training:   3%|▎         | 344124/10000000 [34:35<25:37:26, 104.67it/s]

Episode 840 completed | Reward: 900.00 | Avg Reward: 953.60 | Frames: 344114 | Epsilon: 0.6559


Training:   3%|▎         | 344517/10000000 [34:39<26:27:06, 101.39it/s]

Episode 841 completed | Reward: 360.00 | Avg Reward: 948.40 | Frames: 344509 | Epsilon: 0.6555


Training:   3%|▎         | 344937/10000000 [34:43<26:37:45, 100.71it/s]

Episode 842 completed | Reward: 1020.00 | Avg Reward: 950.00 | Frames: 344931 | Epsilon: 0.6551


Training:   3%|▎         | 345308/10000000 [34:46<25:00:43, 107.22it/s]

Episode 843 completed | Reward: 740.00 | Avg Reward: 945.20 | Frames: 345299 | Epsilon: 0.6547


Training:   3%|▎         | 345783/10000000 [34:51<25:34:02, 104.89it/s]

Episode 844 completed | Reward: 1380.00 | Avg Reward: 954.00 | Frames: 345771 | Epsilon: 0.6542


Training:   3%|▎         | 346188/10000000 [34:54<25:13:23, 106.31it/s]

Episode 845 completed | Reward: 1620.00 | Avg Reward: 962.00 | Frames: 346178 | Epsilon: 0.6538


Training:   3%|▎         | 346663/10000000 [34:59<26:38:49, 100.63it/s]

Episode 846 completed | Reward: 1020.00 | Avg Reward: 966.00 | Frames: 346650 | Epsilon: 0.6533


Training:   3%|▎         | 347043/10000000 [35:02<26:24:27, 101.54it/s]

Episode 847 completed | Reward: 760.00 | Avg Reward: 955.20 | Frames: 347026 | Epsilon: 0.6530


Training:   3%|▎         | 347435/10000000 [35:06<26:09:25, 102.51it/s]

Episode 848 completed | Reward: 460.00 | Avg Reward: 952.40 | Frames: 347418 | Epsilon: 0.6526


Training:   3%|▎         | 347841/10000000 [35:10<33:38:37, 79.69it/s] 

Episode 849 completed | Reward: 960.00 | Avg Reward: 949.60 | Frames: 347840 | Epsilon: 0.6522

Memory usage: 1.10 GB


Training:   3%|▎         | 348319/10000000 [35:15<26:23:16, 101.60it/s]

Episode 850 completed | Reward: 1380.00 | Avg Reward: 959.20 | Frames: 348304 | Epsilon: 0.6517


Training:   3%|▎         | 348711/10000000 [35:18<25:49:20, 103.82it/s]

Episode 851 completed | Reward: 740.00 | Avg Reward: 953.40 | Frames: 348696 | Epsilon: 0.6513


Training:   3%|▎         | 349081/10000000 [35:22<26:13:12, 102.24it/s]

Episode 852 completed | Reward: 740.00 | Avg Reward: 950.80 | Frames: 349068 | Epsilon: 0.6509


Training:   3%|▎         | 349525/10000000 [35:26<26:19:50, 101.81it/s]

Episode 853 completed | Reward: 1420.00 | Avg Reward: 950.60 | Frames: 349508 | Epsilon: 0.6505


Training:   3%|▎         | 349945/10000000 [35:30<26:17:34, 101.95it/s]

Episode 854 completed | Reward: 1480.00 | Avg Reward: 958.40 | Frames: 349936 | Epsilon: 0.6501


Training:   4%|▎         | 350245/10000000 [35:33<25:33:12, 104.90it/s]

Episode 855 completed | Reward: 680.00 | Avg Reward: 952.80 | Frames: 350230 | Epsilon: 0.6498


Training:   4%|▎         | 350651/10000000 [35:37<25:58:08, 103.21it/s]

Episode 856 completed | Reward: 980.00 | Avg Reward: 952.00 | Frames: 350638 | Epsilon: 0.6494


Training:   4%|▎         | 351007/10000000 [35:40<26:19:40, 101.80it/s]

Episode 857 completed | Reward: 900.00 | Avg Reward: 954.40 | Frames: 350994 | Epsilon: 0.6490


Training:   4%|▎         | 351425/10000000 [35:44<26:20:06, 101.77it/s]

Episode 858 completed | Reward: 860.00 | Avg Reward: 948.20 | Frames: 351409 | Epsilon: 0.6486


Training:   4%|▎         | 351869/10000000 [35:48<26:13:28, 102.20it/s]

Episode 859 completed | Reward: 1100.00 | Avg Reward: 953.20 | Frames: 351853 | Epsilon: 0.6481


Training:   4%|▎         | 352287/10000000 [35:52<26:08:05, 102.54it/s]

Episode 860 completed | Reward: 880.00 | Avg Reward: 951.60 | Frames: 352270 | Epsilon: 0.6477


Training:   4%|▎         | 352691/10000000 [35:56<26:34:16, 100.85it/s]

Episode 861 completed | Reward: 780.00 | Avg Reward: 949.20 | Frames: 352676 | Epsilon: 0.6473


Training:   4%|▎         | 353047/10000000 [35:59<26:24:54, 101.45it/s]

Episode 862 completed | Reward: 1000.00 | Avg Reward: 936.40 | Frames: 353031 | Epsilon: 0.6470


Training:   4%|▎         | 353451/10000000 [36:03<26:18:50, 101.83it/s]

Episode 863 completed | Reward: 1400.00 | Avg Reward: 943.00 | Frames: 353435 | Epsilon: 0.6466


Training:   4%|▎         | 353843/10000000 [36:07<26:28:16, 101.22it/s]

Episode 864 completed | Reward: 280.00 | Avg Reward: 934.40 | Frames: 353826 | Epsilon: 0.6462


Training:   4%|▎         | 354343/10000000 [36:12<25:37:17, 104.57it/s]

Episode 865 completed | Reward: 1740.00 | Avg Reward: 946.20 | Frames: 354329 | Epsilon: 0.6457


Training:   4%|▎         | 354676/10000000 [36:15<25:33:39, 104.82it/s]

Episode 866 completed | Reward: 1860.00 | Avg Reward: 958.00 | Frames: 354665 | Epsilon: 0.6453


Training:   4%|▎         | 355115/10000000 [36:19<26:37:04, 100.65it/s]

Episode 867 completed | Reward: 500.00 | Avg Reward: 951.60 | Frames: 355099 | Epsilon: 0.6449


Training:   4%|▎         | 355567/10000000 [36:23<26:18:15, 101.85it/s]

Episode 868 completed | Reward: 780.00 | Avg Reward: 951.40 | Frames: 355551 | Epsilon: 0.6444


Training:   4%|▎         | 355947/10000000 [36:27<26:25:22, 101.39it/s]

Episode 869 completed | Reward: 620.00 | Avg Reward: 952.20 | Frames: 355935 | Epsilon: 0.6441


Training:   4%|▎         | 356315/10000000 [36:31<26:04:13, 102.75it/s]

Episode 870 completed | Reward: 700.00 | Avg Reward: 953.80 | Frames: 356301 | Epsilon: 0.6437


Training:   4%|▎         | 356684/10000000 [36:34<25:35:25, 104.68it/s]

Episode 871 completed | Reward: 820.00 | Avg Reward: 952.80 | Frames: 356675 | Epsilon: 0.6433


Training:   4%|▎         | 357111/10000000 [36:38<26:49:35, 99.85it/s] 

Episode 872 completed | Reward: 420.00 | Avg Reward: 953.60 | Frames: 357095 | Epsilon: 0.6429


Training:   4%|▎         | 357517/10000000 [36:42<26:02:39, 102.84it/s]

Episode 873 completed | Reward: 800.00 | Avg Reward: 951.00 | Frames: 357499 | Epsilon: 0.6425


Training:   4%|▎         | 357864/10000000 [36:46<32:26:12, 82.57it/s] 

Episode 874 completed | Reward: 1240.00 | Avg Reward: 944.80 | Frames: 357862 | Epsilon: 0.6421

Memory usage: 1.10 GB


Training:   4%|▎         | 358267/10000000 [36:50<26:33:09, 100.87it/s]

Episode 875 completed | Reward: 380.00 | Avg Reward: 941.80 | Frames: 358254 | Epsilon: 0.6417


Training:   4%|▎         | 358611/10000000 [36:53<26:21:33, 101.60it/s]

Episode 876 completed | Reward: 880.00 | Avg Reward: 943.60 | Frames: 358592 | Epsilon: 0.6414


Training:   4%|▎         | 358991/10000000 [36:57<26:15:28, 101.99it/s]

Episode 877 completed | Reward: 1680.00 | Avg Reward: 955.40 | Frames: 358973 | Epsilon: 0.6410


Training:   4%|▎         | 359455/10000000 [37:01<26:23:08, 101.49it/s]

Episode 878 completed | Reward: 540.00 | Avg Reward: 952.20 | Frames: 359441 | Epsilon: 0.6406


Training:   4%|▎         | 359932/10000000 [37:06<25:42:01, 104.19it/s]

Episode 879 completed | Reward: 780.00 | Avg Reward: 954.20 | Frames: 359921 | Epsilon: 0.6401


Training:   4%|▎         | 360347/10000000 [37:10<26:06:56, 102.53it/s]

Episode 880 completed | Reward: 440.00 | Avg Reward: 952.00 | Frames: 360331 | Epsilon: 0.6397


Training:   4%|▎         | 360798/10000000 [37:14<28:23:51, 94.29it/s]

Episode 881 completed | Reward: 1000.00 | Avg Reward: 941.20 | Frames: 360787 | Epsilon: 0.6392


Training:   4%|▎         | 361145/10000000 [37:17<26:39:52, 100.41it/s]

Episode 882 completed | Reward: 1040.00 | Avg Reward: 945.20 | Frames: 361134 | Epsilon: 0.6389


Training:   4%|▎         | 361577/10000000 [37:22<26:37:27, 100.56it/s]

Episode 883 completed | Reward: 1400.00 | Avg Reward: 951.20 | Frames: 361560 | Epsilon: 0.6384


Training:   4%|▎         | 361875/10000000 [37:24<26:29:23, 101.07it/s]

Episode 884 completed | Reward: 420.00 | Avg Reward: 946.60 | Frames: 361860 | Epsilon: 0.6381


Training:   4%|▎         | 362341/10000000 [37:29<26:24:18, 101.39it/s]

Episode 885 completed | Reward: 1960.00 | Avg Reward: 959.40 | Frames: 362327 | Epsilon: 0.6377


Training:   4%|▎         | 362748/10000000 [37:33<25:48:26, 103.73it/s]

Episode 886 completed | Reward: 500.00 | Avg Reward: 958.40 | Frames: 362738 | Epsilon: 0.6373


Training:   4%|▎         | 363115/10000000 [37:36<26:27:25, 101.18it/s]

Episode 887 completed | Reward: 1400.00 | Avg Reward: 960.60 | Frames: 363098 | Epsilon: 0.6369


Training:   4%|▎         | 363617/10000000 [37:41<26:30:06, 101.00it/s]

Episode 888 completed | Reward: 900.00 | Avg Reward: 958.20 | Frames: 363603 | Epsilon: 0.6364


Training:   4%|▎         | 364037/10000000 [37:46<26:23:32, 101.42it/s]

Episode 889 completed | Reward: 500.00 | Avg Reward: 953.40 | Frames: 364021 | Epsilon: 0.6360


Training:   4%|▎         | 364420/10000000 [37:49<26:23:48, 101.40it/s]

Episode 890 completed | Reward: 680.00 | Avg Reward: 956.00 | Frames: 364409 | Epsilon: 0.6356


Training:   4%|▎         | 364801/10000000 [37:53<26:52:42, 99.58it/s]

Episode 891 completed | Reward: 800.00 | Avg Reward: 954.40 | Frames: 364788 | Epsilon: 0.6352


Training:   4%|▎         | 365209/10000000 [37:57<26:26:38, 101.21it/s]

Episode 892 completed | Reward: 680.00 | Avg Reward: 954.20 | Frames: 365192 | Epsilon: 0.6348


Training:   4%|▎         | 365581/10000000 [38:01<26:45:16, 100.03it/s]

Episode 893 completed | Reward: 940.00 | Avg Reward: 953.40 | Frames: 365574 | Epsilon: 0.6344


Training:   4%|▎         | 366001/10000000 [38:05<26:32:23, 100.83it/s]

Episode 894 completed | Reward: 1440.00 | Avg Reward: 957.20 | Frames: 365990 | Epsilon: 0.6340


Training:   4%|▎         | 366396/10000000 [38:09<25:58:00, 103.05it/s]

Episode 895 completed | Reward: 1360.00 | Avg Reward: 958.00 | Frames: 366386 | Epsilon: 0.6336


Training:   4%|▎         | 366907/10000000 [38:14<27:03:28, 98.89it/s] 

Episode 896 completed | Reward: 920.00 | Avg Reward: 958.40 | Frames: 366890 | Epsilon: 0.6331


Training:   4%|▎         | 367312/10000000 [38:18<25:55:12, 103.23it/s]

Episode 897 completed | Reward: 440.00 | Avg Reward: 955.80 | Frames: 367302 | Epsilon: 0.6327


Training:   4%|▎         | 367692/10000000 [38:21<26:19:00, 101.67it/s]

Episode 898 completed | Reward: 1100.00 | Avg Reward: 964.80 | Frames: 367681 | Epsilon: 0.6323


Training:   4%|▎         | 368252/10000000 [38:27<34:49:46, 76.82it/s] 

Episode 899 completed | Reward: 900.00 | Avg Reward: 964.40 | Frames: 368251 | Epsilon: 0.6317

Memory usage: 1.11 GB


Training:   4%|▎         | 368741/10000000 [38:32<26:42:31, 100.17it/s]

Episode 900 completed | Reward: 1220.00 | Avg Reward: 966.00 | Frames: 368731 | Epsilon: 0.6313


Training:   4%|▎         | 369207/10000000 [38:36<26:28:59, 101.02it/s]

Episode 901 completed | Reward: 1600.00 | Avg Reward: 970.00 | Frames: 369193 | Epsilon: 0.6308


Training:   4%|▎         | 369683/10000000 [38:41<26:43:28, 100.10it/s]

Episode 902 completed | Reward: 760.00 | Avg Reward: 976.60 | Frames: 369669 | Epsilon: 0.6303


Training:   4%|▎         | 370413/10000000 [38:48<26:40:48, 100.26it/s]

Episode 903 completed | Reward: 340.00 | Avg Reward: 973.20 | Frames: 370397 | Epsilon: 0.6296


Training:   4%|▎         | 370807/10000000 [38:52<26:23:07, 101.37it/s]

Episode 904 completed | Reward: 1180.00 | Avg Reward: 974.60 | Frames: 370793 | Epsilon: 0.6292


Training:   4%|▎         | 371225/10000000 [38:56<26:48:06, 99.79it/s]

Episode 905 completed | Reward: 220.00 | Avg Reward: 967.40 | Frames: 371210 | Epsilon: 0.6288


Training:   4%|▎         | 371669/10000000 [39:00<26:56:33, 99.27it/s] 

Episode 906 completed | Reward: 820.00 | Avg Reward: 960.00 | Frames: 371663 | Epsilon: 0.6283


Training:   4%|▎         | 372088/10000000 [39:04<25:20:03, 105.56it/s]

Episode 907 completed | Reward: 1080.00 | Avg Reward: 966.00 | Frames: 372078 | Epsilon: 0.6279


Training:   4%|▎         | 372541/10000000 [39:09<26:42:34, 100.13it/s]

Episode 908 completed | Reward: 840.00 | Avg Reward: 965.20 | Frames: 372528 | Epsilon: 0.6275


Training:   4%|▎         | 372912/10000000 [39:13<27:29:02, 97.30it/s] 

Episode 909 completed | Reward: 860.00 | Avg Reward: 964.40 | Frames: 372901 | Epsilon: 0.6271


Training:   4%|▎         | 373400/10000000 [39:17<26:11:46, 102.08it/s]

Episode 910 completed | Reward: 1740.00 | Avg Reward: 977.00 | Frames: 373390 | Epsilon: 0.6266


Training:   4%|▎         | 373827/10000000 [39:22<27:09:09, 98.48it/s] 

Episode 911 completed | Reward: 640.00 | Avg Reward: 975.20 | Frames: 373811 | Epsilon: 0.6262


Training:   4%|▎         | 374209/10000000 [39:26<28:15:44, 94.61it/s]

Episode 912 completed | Reward: 980.00 | Avg Reward: 978.20 | Frames: 374196 | Epsilon: 0.6258


Training:   4%|▎         | 374627/10000000 [39:30<26:35:29, 100.55it/s]

Episode 913 completed | Reward: 1120.00 | Avg Reward: 964.60 | Frames: 374610 | Epsilon: 0.6254


Training:   4%|▍         | 375080/10000000 [39:34<26:21:05, 101.46it/s]

Episode 914 completed | Reward: 760.00 | Avg Reward: 959.20 | Frames: 375070 | Epsilon: 0.6249


Training:   4%|▍         | 375447/10000000 [39:38<28:02:02, 95.37it/s]

Episode 915 completed | Reward: 940.00 | Avg Reward: 955.00 | Frames: 375439 | Epsilon: 0.6246


Training:   4%|▍         | 375793/10000000 [39:41<26:59:22, 99.05it/s] 

Episode 916 completed | Reward: 840.00 | Avg Reward: 945.00 | Frames: 375782 | Epsilon: 0.6242


Training:   4%|▍         | 376187/10000000 [39:45<27:08:26, 98.50it/s] 

Episode 917 completed | Reward: 760.00 | Avg Reward: 936.60 | Frames: 376173 | Epsilon: 0.6238


Training:   4%|▍         | 376735/10000000 [39:51<27:19:39, 97.82it/s]

Episode 918 completed | Reward: 1260.00 | Avg Reward: 940.00 | Frames: 376726 | Epsilon: 0.6233


Training:   4%|▍         | 377223/10000000 [39:56<26:47:04, 99.80it/s] 

Episode 919 completed | Reward: 1180.00 | Avg Reward: 944.40 | Frames: 377206 | Epsilon: 0.6228


Training:   4%|▍         | 377675/10000000 [40:00<26:56:07, 99.23it/s] 

Episode 920 completed | Reward: 580.00 | Avg Reward: 938.20 | Frames: 377659 | Epsilon: 0.6223


Training:   4%|▍         | 378067/10000000 [40:04<27:06:35, 98.59it/s] 

Episode 921 completed | Reward: 640.00 | Avg Reward: 938.40 | Frames: 378048 | Epsilon: 0.6220


Training:   4%|▍         | 378484/10000000 [40:08<34:03:24, 78.48it/s] 

Episode 922 completed | Reward: 540.00 | Avg Reward: 936.60 | Frames: 378477 | Epsilon: 0.6215

Memory usage: 1.11 GB


Training:   4%|▍         | 378925/10000000 [40:13<28:01:20, 95.37it/s]

Episode 923 completed | Reward: 1000.00 | Avg Reward: 935.80 | Frames: 378912 | Epsilon: 0.6211


Training:   4%|▍         | 379339/10000000 [40:17<26:56:05, 99.22it/s] 

Episode 924 completed | Reward: 600.00 | Avg Reward: 937.60 | Frames: 379323 | Epsilon: 0.6207


Training:   4%|▍         | 379695/10000000 [40:21<27:10:52, 98.31it/s] 

Episode 925 completed | Reward: 980.00 | Avg Reward: 942.60 | Frames: 379683 | Epsilon: 0.6203


Training:   4%|▍         | 380073/10000000 [40:24<28:30:54, 93.71it/s]

Episode 926 completed | Reward: 980.00 | Avg Reward: 941.80 | Frames: 380060 | Epsilon: 0.6199


Training:   4%|▍         | 380478/10000000 [40:29<28:03:38, 95.23it/s]

Episode 927 completed | Reward: 1860.00 | Avg Reward: 949.60 | Frames: 380467 | Epsilon: 0.6195


Training:   4%|▍         | 380859/10000000 [40:32<27:02:11, 98.83it/s] 

Episode 928 completed | Reward: 620.00 | Avg Reward: 949.60 | Frames: 380845 | Epsilon: 0.6192


Training:   4%|▍         | 381286/10000000 [40:37<28:37:31, 93.34it/s]

Episode 929 completed | Reward: 620.00 | Avg Reward: 949.40 | Frames: 381275 | Epsilon: 0.6187


Training:   4%|▍         | 381593/10000000 [40:40<29:15:36, 91.31it/s]

Episode 930 completed | Reward: 600.00 | Avg Reward: 937.20 | Frames: 381577 | Epsilon: 0.6184


Training:   4%|▍         | 382082/10000000 [40:45<28:23:34, 94.10it/s]

Episode 931 completed | Reward: 880.00 | Avg Reward: 945.00 | Frames: 382067 | Epsilon: 0.6179


Training:   4%|▍         | 382438/10000000 [40:48<28:18:14, 94.39it/s]

Episode 932 completed | Reward: 840.00 | Avg Reward: 935.00 | Frames: 382423 | Epsilon: 0.6176


Training:   4%|▍         | 382916/10000000 [40:53<26:13:46, 101.85it/s]

Episode 933 completed | Reward: 700.00 | Avg Reward: 925.40 | Frames: 382905 | Epsilon: 0.6171


Training:   4%|▍         | 383429/10000000 [40:58<27:11:29, 98.24it/s]

Episode 934 completed | Reward: 1020.00 | Avg Reward: 926.40 | Frames: 383416 | Epsilon: 0.6166


Training:   4%|▍         | 383845/10000000 [41:03<28:54:18, 92.41it/s]

Episode 935 completed | Reward: 460.00 | Avg Reward: 924.20 | Frames: 383834 | Epsilon: 0.6162


Training:   4%|▍         | 384217/10000000 [41:06<28:07:37, 94.96it/s]

Episode 936 completed | Reward: 680.00 | Avg Reward: 922.20 | Frames: 384204 | Epsilon: 0.6158


Training:   4%|▍         | 384585/10000000 [41:10<28:59:11, 92.14it/s]

Episode 937 completed | Reward: 1180.00 | Avg Reward: 924.60 | Frames: 384571 | Epsilon: 0.6154


Training:   4%|▍         | 385026/10000000 [41:15<28:14:11, 94.59it/s]

Episode 938 completed | Reward: 520.00 | Avg Reward: 922.40 | Frames: 385015 | Epsilon: 0.6150


Training:   4%|▍         | 385347/10000000 [41:18<27:23:55, 97.48it/s]

Episode 939 completed | Reward: 1000.00 | Avg Reward: 918.40 | Frames: 385339 | Epsilon: 0.6147


Training:   4%|▍         | 385797/10000000 [41:23<28:42:33, 93.02it/s]

Episode 940 completed | Reward: 920.00 | Avg Reward: 918.60 | Frames: 385783 | Epsilon: 0.6142


Training:   4%|▍         | 386213/10000000 [41:27<28:48:56, 92.68it/s]

Episode 941 completed | Reward: 480.00 | Avg Reward: 919.80 | Frames: 386200 | Epsilon: 0.6138


Training:   4%|▍         | 386617/10000000 [41:31<28:48:42, 92.68it/s]

Episode 942 completed | Reward: 540.00 | Avg Reward: 915.00 | Frames: 386605 | Epsilon: 0.6134


Training:   4%|▍         | 387033/10000000 [41:35<29:09:11, 91.59it/s]

Episode 943 completed | Reward: 1080.00 | Avg Reward: 918.40 | Frames: 387016 | Epsilon: 0.6130


Training:   4%|▍         | 387415/10000000 [41:39<27:51:54, 95.82it/s]

Episode 944 completed | Reward: 480.00 | Avg Reward: 909.40 | Frames: 387405 | Epsilon: 0.6126


Training:   4%|▍         | 387807/10000000 [41:43<27:07:38, 98.43it/s] 

Episode 945 completed | Reward: 560.00 | Avg Reward: 898.80 | Frames: 387790 | Epsilon: 0.6122


Training:   4%|▍         | 388197/10000000 [41:47<29:43:10, 89.84it/s]

Episode 946 completed | Reward: 940.00 | Avg Reward: 898.00 | Frames: 388184 | Epsilon: 0.6118


Training:   4%|▍         | 388579/10000000 [41:51<36:32:44, 73.05it/s]

Episode 947 completed | Reward: 920.00 | Avg Reward: 899.60 | Frames: 388577 | Epsilon: 0.6114

Memory usage: 1.11 GB


Training:   4%|▍         | 388994/10000000 [41:55<28:22:25, 94.09it/s]

Episode 948 completed | Reward: 820.00 | Avg Reward: 903.20 | Frames: 388983 | Epsilon: 0.6110


Training:   4%|▍         | 389327/10000000 [41:59<27:30:00, 97.08it/s]

Episode 949 completed | Reward: 800.00 | Avg Reward: 901.60 | Frames: 389318 | Epsilon: 0.6107


Training:   4%|▍         | 389729/10000000 [42:03<29:21:50, 90.91it/s]

Episode 950 completed | Reward: 940.00 | Avg Reward: 897.20 | Frames: 389713 | Epsilon: 0.6103


Training:   4%|▍         | 390171/10000000 [42:08<27:19:01, 97.72it/s]

Episode 951 completed | Reward: 800.00 | Avg Reward: 897.80 | Frames: 390162 | Epsilon: 0.6098


Training:   4%|▍         | 390623/10000000 [42:12<27:44:54, 96.20it/s]

Episode 952 completed | Reward: 1940.00 | Avg Reward: 909.80 | Frames: 390614 | Epsilon: 0.6094


Training:   4%|▍         | 391051/10000000 [42:17<27:35:33, 96.73it/s]

Episode 953 completed | Reward: 240.00 | Avg Reward: 898.00 | Frames: 391043 | Epsilon: 0.6090


Training:   4%|▍         | 391467/10000000 [42:21<27:54:23, 95.64it/s]

Episode 954 completed | Reward: 1340.00 | Avg Reward: 896.60 | Frames: 391458 | Epsilon: 0.6085


Training:   4%|▍         | 391905/10000000 [42:26<29:18:40, 91.05it/s]

Episode 955 completed | Reward: 540.00 | Avg Reward: 895.20 | Frames: 391889 | Epsilon: 0.6081


Training:   4%|▍         | 392359/10000000 [42:30<27:25:11, 97.33it/s]

Episode 956 completed | Reward: 520.00 | Avg Reward: 890.60 | Frames: 392349 | Epsilon: 0.6077


Training:   4%|▍         | 392774/10000000 [42:34<29:13:55, 91.29it/s]

Episode 957 completed | Reward: 1400.00 | Avg Reward: 895.60 | Frames: 392763 | Epsilon: 0.6072


Training:   4%|▍         | 393201/10000000 [42:39<29:08:20, 91.58it/s]

Episode 958 completed | Reward: 740.00 | Avg Reward: 894.40 | Frames: 393186 | Epsilon: 0.6068


Training:   4%|▍         | 393809/10000000 [42:45<28:47:18, 92.69it/s]

Episode 959 completed | Reward: 1600.00 | Avg Reward: 899.40 | Frames: 393795 | Epsilon: 0.6062


Training:   4%|▍         | 394263/10000000 [42:50<27:35:58, 96.68it/s]

Episode 960 completed | Reward: 1300.00 | Avg Reward: 903.60 | Frames: 394255 | Epsilon: 0.6057


Training:   4%|▍         | 394581/10000000 [42:53<28:50:41, 92.50it/s]

Episode 961 completed | Reward: 420.00 | Avg Reward: 900.00 | Frames: 394567 | Epsilon: 0.6054


Training:   4%|▍         | 394999/10000000 [42:58<27:57:54, 95.41it/s]

Episode 962 completed | Reward: 520.00 | Avg Reward: 895.20 | Frames: 394990 | Epsilon: 0.6050


Training:   4%|▍         | 395441/10000000 [43:02<28:41:24, 92.99it/s]

Episode 963 completed | Reward: 1400.00 | Avg Reward: 895.20 | Frames: 395428 | Epsilon: 0.6046


Training:   4%|▍         | 395799/10000000 [43:06<28:55:21, 92.24it/s]

Episode 964 completed | Reward: 2020.00 | Avg Reward: 912.60 | Frames: 395791 | Epsilon: 0.6042


Training:   4%|▍         | 396167/10000000 [43:10<27:39:45, 96.44it/s]

Episode 965 completed | Reward: 980.00 | Avg Reward: 905.00 | Frames: 396159 | Epsilon: 0.6038


Training:   4%|▍         | 396667/10000000 [43:15<29:16:13, 91.14it/s]

Episode 966 completed | Reward: 840.00 | Avg Reward: 894.80 | Frames: 396659 | Epsilon: 0.6033


Training:   4%|▍         | 397071/10000000 [43:19<28:11:41, 94.61it/s]

Episode 967 completed | Reward: 620.00 | Avg Reward: 896.00 | Frames: 397063 | Epsilon: 0.6029


Training:   4%|▍         | 397809/10000000 [43:27<28:58:43, 92.04it/s]

Episode 968 completed | Reward: 2620.00 | Avg Reward: 914.40 | Frames: 397793 | Epsilon: 0.6022


Training:   4%|▍         | 398297/10000000 [43:32<29:29:21, 90.44it/s]

Episode 969 completed | Reward: 680.00 | Avg Reward: 915.00 | Frames: 398286 | Epsilon: 0.6017


Training:   4%|▍         | 398693/10000000 [43:36<27:29:34, 97.01it/s]

Episode 970 completed | Reward: 740.00 | Avg Reward: 915.40 | Frames: 398693 | Epsilon: 0.6013

Memory usage: 1.11 GB


Training:   4%|▍         | 399118/10000000 [43:41<28:24:07, 93.90it/s]

Episode 971 completed | Reward: 640.00 | Avg Reward: 913.60 | Frames: 399107 | Epsilon: 0.6009


Training:   4%|▍         | 399533/10000000 [43:45<29:19:16, 90.95it/s]

Episode 972 completed | Reward: 1000.00 | Avg Reward: 919.40 | Frames: 399521 | Epsilon: 0.6005


Training:   4%|▍         | 399901/10000000 [43:49<29:03:36, 91.76it/s]

Episode 973 completed | Reward: 720.00 | Avg Reward: 918.60 | Frames: 399890 | Epsilon: 0.6001


Training:   4%|▍         | 400329/10000000 [43:53<28:37:27, 93.16it/s]

Episode 974 completed | Reward: 760.00 | Avg Reward: 913.80 | Frames: 400315 | Epsilon: 0.5997


Training:   4%|▍         | 400867/10000000 [43:59<27:28:37, 97.04it/s]

Episode 975 completed | Reward: 1200.00 | Avg Reward: 922.00 | Frames: 400857 | Epsilon: 0.5991


Training:   4%|▍         | 401173/10000000 [44:02<29:44:47, 89.64it/s]

Episode 976 completed | Reward: 540.00 | Avg Reward: 918.60 | Frames: 401157 | Epsilon: 0.5988


Training:   4%|▍         | 401577/10000000 [44:06<29:14:50, 91.16it/s]

Episode 977 completed | Reward: 740.00 | Avg Reward: 909.20 | Frames: 401560 | Epsilon: 0.5984


Training:   4%|▍         | 401993/10000000 [44:11<29:50:03, 89.36it/s]

Episode 978 completed | Reward: 1040.00 | Avg Reward: 914.20 | Frames: 401982 | Epsilon: 0.5980


Training:   4%|▍         | 402373/10000000 [44:15<29:03:30, 91.75it/s]

Episode 979 completed | Reward: 640.00 | Avg Reward: 912.80 | Frames: 402358 | Epsilon: 0.5976


Training:   4%|▍         | 402801/10000000 [44:19<28:57:26, 92.06it/s]

Episode 980 completed | Reward: 840.00 | Avg Reward: 916.80 | Frames: 402788 | Epsilon: 0.5972


Training:   4%|▍         | 403110/10000000 [44:22<28:25:27, 93.79it/s]

Episode 981 completed | Reward: 380.00 | Avg Reward: 910.60 | Frames: 403098 | Epsilon: 0.5969


Training:   4%|▍         | 403515/10000000 [44:27<29:00:55, 91.87it/s]

Episode 982 completed | Reward: 540.00 | Avg Reward: 905.60 | Frames: 403508 | Epsilon: 0.5965


Training:   4%|▍         | 403955/10000000 [44:31<27:31:46, 96.83it/s]

Episode 983 completed | Reward: 480.00 | Avg Reward: 896.40 | Frames: 403945 | Epsilon: 0.5961


Training:   4%|▍         | 404359/10000000 [44:36<29:14:48, 91.14it/s]

Episode 984 completed | Reward: 1340.00 | Avg Reward: 905.60 | Frames: 404353 | Epsilon: 0.5956


Training:   4%|▍         | 404725/10000000 [44:39<29:51:40, 89.26it/s]

Episode 985 completed | Reward: 980.00 | Avg Reward: 895.80 | Frames: 404714 | Epsilon: 0.5953


Training:   4%|▍         | 405155/10000000 [44:44<27:56:07, 95.41it/s]

Episode 986 completed | Reward: 600.00 | Avg Reward: 896.80 | Frames: 405146 | Epsilon: 0.5949


Training:   4%|▍         | 405583/10000000 [44:48<28:49:14, 92.47it/s]

Episode 987 completed | Reward: 640.00 | Avg Reward: 889.20 | Frames: 405575 | Epsilon: 0.5944


Training:   4%|▍         | 406095/10000000 [44:54<28:44:52, 92.70it/s]

Episode 988 completed | Reward: 860.00 | Avg Reward: 888.80 | Frames: 406087 | Epsilon: 0.5939


Training:   4%|▍         | 406561/10000000 [44:59<28:39:34, 92.98it/s]

Episode 989 completed | Reward: 700.00 | Avg Reward: 890.80 | Frames: 406548 | Epsilon: 0.5935


Training:   4%|▍         | 406979/10000000 [45:03<28:46:45, 92.59it/s]

Episode 990 completed | Reward: 480.00 | Avg Reward: 888.80 | Frames: 406970 | Epsilon: 0.5930


Training:   4%|▍         | 407417/10000000 [45:08<29:11:21, 91.29it/s]

Episode 991 completed | Reward: 420.00 | Avg Reward: 885.00 | Frames: 407401 | Epsilon: 0.5926


Training:   4%|▍         | 407763/10000000 [45:11<28:27:09, 93.65it/s]

Episode 992 completed | Reward: 380.00 | Avg Reward: 882.00 | Frames: 407755 | Epsilon: 0.5922


Training:   4%|▍         | 408225/10000000 [45:16<30:35:07, 87.11it/s]

Episode 993 completed | Reward: 680.00 | Avg Reward: 879.40 | Frames: 408211 | Epsilon: 0.5918


Training:   4%|▍         | 408667/10000000 [45:21<28:16:10, 94.24it/s]

Episode 994 completed | Reward: 1040.00 | Avg Reward: 875.40 | Frames: 408659 | Epsilon: 0.5913


Training:   4%|▍         | 409049/10000000 [45:25<37:31:47, 70.99it/s]

Episode 995 completed | Reward: 540.00 | Avg Reward: 867.20 | Frames: 409048 | Epsilon: 0.5910

Memory usage: 1.11 GB


Training:   4%|▍         | 409453/10000000 [45:30<30:11:07, 88.26it/s]

Episode 996 completed | Reward: 680.00 | Avg Reward: 864.80 | Frames: 409439 | Epsilon: 0.5906


Training:   4%|▍         | 409763/10000000 [45:33<28:52:47, 92.24it/s]

Episode 997 completed | Reward: 740.00 | Avg Reward: 867.80 | Frames: 409753 | Epsilon: 0.5902


Training:   4%|▍         | 410177/10000000 [45:37<29:26:36, 90.47it/s]

Episode 998 completed | Reward: 680.00 | Avg Reward: 863.60 | Frames: 410161 | Epsilon: 0.5898


Training:   4%|▍         | 410687/10000000 [45:43<28:29:52, 93.47it/s]

Episode 999 completed | Reward: 1540.00 | Avg Reward: 870.00 | Frames: 410679 | Epsilon: 0.5893


Training:   4%|▍         | 411138/10000000 [45:48<28:42:55, 92.76it/s]

Episode 1000 completed | Reward: 580.00 | Avg Reward: 863.60 | Frames: 411124 | Epsilon: 0.5889


Training:   4%|▍         | 411505/10000000 [45:52<30:44:26, 86.64it/s]

Episode 1001 completed | Reward: 1240.00 | Avg Reward: 860.00 | Frames: 411489 | Epsilon: 0.5885


Training:   4%|▍         | 411933/10000000 [45:56<30:29:52, 87.33it/s]

Episode 1002 completed | Reward: 620.00 | Avg Reward: 858.60 | Frames: 411917 | Epsilon: 0.5881


Training:   4%|▍         | 412421/10000000 [46:01<29:05:27, 91.55it/s]

Episode 1003 completed | Reward: 940.00 | Avg Reward: 864.60 | Frames: 412404 | Epsilon: 0.5876


Training:   4%|▍         | 412851/10000000 [46:06<29:20:17, 90.77it/s]

Episode 1004 completed | Reward: 920.00 | Avg Reward: 862.00 | Frames: 412841 | Epsilon: 0.5872


Training:   4%|▍         | 413325/10000000 [46:11<30:25:31, 87.52it/s]

Episode 1005 completed | Reward: 780.00 | Avg Reward: 867.60 | Frames: 413312 | Epsilon: 0.5867


Training:   4%|▍         | 413693/10000000 [46:15<30:47:53, 86.46it/s]

Episode 1006 completed | Reward: 680.00 | Avg Reward: 866.20 | Frames: 413680 | Epsilon: 0.5863


Training:   4%|▍         | 414135/10000000 [46:20<29:15:09, 91.03it/s]

Episode 1007 completed | Reward: 1200.00 | Avg Reward: 867.40 | Frames: 414126 | Epsilon: 0.5859


Training:   4%|▍         | 414515/10000000 [46:24<28:56:59, 91.97it/s]

Episode 1008 completed | Reward: 480.00 | Avg Reward: 863.80 | Frames: 414505 | Epsilon: 0.5855


Training:   4%|▍         | 414834/10000000 [46:28<30:24:10, 87.58it/s]

Episode 1009 completed | Reward: 440.00 | Avg Reward: 859.60 | Frames: 414823 | Epsilon: 0.5852


Training:   4%|▍         | 415321/10000000 [46:33<30:19:52, 87.78it/s]

Episode 1010 completed | Reward: 880.00 | Avg Reward: 851.00 | Frames: 415310 | Epsilon: 0.5847


Training:   4%|▍         | 415703/10000000 [46:37<28:13:23, 94.33it/s]

Episode 1011 completed | Reward: 1040.00 | Avg Reward: 855.00 | Frames: 415694 | Epsilon: 0.5843


Training:   4%|▍         | 416117/10000000 [46:42<30:44:47, 86.58it/s]

Episode 1012 completed | Reward: 880.00 | Avg Reward: 854.00 | Frames: 416106 | Epsilon: 0.5839


Training:   4%|▍         | 416474/10000000 [46:45<29:27:41, 90.36it/s]

Episode 1013 completed | Reward: 620.00 | Avg Reward: 849.00 | Frames: 416463 | Epsilon: 0.5835


Training:   4%|▍         | 416949/10000000 [46:51<30:33:30, 87.11it/s]

Episode 1014 completed | Reward: 1200.00 | Avg Reward: 853.40 | Frames: 416933 | Epsilon: 0.5831


Training:   4%|▍         | 417691/10000000 [46:59<29:37:05, 89.87it/s]

Episode 1015 completed | Reward: 1740.00 | Avg Reward: 861.40 | Frames: 417682 | Epsilon: 0.5823


Training:   4%|▍         | 418155/10000000 [47:04<30:09:32, 88.25it/s]

Episode 1016 completed | Reward: 400.00 | Avg Reward: 857.00 | Frames: 418146 | Epsilon: 0.5819


Training:   4%|▍         | 418535/10000000 [47:08<29:18:42, 90.80it/s]

Episode 1017 completed | Reward: 1340.00 | Avg Reward: 862.80 | Frames: 418526 | Epsilon: 0.5815


Training:   4%|▍         | 419167/10000000 [47:15<37:31:41, 70.92it/s]

Episode 1018 completed | Reward: 840.00 | Avg Reward: 858.60 | Frames: 419161 | Epsilon: 0.5808

Memory usage: 1.11 GB


Training:   4%|▍         | 419525/10000000 [47:19<31:06:13, 85.56it/s]

Episode 1019 completed | Reward: 920.00 | Avg Reward: 856.00 | Frames: 419512 | Epsilon: 0.5805


Training:   4%|▍         | 419929/10000000 [47:23<30:05:27, 88.44it/s]

Episode 1020 completed | Reward: 1720.00 | Avg Reward: 867.40 | Frames: 419916 | Epsilon: 0.5801


Training:   4%|▍         | 420394/10000000 [47:28<30:01:45, 88.61it/s]

Episode 1021 completed | Reward: 600.00 | Avg Reward: 867.00 | Frames: 420383 | Epsilon: 0.5796


Training:   4%|▍         | 420809/10000000 [47:33<30:17:44, 87.83it/s]

Episode 1022 completed | Reward: 440.00 | Avg Reward: 866.00 | Frames: 420793 | Epsilon: 0.5792


Training:   4%|▍         | 421142/10000000 [47:37<30:09:04, 88.25it/s]

Episode 1023 completed | Reward: 1160.00 | Avg Reward: 867.60 | Frames: 421131 | Epsilon: 0.5789


Training:   4%|▍         | 421533/10000000 [47:41<30:43:23, 86.60it/s]

Episode 1024 completed | Reward: 740.00 | Avg Reward: 869.00 | Frames: 421519 | Epsilon: 0.5785


Training:   4%|▍         | 421867/10000000 [47:45<29:50:46, 89.14it/s]

Episode 1025 completed | Reward: 700.00 | Avg Reward: 866.20 | Frames: 421859 | Epsilon: 0.5781


Training:   4%|▍         | 422233/10000000 [47:49<30:03:53, 88.49it/s]

Episode 1026 completed | Reward: 1040.00 | Avg Reward: 866.80 | Frames: 422217 | Epsilon: 0.5778


Training:   4%|▍         | 422589/10000000 [47:52<30:57:17, 85.94it/s]

Episode 1027 completed | Reward: 800.00 | Avg Reward: 856.20 | Frames: 422577 | Epsilon: 0.5774


Training:   4%|▍         | 422995/10000000 [47:57<29:10:24, 91.19it/s]

Episode 1028 completed | Reward: 1140.00 | Avg Reward: 861.40 | Frames: 422985 | Epsilon: 0.5770


Training:   4%|▍         | 423457/10000000 [48:02<31:01:17, 85.75it/s]

Episode 1029 completed | Reward: 1140.00 | Avg Reward: 866.60 | Frames: 423444 | Epsilon: 0.5766


Training:   4%|▍         | 423993/10000000 [48:08<30:17:41, 87.80it/s]

Episode 1030 completed | Reward: 840.00 | Avg Reward: 869.00 | Frames: 423976 | Epsilon: 0.5760


Training:   4%|▍         | 424373/10000000 [48:12<30:25:53, 87.41it/s]

Episode 1031 completed | Reward: 540.00 | Avg Reward: 865.60 | Frames: 424360 | Epsilon: 0.5756


Training:   4%|▍         | 424766/10000000 [48:16<30:20:55, 87.64it/s]

Episode 1032 completed | Reward: 840.00 | Avg Reward: 865.60 | Frames: 424757 | Epsilon: 0.5752


Training:   4%|▍         | 425171/10000000 [48:21<28:42:00, 92.67it/s]

Episode 1033 completed | Reward: 740.00 | Avg Reward: 866.00 | Frames: 425162 | Epsilon: 0.5748


Training:   4%|▍         | 425587/10000000 [48:25<29:55:19, 88.88it/s]

Episode 1034 completed | Reward: 1320.00 | Avg Reward: 869.00 | Frames: 425578 | Epsilon: 0.5744


Training:   4%|▍         | 425949/10000000 [48:29<30:37:40, 86.83it/s]

Episode 1035 completed | Reward: 860.00 | Avg Reward: 873.00 | Frames: 425932 | Epsilon: 0.5741


Training:   4%|▍         | 426353/10000000 [48:34<30:38:02, 86.81it/s]

Episode 1036 completed | Reward: 940.00 | Avg Reward: 875.60 | Frames: 426338 | Epsilon: 0.5737


Training:   4%|▍         | 426759/10000000 [48:38<29:51:31, 89.06it/s]

Episode 1037 completed | Reward: 1140.00 | Avg Reward: 875.20 | Frames: 426749 | Epsilon: 0.5733


Training:   4%|▍         | 427161/10000000 [48:43<30:17:31, 87.78it/s]

Episode 1038 completed | Reward: 940.00 | Avg Reward: 879.40 | Frames: 427147 | Epsilon: 0.5729


Training:   4%|▍         | 427565/10000000 [48:47<30:33:16, 87.03it/s]

Episode 1039 completed | Reward: 680.00 | Avg Reward: 876.20 | Frames: 427549 | Epsilon: 0.5725


Training:   4%|▍         | 428015/10000000 [48:52<30:28:19, 87.26it/s]

Episode 1040 completed | Reward: 940.00 | Avg Reward: 876.40 | Frames: 428002 | Epsilon: 0.5720


Training:   4%|▍         | 428395/10000000 [48:56<29:41:40, 89.54it/s]

Episode 1041 completed | Reward: 680.00 | Avg Reward: 878.40 | Frames: 428387 | Epsilon: 0.5716


Training:   4%|▍         | 428773/10000000 [49:01<31:41:42, 83.88it/s]

Episode 1042 completed | Reward: 820.00 | Avg Reward: 881.20 | Frames: 428762 | Epsilon: 0.5712


Training:   4%|▍         | 429222/10000000 [49:06<39:50:36, 66.72it/s]

Episode 1043 completed | Reward: 1040.00 | Avg Reward: 880.80 | Frames: 429218 | Epsilon: 0.5708

Memory usage: 1.11 GB


Training:   4%|▍         | 429614/10000000 [49:10<30:06:18, 88.31it/s]

Episode 1044 completed | Reward: 1040.00 | Avg Reward: 886.40 | Frames: 429603 | Epsilon: 0.5704


Training:   4%|▍         | 430065/10000000 [49:15<31:16:35, 84.99it/s]

Episode 1045 completed | Reward: 1520.00 | Avg Reward: 896.00 | Frames: 430054 | Epsilon: 0.5699


Training:   4%|▍         | 430459/10000000 [49:20<29:51:21, 89.03it/s]

Episode 1046 completed | Reward: 880.00 | Avg Reward: 895.40 | Frames: 430450 | Epsilon: 0.5696


Training:   4%|▍         | 430838/10000000 [49:24<31:24:39, 84.62it/s]

Episode 1047 completed | Reward: 640.00 | Avg Reward: 892.60 | Frames: 430830 | Epsilon: 0.5692


Training:   4%|▍         | 431206/10000000 [49:28<31:35:47, 84.12it/s]

Episode 1048 completed | Reward: 460.00 | Avg Reward: 889.00 | Frames: 431199 | Epsilon: 0.5688


Training:   4%|▍         | 431605/10000000 [49:32<30:18:05, 87.71it/s]

Episode 1049 completed | Reward: 720.00 | Avg Reward: 888.20 | Frames: 431590 | Epsilon: 0.5684


Training:   4%|▍         | 432057/10000000 [49:37<31:03:24, 85.58it/s]

Episode 1050 completed | Reward: 940.00 | Avg Reward: 888.20 | Frames: 432042 | Epsilon: 0.5680


Training:   4%|▍         | 432549/10000000 [49:43<31:52:31, 83.38it/s]

Episode 1051 completed | Reward: 900.00 | Avg Reward: 889.20 | Frames: 432539 | Epsilon: 0.5675


Training:   4%|▍         | 432997/10000000 [49:48<31:12:00, 85.18it/s]

Episode 1052 completed | Reward: 1080.00 | Avg Reward: 880.60 | Frames: 432986 | Epsilon: 0.5670


Training:   4%|▍         | 433385/10000000 [49:52<31:56:01, 83.22it/s]

Episode 1053 completed | Reward: 480.00 | Avg Reward: 883.00 | Frames: 433372 | Epsilon: 0.5666


Training:   4%|▍         | 433770/10000000 [49:57<30:04:22, 88.36it/s]

Episode 1054 completed | Reward: 340.00 | Avg Reward: 873.00 | Frames: 433762 | Epsilon: 0.5662


Training:   4%|▍         | 434245/10000000 [50:02<30:28:59, 87.17it/s]

Episode 1055 completed | Reward: 740.00 | Avg Reward: 875.00 | Frames: 434229 | Epsilon: 0.5658


Training:   4%|▍         | 434637/10000000 [50:06<31:13:31, 85.09it/s]

Episode 1056 completed | Reward: 740.00 | Avg Reward: 877.20 | Frames: 434625 | Epsilon: 0.5654


Training:   4%|▍         | 435117/10000000 [50:12<30:24:01, 87.40it/s]

Episode 1057 completed | Reward: 1100.00 | Avg Reward: 874.20 | Frames: 435101 | Epsilon: 0.5649


Training:   4%|▍         | 435539/10000000 [50:16<31:06:34, 85.40it/s]

Episode 1058 completed | Reward: 900.00 | Avg Reward: 875.80 | Frames: 435524 | Epsilon: 0.5645


Training:   4%|▍         | 436720/10000000 [50:30<29:01:27, 91.53it/s]

Episode 1059 completed | Reward: 3360.00 | Avg Reward: 893.40 | Frames: 436708 | Epsilon: 0.5633


Training:   4%|▍         | 437110/10000000 [50:34<30:51:46, 86.07it/s]

Episode 1060 completed | Reward: 740.00 | Avg Reward: 887.80 | Frames: 437105 | Epsilon: 0.5629


Training:   4%|▍         | 437478/10000000 [50:38<30:32:11, 86.99it/s]

Episode 1061 completed | Reward: 1300.00 | Avg Reward: 896.60 | Frames: 437467 | Epsilon: 0.5625


Training:   4%|▍         | 437990/10000000 [50:44<31:39:48, 83.89it/s]

Episode 1062 completed | Reward: 1140.00 | Avg Reward: 902.80 | Frames: 437987 | Epsilon: 0.5620


Training:   4%|▍         | 438606/10000000 [50:51<30:35:27, 86.82it/s]

Episode 1063 completed | Reward: 1260.00 | Avg Reward: 901.40 | Frames: 438597 | Epsilon: 0.5614


Training:   4%|▍         | 439025/10000000 [50:56<31:16:16, 84.93it/s]

Episode 1064 completed | Reward: 680.00 | Avg Reward: 888.00 | Frames: 439013 | Epsilon: 0.5610


Training:   4%|▍         | 439416/10000000 [51:00<36:28:26, 72.81it/s]

Episode 1065 completed | Reward: 680.00 | Avg Reward: 885.00 | Frames: 439415 | Epsilon: 0.5606

Memory usage: 1.11 GB


Training:   4%|▍         | 439742/10000000 [51:04<31:56:20, 83.15it/s]

Episode 1066 completed | Reward: 300.00 | Avg Reward: 879.60 | Frames: 439736 | Epsilon: 0.5603


Training:   4%|▍         | 440162/10000000 [51:09<31:17:19, 84.87it/s]

Episode 1067 completed | Reward: 480.00 | Avg Reward: 878.20 | Frames: 440158 | Epsilon: 0.5598


Training:   4%|▍         | 440494/10000000 [51:12<30:34:43, 86.84it/s]

Episode 1068 completed | Reward: 600.00 | Avg Reward: 858.00 | Frames: 440488 | Epsilon: 0.5595


Training:   4%|▍         | 440917/10000000 [51:17<32:31:38, 81.63it/s]

Episode 1069 completed | Reward: 2140.00 | Avg Reward: 872.60 | Frames: 440905 | Epsilon: 0.5591


Training:   4%|▍         | 441365/10000000 [51:22<31:27:10, 84.42it/s]

Episode 1070 completed | Reward: 1440.00 | Avg Reward: 879.60 | Frames: 441353 | Epsilon: 0.5586


Training:   4%|▍         | 441770/10000000 [51:27<32:10:28, 82.52it/s]

Episode 1071 completed | Reward: 1420.00 | Avg Reward: 887.40 | Frames: 441760 | Epsilon: 0.5582


Training:   4%|▍         | 442105/10000000 [51:31<30:58:48, 85.70it/s]

Episode 1072 completed | Reward: 1120.00 | Avg Reward: 888.60 | Frames: 442092 | Epsilon: 0.5579


Training:   4%|▍         | 442481/10000000 [51:35<30:56:34, 85.80it/s]

Episode 1073 completed | Reward: 1040.00 | Avg Reward: 891.80 | Frames: 442466 | Epsilon: 0.5575


Training:   4%|▍         | 443165/10000000 [51:43<32:01:21, 82.90it/s]

Episode 1074 completed | Reward: 920.00 | Avg Reward: 893.40 | Frames: 443152 | Epsilon: 0.5568


Training:   4%|▍         | 443486/10000000 [51:46<31:01:03, 85.58it/s]

Episode 1075 completed | Reward: 1220.00 | Avg Reward: 893.60 | Frames: 443477 | Epsilon: 0.5565


Training:   4%|▍         | 443874/10000000 [51:51<31:23:14, 84.57it/s]

Episode 1076 completed | Reward: 920.00 | Avg Reward: 897.40 | Frames: 443865 | Epsilon: 0.5561


Training:   4%|▍         | 444281/10000000 [51:56<31:53:59, 83.21it/s]

Episode 1077 completed | Reward: 500.00 | Avg Reward: 895.00 | Frames: 444269 | Epsilon: 0.5557


Training:   4%|▍         | 444701/10000000 [52:00<31:53:18, 83.24it/s]

Episode 1078 completed | Reward: 960.00 | Avg Reward: 894.20 | Frames: 444689 | Epsilon: 0.5553


Training:   4%|▍         | 445094/10000000 [52:05<32:07:01, 82.64it/s]

Episode 1079 completed | Reward: 440.00 | Avg Reward: 892.20 | Frames: 445086 | Epsilon: 0.5549


Training:   4%|▍         | 445514/10000000 [52:10<31:00:03, 85.61it/s]

Episode 1080 completed | Reward: 700.00 | Avg Reward: 890.80 | Frames: 445507 | Epsilon: 0.5545


Training:   4%|▍         | 445941/10000000 [52:14<30:44:23, 86.33it/s]

Episode 1081 completed | Reward: 920.00 | Avg Reward: 896.20 | Frames: 445926 | Epsilon: 0.5541


Training:   4%|▍         | 446390/10000000 [52:20<31:53:00, 83.23it/s]

Episode 1082 completed | Reward: 900.00 | Avg Reward: 899.80 | Frames: 446386 | Epsilon: 0.5536


Training:   4%|▍         | 446941/10000000 [52:26<31:07:54, 85.24it/s]

Episode 1083 completed | Reward: 600.00 | Avg Reward: 901.00 | Frames: 446926 | Epsilon: 0.5531


Training:   4%|▍         | 447381/10000000 [52:31<31:31:27, 84.17it/s]

Episode 1084 completed | Reward: 720.00 | Avg Reward: 894.80 | Frames: 447365 | Epsilon: 0.5526


Training:   4%|▍         | 447997/10000000 [52:38<30:46:16, 86.23it/s]

Episode 1085 completed | Reward: 1240.00 | Avg Reward: 897.40 | Frames: 447985 | Epsilon: 0.5520


Training:   4%|▍         | 448414/10000000 [52:43<31:48:04, 83.43it/s]

Episode 1086 completed | Reward: 980.00 | Avg Reward: 901.20 | Frames: 448407 | Epsilon: 0.5516


Training:   4%|▍         | 448821/10000000 [52:47<31:55:22, 83.11it/s]

Episode 1087 completed | Reward: 720.00 | Avg Reward: 902.00 | Frames: 448808 | Epsilon: 0.5512


Training:   4%|▍         | 449233/10000000 [52:52<31:48:30, 83.40it/s]

Episode 1088 completed | Reward: 480.00 | Avg Reward: 898.20 | Frames: 449222 | Epsilon: 0.5508


Training:   4%|▍         | 449613/10000000 [52:57<29:42:53, 89.28it/s]

Episode 1089 completed | Reward: 740.00 | Avg Reward: 898.60 | Frames: 449613 | Epsilon: 0.5504

Memory usage: 1.11 GB


Training:   5%|▍         | 450462/10000000 [53:06<31:11:37, 85.04it/s]

Episode 1090 completed | Reward: 2180.00 | Avg Reward: 915.60 | Frames: 450454 | Epsilon: 0.5495


Training:   5%|▍         | 450877/10000000 [53:11<32:36:35, 81.34it/s]

Episode 1091 completed | Reward: 940.00 | Avg Reward: 920.80 | Frames: 450865 | Epsilon: 0.5491


Training:   5%|▍         | 451292/10000000 [53:16<30:46:14, 86.20it/s]

Episode 1092 completed | Reward: 760.00 | Avg Reward: 924.60 | Frames: 451280 | Epsilon: 0.5487


Training:   5%|▍         | 451650/10000000 [53:20<31:24:40, 84.44it/s]

Episode 1093 completed | Reward: 680.00 | Avg Reward: 924.60 | Frames: 451641 | Epsilon: 0.5484


Training:   5%|▍         | 452034/10000000 [53:25<32:01:24, 82.82it/s]

Episode 1094 completed | Reward: 1100.00 | Avg Reward: 925.20 | Frames: 452031 | Epsilon: 0.5480


Training:   5%|▍         | 452449/10000000 [53:29<31:55:27, 83.07it/s]

Episode 1095 completed | Reward: 880.00 | Avg Reward: 928.60 | Frames: 452437 | Epsilon: 0.5476


Training:   5%|▍         | 452865/10000000 [53:34<32:13:36, 82.29it/s]

Episode 1096 completed | Reward: 680.00 | Avg Reward: 928.60 | Frames: 452855 | Epsilon: 0.5471


Training:   5%|▍         | 453345/10000000 [53:40<32:27:29, 81.70it/s]

Episode 1097 completed | Reward: 1140.00 | Avg Reward: 932.60 | Frames: 453332 | Epsilon: 0.5467


Training:   5%|▍         | 453729/10000000 [53:44<32:15:05, 82.22it/s]

Episode 1098 completed | Reward: 740.00 | Avg Reward: 933.20 | Frames: 453716 | Epsilon: 0.5463


Training:   5%|▍         | 454072/10000000 [53:48<30:38:49, 86.52it/s]

Episode 1099 completed | Reward: 680.00 | Avg Reward: 924.60 | Frames: 454059 | Epsilon: 0.5459


Training:   5%|▍         | 454504/10000000 [53:53<31:42:55, 83.60it/s]

Episode 1100 completed | Reward: 660.00 | Avg Reward: 925.40 | Frames: 454499 | Epsilon: 0.5455


Training:   5%|▍         | 454989/10000000 [53:59<31:33:59, 83.99it/s]

Episode 1101 completed | Reward: 1160.00 | Avg Reward: 924.60 | Frames: 454977 | Epsilon: 0.5450


Training:   5%|▍         | 455473/10000000 [54:04<32:23:39, 81.84it/s]

Episode 1102 completed | Reward: 820.00 | Avg Reward: 926.60 | Frames: 455459 | Epsilon: 0.5445


Training:   5%|▍         | 455934/10000000 [54:10<32:30:10, 81.57it/s]

Episode 1103 completed | Reward: 900.00 | Avg Reward: 926.20 | Frames: 455927 | Epsilon: 0.5441


Training:   5%|▍         | 456270/10000000 [54:14<32:50:45, 80.71it/s]

Episode 1104 completed | Reward: 480.00 | Avg Reward: 921.80 | Frames: 456263 | Epsilon: 0.5437


Training:   5%|▍         | 456692/10000000 [54:19<29:48:37, 88.93it/s]

Episode 1105 completed | Reward: 180.00 | Avg Reward: 915.80 | Frames: 456680 | Epsilon: 0.5433


Training:   5%|▍         | 457145/10000000 [54:24<33:18:21, 79.59it/s]

Episode 1106 completed | Reward: 580.00 | Avg Reward: 914.80 | Frames: 457132 | Epsilon: 0.5429


Training:   5%|▍         | 457575/10000000 [54:29<29:29:22, 89.89it/s]

Episode 1107 completed | Reward: 1040.00 | Avg Reward: 913.20 | Frames: 457567 | Epsilon: 0.5424


Training:   5%|▍         | 458017/10000000 [54:34<29:12:06, 90.77it/s]

Episode 1108 completed | Reward: 540.00 | Avg Reward: 913.80 | Frames: 458004 | Epsilon: 0.5420


Training:   5%|▍         | 458589/10000000 [54:40<30:32:58, 86.76it/s]

Episode 1109 completed | Reward: 680.00 | Avg Reward: 916.20 | Frames: 458576 | Epsilon: 0.5414


Training:   5%|▍         | 458922/10000000 [54:43<30:17:18, 87.50it/s]

Episode 1110 completed | Reward: 340.00 | Avg Reward: 910.80 | Frames: 458911 | Epsilon: 0.5411


Training:   5%|▍         | 459349/10000000 [54:48<30:29:28, 86.92it/s]

Episode 1111 completed | Reward: 640.00 | Avg Reward: 906.80 | Frames: 459338 | Epsilon: 0.5407


Training:   5%|▍         | 459935/10000000 [54:55<37:36:26, 70.47it/s]

Episode 1112 completed | Reward: 900.00 | Avg Reward: 907.00 | Frames: 459930 | Epsilon: 0.5401

Memory usage: 1.11 GB


Training:   5%|▍         | 460301/10000000 [54:59<31:03:47, 85.31it/s]

Episode 1113 completed | Reward: 820.00 | Avg Reward: 909.00 | Frames: 460290 | Epsilon: 0.5397


Training:   5%|▍         | 460683/10000000 [55:03<29:08:04, 90.95it/s]

Episode 1114 completed | Reward: 2480.00 | Avg Reward: 921.80 | Frames: 460679 | Epsilon: 0.5393


Training:   5%|▍         | 461469/10000000 [55:11<30:00:53, 88.28it/s]

Episode 1115 completed | Reward: 2220.00 | Avg Reward: 926.60 | Frames: 461456 | Epsilon: 0.5385


Training:   5%|▍         | 461863/10000000 [55:16<29:46:40, 88.97it/s]

Episode 1116 completed | Reward: 700.00 | Avg Reward: 929.60 | Frames: 461853 | Epsilon: 0.5381


Training:   5%|▍         | 462273/10000000 [55:20<30:24:52, 87.11it/s]

Episode 1117 completed | Reward: 1200.00 | Avg Reward: 928.20 | Frames: 462261 | Epsilon: 0.5377


Training:   5%|▍         | 462691/10000000 [55:25<28:35:02, 92.68it/s]

Episode 1118 completed | Reward: 260.00 | Avg Reward: 922.40 | Frames: 462681 | Epsilon: 0.5373


Training:   5%|▍         | 463046/10000000 [55:29<29:37:12, 89.44it/s]

Episode 1119 completed | Reward: 500.00 | Avg Reward: 918.20 | Frames: 463035 | Epsilon: 0.5370


Training:   5%|▍         | 463533/10000000 [55:34<30:35:04, 86.61it/s]

Episode 1120 completed | Reward: 900.00 | Avg Reward: 910.00 | Frames: 463521 | Epsilon: 0.5365


Training:   5%|▍         | 463925/10000000 [55:38<30:14:21, 87.60it/s]

Episode 1121 completed | Reward: 480.00 | Avg Reward: 908.80 | Frames: 463909 | Epsilon: 0.5361


Training:   5%|▍         | 464393/10000000 [55:43<28:50:11, 91.85it/s]

Episode 1122 completed | Reward: 560.00 | Avg Reward: 910.00 | Frames: 464380 | Epsilon: 0.5356


Training:   5%|▍         | 464813/10000000 [55:48<30:21:21, 87.25it/s]

Episode 1123 completed | Reward: 620.00 | Avg Reward: 904.60 | Frames: 464800 | Epsilon: 0.5352


Training:   5%|▍         | 465271/10000000 [55:53<29:53:08, 88.62it/s]

Episode 1124 completed | Reward: 1520.00 | Avg Reward: 912.40 | Frames: 465266 | Epsilon: 0.5347


Training:   5%|▍         | 465645/10000000 [55:57<29:58:12, 88.37it/s]

Episode 1125 completed | Reward: 1060.00 | Avg Reward: 916.00 | Frames: 465629 | Epsilon: 0.5344


Training:   5%|▍         | 466159/10000000 [56:03<29:40:18, 89.25it/s]

Episode 1126 completed | Reward: 820.00 | Avg Reward: 913.80 | Frames: 466150 | Epsilon: 0.5338


Training:   5%|▍         | 466559/10000000 [56:07<30:02:00, 88.17it/s]

Episode 1127 completed | Reward: 1340.00 | Avg Reward: 919.20 | Frames: 466550 | Epsilon: 0.5334


Training:   5%|▍         | 466949/10000000 [56:11<30:24:30, 87.08it/s]

Episode 1128 completed | Reward: 900.00 | Avg Reward: 916.80 | Frames: 466935 | Epsilon: 0.5331


Training:   5%|▍         | 467427/10000000 [56:17<29:53:49, 88.57it/s]

Episode 1129 completed | Reward: 700.00 | Avg Reward: 912.40 | Frames: 467419 | Epsilon: 0.5326


Training:   5%|▍         | 467877/10000000 [56:22<30:31:40, 86.73it/s]

Episode 1130 completed | Reward: 740.00 | Avg Reward: 911.40 | Frames: 467864 | Epsilon: 0.5321


Training:   5%|▍         | 468237/10000000 [56:26<29:32:48, 89.61it/s]

Episode 1131 completed | Reward: 680.00 | Avg Reward: 912.80 | Frames: 468224 | Epsilon: 0.5318


Training:   5%|▍         | 468643/10000000 [56:30<29:59:35, 88.27it/s]

Episode 1132 completed | Reward: 1740.00 | Avg Reward: 921.80 | Frames: 468634 | Epsilon: 0.5314


Training:   5%|▍         | 469033/10000000 [56:34<30:27:10, 86.94it/s]

Episode 1133 completed | Reward: 620.00 | Avg Reward: 920.60 | Frames: 469021 | Epsilon: 0.5310


Training:   5%|▍         | 469449/10000000 [56:39<30:44:02, 86.14it/s]

Episode 1134 completed | Reward: 940.00 | Avg Reward: 916.80 | Frames: 469436 | Epsilon: 0.5306


Training:   5%|▍         | 469865/10000000 [56:44<30:37:38, 86.43it/s]

Episode 1135 completed | Reward: 660.00 | Avg Reward: 914.80 | Frames: 469849 | Epsilon: 0.5302


Training:   5%|▍         | 470291/10000000 [56:48<28:38:11, 92.44it/s]

Episode 1136 completed | Reward: 800.00 | Avg Reward: 913.40 | Frames: 470291 | Epsilon: 0.5297

Memory usage: 1.11 GB


Training:   5%|▍         | 471285/10000000 [56:59<30:30:00, 86.78it/s]

Episode 1137 completed | Reward: 2860.00 | Avg Reward: 930.60 | Frames: 471274 | Epsilon: 0.5287


Training:   5%|▍         | 471653/10000000 [57:03<30:41:17, 86.25it/s]

Episode 1138 completed | Reward: 840.00 | Avg Reward: 929.60 | Frames: 471638 | Epsilon: 0.5284


Training:   5%|▍         | 472162/10000000 [57:09<30:41:06, 86.25it/s]

Episode 1139 completed | Reward: 980.00 | Avg Reward: 932.60 | Frames: 472154 | Epsilon: 0.5278


Training:   5%|▍         | 472518/10000000 [57:13<30:04:41, 87.99it/s]

Episode 1140 completed | Reward: 620.00 | Avg Reward: 929.40 | Frames: 472507 | Epsilon: 0.5275


Training:   5%|▍         | 472892/10000000 [57:17<29:22:31, 90.09it/s]

Episode 1141 completed | Reward: 900.00 | Avg Reward: 931.60 | Frames: 472880 | Epsilon: 0.5271


Training:   5%|▍         | 473271/10000000 [57:22<29:33:41, 89.52it/s]

Episode 1142 completed | Reward: 680.00 | Avg Reward: 930.20 | Frames: 473261 | Epsilon: 0.5267


Training:   5%|▍         | 473698/10000000 [57:26<30:14:27, 87.50it/s]

Episode 1143 completed | Reward: 540.00 | Avg Reward: 925.20 | Frames: 473687 | Epsilon: 0.5263


Training:   5%|▍         | 474125/10000000 [57:31<30:15:49, 87.43it/s]

Episode 1144 completed | Reward: 480.00 | Avg Reward: 919.60 | Frames: 474112 | Epsilon: 0.5259


Training:   5%|▍         | 474541/10000000 [57:36<30:34:16, 86.55it/s]

Episode 1145 completed | Reward: 1260.00 | Avg Reward: 917.00 | Frames: 474530 | Epsilon: 0.5255


Training:   5%|▍         | 474989/10000000 [57:41<31:07:55, 84.99it/s]

Episode 1146 completed | Reward: 640.00 | Avg Reward: 914.60 | Frames: 474976 | Epsilon: 0.5250


Training:   5%|▍         | 475479/10000000 [57:46<28:53:54, 91.55it/s]

Episode 1147 completed | Reward: 740.00 | Avg Reward: 915.60 | Frames: 475470 | Epsilon: 0.5245


Training:   5%|▍         | 475911/10000000 [57:51<29:42:20, 89.06it/s]

Episode 1148 completed | Reward: 900.00 | Avg Reward: 920.00 | Frames: 475904 | Epsilon: 0.5241


Training:   5%|▍         | 476301/10000000 [57:55<30:15:04, 87.45it/s]

Episode 1149 completed | Reward: 640.00 | Avg Reward: 919.20 | Frames: 476288 | Epsilon: 0.5237


Training:   5%|▍         | 476641/10000000 [57:59<30:55:41, 85.53it/s]

Episode 1150 completed | Reward: 780.00 | Avg Reward: 917.60 | Frames: 476625 | Epsilon: 0.5234


Training:   5%|▍         | 477102/10000000 [58:05<32:44:51, 80.78it/s]

Episode 1151 completed | Reward: 680.00 | Avg Reward: 915.40 | Frames: 477094 | Epsilon: 0.5229


Training:   5%|▍         | 477530/10000000 [58:09<30:57:19, 85.45it/s]

Episode 1152 completed | Reward: 540.00 | Avg Reward: 910.00 | Frames: 477522 | Epsilon: 0.5225


Training:   5%|▍         | 477894/10000000 [58:13<31:14:37, 84.66it/s]

Episode 1153 completed | Reward: 900.00 | Avg Reward: 914.20 | Frames: 477890 | Epsilon: 0.5221


Training:   5%|▍         | 478266/10000000 [58:18<30:46:16, 85.95it/s]

Episode 1154 completed | Reward: 440.00 | Avg Reward: 915.20 | Frames: 478258 | Epsilon: 0.5217


Training:   5%|▍         | 478682/10000000 [58:22<30:45:51, 85.97it/s]

Episode 1155 completed | Reward: 940.00 | Avg Reward: 917.20 | Frames: 478675 | Epsilon: 0.5213


Training:   5%|▍         | 479182/10000000 [58:28<32:12:24, 82.12it/s]

Episode 1156 completed | Reward: 740.00 | Avg Reward: 917.20 | Frames: 479173 | Epsilon: 0.5208


Training:   5%|▍         | 479546/10000000 [58:32<31:04:58, 85.08it/s]

Episode 1157 completed | Reward: 1020.00 | Avg Reward: 916.40 | Frames: 479535 | Epsilon: 0.5205


Training:   5%|▍         | 479925/10000000 [58:36<31:05:29, 85.05it/s]

Episode 1158 completed | Reward: 940.00 | Avg Reward: 916.80 | Frames: 479912 | Epsilon: 0.5201


Training:   5%|▍         | 480403/10000000 [58:42<38:21:41, 68.93it/s]

Episode 1159 completed | Reward: 1840.00 | Avg Reward: 901.60 | Frames: 480402 | Epsilon: 0.5196

Memory usage: 1.11 GB


Training:   5%|▍         | 480810/10000000 [58:46<31:02:40, 85.17it/s]

Episode 1160 completed | Reward: 680.00 | Avg Reward: 901.00 | Frames: 480803 | Epsilon: 0.5192


Training:   5%|▍         | 481526/10000000 [58:54<30:31:47, 86.60it/s]

Episode 1161 completed | Reward: 940.00 | Avg Reward: 897.40 | Frames: 481517 | Epsilon: 0.5185


Training:   5%|▍         | 482109/10000000 [59:01<32:03:43, 82.46it/s]

Episode 1162 completed | Reward: 1160.00 | Avg Reward: 897.60 | Frames: 482099 | Epsilon: 0.5179


Training:   5%|▍         | 482681/10000000 [59:07<30:32:58, 86.54it/s]

Episode 1163 completed | Reward: 1680.00 | Avg Reward: 901.80 | Frames: 482670 | Epsilon: 0.5173


Training:   5%|▍         | 483161/10000000 [59:13<30:49:19, 85.77it/s]

Episode 1164 completed | Reward: 1300.00 | Avg Reward: 908.00 | Frames: 483144 | Epsilon: 0.5169


Training:   5%|▍         | 483514/10000000 [59:17<31:31:53, 83.84it/s]

Episode 1165 completed | Reward: 440.00 | Avg Reward: 905.60 | Frames: 483503 | Epsilon: 0.5165


Training:   5%|▍         | 483826/10000000 [59:20<30:52:53, 85.60it/s]

Episode 1166 completed | Reward: 780.00 | Avg Reward: 910.40 | Frames: 483820 | Epsilon: 0.5162


Training:   5%|▍         | 484230/10000000 [59:25<30:53:15, 85.58it/s]

Episode 1167 completed | Reward: 520.00 | Avg Reward: 910.80 | Frames: 484227 | Epsilon: 0.5158


Training:   5%|▍         | 484610/10000000 [59:29<31:31:40, 83.84it/s]

Episode 1168 completed | Reward: 680.00 | Avg Reward: 911.60 | Frames: 484606 | Epsilon: 0.5154


Training:   5%|▍         | 485089/10000000 [59:35<30:58:14, 85.34it/s]

Episode 1169 completed | Reward: 740.00 | Avg Reward: 897.60 | Frames: 485074 | Epsilon: 0.5149


Training:   5%|▍         | 485490/10000000 [59:39<30:58:02, 85.35it/s]

Episode 1170 completed | Reward: 500.00 | Avg Reward: 888.20 | Frames: 485481 | Epsilon: 0.5145


Training:   5%|▍         | 486017/10000000 [59:45<31:03:07, 85.11it/s]

Episode 1171 completed | Reward: 880.00 | Avg Reward: 882.80 | Frames: 486006 | Epsilon: 0.5140


Training:   5%|▍         | 486390/10000000 [59:49<30:18:12, 87.21it/s]

Episode 1172 completed | Reward: 500.00 | Avg Reward: 876.60 | Frames: 486383 | Epsilon: 0.5136


Training:   5%|▍         | 486754/10000000 [59:54<31:15:18, 84.55it/s]

Episode 1173 completed | Reward: 940.00 | Avg Reward: 875.60 | Frames: 486751 | Epsilon: 0.5132


Training:   5%|▍         | 487214/10000000 [59:59<30:00:08, 88.07it/s]

Episode 1174 completed | Reward: 760.00 | Avg Reward: 874.00 | Frames: 487207 | Epsilon: 0.5128


Training:   5%|▍         | 487625/10000000 [1:00:03<30:37:32, 86.28it/s]

Episode 1175 completed | Reward: 720.00 | Avg Reward: 869.00 | Frames: 487611 | Epsilon: 0.5124


Training:   5%|▍         | 488133/10000000 [1:00:09<30:44:31, 85.95it/s]

Episode 1176 completed | Reward: 2200.00 | Avg Reward: 881.80 | Frames: 488119 | Epsilon: 0.5119


Training:   5%|▍         | 488518/10000000 [1:00:13<30:43:05, 86.01it/s]

Episode 1177 completed | Reward: 1320.00 | Avg Reward: 890.00 | Frames: 488507 | Epsilon: 0.5115


Training:   5%|▍         | 488981/10000000 [1:00:19<31:30:46, 83.84it/s]

Episode 1178 completed | Reward: 940.00 | Avg Reward: 889.80 | Frames: 488967 | Epsilon: 0.5110


Training:   5%|▍         | 489345/10000000 [1:00:23<30:56:30, 85.38it/s]

Episode 1179 completed | Reward: 820.00 | Avg Reward: 893.60 | Frames: 489330 | Epsilon: 0.5107


Training:   5%|▍         | 490033/10000000 [1:00:31<31:11:29, 84.69it/s]

Episode 1180 completed | Reward: 1040.00 | Avg Reward: 897.00 | Frames: 490016 | Epsilon: 0.5100


Training:   5%|▍         | 490522/10000000 [1:00:36<29:44:54, 88.79it/s]

Episode 1181 completed | Reward: 1480.00 | Avg Reward: 902.60 | Frames: 490522 | Epsilon: 0.5095

Memory usage: 1.11 GB


Training:   5%|▍         | 490986/10000000 [1:00:42<34:19:57, 76.94it/s]

Episode 1182 completed | Reward: 780.00 | Avg Reward: 901.40 | Frames: 490980 | Epsilon: 0.5090


Training:   5%|▍         | 491422/10000000 [1:00:47<31:57:58, 82.63it/s]

Episode 1183 completed | Reward: 1380.00 | Avg Reward: 909.20 | Frames: 491418 | Epsilon: 0.5086


Training:   5%|▍         | 491953/10000000 [1:00:53<31:33:04, 83.71it/s]

Episode 1184 completed | Reward: 1060.00 | Avg Reward: 912.60 | Frames: 491939 | Epsilon: 0.5081


Training:   5%|▍         | 492422/10000000 [1:00:58<31:32:45, 83.72it/s]

Episode 1185 completed | Reward: 1120.00 | Avg Reward: 911.40 | Frames: 492416 | Epsilon: 0.5076


Training:   5%|▍         | 492770/10000000 [1:01:02<31:06:46, 84.88it/s]

Episode 1186 completed | Reward: 880.00 | Avg Reward: 910.40 | Frames: 492764 | Epsilon: 0.5072


Training:   5%|▍         | 493158/10000000 [1:01:07<31:43:44, 83.23it/s]

Episode 1187 completed | Reward: 1040.00 | Avg Reward: 913.60 | Frames: 493152 | Epsilon: 0.5068


Training:   5%|▍         | 493481/10000000 [1:01:10<31:30:22, 83.82it/s]

Episode 1188 completed | Reward: 1480.00 | Avg Reward: 923.60 | Frames: 493465 | Epsilon: 0.5065


Training:   5%|▍         | 493865/10000000 [1:01:15<32:04:25, 82.33it/s]

Episode 1189 completed | Reward: 540.00 | Avg Reward: 921.60 | Frames: 493854 | Epsilon: 0.5061


Training:   5%|▍         | 494329/10000000 [1:01:20<31:53:05, 82.81it/s]

Episode 1190 completed | Reward: 820.00 | Avg Reward: 908.00 | Frames: 494318 | Epsilon: 0.5057


Training:   5%|▍         | 494760/10000000 [1:01:25<31:25:22, 84.03it/s]

Episode 1191 completed | Reward: 1040.00 | Avg Reward: 909.00 | Frames: 494747 | Epsilon: 0.5053


Training:   5%|▍         | 495210/10000000 [1:01:30<32:00:38, 82.48it/s]

Episode 1192 completed | Reward: 1140.00 | Avg Reward: 912.80 | Frames: 495206 | Epsilon: 0.5048


Training:   5%|▍         | 495598/10000000 [1:01:35<31:24:21, 84.06it/s]

Episode 1193 completed | Reward: 600.00 | Avg Reward: 912.00 | Frames: 495590 | Epsilon: 0.5044


Training:   5%|▍         | 496025/10000000 [1:01:40<31:22:26, 84.15it/s]

Episode 1194 completed | Reward: 740.00 | Avg Reward: 908.40 | Frames: 496011 | Epsilon: 0.5040


Training:   5%|▍         | 496497/10000000 [1:01:45<31:22:59, 84.12it/s]

Episode 1195 completed | Reward: 840.00 | Avg Reward: 908.00 | Frames: 496485 | Epsilon: 0.5035


Training:   5%|▍         | 496880/10000000 [1:01:50<30:13:07, 87.35it/s]

Episode 1196 completed | Reward: 1080.00 | Avg Reward: 912.00 | Frames: 496864 | Epsilon: 0.5031


Training:   5%|▍         | 497233/10000000 [1:01:54<33:16:34, 79.33it/s]

Episode 1197 completed | Reward: 640.00 | Avg Reward: 907.00 | Frames: 497221 | Epsilon: 0.5028


Training:   5%|▍         | 497602/10000000 [1:01:58<32:29:18, 81.25it/s]

Episode 1198 completed | Reward: 680.00 | Avg Reward: 906.40 | Frames: 497595 | Epsilon: 0.5024


Training:   5%|▍         | 498065/10000000 [1:02:04<32:52:18, 80.29it/s]

Episode 1199 completed | Reward: 1180.00 | Avg Reward: 911.40 | Frames: 498049 | Epsilon: 0.5020


Training:   5%|▍         | 498490/10000000 [1:02:08<31:09:25, 84.71it/s]

Episode 1200 completed | Reward: 1540.00 | Avg Reward: 920.20 | Frames: 498483 | Epsilon: 0.5015


Training:   5%|▍         | 498929/10000000 [1:02:13<30:45:00, 85.83it/s]

Episode 1201 completed | Reward: 1340.00 | Avg Reward: 922.00 | Frames: 498912 | Epsilon: 0.5011


Training:   5%|▍         | 499501/10000000 [1:02:20<31:29:50, 83.79it/s]

Episode 1202 completed | Reward: 640.00 | Avg Reward: 920.20 | Frames: 499488 | Epsilon: 0.5005


Training:   5%|▍         | 499886/10000000 [1:02:25<32:25:43, 81.38it/s]

Episode 1203 completed | Reward: 840.00 | Avg Reward: 919.60 | Frames: 499877 | Epsilon: 0.5001


Training:   5%|▍         | 499989/10000000 [1:02:26<30:21:35, 86.92it/s]

Model saved to weights/CarnivalDeterministic-v4_dqn_500000frames.pth


Training:   5%|▌         | 500000/10000000 [1:02:35<30:21:35, 86.92it/s]


Evaluation at frame 500000: 1034.00


Training:   5%|▌         | 500009/10000000 [1:02:37<597:40:16,  4.42it/s]

Model saved to weights/CarnivalDeterministic-v4_dqn_best.pth
Episode 1204 completed | Reward: 120.00 | Avg Reward: 916.00 | Frames: 500001 | Epsilon: 0.5000


Training:   5%|▌         | 500630/10000000 [1:02:44<40:37:08, 64.96it/s]

Episode 1205 completed | Reward: 680.00 | Avg Reward: 921.00 | Frames: 500628 | Epsilon: 0.4994

Memory usage: 1.11 GB


Training:   5%|▌         | 501461/10000000 [1:02:54<34:19:47, 76.86it/s]

Episode 1206 completed | Reward: 700.00 | Avg Reward: 922.20 | Frames: 501451 | Epsilon: 0.4985


Training:   5%|▌         | 501962/10000000 [1:03:00<31:59:44, 82.46it/s]

Episode 1207 completed | Reward: 2400.00 | Avg Reward: 935.80 | Frames: 501954 | Epsilon: 0.4980


Training:   5%|▌         | 502338/10000000 [1:03:04<31:37:34, 83.42it/s]

Episode 1208 completed | Reward: 880.00 | Avg Reward: 939.20 | Frames: 502330 | Epsilon: 0.4977


Training:   5%|▌         | 502798/10000000 [1:03:10<31:59:57, 82.44it/s]

Episode 1209 completed | Reward: 940.00 | Avg Reward: 941.80 | Frames: 502792 | Epsilon: 0.4972


Training:   5%|▌         | 503089/10000000 [1:03:13<32:27:56, 81.26it/s]

Episode 1210 completed | Reward: 440.00 | Avg Reward: 942.80 | Frames: 503079 | Epsilon: 0.4969


Training:   5%|▌         | 503485/10000000 [1:03:18<32:38:14, 80.83it/s]

Episode 1211 completed | Reward: 240.00 | Avg Reward: 938.80 | Frames: 503475 | Epsilon: 0.4965


Training:   5%|▌         | 503886/10000000 [1:03:22<30:50:28, 85.53it/s]

Episode 1212 completed | Reward: 940.00 | Avg Reward: 939.20 | Frames: 503877 | Epsilon: 0.4961


Training:   5%|▌         | 504485/10000000 [1:03:29<33:03:39, 79.78it/s]

Episode 1213 completed | Reward: 1100.00 | Avg Reward: 942.00 | Frames: 504474 | Epsilon: 0.4955


Training:   5%|▌         | 504826/10000000 [1:03:33<31:55:37, 82.61it/s]

Episode 1214 completed | Reward: 680.00 | Avg Reward: 924.00 | Frames: 504818 | Epsilon: 0.4952


Training:   5%|▌         | 505229/10000000 [1:03:38<32:01:23, 82.36it/s]

Episode 1215 completed | Reward: 1040.00 | Avg Reward: 912.20 | Frames: 505216 | Epsilon: 0.4948


Training:   5%|▌         | 505580/10000000 [1:03:42<31:21:41, 84.09it/s]

Episode 1216 completed | Reward: 1040.00 | Avg Reward: 915.60 | Frames: 505570 | Epsilon: 0.4944


Training:   5%|▌         | 505970/10000000 [1:03:47<32:41:01, 80.69it/s]

Episode 1217 completed | Reward: 840.00 | Avg Reward: 912.00 | Frames: 505964 | Epsilon: 0.4940


Training:   5%|▌         | 506481/10000000 [1:03:53<32:56:59, 80.03it/s]

Episode 1218 completed | Reward: 760.00 | Avg Reward: 917.00 | Frames: 506471 | Epsilon: 0.4935


Training:   5%|▌         | 506894/10000000 [1:03:57<32:57:53, 79.99it/s]

Episode 1219 completed | Reward: 420.00 | Avg Reward: 916.20 | Frames: 506886 | Epsilon: 0.4931


Training:   5%|▌         | 507273/10000000 [1:04:02<32:48:51, 80.36it/s]

Episode 1220 completed | Reward: 1040.00 | Avg Reward: 917.60 | Frames: 507260 | Epsilon: 0.4927


Training:   5%|▌         | 507724/10000000 [1:04:07<30:48:55, 85.57it/s]

Episode 1221 completed | Reward: 1200.00 | Avg Reward: 924.80 | Frames: 507712 | Epsilon: 0.4923


Training:   5%|▌         | 508169/10000000 [1:04:12<31:21:14, 84.09it/s]

Episode 1222 completed | Reward: 1040.00 | Avg Reward: 929.60 | Frames: 508156 | Epsilon: 0.4918


Training:   5%|▌         | 508559/10000000 [1:04:17<31:29:26, 83.72it/s]

Episode 1223 completed | Reward: 1120.00 | Avg Reward: 934.60 | Frames: 508553 | Epsilon: 0.4914


Training:   5%|▌         | 509009/10000000 [1:04:22<32:18:50, 81.59it/s]

Episode 1224 completed | Reward: 1640.00 | Avg Reward: 935.80 | Frames: 508996 | Epsilon: 0.4910


Training:   5%|▌         | 509390/10000000 [1:04:27<33:53:27, 77.79it/s]

Episode 1225 completed | Reward: 600.00 | Avg Reward: 931.20 | Frames: 509385 | Epsilon: 0.4906


Training:   5%|▌         | 509838/10000000 [1:04:32<33:05:34, 79.66it/s]

Episode 1226 completed | Reward: 1280.00 | Avg Reward: 935.80 | Frames: 509830 | Epsilon: 0.4902


Training:   5%|▌         | 510255/10000000 [1:04:37<31:26:51, 83.82it/s]

Episode 1227 completed | Reward: 580.00 | Avg Reward: 928.20 | Frames: 510246 | Epsilon: 0.4898


Training:   5%|▌         | 510587/10000000 [1:04:41<31:43:51, 83.07it/s]

Episode 1228 completed | Reward: 740.00 | Avg Reward: 926.60 | Frames: 510576 | Epsilon: 0.4894


Training:   5%|▌         | 510889/10000000 [1:04:45<31:37:39, 83.34it/s]

Episode 1229 completed | Reward: 1260.00 | Avg Reward: 932.20 | Frames: 510889 | Epsilon: 0.4891

Memory usage: 1.11 GB


Training:   5%|▌         | 511390/10000000 [1:04:51<31:35:45, 83.42it/s]

Episode 1230 completed | Reward: 2020.00 | Avg Reward: 945.00 | Frames: 511386 | Epsilon: 0.4886


Training:   5%|▌         | 511841/10000000 [1:04:56<32:43:45, 80.53it/s]

Episode 1231 completed | Reward: 1240.00 | Avg Reward: 950.60 | Frames: 511831 | Epsilon: 0.4882


Training:   5%|▌         | 512261/10000000 [1:05:01<32:38:29, 80.74it/s]

Episode 1232 completed | Reward: 1040.00 | Avg Reward: 943.60 | Frames: 512250 | Epsilon: 0.4878


Training:   5%|▌         | 512686/10000000 [1:05:06<34:51:30, 75.60it/s]

Episode 1233 completed | Reward: 840.00 | Avg Reward: 945.80 | Frames: 512677 | Epsilon: 0.4873


Training:   5%|▌         | 513089/10000000 [1:05:11<33:25:53, 78.83it/s]

Episode 1234 completed | Reward: 540.00 | Avg Reward: 941.80 | Frames: 513077 | Epsilon: 0.4869


Training:   5%|▌         | 513445/10000000 [1:05:15<34:35:00, 76.20it/s]

Episode 1235 completed | Reward: 580.00 | Avg Reward: 941.00 | Frames: 513434 | Epsilon: 0.4866


Training:   5%|▌         | 513789/10000000 [1:05:19<35:10:35, 74.91it/s]

Episode 1236 completed | Reward: 1620.00 | Avg Reward: 949.20 | Frames: 513779 | Epsilon: 0.4862


Training:   5%|▌         | 514241/10000000 [1:05:24<33:22:33, 78.95it/s]

Episode 1237 completed | Reward: 340.00 | Avg Reward: 924.00 | Frames: 514229 | Epsilon: 0.4858


Training:   5%|▌         | 514701/10000000 [1:05:30<33:37:54, 78.34it/s]

Episode 1238 completed | Reward: 740.00 | Avg Reward: 923.00 | Frames: 514691 | Epsilon: 0.4853


Training:   5%|▌         | 515134/10000000 [1:05:35<32:37:24, 80.76it/s]

Episode 1239 completed | Reward: 600.00 | Avg Reward: 919.20 | Frames: 515128 | Epsilon: 0.4849


Training:   5%|▌         | 515545/10000000 [1:05:40<32:54:56, 80.04it/s]

Episode 1240 completed | Reward: 700.00 | Avg Reward: 920.00 | Frames: 515533 | Epsilon: 0.4845


Training:   5%|▌         | 515978/10000000 [1:05:45<32:31:12, 81.01it/s]

Episode 1241 completed | Reward: 340.00 | Avg Reward: 914.40 | Frames: 515974 | Epsilon: 0.4840


Training:   5%|▌         | 516394/10000000 [1:05:50<33:05:27, 79.61it/s]

Episode 1242 completed | Reward: 720.00 | Avg Reward: 914.80 | Frames: 516386 | Epsilon: 0.4836


Training:   5%|▌         | 516845/10000000 [1:05:56<36:00:03, 73.17it/s]

Episode 1243 completed | Reward: 400.00 | Avg Reward: 913.40 | Frames: 516836 | Epsilon: 0.4832


Training:   5%|▌         | 517266/10000000 [1:06:01<33:00:46, 79.79it/s]

Episode 1244 completed | Reward: 560.00 | Avg Reward: 914.20 | Frames: 517258 | Epsilon: 0.4827


Training:   5%|▌         | 517757/10000000 [1:06:07<34:29:47, 76.35it/s]

Episode 1245 completed | Reward: 1440.00 | Avg Reward: 916.00 | Frames: 517748 | Epsilon: 0.4823


Training:   5%|▌         | 518169/10000000 [1:06:12<34:17:41, 76.80it/s]

Episode 1246 completed | Reward: 940.00 | Avg Reward: 919.00 | Frames: 518156 | Epsilon: 0.4818


Training:   5%|▌         | 518546/10000000 [1:06:16<33:22:01, 78.93it/s]

Episode 1247 completed | Reward: 1480.00 | Avg Reward: 926.40 | Frames: 518539 | Epsilon: 0.4815


Training:   5%|▌         | 518972/10000000 [1:06:21<31:11:38, 84.43it/s]

Episode 1248 completed | Reward: 680.00 | Avg Reward: 924.20 | Frames: 518957 | Epsilon: 0.4810


Training:   5%|▌         | 519409/10000000 [1:06:27<35:01:20, 75.19it/s]

Episode 1249 completed | Reward: 1240.00 | Avg Reward: 930.20 | Frames: 519398 | Epsilon: 0.4806


Training:   5%|▌         | 519802/10000000 [1:06:32<33:43:19, 78.09it/s]

Episode 1250 completed | Reward: 480.00 | Avg Reward: 927.20 | Frames: 519793 | Epsilon: 0.4802


Training:   5%|▌         | 520151/10000000 [1:06:36<33:43:54, 78.07it/s]

Episode 1251 completed | Reward: 760.00 | Avg Reward: 928.00 | Frames: 520147 | Epsilon: 0.4799


Training:   5%|▌         | 520604/10000000 [1:06:41<31:50:12, 82.71it/s]

Episode 1252 completed | Reward: 600.00 | Avg Reward: 928.60 | Frames: 520598 | Epsilon: 0.4794


Training:   5%|▌         | 520988/10000000 [1:06:46<42:47:10, 61.54it/s]

Episode 1253 completed | Reward: 1080.00 | Avg Reward: 930.40 | Frames: 520981 | Epsilon: 0.4790

Memory usage: 1.11 GB


Training:   5%|▌         | 521433/10000000 [1:06:52<34:32:48, 76.21it/s]

Episode 1254 completed | Reward: 920.00 | Avg Reward: 935.20 | Frames: 521424 | Epsilon: 0.4786


Training:   5%|▌         | 521893/10000000 [1:06:57<34:31:08, 76.27it/s]

Episode 1255 completed | Reward: 920.00 | Avg Reward: 935.00 | Frames: 521882 | Epsilon: 0.4781


Training:   5%|▌         | 522933/10000000 [1:07:10<33:33:20, 78.45it/s]

Episode 1256 completed | Reward: 1260.00 | Avg Reward: 940.20 | Frames: 522923 | Epsilon: 0.4771


Training:   5%|▌         | 523438/10000000 [1:07:16<33:19:39, 78.98it/s]

Episode 1257 completed | Reward: 380.00 | Avg Reward: 933.80 | Frames: 523430 | Epsilon: 0.4766


Training:   5%|▌         | 523923/10000000 [1:07:22<31:54:01, 82.51it/s]

Episode 1258 completed | Reward: 740.00 | Avg Reward: 931.80 | Frames: 523909 | Epsilon: 0.4761


Training:   5%|▌         | 524424/10000000 [1:07:28<31:20:10, 84.00it/s]

Episode 1259 completed | Reward: 740.00 | Avg Reward: 920.80 | Frames: 524413 | Epsilon: 0.4756


Training:   5%|▌         | 524781/10000000 [1:07:32<34:39:41, 75.93it/s]

Episode 1260 completed | Reward: 520.00 | Avg Reward: 919.20 | Frames: 524769 | Epsilon: 0.4752


Training:   5%|▌         | 525316/10000000 [1:07:39<32:55:57, 79.92it/s]

Episode 1261 completed | Reward: 980.00 | Avg Reward: 919.60 | Frames: 525310 | Epsilon: 0.4747


Training:   5%|▌         | 525668/10000000 [1:07:43<33:10:42, 79.32it/s]

Episode 1262 completed | Reward: 420.00 | Avg Reward: 912.20 | Frames: 525661 | Epsilon: 0.4743


Training:   5%|▌         | 526268/10000000 [1:07:50<31:44:28, 82.91it/s]

Episode 1263 completed | Reward: 860.00 | Avg Reward: 904.00 | Frames: 526262 | Epsilon: 0.4737


Training:   5%|▌         | 526798/10000000 [1:07:57<33:17:49, 79.03it/s]

Episode 1264 completed | Reward: 2100.00 | Avg Reward: 912.00 | Frames: 526791 | Epsilon: 0.4732


Training:   5%|▌         | 527291/10000000 [1:08:03<33:43:04, 78.04it/s]

Episode 1265 completed | Reward: 1540.00 | Avg Reward: 923.00 | Frames: 527285 | Epsilon: 0.4727


Training:   5%|▌         | 527678/10000000 [1:08:08<33:39:47, 78.16it/s]

Episode 1266 completed | Reward: 500.00 | Avg Reward: 920.20 | Frames: 527671 | Epsilon: 0.4723


Training:   5%|▌         | 528313/10000000 [1:08:15<34:32:42, 76.16it/s]

Episode 1267 completed | Reward: 520.00 | Avg Reward: 920.20 | Frames: 528304 | Epsilon: 0.4717


Training:   5%|▌         | 528850/10000000 [1:08:22<33:38:28, 78.20it/s]

Episode 1268 completed | Reward: 1000.00 | Avg Reward: 923.40 | Frames: 528837 | Epsilon: 0.4712


Training:   5%|▌         | 529266/10000000 [1:08:27<33:29:21, 78.56it/s]

Episode 1269 completed | Reward: 600.00 | Avg Reward: 922.00 | Frames: 529255 | Epsilon: 0.4707


Training:   5%|▌         | 529631/10000000 [1:08:32<32:43:41, 80.38it/s]

Episode 1270 completed | Reward: 940.00 | Avg Reward: 926.40 | Frames: 529620 | Epsilon: 0.4704


Training:   5%|▌         | 530165/10000000 [1:08:38<34:48:20, 75.58it/s]

Episode 1271 completed | Reward: 680.00 | Avg Reward: 924.40 | Frames: 530154 | Epsilon: 0.4698


Training:   5%|▌         | 530572/10000000 [1:08:43<32:38:14, 80.59it/s]

Episode 1272 completed | Reward: 420.00 | Avg Reward: 923.60 | Frames: 530556 | Epsilon: 0.4694


Training:   5%|▌         | 531272/10000000 [1:08:52<41:23:00, 63.56it/s]

Episode 1273 completed | Reward: 2440.00 | Avg Reward: 938.60 | Frames: 531265 | Epsilon: 0.4687

Memory usage: 1.11 GB


Training:   5%|▌         | 531694/10000000 [1:08:57<35:36:29, 73.86it/s]

Episode 1274 completed | Reward: 1060.00 | Avg Reward: 941.60 | Frames: 531686 | Epsilon: 0.4683


Training:   5%|▌         | 532024/10000000 [1:09:01<30:54:15, 85.10it/s]

Episode 1275 completed | Reward: 360.00 | Avg Reward: 938.00 | Frames: 532013 | Epsilon: 0.4680


Training:   5%|▌         | 532401/10000000 [1:09:06<34:45:34, 75.66it/s]

Episode 1276 completed | Reward: 1020.00 | Avg Reward: 926.20 | Frames: 532388 | Epsilon: 0.4676


Training:   5%|▌         | 532838/10000000 [1:09:11<34:01:19, 77.30it/s]

Episode 1277 completed | Reward: 620.00 | Avg Reward: 919.20 | Frames: 532829 | Epsilon: 0.4672


Training:   5%|▌         | 533180/10000000 [1:09:16<32:20:57, 81.29it/s]

Episode 1278 completed | Reward: 880.00 | Avg Reward: 918.60 | Frames: 533172 | Epsilon: 0.4668


Training:   5%|▌         | 533547/10000000 [1:09:20<31:35:48, 83.22it/s]

Episode 1279 completed | Reward: 980.00 | Avg Reward: 920.20 | Frames: 533535 | Epsilon: 0.4665


Training:   5%|▌         | 534042/10000000 [1:09:26<33:21:17, 78.83it/s]

Episode 1280 completed | Reward: 520.00 | Avg Reward: 915.00 | Frames: 534034 | Epsilon: 0.4660


Training:   5%|▌         | 534454/10000000 [1:09:31<35:06:36, 74.89it/s]

Episode 1281 completed | Reward: 760.00 | Avg Reward: 907.80 | Frames: 534441 | Epsilon: 0.4656


Training:   5%|▌         | 534793/10000000 [1:09:36<36:45:55, 71.51it/s]

Episode 1282 completed | Reward: 1020.00 | Avg Reward: 910.20 | Frames: 534784 | Epsilon: 0.4652


Training:   5%|▌         | 535294/10000000 [1:09:42<33:22:27, 78.78it/s]

Episode 1283 completed | Reward: 840.00 | Avg Reward: 904.80 | Frames: 535280 | Epsilon: 0.4647


Training:   5%|▌         | 535764/10000000 [1:09:48<31:46:39, 82.73it/s]

Episode 1284 completed | Reward: 340.00 | Avg Reward: 897.60 | Frames: 535749 | Epsilon: 0.4643


Training:   5%|▌         | 536117/10000000 [1:09:52<36:34:47, 71.87it/s]

Episode 1285 completed | Reward: 680.00 | Avg Reward: 893.20 | Frames: 536111 | Epsilon: 0.4639


Training:   5%|▌         | 536541/10000000 [1:09:57<35:44:10, 73.56it/s]

Episode 1286 completed | Reward: 500.00 | Avg Reward: 889.40 | Frames: 536535 | Epsilon: 0.4635


Training:   5%|▌         | 536942/10000000 [1:10:02<33:20:02, 78.86it/s]

Episode 1287 completed | Reward: 500.00 | Avg Reward: 884.00 | Frames: 536934 | Epsilon: 0.4631


Training:   5%|▌         | 537349/10000000 [1:10:08<36:37:34, 71.77it/s]

Episode 1288 completed | Reward: 680.00 | Avg Reward: 876.00 | Frames: 537336 | Epsilon: 0.4627


Training:   5%|▌         | 537727/10000000 [1:10:12<34:36:19, 75.95it/s]

Episode 1289 completed | Reward: 1140.00 | Avg Reward: 882.00 | Frames: 537718 | Epsilon: 0.4623


Training:   5%|▌         | 538147/10000000 [1:10:18<35:49:26, 73.37it/s]

Episode 1290 completed | Reward: 740.00 | Avg Reward: 881.20 | Frames: 538138 | Epsilon: 0.4619


Training:   5%|▌         | 538473/10000000 [1:10:22<35:41:35, 73.63it/s]

Episode 1291 completed | Reward: 1320.00 | Avg Reward: 884.00 | Frames: 538461 | Epsilon: 0.4615


Training:   5%|▌         | 538948/10000000 [1:10:28<33:55:17, 77.48it/s]

Episode 1292 completed | Reward: 1300.00 | Avg Reward: 885.60 | Frames: 538941 | Epsilon: 0.4611


Training:   5%|▌         | 539296/10000000 [1:10:32<31:57:02, 82.25it/s]

Episode 1293 completed | Reward: 880.00 | Avg Reward: 888.40 | Frames: 539282 | Epsilon: 0.4607


Training:   5%|▌         | 539715/10000000 [1:10:37<34:47:04, 75.55it/s]

Episode 1294 completed | Reward: 880.00 | Avg Reward: 889.80 | Frames: 539706 | Epsilon: 0.4603


Training:   5%|▌         | 540082/10000000 [1:10:42<33:49:01, 77.71it/s]

Episode 1295 completed | Reward: 1140.00 | Avg Reward: 892.80 | Frames: 540068 | Epsilon: 0.4599


Training:   5%|▌         | 540552/10000000 [1:10:48<31:48:23, 82.61it/s]

Episode 1296 completed | Reward: 1060.00 | Avg Reward: 892.60 | Frames: 540537 | Epsilon: 0.4595


Training:   5%|▌         | 541094/10000000 [1:10:55<34:01:17, 77.23it/s]

Episode 1297 completed | Reward: 1240.00 | Avg Reward: 898.60 | Frames: 541080 | Epsilon: 0.4589


Training:   5%|▌         | 541517/10000000 [1:11:00<47:17:32, 55.56it/s]

Episode 1298 completed | Reward: 900.00 | Avg Reward: 900.80 | Frames: 541513 | Epsilon: 0.4585

Memory usage: 1.11 GB


Training:   5%|▌         | 541893/10000000 [1:11:05<35:56:15, 73.11it/s]

Episode 1299 completed | Reward: 620.00 | Avg Reward: 895.20 | Frames: 541884 | Epsilon: 0.4581


Training:   5%|▌         | 542258/10000000 [1:11:09<35:05:37, 74.86it/s]

Episode 1300 completed | Reward: 620.00 | Avg Reward: 886.00 | Frames: 542249 | Epsilon: 0.4578


Training:   5%|▌         | 542632/10000000 [1:11:14<34:14:46, 76.71it/s]

Episode 1301 completed | Reward: 980.00 | Avg Reward: 882.40 | Frames: 542620 | Epsilon: 0.4574


Training:   5%|▌         | 543009/10000000 [1:11:19<37:06:18, 70.80it/s]

Episode 1302 completed | Reward: 760.00 | Avg Reward: 883.60 | Frames: 543001 | Epsilon: 0.4570


Training:   5%|▌         | 543354/10000000 [1:11:23<33:09:30, 79.22it/s]

Episode 1303 completed | Reward: 440.00 | Avg Reward: 879.60 | Frames: 543347 | Epsilon: 0.4567


Training:   5%|▌         | 543822/10000000 [1:11:29<33:58:51, 77.30it/s]

Episode 1304 completed | Reward: 780.00 | Avg Reward: 886.20 | Frames: 543808 | Epsilon: 0.4562


Training:   5%|▌         | 544173/10000000 [1:11:33<35:54:41, 73.14it/s]

Episode 1305 completed | Reward: 920.00 | Avg Reward: 888.60 | Frames: 544167 | Epsilon: 0.4558


Training:   5%|▌         | 544574/10000000 [1:11:38<34:59:07, 75.07it/s]

Episode 1306 completed | Reward: 640.00 | Avg Reward: 888.00 | Frames: 544565 | Epsilon: 0.4554


Training:   5%|▌         | 545010/10000000 [1:11:44<33:35:34, 78.18it/s]

Episode 1307 completed | Reward: 580.00 | Avg Reward: 869.80 | Frames: 544999 | Epsilon: 0.4550


Training:   5%|▌         | 545671/10000000 [1:11:52<34:48:08, 75.46it/s]

Episode 1308 completed | Reward: 620.00 | Avg Reward: 867.20 | Frames: 545662 | Epsilon: 0.4543


Training:   5%|▌         | 546025/10000000 [1:11:57<36:02:39, 72.86it/s]

Episode 1309 completed | Reward: 1240.00 | Avg Reward: 870.20 | Frames: 546014 | Epsilon: 0.4540


Training:   5%|▌         | 546429/10000000 [1:12:02<35:31:19, 73.93it/s]

Episode 1310 completed | Reward: 1240.00 | Avg Reward: 878.20 | Frames: 546416 | Epsilon: 0.4536


Training:   5%|▌         | 546821/10000000 [1:12:07<35:45:10, 73.45it/s]

Episode 1311 completed | Reward: 440.00 | Avg Reward: 880.20 | Frames: 546815 | Epsilon: 0.4532


Training:   5%|▌         | 547293/10000000 [1:12:13<38:06:16, 68.91it/s]

Episode 1312 completed | Reward: 1820.00 | Avg Reward: 889.00 | Frames: 547287 | Epsilon: 0.4527


Training:   5%|▌         | 547657/10000000 [1:12:17<37:14:28, 70.50it/s]

Episode 1313 completed | Reward: 1040.00 | Avg Reward: 888.40 | Frames: 547649 | Epsilon: 0.4524


Training:   5%|▌         | 548004/10000000 [1:12:22<32:54:44, 79.77it/s]

Episode 1314 completed | Reward: 900.00 | Avg Reward: 890.60 | Frames: 547988 | Epsilon: 0.4520


Training:   5%|▌         | 548465/10000000 [1:12:28<36:34:57, 71.77it/s]

Episode 1315 completed | Reward: 1180.00 | Avg Reward: 892.00 | Frames: 548453 | Epsilon: 0.4515


Training:   5%|▌         | 548856/10000000 [1:12:33<33:10:00, 79.16it/s]

Episode 1316 completed | Reward: 980.00 | Avg Reward: 891.40 | Frames: 548843 | Epsilon: 0.4512


Training:   5%|▌         | 549296/10000000 [1:12:38<32:08:32, 81.67it/s]

Episode 1317 completed | Reward: 560.00 | Avg Reward: 888.60 | Frames: 549282 | Epsilon: 0.4507


Training:   6%|▌         | 550098/10000000 [1:12:48<33:13:35, 79.00it/s]

Episode 1318 completed | Reward: 1400.00 | Avg Reward: 895.00 | Frames: 550087 | Epsilon: 0.4499


Training:   6%|▌         | 550511/10000000 [1:12:54<33:03:05, 79.42it/s]

Episode 1319 completed | Reward: 520.00 | Avg Reward: 896.00 | Frames: 550502 | Epsilon: 0.4495


Training:   6%|▌         | 551000/10000000 [1:13:00<33:53:13, 77.46it/s]

Episode 1320 completed | Reward: 1140.00 | Avg Reward: 897.00 | Frames: 550991 | Epsilon: 0.4490


Training:   6%|▌         | 551448/10000000 [1:13:05<32:42:41, 80.23it/s]

Episode 1321 completed | Reward: 1060.00 | Avg Reward: 895.60 | Frames: 551435 | Epsilon: 0.4486


Training:   6%|▌         | 551878/10000000 [1:13:11<34:15:45, 76.60it/s]

Episode 1322 completed | Reward: 680.00 | Avg Reward: 892.00 | Frames: 551878 | Epsilon: 0.4481

Memory usage: 1.11 GB


Training:   6%|▌         | 552285/10000000 [1:13:16<34:53:33, 75.21it/s]

Episode 1323 completed | Reward: 660.00 | Avg Reward: 887.40 | Frames: 552274 | Epsilon: 0.4477


Training:   6%|▌         | 552713/10000000 [1:13:22<36:52:47, 71.16it/s]

Episode 1324 completed | Reward: 1080.00 | Avg Reward: 881.80 | Frames: 552705 | Epsilon: 0.4473


Training:   6%|▌         | 553501/10000000 [1:13:32<35:59:08, 72.92it/s]

Episode 1325 completed | Reward: 1020.00 | Avg Reward: 886.00 | Frames: 553489 | Epsilon: 0.4465


Training:   6%|▌         | 553907/10000000 [1:13:37<33:18:20, 78.78it/s]

Episode 1326 completed | Reward: 640.00 | Avg Reward: 879.60 | Frames: 553894 | Epsilon: 0.4461


Training:   6%|▌         | 554429/10000000 [1:13:43<36:20:07, 72.21it/s]

Episode 1327 completed | Reward: 760.00 | Avg Reward: 881.40 | Frames: 554418 | Epsilon: 0.4456


Training:   6%|▌         | 554872/10000000 [1:13:49<34:00:11, 77.16it/s]

Episode 1328 completed | Reward: 1520.00 | Avg Reward: 889.20 | Frames: 554856 | Epsilon: 0.4451


Training:   6%|▌         | 555291/10000000 [1:13:55<34:02:10, 77.08it/s]

Episode 1329 completed | Reward: 640.00 | Avg Reward: 883.00 | Frames: 555276 | Epsilon: 0.4447


Training:   6%|▌         | 556045/10000000 [1:14:04<34:45:10, 75.48it/s]

Episode 1330 completed | Reward: 1000.00 | Avg Reward: 872.80 | Frames: 556032 | Epsilon: 0.4440


Training:   6%|▌         | 556513/10000000 [1:14:10<34:46:54, 75.42it/s]

Episode 1331 completed | Reward: 840.00 | Avg Reward: 868.80 | Frames: 556506 | Epsilon: 0.4435


Training:   6%|▌         | 556893/10000000 [1:14:15<36:35:44, 71.68it/s]

Episode 1332 completed | Reward: 1040.00 | Avg Reward: 868.80 | Frames: 556886 | Epsilon: 0.4431


Training:   6%|▌         | 557269/10000000 [1:14:20<35:18:16, 74.30it/s]

Episode 1333 completed | Reward: 680.00 | Avg Reward: 867.20 | Frames: 557260 | Epsilon: 0.4427


Training:   6%|▌         | 557677/10000000 [1:14:25<34:34:58, 75.84it/s]

Episode 1334 completed | Reward: 540.00 | Avg Reward: 867.20 | Frames: 557664 | Epsilon: 0.4423


Training:   6%|▌         | 558092/10000000 [1:14:30<33:33:29, 78.16it/s]

Episode 1335 completed | Reward: 720.00 | Avg Reward: 868.60 | Frames: 558082 | Epsilon: 0.4419


Training:   6%|▌         | 558453/10000000 [1:14:35<36:14:56, 72.35it/s]

Episode 1336 completed | Reward: 560.00 | Avg Reward: 858.00 | Frames: 558445 | Epsilon: 0.4416


Training:   6%|▌         | 559037/10000000 [1:14:42<34:22:06, 76.31it/s]

Episode 1337 completed | Reward: 1280.00 | Avg Reward: 867.40 | Frames: 559028 | Epsilon: 0.4410


Training:   6%|▌         | 559581/10000000 [1:14:49<34:54:24, 75.12it/s]

Episode 1338 completed | Reward: 1060.00 | Avg Reward: 870.60 | Frames: 559572 | Epsilon: 0.4404


Training:   6%|▌         | 560089/10000000 [1:14:56<35:31:11, 73.82it/s]

Episode 1339 completed | Reward: 1820.00 | Avg Reward: 882.80 | Frames: 560079 | Epsilon: 0.4399


Training:   6%|▌         | 560509/10000000 [1:15:01<35:59:57, 72.84it/s]

Episode 1340 completed | Reward: 400.00 | Avg Reward: 879.80 | Frames: 560503 | Epsilon: 0.4395


Training:   6%|▌         | 561262/10000000 [1:15:11<34:36:30, 75.76it/s]

Episode 1341 completed | Reward: 660.00 | Avg Reward: 883.00 | Frames: 561250 | Epsilon: 0.4387


Training:   6%|▌         | 561677/10000000 [1:15:16<35:13:45, 74.42it/s]

Episode 1342 completed | Reward: 1280.00 | Avg Reward: 888.60 | Frames: 561664 | Epsilon: 0.4383


Training:   6%|▌         | 562041/10000000 [1:15:21<33:58:29, 77.16it/s]

Episode 1343 completed | Reward: 900.00 | Avg Reward: 893.60 | Frames: 562041 | Epsilon: 0.4380

Memory usage: 1.11 GB


Training:   6%|▌         | 562405/10000000 [1:15:26<36:27:38, 71.90it/s]

Episode 1344 completed | Reward: 480.00 | Avg Reward: 892.80 | Frames: 562398 | Epsilon: 0.4376


Training:   6%|▌         | 562813/10000000 [1:15:31<35:02:17, 74.82it/s]

Episode 1345 completed | Reward: 660.00 | Avg Reward: 885.00 | Frames: 562804 | Epsilon: 0.4372


Training:   6%|▌         | 563161/10000000 [1:15:36<36:26:52, 71.92it/s]

Episode 1346 completed | Reward: 940.00 | Avg Reward: 885.00 | Frames: 563155 | Epsilon: 0.4368


Training:   6%|▌         | 563641/10000000 [1:15:42<36:14:39, 72.32it/s]

Episode 1347 completed | Reward: 1020.00 | Avg Reward: 880.40 | Frames: 563635 | Epsilon: 0.4364


Training:   6%|▌         | 564097/10000000 [1:15:48<36:16:04, 72.27it/s]

Episode 1348 completed | Reward: 1080.00 | Avg Reward: 884.40 | Frames: 564091 | Epsilon: 0.4359


Training:   6%|▌         | 564545/10000000 [1:15:53<35:21:53, 74.11it/s]

Episode 1349 completed | Reward: 900.00 | Avg Reward: 881.00 | Frames: 564539 | Epsilon: 0.4355


Training:   6%|▌         | 564965/10000000 [1:15:59<34:52:30, 75.15it/s]

Episode 1350 completed | Reward: 580.00 | Avg Reward: 882.00 | Frames: 564954 | Epsilon: 0.4350


Training:   6%|▌         | 565408/10000000 [1:16:05<33:29:01, 78.27it/s]

Episode 1351 completed | Reward: 720.00 | Avg Reward: 881.60 | Frames: 565393 | Epsilon: 0.4346


Training:   6%|▌         | 565945/10000000 [1:16:12<34:20:38, 76.30it/s]

Episode 1352 completed | Reward: 1360.00 | Avg Reward: 889.20 | Frames: 565934 | Epsilon: 0.4341


Training:   6%|▌         | 566385/10000000 [1:16:17<35:20:06, 74.16it/s]

Episode 1353 completed | Reward: 600.00 | Avg Reward: 884.40 | Frames: 566376 | Epsilon: 0.4336


Training:   6%|▌         | 566934/10000000 [1:16:24<34:08:39, 76.74it/s]

Episode 1354 completed | Reward: 820.00 | Avg Reward: 883.40 | Frames: 566923 | Epsilon: 0.4331


Training:   6%|▌         | 567393/10000000 [1:16:30<36:45:05, 71.29it/s]

Episode 1355 completed | Reward: 680.00 | Avg Reward: 881.00 | Frames: 567383 | Epsilon: 0.4326


Training:   6%|▌         | 567705/10000000 [1:16:34<35:27:45, 73.88it/s]

Episode 1356 completed | Reward: 540.00 | Avg Reward: 873.80 | Frames: 567692 | Epsilon: 0.4323


Training:   6%|▌         | 568128/10000000 [1:16:39<32:49:13, 79.83it/s]

Episode 1357 completed | Reward: 620.00 | Avg Reward: 876.20 | Frames: 568121 | Epsilon: 0.4319


Training:   6%|▌         | 568553/10000000 [1:16:45<37:14:19, 70.35it/s]

Episode 1358 completed | Reward: 1120.00 | Avg Reward: 880.00 | Frames: 568545 | Epsilon: 0.4315


Training:   6%|▌         | 568993/10000000 [1:16:51<36:27:15, 71.86it/s]

Episode 1359 completed | Reward: 760.00 | Avg Reward: 880.20 | Frames: 568987 | Epsilon: 0.4310


Training:   6%|▌         | 569425/10000000 [1:16:56<35:31:08, 73.75it/s]

Episode 1360 completed | Reward: 1000.00 | Avg Reward: 885.00 | Frames: 569417 | Epsilon: 0.4306


Training:   6%|▌         | 569837/10000000 [1:17:01<34:30:42, 75.90it/s]

Episode 1361 completed | Reward: 1000.00 | Avg Reward: 885.20 | Frames: 569825 | Epsilon: 0.4302


Training:   6%|▌         | 570361/10000000 [1:17:08<36:42:36, 71.35it/s]

Episode 1362 completed | Reward: 1340.00 | Avg Reward: 894.40 | Frames: 570348 | Epsilon: 0.4297


Training:   6%|▌         | 570721/10000000 [1:17:13<36:04:59, 72.59it/s]

Episode 1363 completed | Reward: 800.00 | Avg Reward: 893.80 | Frames: 570711 | Epsilon: 0.4293


Training:   6%|▌         | 571097/10000000 [1:17:18<35:34:33, 73.62it/s]

Episode 1364 completed | Reward: 640.00 | Avg Reward: 879.20 | Frames: 571087 | Epsilon: 0.4289


Training:   6%|▌         | 571633/10000000 [1:17:25<36:08:56, 72.45it/s]

Episode 1365 completed | Reward: 680.00 | Avg Reward: 870.60 | Frames: 571621 | Epsilon: 0.4284


Training:   6%|▌         | 571969/10000000 [1:17:29<34:37:50, 75.62it/s]

Episode 1366 completed | Reward: 540.00 | Avg Reward: 871.00 | Frames: 571959 | Epsilon: 0.4280


Training:   6%|▌         | 572495/10000000 [1:17:36<33:43:41, 77.64it/s]

Episode 1367 completed | Reward: 1980.00 | Avg Reward: 885.60 | Frames: 572495 | Epsilon: 0.4275

Memory usage: 1.11 GB


Training:   6%|▌         | 573025/10000000 [1:17:43<36:42:41, 71.33it/s]

Episode 1368 completed | Reward: 1420.00 | Avg Reward: 889.80 | Frames: 573018 | Epsilon: 0.4270


Training:   6%|▌         | 573417/10000000 [1:17:48<34:43:42, 75.40it/s]

Episode 1369 completed | Reward: 740.00 | Avg Reward: 891.20 | Frames: 573408 | Epsilon: 0.4266


Training:   6%|▌         | 574161/10000000 [1:17:58<36:23:24, 71.95it/s]

Episode 1370 completed | Reward: 1040.00 | Avg Reward: 892.20 | Frames: 574152 | Epsilon: 0.4258


Training:   6%|▌         | 574569/10000000 [1:18:03<36:44:08, 71.27it/s]

Episode 1371 completed | Reward: 800.00 | Avg Reward: 893.40 | Frames: 574563 | Epsilon: 0.4254


Training:   6%|▌         | 575025/10000000 [1:18:09<36:52:55, 70.98it/s]

Episode 1372 completed | Reward: 980.00 | Avg Reward: 899.00 | Frames: 575018 | Epsilon: 0.4250


Training:   6%|▌         | 575513/10000000 [1:18:16<35:13:58, 74.30it/s]

Episode 1373 completed | Reward: 1040.00 | Avg Reward: 885.00 | Frames: 575505 | Epsilon: 0.4245


Training:   6%|▌         | 575913/10000000 [1:18:21<35:55:33, 72.87it/s]

Episode 1374 completed | Reward: 2440.00 | Avg Reward: 898.80 | Frames: 575907 | Epsilon: 0.4241


Training:   6%|▌         | 576409/10000000 [1:18:27<35:16:49, 74.20it/s]

Episode 1375 completed | Reward: 700.00 | Avg Reward: 902.20 | Frames: 576396 | Epsilon: 0.4236


Training:   6%|▌         | 576777/10000000 [1:18:32<36:38:31, 71.44it/s]

Episode 1376 completed | Reward: 1120.00 | Avg Reward: 903.20 | Frames: 576768 | Epsilon: 0.4232


Training:   6%|▌         | 577169/10000000 [1:18:37<35:09:40, 74.44it/s]

Episode 1377 completed | Reward: 520.00 | Avg Reward: 902.20 | Frames: 577158 | Epsilon: 0.4228


Training:   6%|▌         | 577657/10000000 [1:18:44<36:31:44, 71.65it/s]

Episode 1378 completed | Reward: 1340.00 | Avg Reward: 906.80 | Frames: 577647 | Epsilon: 0.4224


Training:   6%|▌         | 578081/10000000 [1:18:49<36:38:28, 71.43it/s]

Episode 1379 completed | Reward: 1180.00 | Avg Reward: 908.80 | Frames: 578073 | Epsilon: 0.4219


Training:   6%|▌         | 578521/10000000 [1:18:55<37:40:54, 69.45it/s]

Episode 1380 completed | Reward: 680.00 | Avg Reward: 910.40 | Frames: 578514 | Epsilon: 0.4215


Training:   6%|▌         | 578945/10000000 [1:19:01<36:04:43, 72.53it/s]

Episode 1381 completed | Reward: 920.00 | Avg Reward: 912.00 | Frames: 578937 | Epsilon: 0.4211


Training:   6%|▌         | 579625/10000000 [1:19:10<36:21:29, 71.97it/s]

Episode 1382 completed | Reward: 960.00 | Avg Reward: 911.40 | Frames: 579612 | Epsilon: 0.4204


Training:   6%|▌         | 580089/10000000 [1:19:16<36:26:43, 71.80it/s]

Episode 1383 completed | Reward: 1240.00 | Avg Reward: 915.40 | Frames: 580079 | Epsilon: 0.4199


Training:   6%|▌         | 580433/10000000 [1:19:20<37:30:57, 69.74it/s]

Episode 1384 completed | Reward: 720.00 | Avg Reward: 919.20 | Frames: 580426 | Epsilon: 0.4196


Training:   6%|▌         | 580841/10000000 [1:19:26<36:10:55, 72.31it/s]

Episode 1385 completed | Reward: 580.00 | Avg Reward: 918.20 | Frames: 580832 | Epsilon: 0.4192


Training:   6%|▌         | 581225/10000000 [1:19:31<35:51:47, 72.95it/s]

Episode 1386 completed | Reward: 1000.00 | Avg Reward: 923.20 | Frames: 581217 | Epsilon: 0.4188


Training:   6%|▌         | 581857/10000000 [1:19:39<36:08:32, 72.38it/s]

Episode 1387 completed | Reward: 580.00 | Avg Reward: 924.00 | Frames: 581848 | Epsilon: 0.4182


Training:   6%|▌         | 582249/10000000 [1:19:44<36:32:54, 71.58it/s]

Episode 1388 completed | Reward: 820.00 | Avg Reward: 925.40 | Frames: 582236 | Epsilon: 0.4178


Training:   6%|▌         | 582705/10000000 [1:19:51<47:32:11, 55.03it/s]

Episode 1389 completed | Reward: 1300.00 | Avg Reward: 927.00 | Frames: 582703 | Epsilon: 0.4173

Memory usage: 1.11 GB


Training:   6%|▌         | 583769/10000000 [1:20:05<35:08:55, 74.42it/s]

Episode 1390 completed | Reward: 680.00 | Avg Reward: 926.40 | Frames: 583759 | Epsilon: 0.4162


Training:   6%|▌         | 584145/10000000 [1:20:10<37:00:15, 70.68it/s]

Episode 1391 completed | Reward: 1000.00 | Avg Reward: 923.20 | Frames: 584139 | Epsilon: 0.4159


Training:   6%|▌         | 584641/10000000 [1:20:16<36:12:25, 72.23it/s]

Episode 1392 completed | Reward: 620.00 | Avg Reward: 916.40 | Frames: 584632 | Epsilon: 0.4154


Training:   6%|▌         | 585057/10000000 [1:20:22<37:16:20, 70.17it/s]

Episode 1393 completed | Reward: 860.00 | Avg Reward: 916.20 | Frames: 585048 | Epsilon: 0.4150


Training:   6%|▌         | 585481/10000000 [1:20:27<36:27:28, 71.73it/s]

Episode 1394 completed | Reward: 620.00 | Avg Reward: 913.60 | Frames: 585474 | Epsilon: 0.4145


Training:   6%|▌         | 586017/10000000 [1:20:35<35:52:49, 72.88it/s]

Episode 1395 completed | Reward: 1120.00 | Avg Reward: 913.40 | Frames: 586004 | Epsilon: 0.4140


Training:   6%|▌         | 586401/10000000 [1:20:40<37:34:04, 69.60it/s]

Episode 1396 completed | Reward: 1340.00 | Avg Reward: 916.20 | Frames: 586394 | Epsilon: 0.4136


Training:   6%|▌         | 586729/10000000 [1:20:44<35:49:54, 72.97it/s]

Episode 1397 completed | Reward: 1000.00 | Avg Reward: 913.80 | Frames: 586720 | Epsilon: 0.4133


Training:   6%|▌         | 587241/10000000 [1:20:51<38:08:53, 68.54it/s]

Episode 1398 completed | Reward: 800.00 | Avg Reward: 912.80 | Frames: 587235 | Epsilon: 0.4128


Training:   6%|▌         | 587673/10000000 [1:20:57<38:35:16, 67.76it/s]

Episode 1399 completed | Reward: 1400.00 | Avg Reward: 920.60 | Frames: 587665 | Epsilon: 0.4123


Training:   6%|▌         | 588777/10000000 [1:21:11<35:52:27, 72.87it/s]

Episode 1400 completed | Reward: 700.00 | Avg Reward: 921.40 | Frames: 588768 | Epsilon: 0.4112


Training:   6%|▌         | 589289/10000000 [1:21:18<36:41:07, 71.26it/s]

Episode 1401 completed | Reward: 1080.00 | Avg Reward: 922.40 | Frames: 589283 | Epsilon: 0.4107


Training:   6%|▌         | 590321/10000000 [1:21:32<36:55:23, 70.79it/s]

Episode 1402 completed | Reward: 840.00 | Avg Reward: 923.20 | Frames: 590309 | Epsilon: 0.4097


Training:   6%|▌         | 590793/10000000 [1:21:38<36:44:06, 71.15it/s]

Episode 1403 completed | Reward: 2080.00 | Avg Reward: 939.60 | Frames: 590780 | Epsilon: 0.4092


Training:   6%|▌         | 591113/10000000 [1:21:43<35:53:50, 72.81it/s]

Episode 1404 completed | Reward: 740.00 | Avg Reward: 939.20 | Frames: 591101 | Epsilon: 0.4089


Training:   6%|▌         | 591617/10000000 [1:21:49<37:00:48, 70.61it/s]

Episode 1405 completed | Reward: 1780.00 | Avg Reward: 947.80 | Frames: 591609 | Epsilon: 0.4084


Training:   6%|▌         | 592089/10000000 [1:21:56<36:22:54, 71.83it/s]

Episode 1406 completed | Reward: 880.00 | Avg Reward: 950.20 | Frames: 592082 | Epsilon: 0.4079


Training:   6%|▌         | 592561/10000000 [1:22:02<37:40:01, 69.38it/s]

Episode 1407 completed | Reward: 500.00 | Avg Reward: 949.40 | Frames: 592555 | Epsilon: 0.4074


Training:   6%|▌         | 593033/10000000 [1:22:09<48:49:52, 53.51it/s]

Episode 1408 completed | Reward: 960.00 | Avg Reward: 952.80 | Frames: 593031 | Epsilon: 0.4070

Memory usage: 1.11 GB


Training:   6%|▌         | 593481/10000000 [1:22:15<38:19:13, 68.19it/s]

Episode 1409 completed | Reward: 2040.00 | Avg Reward: 960.80 | Frames: 593473 | Epsilon: 0.4065


Training:   6%|▌         | 593841/10000000 [1:22:19<36:48:01, 71.00it/s]

Episode 1410 completed | Reward: 1240.00 | Avg Reward: 960.80 | Frames: 593832 | Epsilon: 0.4062


Training:   6%|▌         | 594249/10000000 [1:22:25<38:17:33, 68.23it/s]

Episode 1411 completed | Reward: 220.00 | Avg Reward: 958.60 | Frames: 594243 | Epsilon: 0.4058


Training:   6%|▌         | 594649/10000000 [1:22:30<36:17:56, 71.97it/s]

Episode 1412 completed | Reward: 600.00 | Avg Reward: 946.40 | Frames: 594638 | Epsilon: 0.4054


Training:   6%|▌         | 595065/10000000 [1:22:36<37:27:25, 69.75it/s]

Episode 1413 completed | Reward: 1080.00 | Avg Reward: 946.80 | Frames: 595059 | Epsilon: 0.4049


Training:   6%|▌         | 595545/10000000 [1:22:42<36:28:57, 71.61it/s]

Episode 1414 completed | Reward: 680.00 | Avg Reward: 944.60 | Frames: 595534 | Epsilon: 0.4045


Training:   6%|▌         | 595929/10000000 [1:22:48<36:10:40, 72.21it/s]

Episode 1415 completed | Reward: 760.00 | Avg Reward: 940.40 | Frames: 595920 | Epsilon: 0.4041


Training:   6%|▌         | 597113/10000000 [1:23:04<36:42:52, 71.14it/s]

Episode 1416 completed | Reward: 1720.00 | Avg Reward: 947.80 | Frames: 597107 | Epsilon: 0.4029


Training:   6%|▌         | 598001/10000000 [1:23:15<37:06:19, 70.39it/s]

Episode 1417 completed | Reward: 1000.00 | Avg Reward: 952.20 | Frames: 597993 | Epsilon: 0.4020


Training:   6%|▌         | 598409/10000000 [1:23:21<36:58:10, 70.64it/s]

Episode 1418 completed | Reward: 620.00 | Avg Reward: 944.40 | Frames: 598398 | Epsilon: 0.4016


Training:   6%|▌         | 598753/10000000 [1:23:26<37:25:32, 69.78it/s]

Episode 1419 completed | Reward: 580.00 | Avg Reward: 945.00 | Frames: 598743 | Epsilon: 0.4013


Training:   6%|▌         | 599257/10000000 [1:23:32<36:40:35, 71.20it/s]

Episode 1420 completed | Reward: 1400.00 | Avg Reward: 947.60 | Frames: 599250 | Epsilon: 0.4008


Training:   6%|▌         | 599945/10000000 [1:23:42<37:38:32, 69.37it/s]

Episode 1421 completed | Reward: 1440.00 | Avg Reward: 951.40 | Frames: 599934 | Epsilon: 0.4001


Training:   6%|▌         | 600505/10000000 [1:23:49<37:04:51, 70.41it/s]

Episode 1422 completed | Reward: 1020.00 | Avg Reward: 954.80 | Frames: 600492 | Epsilon: 0.3995


Training:   6%|▌         | 601001/10000000 [1:23:56<36:25:50, 71.67it/s]

Episode 1423 completed | Reward: 700.00 | Avg Reward: 955.20 | Frames: 600992 | Epsilon: 0.3990


Training:   6%|▌         | 601489/10000000 [1:24:03<36:31:29, 71.48it/s]

Episode 1424 completed | Reward: 1380.00 | Avg Reward: 958.20 | Frames: 601481 | Epsilon: 0.3985


Training:   6%|▌         | 602041/10000000 [1:24:10<36:47:46, 70.95it/s]

Episode 1425 completed | Reward: 1440.00 | Avg Reward: 962.40 | Frames: 602028 | Epsilon: 0.3980


Training:   6%|▌         | 602489/10000000 [1:24:16<35:54:57, 72.68it/s]

Episode 1426 completed | Reward: 660.00 | Avg Reward: 962.60 | Frames: 602476 | Epsilon: 0.3975


Training:   6%|▌         | 602953/10000000 [1:24:23<37:31:36, 69.56it/s]

Episode 1427 completed | Reward: 920.00 | Avg Reward: 964.20 | Frames: 602943 | Epsilon: 0.3971


Training:   6%|▌         | 603446/10000000 [1:24:29<35:01:04, 74.54it/s]

Episode 1428 completed | Reward: 1240.00 | Avg Reward: 961.40 | Frames: 603446 | Epsilon: 0.3966

Memory usage: 1.11 GB


Training:   6%|▌         | 603897/10000000 [1:24:36<36:08:50, 72.21it/s]

Episode 1429 completed | Reward: 760.00 | Avg Reward: 962.60 | Frames: 603887 | Epsilon: 0.3961


Training:   6%|▌         | 604337/10000000 [1:24:42<37:18:30, 69.95it/s]

Episode 1430 completed | Reward: 940.00 | Avg Reward: 962.00 | Frames: 604325 | Epsilon: 0.3957


Training:   6%|▌         | 604873/10000000 [1:24:49<36:03:18, 72.38it/s]

Episode 1431 completed | Reward: 2420.00 | Avg Reward: 977.80 | Frames: 604860 | Epsilon: 0.3951


Training:   6%|▌         | 605281/10000000 [1:24:55<37:05:08, 70.37it/s]

Episode 1432 completed | Reward: 840.00 | Avg Reward: 975.80 | Frames: 605272 | Epsilon: 0.3947


Training:   6%|▌         | 605929/10000000 [1:25:03<37:06:29, 70.32it/s]

Episode 1433 completed | Reward: 1260.00 | Avg Reward: 981.60 | Frames: 605919 | Epsilon: 0.3941


Training:   6%|▌         | 606345/10000000 [1:25:09<37:08:19, 70.26it/s]

Episode 1434 completed | Reward: 580.00 | Avg Reward: 982.00 | Frames: 606337 | Epsilon: 0.3937


Training:   6%|▌         | 606865/10000000 [1:25:16<37:00:48, 70.49it/s]

Episode 1435 completed | Reward: 1440.00 | Avg Reward: 989.20 | Frames: 606852 | Epsilon: 0.3931


Training:   6%|▌         | 607209/10000000 [1:25:21<37:19:39, 69.90it/s]

Episode 1436 completed | Reward: 680.00 | Avg Reward: 990.40 | Frames: 607199 | Epsilon: 0.3928


Training:   6%|▌         | 608201/10000000 [1:25:35<37:49:03, 68.98it/s]

Episode 1437 completed | Reward: 1840.00 | Avg Reward: 996.00 | Frames: 608195 | Epsilon: 0.3918


Training:   6%|▌         | 609153/10000000 [1:25:48<37:16:38, 69.98it/s]

Episode 1438 completed | Reward: 1580.00 | Avg Reward: 1001.20 | Frames: 609141 | Epsilon: 0.3909


Training:   6%|▌         | 609897/10000000 [1:25:58<37:48:11, 69.00it/s]

Episode 1439 completed | Reward: 360.00 | Avg Reward: 986.60 | Frames: 609887 | Epsilon: 0.3901


Training:   6%|▌         | 610649/10000000 [1:26:08<38:43:43, 67.34it/s]

Episode 1440 completed | Reward: 1360.00 | Avg Reward: 996.20 | Frames: 610643 | Epsilon: 0.3894


Training:   6%|▌         | 611209/10000000 [1:26:16<36:51:19, 70.76it/s]

Episode 1441 completed | Reward: 1540.00 | Avg Reward: 1005.00 | Frames: 611200 | Epsilon: 0.3888


Training:   6%|▌         | 611889/10000000 [1:26:25<37:50:55, 68.90it/s]

Episode 1442 completed | Reward: 160.00 | Avg Reward: 993.80 | Frames: 611881 | Epsilon: 0.3881


Training:   6%|▌         | 612345/10000000 [1:26:32<37:05:39, 70.30it/s]

Episode 1443 completed | Reward: 780.00 | Avg Reward: 992.60 | Frames: 612332 | Epsilon: 0.3877


Training:   6%|▌         | 612833/10000000 [1:26:38<37:58:54, 68.65it/s]

Episode 1444 completed | Reward: 1040.00 | Avg Reward: 998.20 | Frames: 612822 | Epsilon: 0.3872


Training:   6%|▌         | 613225/10000000 [1:26:44<38:13:19, 68.22it/s]

Episode 1445 completed | Reward: 740.00 | Avg Reward: 999.00 | Frames: 613212 | Epsilon: 0.3868


Training:   6%|▌         | 613690/10000000 [1:26:50<35:29:46, 73.45it/s]

Episode 1446 completed | Reward: 800.00 | Avg Reward: 997.60 | Frames: 613690 | Epsilon: 0.3863

Memory usage: 1.11 GB


Training:   6%|▌         | 614065/10000000 [1:26:55<37:17:25, 69.92it/s]

Episode 1447 completed | Reward: 880.00 | Avg Reward: 996.20 | Frames: 614055 | Epsilon: 0.3859


Training:   6%|▌         | 614993/10000000 [1:27:08<38:32:39, 67.64it/s]

Episode 1448 completed | Reward: 700.00 | Avg Reward: 992.40 | Frames: 614986 | Epsilon: 0.3850


Training:   6%|▌         | 615465/10000000 [1:27:15<37:16:12, 69.94it/s]

Episode 1449 completed | Reward: 880.00 | Avg Reward: 992.20 | Frames: 615452 | Epsilon: 0.3845


Training:   6%|▌         | 615897/10000000 [1:27:21<37:12:29, 70.06it/s]

Episode 1450 completed | Reward: 1160.00 | Avg Reward: 998.00 | Frames: 615884 | Epsilon: 0.3841


Training:   6%|▌         | 616193/10000000 [1:27:25<36:48:42, 70.81it/s]

Episode 1451 completed | Reward: 700.00 | Avg Reward: 997.80 | Frames: 616183 | Epsilon: 0.3838


Training:   6%|▌         | 617385/10000000 [1:27:41<39:11:43, 66.49it/s]

Episode 1452 completed | Reward: 760.00 | Avg Reward: 991.80 | Frames: 617377 | Epsilon: 0.3826


Training:   6%|▌         | 617841/10000000 [1:27:47<37:15:06, 69.96it/s]

Episode 1453 completed | Reward: 500.00 | Avg Reward: 990.80 | Frames: 617828 | Epsilon: 0.3822


Training:   6%|▌         | 618257/10000000 [1:27:53<37:58:11, 68.63it/s]

Episode 1454 completed | Reward: 700.00 | Avg Reward: 989.60 | Frames: 618249 | Epsilon: 0.3818


Training:   6%|▌         | 618705/10000000 [1:27:59<37:01:17, 70.39it/s]

Episode 1455 completed | Reward: 480.00 | Avg Reward: 987.60 | Frames: 618695 | Epsilon: 0.3813


Training:   6%|▌         | 619505/10000000 [1:28:10<37:56:21, 68.68it/s]

Episode 1456 completed | Reward: 760.00 | Avg Reward: 989.80 | Frames: 619492 | Epsilon: 0.3805


Training:   6%|▌         | 619961/10000000 [1:28:16<37:32:43, 69.40it/s]

Episode 1457 completed | Reward: 1100.00 | Avg Reward: 994.60 | Frames: 619953 | Epsilon: 0.3800


Training:   6%|▌         | 620353/10000000 [1:28:22<37:43:19, 69.07it/s]

Episode 1458 completed | Reward: 420.00 | Avg Reward: 987.60 | Frames: 620341 | Epsilon: 0.3797


Training:   6%|▌         | 620689/10000000 [1:28:26<38:20:20, 67.96it/s]

Episode 1459 completed | Reward: 580.00 | Avg Reward: 985.80 | Frames: 620681 | Epsilon: 0.3793


Training:   6%|▌         | 621177/10000000 [1:28:33<36:49:44, 70.74it/s]

Episode 1460 completed | Reward: 1640.00 | Avg Reward: 992.20 | Frames: 621165 | Epsilon: 0.3788


Training:   6%|▌         | 621625/10000000 [1:28:39<36:58:46, 70.45it/s]

Episode 1461 completed | Reward: 1300.00 | Avg Reward: 995.20 | Frames: 621616 | Epsilon: 0.3784


Training:   6%|▌         | 622009/10000000 [1:28:44<38:14:07, 68.13it/s]

Episode 1462 completed | Reward: 900.00 | Avg Reward: 990.80 | Frames: 622001 | Epsilon: 0.3780


Training:   6%|▌         | 622377/10000000 [1:28:49<38:09:06, 68.28it/s]

Episode 1463 completed | Reward: 620.00 | Avg Reward: 989.00 | Frames: 622365 | Epsilon: 0.3776


Training:   6%|▌         | 622865/10000000 [1:28:56<37:50:05, 68.85it/s]

Episode 1464 completed | Reward: 1360.00 | Avg Reward: 996.20 | Frames: 622855 | Epsilon: 0.3771


Training:   6%|▌         | 623313/10000000 [1:29:02<36:38:30, 71.08it/s]

Episode 1465 completed | Reward: 760.00 | Avg Reward: 997.00 | Frames: 623303 | Epsilon: 0.3767


Training:   6%|▌         | 623794/10000000 [1:29:09<35:51:22, 72.64it/s]

Episode 1466 completed | Reward: 1060.00 | Avg Reward: 1002.20 | Frames: 623794 | Epsilon: 0.3762

Memory usage: 1.11 GB


Training:   6%|▌         | 624625/10000000 [1:29:21<37:17:00, 69.85it/s]

Episode 1467 completed | Reward: 940.00 | Avg Reward: 991.80 | Frames: 624618 | Epsilon: 0.3754


Training:   6%|▋         | 625065/10000000 [1:29:27<36:20:35, 71.65it/s]

Episode 1468 completed | Reward: 780.00 | Avg Reward: 985.40 | Frames: 625054 | Epsilon: 0.3749


Training:   6%|▋         | 625425/10000000 [1:29:32<37:22:38, 69.67it/s]

Episode 1469 completed | Reward: 920.00 | Avg Reward: 987.20 | Frames: 625413 | Epsilon: 0.3746


Training:   6%|▋         | 625785/10000000 [1:29:37<37:17:45, 69.82it/s]

Episode 1470 completed | Reward: 1420.00 | Avg Reward: 991.00 | Frames: 625772 | Epsilon: 0.3742


Training:   6%|▋         | 626193/10000000 [1:29:42<37:04:32, 70.23it/s]

Episode 1471 completed | Reward: 580.00 | Avg Reward: 988.80 | Frames: 626185 | Epsilon: 0.3738


Training:   6%|▋         | 626681/10000000 [1:29:49<36:37:07, 71.10it/s]

Episode 1472 completed | Reward: 840.00 | Avg Reward: 987.40 | Frames: 626668 | Epsilon: 0.3733


Training:   6%|▋         | 627129/10000000 [1:29:55<37:57:06, 68.60it/s]

Episode 1473 completed | Reward: 700.00 | Avg Reward: 984.00 | Frames: 627121 | Epsilon: 0.3729


Training:   6%|▋         | 627665/10000000 [1:30:03<37:56:51, 68.61it/s]

Episode 1474 completed | Reward: 2340.00 | Avg Reward: 983.00 | Frames: 627652 | Epsilon: 0.3723


Training:   6%|▋         | 628065/10000000 [1:30:08<37:26:55, 69.52it/s]

Episode 1475 completed | Reward: 720.00 | Avg Reward: 983.20 | Frames: 628055 | Epsilon: 0.3719


Training:   6%|▋         | 628641/10000000 [1:30:16<37:35:17, 69.25it/s]

Episode 1476 completed | Reward: 620.00 | Avg Reward: 978.20 | Frames: 628630 | Epsilon: 0.3714


Training:   6%|▋         | 629185/10000000 [1:30:24<37:41:13, 69.07it/s]

Episode 1477 completed | Reward: 2120.00 | Avg Reward: 994.20 | Frames: 629179 | Epsilon: 0.3708


Training:   6%|▋         | 629721/10000000 [1:30:31<37:19:41, 69.73it/s]

Episode 1478 completed | Reward: 1960.00 | Avg Reward: 1000.40 | Frames: 629710 | Epsilon: 0.3703


Training:   6%|▋         | 630417/10000000 [1:30:41<38:48:51, 67.05it/s]

Episode 1479 completed | Reward: 1420.00 | Avg Reward: 1002.80 | Frames: 630410 | Epsilon: 0.3696


Training:   6%|▋         | 630777/10000000 [1:30:46<38:17:38, 67.96it/s]

Episode 1480 completed | Reward: 1160.00 | Avg Reward: 1007.60 | Frames: 630764 | Epsilon: 0.3692


Training:   6%|▋         | 631337/10000000 [1:30:54<37:46:32, 68.89it/s]

Episode 1481 completed | Reward: 1040.00 | Avg Reward: 1008.80 | Frames: 631327 | Epsilon: 0.3687


Training:   6%|▋         | 631825/10000000 [1:31:00<38:04:03, 68.36it/s]

Episode 1482 completed | Reward: 1220.00 | Avg Reward: 1011.40 | Frames: 631816 | Epsilon: 0.3682


Training:   6%|▋         | 632617/10000000 [1:31:12<40:26:11, 64.35it/s]

Episode 1483 completed | Reward: 480.00 | Avg Reward: 1003.80 | Frames: 632614 | Epsilon: 0.3674


Training:   6%|▋         | 633233/10000000 [1:31:20<37:27:24, 69.46it/s]

Episode 1484 completed | Reward: 700.00 | Avg Reward: 1003.60 | Frames: 633220 | Epsilon: 0.3668


Training:   6%|▋         | 633737/10000000 [1:31:27<38:46:27, 67.10it/s]

Episode 1485 completed | Reward: 840.00 | Avg Reward: 1006.20 | Frames: 633729 | Epsilon: 0.3663


Training:   6%|▋         | 634256/10000000 [1:31:35<48:13:05, 53.95it/s]

Episode 1486 completed | Reward: 1380.00 | Avg Reward: 1010.00 | Frames: 634254 | Epsilon: 0.3657

Memory usage: 1.11 GB


Training:   6%|▋         | 634797/10000000 [1:31:42<38:58:21, 66.75it/s]

Episode 1487 completed | Reward: 2440.00 | Avg Reward: 1028.60 | Frames: 634790 | Epsilon: 0.3652


Training:   6%|▋         | 635197/10000000 [1:31:48<40:15:16, 64.62it/s]

Episode 1488 completed | Reward: 680.00 | Avg Reward: 1027.20 | Frames: 635192 | Epsilon: 0.3648


Training:   6%|▋         | 635605/10000000 [1:31:54<39:27:55, 65.91it/s]

Episode 1489 completed | Reward: 780.00 | Avg Reward: 1022.00 | Frames: 635598 | Epsilon: 0.3644


Training:   6%|▋         | 636021/10000000 [1:32:00<38:37:52, 67.33it/s]

Episode 1490 completed | Reward: 640.00 | Avg Reward: 1021.60 | Frames: 636011 | Epsilon: 0.3640


Training:   6%|▋         | 636389/10000000 [1:32:05<38:17:43, 67.92it/s]

Episode 1491 completed | Reward: 580.00 | Avg Reward: 1017.40 | Frames: 636381 | Epsilon: 0.3636


Training:   6%|▋         | 636933/10000000 [1:32:12<38:02:24, 68.37it/s]

Episode 1492 completed | Reward: 1440.00 | Avg Reward: 1025.60 | Frames: 636921 | Epsilon: 0.3631


Training:   6%|▋         | 637349/10000000 [1:32:18<37:56:27, 68.55it/s]

Episode 1493 completed | Reward: 860.00 | Avg Reward: 1025.60 | Frames: 637339 | Epsilon: 0.3627


Training:   6%|▋         | 637829/10000000 [1:32:25<38:05:50, 68.26it/s]

Episode 1494 completed | Reward: 500.00 | Avg Reward: 1024.40 | Frames: 637820 | Epsilon: 0.3622


Training:   6%|▋         | 638589/10000000 [1:32:36<38:44:37, 67.12it/s]

Episode 1495 completed | Reward: 1480.00 | Avg Reward: 1028.00 | Frames: 638582 | Epsilon: 0.3614


Training:   6%|▋         | 639061/10000000 [1:32:43<38:12:37, 68.05it/s]

Episode 1496 completed | Reward: 920.00 | Avg Reward: 1023.80 | Frames: 639050 | Epsilon: 0.3609


Training:   6%|▋         | 640133/10000000 [1:32:58<38:39:45, 67.25it/s]

Episode 1497 completed | Reward: 1420.00 | Avg Reward: 1028.00 | Frames: 640120 | Epsilon: 0.3599


Training:   6%|▋         | 640757/10000000 [1:33:07<39:17:29, 66.17it/s]

Episode 1498 completed | Reward: 1160.00 | Avg Reward: 1031.60 | Frames: 640750 | Epsilon: 0.3592


Training:   6%|▋         | 641181/10000000 [1:33:13<37:07:23, 70.03it/s]

Episode 1499 completed | Reward: 660.00 | Avg Reward: 1024.20 | Frames: 641172 | Epsilon: 0.3588


Training:   6%|▋         | 641701/10000000 [1:33:20<36:58:22, 70.31it/s]

Episode 1500 completed | Reward: 1740.00 | Avg Reward: 1034.60 | Frames: 641689 | Epsilon: 0.3583


Training:   6%|▋         | 642101/10000000 [1:33:26<39:05:46, 66.49it/s]

Episode 1501 completed | Reward: 880.00 | Avg Reward: 1032.60 | Frames: 642095 | Epsilon: 0.3579


Training:   6%|▋         | 642517/10000000 [1:33:32<37:51:17, 68.66it/s]

Episode 1502 completed | Reward: 1220.00 | Avg Reward: 1036.40 | Frames: 642511 | Epsilon: 0.3575


Training:   6%|▋         | 642989/10000000 [1:33:38<38:20:56, 67.78it/s]

Episode 1503 completed | Reward: 980.00 | Avg Reward: 1025.40 | Frames: 642982 | Epsilon: 0.3570


Training:   6%|▋         | 643477/10000000 [1:33:45<38:41:42, 67.17it/s]

Episode 1504 completed | Reward: 1180.00 | Avg Reward: 1029.80 | Frames: 643465 | Epsilon: 0.3565


Training:   6%|▋         | 643869/10000000 [1:33:51<38:06:34, 68.20it/s]

Episode 1505 completed | Reward: 800.00 | Avg Reward: 1020.00 | Frames: 643857 | Epsilon: 0.3561


Training:   6%|▋         | 644846/10000000 [1:34:05<36:50:45, 70.53it/s]

Episode 1506 completed | Reward: 1240.00 | Avg Reward: 1023.60 | Frames: 644846 | Epsilon: 0.3552

Memory usage: 1.11 GB


Training:   6%|▋         | 645277/10000000 [1:34:11<38:57:11, 66.71it/s]

Episode 1507 completed | Reward: 780.00 | Avg Reward: 1026.40 | Frames: 645269 | Epsilon: 0.3547


Training:   6%|▋         | 645853/10000000 [1:34:19<37:50:23, 68.67it/s]

Episode 1508 completed | Reward: 1060.00 | Avg Reward: 1027.40 | Frames: 645845 | Epsilon: 0.3542


Training:   6%|▋         | 646341/10000000 [1:34:26<37:50:23, 68.66it/s]

Episode 1509 completed | Reward: 720.00 | Avg Reward: 1014.20 | Frames: 646331 | Epsilon: 0.3537


Training:   6%|▋         | 646757/10000000 [1:34:32<38:34:31, 67.35it/s]

Episode 1510 completed | Reward: 560.00 | Avg Reward: 1007.40 | Frames: 646751 | Epsilon: 0.3532


Training:   6%|▋         | 647157/10000000 [1:34:38<38:47:56, 66.96it/s]

Episode 1511 completed | Reward: 880.00 | Avg Reward: 1014.00 | Frames: 647151 | Epsilon: 0.3528


Training:   6%|▋         | 647565/10000000 [1:34:43<38:57:46, 66.68it/s]

Episode 1512 completed | Reward: 560.00 | Avg Reward: 1013.60 | Frames: 647556 | Epsilon: 0.3524


Training:   6%|▋         | 648029/10000000 [1:34:50<40:42:03, 63.83it/s]

Episode 1513 completed | Reward: 1100.00 | Avg Reward: 1013.80 | Frames: 648024 | Epsilon: 0.3520


Training:   6%|▋         | 648477/10000000 [1:34:56<38:24:22, 67.64it/s]

Episode 1514 completed | Reward: 1020.00 | Avg Reward: 1017.20 | Frames: 648466 | Epsilon: 0.3515


Training:   6%|▋         | 649445/10000000 [1:35:10<38:57:51, 66.66it/s]

Episode 1515 completed | Reward: 720.00 | Avg Reward: 1016.80 | Frames: 649433 | Epsilon: 0.3506


Training:   6%|▋         | 649813/10000000 [1:35:16<39:45:38, 65.32it/s]

Episode 1516 completed | Reward: 1080.00 | Avg Reward: 1010.40 | Frames: 649807 | Epsilon: 0.3502


Training:   7%|▋         | 650253/10000000 [1:35:22<39:43:02, 65.39it/s]

Episode 1517 completed | Reward: 900.00 | Avg Reward: 1009.40 | Frames: 650245 | Epsilon: 0.3498


Training:   7%|▋         | 650773/10000000 [1:35:29<39:49:55, 65.20it/s]

Episode 1518 completed | Reward: 920.00 | Avg Reward: 1012.40 | Frames: 650768 | Epsilon: 0.3492


Training:   7%|▋         | 651317/10000000 [1:35:37<39:02:33, 66.51it/s]

Episode 1519 completed | Reward: 1300.00 | Avg Reward: 1019.60 | Frames: 651310 | Epsilon: 0.3487


Training:   7%|▋         | 651789/10000000 [1:35:44<39:33:57, 65.63it/s]

Episode 1520 completed | Reward: 1200.00 | Avg Reward: 1017.60 | Frames: 651781 | Epsilon: 0.3482


Training:   7%|▋         | 652229/10000000 [1:35:50<38:33:33, 67.34it/s]

Episode 1521 completed | Reward: 2160.00 | Avg Reward: 1024.80 | Frames: 652220 | Epsilon: 0.3478


Training:   7%|▋         | 652997/10000000 [1:36:01<39:50:30, 65.17it/s]

Episode 1522 completed | Reward: 1100.00 | Avg Reward: 1025.60 | Frames: 652990 | Epsilon: 0.3470


Training:   7%|▋         | 653373/10000000 [1:36:07<38:45:14, 66.99it/s]

Episode 1523 completed | Reward: 740.00 | Avg Reward: 1026.00 | Frames: 653363 | Epsilon: 0.3466


Training:   7%|▋         | 653749/10000000 [1:36:12<40:14:50, 64.51it/s]

Episode 1524 completed | Reward: 1040.00 | Avg Reward: 1022.60 | Frames: 653743 | Epsilon: 0.3463


Training:   7%|▋         | 654741/10000000 [1:36:26<38:11:17, 67.98it/s]

Episode 1525 completed | Reward: 1080.00 | Avg Reward: 1019.00 | Frames: 654734 | Epsilon: 0.3453


Training:   7%|▋         | 655228/10000000 [1:36:34<47:21:48, 54.81it/s]

Episode 1526 completed | Reward: 1100.00 | Avg Reward: 1023.40 | Frames: 655225 | Epsilon: 0.3448

Memory usage: 1.12 GB


Training:   7%|▋         | 655593/10000000 [1:36:39<41:23:57, 62.70it/s]

Episode 1527 completed | Reward: 740.00 | Avg Reward: 1021.60 | Frames: 655589 | Epsilon: 0.3444


Training:   7%|▋         | 656103/10000000 [1:36:46<38:55:08, 66.69it/s]

Episode 1528 completed | Reward: 720.00 | Avg Reward: 1016.40 | Frames: 656095 | Epsilon: 0.3439


Training:   7%|▋         | 656917/10000000 [1:36:58<39:08:55, 66.29it/s]

Episode 1529 completed | Reward: 1040.00 | Avg Reward: 1019.20 | Frames: 656905 | Epsilon: 0.3431


Training:   7%|▋         | 657379/10000000 [1:37:05<39:28:11, 65.75it/s]

Episode 1530 completed | Reward: 940.00 | Avg Reward: 1019.20 | Frames: 657370 | Epsilon: 0.3426


Training:   7%|▋         | 657785/10000000 [1:37:11<39:32:37, 65.63it/s]

Episode 1531 completed | Reward: 680.00 | Avg Reward: 1001.80 | Frames: 657776 | Epsilon: 0.3422


Training:   7%|▋         | 658233/10000000 [1:37:17<38:46:03, 66.94it/s]

Episode 1532 completed | Reward: 760.00 | Avg Reward: 1001.00 | Frames: 658222 | Epsilon: 0.3418


Training:   7%|▋         | 658737/10000000 [1:37:25<38:48:49, 66.85it/s]

Episode 1533 completed | Reward: 900.00 | Avg Reward: 997.40 | Frames: 658729 | Epsilon: 0.3413


Training:   7%|▋         | 659089/10000000 [1:37:30<39:53:19, 65.05it/s]

Episode 1534 completed | Reward: 780.00 | Avg Reward: 999.40 | Frames: 659082 | Epsilon: 0.3409


Training:   7%|▋         | 659545/10000000 [1:37:36<39:22:57, 65.88it/s]

Episode 1535 completed | Reward: 1560.00 | Avg Reward: 1000.60 | Frames: 659536 | Epsilon: 0.3405


Training:   7%|▋         | 659953/10000000 [1:37:42<40:36:16, 63.90it/s]

Episode 1536 completed | Reward: 180.00 | Avg Reward: 995.60 | Frames: 659950 | Epsilon: 0.3400


Training:   7%|▋         | 660465/10000000 [1:37:50<39:16:50, 66.05it/s]

Episode 1537 completed | Reward: 1900.00 | Avg Reward: 996.20 | Frames: 660453 | Epsilon: 0.3395


Training:   7%|▋         | 660833/10000000 [1:37:55<40:27:59, 64.11it/s]

Episode 1538 completed | Reward: 740.00 | Avg Reward: 987.80 | Frames: 660827 | Epsilon: 0.3392


Training:   7%|▋         | 661241/10000000 [1:38:01<38:54:53, 66.66it/s]

Episode 1539 completed | Reward: 1000.00 | Avg Reward: 994.20 | Frames: 661229 | Epsilon: 0.3388


Training:   7%|▋         | 661936/10000000 [1:38:11<37:51:18, 68.52it/s]

Episode 1540 completed | Reward: 1280.00 | Avg Reward: 993.40 | Frames: 661929 | Epsilon: 0.3381


Training:   7%|▋         | 662341/10000000 [1:38:17<39:03:51, 66.40it/s]

Episode 1541 completed | Reward: 900.00 | Avg Reward: 987.00 | Frames: 662330 | Epsilon: 0.3377


Training:   7%|▋         | 662789/10000000 [1:38:24<39:05:42, 66.34it/s]

Episode 1542 completed | Reward: 1020.00 | Avg Reward: 995.60 | Frames: 662779 | Epsilon: 0.3372


Training:   7%|▋         | 663149/10000000 [1:38:29<39:10:41, 66.20it/s]

Episode 1543 completed | Reward: 1000.00 | Avg Reward: 997.80 | Frames: 663141 | Epsilon: 0.3369


Training:   7%|▋         | 663821/10000000 [1:38:39<38:58:10, 66.55it/s]

Episode 1544 completed | Reward: 1420.00 | Avg Reward: 1001.60 | Frames: 663810 | Epsilon: 0.3362


Training:   7%|▋         | 664140/10000000 [1:38:43<37:17:55, 69.53it/s]

Episode 1545 completed | Reward: 420.00 | Avg Reward: 998.40 | Frames: 664133 | Epsilon: 0.3359


Training:   7%|▋         | 664489/10000000 [1:38:48<39:44:27, 65.25it/s]

Episode 1546 completed | Reward: 620.00 | Avg Reward: 996.60 | Frames: 664481 | Epsilon: 0.3355


Training:   7%|▋         | 664977/10000000 [1:38:55<40:24:38, 64.17it/s]

Episode 1547 completed | Reward: 1780.00 | Avg Reward: 1005.60 | Frames: 664974 | Epsilon: 0.3350


Training:   7%|▋         | 665472/10000000 [1:39:03<48:51:20, 53.07it/s]

Episode 1548 completed | Reward: 820.00 | Avg Reward: 1006.80 | Frames: 665470 | Epsilon: 0.3345

Memory usage: 1.12 GB


Training:   7%|▋         | 665941/10000000 [1:39:10<40:18:08, 64.33it/s]

Episode 1549 completed | Reward: 1040.00 | Avg Reward: 1008.40 | Frames: 665933 | Epsilon: 0.3341


Training:   7%|▋         | 666365/10000000 [1:39:16<39:53:58, 64.98it/s]

Episode 1550 completed | Reward: 540.00 | Avg Reward: 1002.20 | Frames: 666359 | Epsilon: 0.3336


Training:   7%|▋         | 666805/10000000 [1:39:22<41:06:29, 63.07it/s]

Episode 1551 completed | Reward: 1240.00 | Avg Reward: 1007.60 | Frames: 666798 | Epsilon: 0.3332


Training:   7%|▋         | 667421/10000000 [1:39:31<39:18:19, 65.95it/s]

Episode 1552 completed | Reward: 1820.00 | Avg Reward: 1018.20 | Frames: 667411 | Epsilon: 0.3326


Training:   7%|▋         | 667797/10000000 [1:39:37<40:35:55, 63.85it/s]

Episode 1553 completed | Reward: 640.00 | Avg Reward: 1019.60 | Frames: 667795 | Epsilon: 0.3322


Training:   7%|▋         | 668253/10000000 [1:39:44<40:29:39, 64.01it/s]

Episode 1554 completed | Reward: 880.00 | Avg Reward: 1021.40 | Frames: 668246 | Epsilon: 0.3318


Training:   7%|▋         | 668733/10000000 [1:39:51<39:09:03, 66.21it/s]

Episode 1555 completed | Reward: 1380.00 | Avg Reward: 1030.40 | Frames: 668727 | Epsilon: 0.3313


Training:   7%|▋         | 669141/10000000 [1:39:57<39:46:52, 65.15it/s]

Episode 1556 completed | Reward: 880.00 | Avg Reward: 1031.60 | Frames: 669134 | Epsilon: 0.3309


Training:   7%|▋         | 669499/10000000 [1:40:02<39:28:12, 65.66it/s]

Episode 1557 completed | Reward: 840.00 | Avg Reward: 1029.00 | Frames: 669491 | Epsilon: 0.3305


Training:   7%|▋         | 669953/10000000 [1:40:08<38:53:42, 66.63it/s]

Episode 1558 completed | Reward: 840.00 | Avg Reward: 1033.20 | Frames: 669941 | Epsilon: 0.3301


Training:   7%|▋         | 670865/10000000 [1:40:22<39:07:49, 66.23it/s]

Episode 1559 completed | Reward: 620.00 | Avg Reward: 1033.60 | Frames: 670852 | Epsilon: 0.3291


Training:   7%|▋         | 671273/10000000 [1:40:28<39:33:26, 65.51it/s]

Episode 1560 completed | Reward: 720.00 | Avg Reward: 1024.40 | Frames: 671262 | Epsilon: 0.3287


Training:   7%|▋         | 671937/10000000 [1:40:37<39:05:07, 66.29it/s]

Episode 1561 completed | Reward: 700.00 | Avg Reward: 1018.40 | Frames: 671925 | Epsilon: 0.3281


Training:   7%|▋         | 672425/10000000 [1:40:45<38:42:59, 66.92it/s]

Episode 1562 completed | Reward: 1120.00 | Avg Reward: 1020.60 | Frames: 672413 | Epsilon: 0.3276


Training:   7%|▋         | 672849/10000000 [1:40:51<40:33:38, 63.88it/s]

Episode 1563 completed | Reward: 740.00 | Avg Reward: 1021.80 | Frames: 672847 | Epsilon: 0.3272


Training:   7%|▋         | 673193/10000000 [1:40:56<38:53:06, 66.63it/s]

Episode 1564 completed | Reward: 1080.00 | Avg Reward: 1019.00 | Frames: 673186 | Epsilon: 0.3268


Training:   7%|▋         | 673753/10000000 [1:41:04<38:28:53, 67.32it/s]

Episode 1565 completed | Reward: 1680.00 | Avg Reward: 1028.20 | Frames: 673746 | Epsilon: 0.3263


Training:   7%|▋         | 674185/10000000 [1:41:10<40:55:17, 63.30it/s]

Episode 1566 completed | Reward: 440.00 | Avg Reward: 1022.00 | Frames: 674183 | Epsilon: 0.3258


Training:   7%|▋         | 674617/10000000 [1:41:17<39:27:45, 65.64it/s]

Episode 1567 completed | Reward: 1200.00 | Avg Reward: 1024.60 | Frames: 674607 | Epsilon: 0.3254


Training:   7%|▋         | 675168/10000000 [1:41:25<37:54:40, 68.32it/s]

Episode 1568 completed | Reward: 1100.00 | Avg Reward: 1027.80 | Frames: 675161 | Epsilon: 0.3248


Training:   7%|▋         | 675660/10000000 [1:41:32<47:16:48, 54.78it/s]

Episode 1569 completed | Reward: 1140.00 | Avg Reward: 1030.00 | Frames: 675657 | Epsilon: 0.3243

Memory usage: 1.12 GB


Training:   7%|▋         | 676521/10000000 [1:41:45<41:35:55, 62.26it/s]

Episode 1570 completed | Reward: 180.00 | Avg Reward: 1017.60 | Frames: 676518 | Epsilon: 0.3235


Training:   7%|▋         | 676905/10000000 [1:41:51<39:44:12, 65.17it/s]

Episode 1571 completed | Reward: 1040.00 | Avg Reward: 1022.20 | Frames: 676901 | Epsilon: 0.3231


Training:   7%|▋         | 677441/10000000 [1:41:58<40:04:04, 64.63it/s]

Episode 1572 completed | Reward: 1100.00 | Avg Reward: 1024.80 | Frames: 677437 | Epsilon: 0.3226


Training:   7%|▋         | 678063/10000000 [1:42:08<39:39:14, 65.30it/s]

Episode 1573 completed | Reward: 740.00 | Avg Reward: 1025.20 | Frames: 678055 | Epsilon: 0.3219


Training:   7%|▋         | 678685/10000000 [1:42:17<39:39:13, 65.30it/s]

Episode 1574 completed | Reward: 460.00 | Avg Reward: 1006.40 | Frames: 678676 | Epsilon: 0.3213


Training:   7%|▋         | 679173/10000000 [1:42:24<40:59:34, 63.16it/s]

Episode 1575 completed | Reward: 780.00 | Avg Reward: 1007.00 | Frames: 679168 | Epsilon: 0.3208


Training:   7%|▋         | 679581/10000000 [1:42:30<41:18:47, 62.67it/s]

Episode 1576 completed | Reward: 800.00 | Avg Reward: 1008.80 | Frames: 679576 | Epsilon: 0.3204


Training:   7%|▋         | 680501/10000000 [1:42:44<39:41:42, 65.22it/s]

Episode 1577 completed | Reward: 1220.00 | Avg Reward: 999.80 | Frames: 680492 | Epsilon: 0.3195


Training:   7%|▋         | 680805/10000000 [1:42:48<41:09:07, 62.90it/s]

Episode 1578 completed | Reward: 760.00 | Avg Reward: 987.80 | Frames: 680797 | Epsilon: 0.3192


Training:   7%|▋         | 681165/10000000 [1:42:54<41:14:04, 62.78it/s]

Episode 1579 completed | Reward: 1040.00 | Avg Reward: 984.00 | Frames: 681158 | Epsilon: 0.3188


Training:   7%|▋         | 681725/10000000 [1:43:02<41:19:57, 62.62it/s]

Episode 1580 completed | Reward: 580.00 | Avg Reward: 978.20 | Frames: 681721 | Epsilon: 0.3183


Training:   7%|▋         | 682347/10000000 [1:43:11<39:41:46, 65.20it/s]

Episode 1581 completed | Reward: 1340.00 | Avg Reward: 981.20 | Frames: 682337 | Epsilon: 0.3177


Training:   7%|▋         | 682969/10000000 [1:43:20<39:34:59, 65.38it/s]

Episode 1582 completed | Reward: 1760.00 | Avg Reward: 986.60 | Frames: 682957 | Epsilon: 0.3170


Training:   7%|▋         | 683449/10000000 [1:43:28<39:14:44, 65.94it/s]

Episode 1583 completed | Reward: 2240.00 | Avg Reward: 1004.20 | Frames: 683437 | Epsilon: 0.3166


Training:   7%|▋         | 683937/10000000 [1:43:35<41:08:07, 62.91it/s]

Episode 1584 completed | Reward: 1540.00 | Avg Reward: 1012.60 | Frames: 683931 | Epsilon: 0.3161


Training:   7%|▋         | 684529/10000000 [1:43:44<40:55:13, 63.24it/s]

Episode 1585 completed | Reward: 1840.00 | Avg Reward: 1022.60 | Frames: 684521 | Epsilon: 0.3155


Training:   7%|▋         | 684953/10000000 [1:43:50<40:23:03, 64.07it/s]

Episode 1586 completed | Reward: 900.00 | Avg Reward: 1017.80 | Frames: 684945 | Epsilon: 0.3151


Training:   7%|▋         | 685425/10000000 [1:43:57<38:45:30, 66.76it/s]

Episode 1587 completed | Reward: 1020.00 | Avg Reward: 1003.60 | Frames: 685417 | Epsilon: 0.3146


Training:   7%|▋         | 686010/10000000 [1:44:06<38:21:35, 67.45it/s]

Episode 1588 completed | Reward: 1140.00 | Avg Reward: 1008.20 | Frames: 686010 | Epsilon: 0.3140

Memory usage: 1.12 GB


Training:   7%|▋         | 686445/10000000 [1:44:12<41:11:57, 62.79it/s]

Episode 1589 completed | Reward: 1020.00 | Avg Reward: 1010.60 | Frames: 686438 | Epsilon: 0.3136


Training:   7%|▋         | 686861/10000000 [1:44:18<40:12:50, 64.33it/s]

Episode 1590 completed | Reward: 1000.00 | Avg Reward: 1014.20 | Frames: 686852 | Epsilon: 0.3131


Training:   7%|▋         | 687219/10000000 [1:44:24<39:59:36, 64.68it/s]

Episode 1591 completed | Reward: 1340.00 | Avg Reward: 1021.80 | Frames: 687208 | Epsilon: 0.3128


Training:   7%|▋         | 687721/10000000 [1:44:31<39:15:08, 65.90it/s]

Episode 1592 completed | Reward: 1160.00 | Avg Reward: 1019.00 | Frames: 687708 | Epsilon: 0.3123


Training:   7%|▋         | 688113/10000000 [1:44:37<39:53:25, 64.84it/s]

Episode 1593 completed | Reward: 660.00 | Avg Reward: 1017.00 | Frames: 688103 | Epsilon: 0.3119


Training:   7%|▋         | 689361/10000000 [1:44:56<39:08:20, 66.08it/s]

Episode 1594 completed | Reward: 1360.00 | Avg Reward: 1025.60 | Frames: 689349 | Epsilon: 0.3107


Training:   7%|▋         | 689745/10000000 [1:45:01<39:11:11, 66.00it/s]

Episode 1595 completed | Reward: 1260.00 | Avg Reward: 1023.40 | Frames: 689735 | Epsilon: 0.3103


Training:   7%|▋         | 690185/10000000 [1:45:08<41:47:41, 61.87it/s]

Episode 1596 completed | Reward: 580.00 | Avg Reward: 1020.00 | Frames: 690182 | Epsilon: 0.3098


Training:   7%|▋         | 690697/10000000 [1:45:15<40:04:05, 64.54it/s]

Episode 1597 completed | Reward: 780.00 | Avg Reward: 1013.60 | Frames: 690691 | Epsilon: 0.3093


Training:   7%|▋         | 691217/10000000 [1:45:23<40:05:07, 64.51it/s]

Episode 1598 completed | Reward: 1160.00 | Avg Reward: 1013.60 | Frames: 691204 | Epsilon: 0.3088


Training:   7%|▋         | 691649/10000000 [1:45:30<41:56:09, 61.66it/s]

Episode 1599 completed | Reward: 980.00 | Avg Reward: 1016.80 | Frames: 691643 | Epsilon: 0.3084


Training:   7%|▋         | 692209/10000000 [1:45:38<41:11:57, 62.76it/s]

Episode 1600 completed | Reward: 880.00 | Avg Reward: 1008.20 | Frames: 692204 | Epsilon: 0.3078


Training:   7%|▋         | 693089/10000000 [1:45:52<40:36:27, 63.66it/s]

Episode 1601 completed | Reward: 780.00 | Avg Reward: 1007.20 | Frames: 693080 | Epsilon: 0.3069


Training:   7%|▋         | 693553/10000000 [1:45:59<41:33:28, 62.21it/s]

Episode 1602 completed | Reward: 1120.00 | Avg Reward: 1006.20 | Frames: 693547 | Epsilon: 0.3065


Training:   7%|▋         | 694009/10000000 [1:46:05<39:57:59, 64.68it/s]

Episode 1603 completed | Reward: 1100.00 | Avg Reward: 1007.40 | Frames: 693998 | Epsilon: 0.3060


Training:   7%|▋         | 694345/10000000 [1:46:10<40:56:05, 63.15it/s]

Episode 1604 completed | Reward: 680.00 | Avg Reward: 1002.40 | Frames: 694342 | Epsilon: 0.3057


Training:   7%|▋         | 694841/10000000 [1:46:18<40:16:37, 64.17it/s]

Episode 1605 completed | Reward: 680.00 | Avg Reward: 1001.20 | Frames: 694832 | Epsilon: 0.3052


Training:   7%|▋         | 695289/10000000 [1:46:25<42:01:09, 61.51it/s]

Episode 1606 completed | Reward: 540.00 | Avg Reward: 994.20 | Frames: 695285 | Epsilon: 0.3047


Training:   7%|▋         | 695753/10000000 [1:46:32<40:54:10, 63.19it/s]

Episode 1607 completed | Reward: 1080.00 | Avg Reward: 997.20 | Frames: 695746 | Epsilon: 0.3043


Training:   7%|▋         | 696112/10000000 [1:46:37<50:23:09, 51.29it/s]

Episode 1608 completed | Reward: 460.00 | Avg Reward: 991.20 | Frames: 696111 | Epsilon: 0.3039

Memory usage: 1.12 GB


Training:   7%|▋         | 696581/10000000 [1:46:44<39:18:52, 65.73it/s]

Episode 1609 completed | Reward: 1020.00 | Avg Reward: 994.20 | Frames: 696569 | Epsilon: 0.3034


Training:   7%|▋         | 697085/10000000 [1:46:52<40:07:18, 64.41it/s]

Episode 1610 completed | Reward: 500.00 | Avg Reward: 993.60 | Frames: 697080 | Epsilon: 0.3029


Training:   7%|▋         | 697531/10000000 [1:46:59<40:07:18, 64.40it/s]

Episode 1611 completed | Reward: 1140.00 | Avg Reward: 996.20 | Frames: 697521 | Epsilon: 0.3025


Training:   7%|▋         | 697913/10000000 [1:47:04<41:33:21, 62.18it/s]

Episode 1612 completed | Reward: 900.00 | Avg Reward: 999.60 | Frames: 697907 | Epsilon: 0.3021


Training:   7%|▋         | 698361/10000000 [1:47:11<41:29:51, 62.26it/s]

Episode 1613 completed | Reward: 1220.00 | Avg Reward: 1000.80 | Frames: 698354 | Epsilon: 0.3016


Training:   7%|▋         | 698729/10000000 [1:47:17<39:58:04, 64.64it/s]

Episode 1614 completed | Reward: 1580.00 | Avg Reward: 1006.40 | Frames: 698716 | Epsilon: 0.3013


Training:   7%|▋         | 699625/10000000 [1:47:30<40:07:42, 64.38it/s]

Episode 1615 completed | Reward: 1000.00 | Avg Reward: 1009.20 | Frames: 699613 | Epsilon: 0.3004


Training:   7%|▋         | 700081/10000000 [1:47:37<40:21:33, 64.01it/s]

Episode 1616 completed | Reward: 1060.00 | Avg Reward: 1009.00 | Frames: 700073 | Epsilon: 0.2999


Training:   7%|▋         | 700881/10000000 [1:47:49<41:43:04, 61.92it/s]

Episode 1617 completed | Reward: 860.00 | Avg Reward: 1008.60 | Frames: 700873 | Epsilon: 0.2991


Training:   7%|▋         | 701232/10000000 [1:47:55<38:55:56, 66.35it/s]

Episode 1618 completed | Reward: 1440.00 | Avg Reward: 1013.80 | Frames: 701227 | Epsilon: 0.2988


Training:   7%|▋         | 701733/10000000 [1:48:02<41:47:10, 61.81it/s]

Episode 1619 completed | Reward: 1020.00 | Avg Reward: 1011.00 | Frames: 701730 | Epsilon: 0.2983


Training:   7%|▋         | 702141/10000000 [1:48:09<41:47:27, 61.80it/s]

Episode 1620 completed | Reward: 1080.00 | Avg Reward: 1009.80 | Frames: 702133 | Epsilon: 0.2979


Training:   7%|▋         | 702685/10000000 [1:48:17<40:53:42, 63.15it/s]

Episode 1621 completed | Reward: 1060.00 | Avg Reward: 998.80 | Frames: 702679 | Epsilon: 0.2973


Training:   7%|▋         | 703093/10000000 [1:48:23<40:57:47, 63.04it/s]

Episode 1622 completed | Reward: 480.00 | Avg Reward: 992.60 | Frames: 703083 | Epsilon: 0.2969


Training:   7%|▋         | 703517/10000000 [1:48:29<40:37:16, 63.57it/s]

Episode 1623 completed | Reward: 580.00 | Avg Reward: 991.00 | Frames: 703510 | Epsilon: 0.2965


Training:   7%|▋         | 704245/10000000 [1:48:41<41:37:08, 62.04it/s]

Episode 1624 completed | Reward: 660.00 | Avg Reward: 987.20 | Frames: 704237 | Epsilon: 0.2958


Training:   7%|▋         | 704757/10000000 [1:48:48<41:36:35, 62.05it/s]

Episode 1625 completed | Reward: 2220.00 | Avg Reward: 998.60 | Frames: 704749 | Epsilon: 0.2953


Training:   7%|▋         | 705165/10000000 [1:48:55<40:21:46, 63.97it/s]

Episode 1626 completed | Reward: 540.00 | Avg Reward: 993.00 | Frames: 705156 | Epsilon: 0.2948


Training:   7%|▋         | 705613/10000000 [1:49:01<41:09:47, 62.72it/s]

Episode 1627 completed | Reward: 280.00 | Avg Reward: 988.40 | Frames: 705601 | Epsilon: 0.2944


Training:   7%|▋         | 705925/10000000 [1:49:06<41:40:45, 61.94it/s]

Episode 1628 completed | Reward: 540.00 | Avg Reward: 986.60 | Frames: 705919 | Epsilon: 0.2941


Training:   7%|▋         | 706388/10000000 [1:49:13<48:39:26, 53.06it/s]

Episode 1629 completed | Reward: 920.00 | Avg Reward: 985.40 | Frames: 706384 | Epsilon: 0.2936

Memory usage: 1.12 GB


Training:   7%|▋         | 707313/10000000 [1:49:27<41:47:40, 61.76it/s]

Episode 1630 completed | Reward: 920.00 | Avg Reward: 985.20 | Frames: 707309 | Epsilon: 0.2927


Training:   7%|▋         | 707729/10000000 [1:49:34<42:17:28, 61.03it/s]

Episode 1631 completed | Reward: 800.00 | Avg Reward: 986.40 | Frames: 707722 | Epsilon: 0.2923


Training:   7%|▋         | 708217/10000000 [1:49:41<40:34:46, 63.60it/s]

Episode 1632 completed | Reward: 1600.00 | Avg Reward: 994.80 | Frames: 708210 | Epsilon: 0.2918


Training:   7%|▋         | 708609/10000000 [1:49:47<41:29:27, 62.20it/s]

Episode 1633 completed | Reward: 1040.00 | Avg Reward: 996.20 | Frames: 708600 | Epsilon: 0.2914


Training:   7%|▋         | 708961/10000000 [1:49:53<42:16:29, 61.05it/s]

Episode 1634 completed | Reward: 480.00 | Avg Reward: 993.20 | Frames: 708954 | Epsilon: 0.2910


Training:   7%|▋         | 709425/10000000 [1:50:00<40:59:36, 62.95it/s]

Episode 1635 completed | Reward: 560.00 | Avg Reward: 983.20 | Frames: 709419 | Epsilon: 0.2906


Training:   7%|▋         | 709801/10000000 [1:50:06<40:42:08, 63.40it/s]

Episode 1636 completed | Reward: 860.00 | Avg Reward: 990.00 | Frames: 709795 | Epsilon: 0.2902


Training:   7%|▋         | 710161/10000000 [1:50:11<41:23:51, 62.33it/s]

Episode 1637 completed | Reward: 1500.00 | Avg Reward: 986.00 | Frames: 710158 | Epsilon: 0.2898


Training:   7%|▋         | 710649/10000000 [1:50:19<40:54:52, 63.07it/s]

Episode 1638 completed | Reward: 900.00 | Avg Reward: 987.60 | Frames: 710639 | Epsilon: 0.2894


Training:   7%|▋         | 711257/10000000 [1:50:28<41:11:26, 62.64it/s]

Episode 1639 completed | Reward: 1600.00 | Avg Reward: 993.60 | Frames: 711250 | Epsilon: 0.2887


Training:   7%|▋         | 711672/10000000 [1:50:34<39:18:06, 65.65it/s]

Episode 1640 completed | Reward: 400.00 | Avg Reward: 984.80 | Frames: 711667 | Epsilon: 0.2883


Training:   7%|▋         | 712101/10000000 [1:50:41<40:37:57, 63.50it/s]

Episode 1641 completed | Reward: 860.00 | Avg Reward: 984.40 | Frames: 712091 | Epsilon: 0.2879


Training:   7%|▋         | 712677/10000000 [1:50:50<42:11:46, 61.14it/s]

Episode 1642 completed | Reward: 880.00 | Avg Reward: 983.00 | Frames: 712670 | Epsilon: 0.2873


Training:   7%|▋         | 713029/10000000 [1:50:55<40:50:30, 63.16it/s]

Episode 1643 completed | Reward: 660.00 | Avg Reward: 979.60 | Frames: 713018 | Epsilon: 0.2870


Training:   7%|▋         | 713517/10000000 [1:51:03<41:51:39, 61.62it/s]

Episode 1644 completed | Reward: 1060.00 | Avg Reward: 976.00 | Frames: 713515 | Epsilon: 0.2865


Training:   7%|▋         | 713901/10000000 [1:51:09<41:55:32, 61.52it/s]

Episode 1645 completed | Reward: 580.00 | Avg Reward: 977.60 | Frames: 713892 | Epsilon: 0.2861


Training:   7%|▋         | 714277/10000000 [1:51:15<42:11:27, 61.14it/s]

Episode 1646 completed | Reward: 1200.00 | Avg Reward: 983.40 | Frames: 714275 | Epsilon: 0.2857


Training:   7%|▋         | 714955/10000000 [1:51:25<41:55:40, 61.51it/s]

Episode 1647 completed | Reward: 1080.00 | Avg Reward: 976.40 | Frames: 714944 | Epsilon: 0.2851


Training:   7%|▋         | 715465/10000000 [1:51:33<41:44:53, 61.78it/s]

Episode 1648 completed | Reward: 1500.00 | Avg Reward: 983.20 | Frames: 715456 | Epsilon: 0.2845


Training:   7%|▋         | 715921/10000000 [1:51:40<40:38:32, 63.45it/s]

Episode 1649 completed | Reward: 1120.00 | Avg Reward: 984.00 | Frames: 715911 | Epsilon: 0.2841


Training:   7%|▋         | 716361/10000000 [1:51:47<40:35:50, 63.52it/s]

Episode 1650 completed | Reward: 920.00 | Avg Reward: 987.80 | Frames: 716350 | Epsilon: 0.2836


Training:   7%|▋         | 716705/10000000 [1:51:52<52:10:46, 49.42it/s]

Episode 1651 completed | Reward: 800.00 | Avg Reward: 983.40 | Frames: 716704 | Epsilon: 0.2833

Memory usage: 1.12 GB


Training:   7%|▋         | 717201/10000000 [1:52:00<43:00:29, 59.96it/s]

Episode 1652 completed | Reward: 840.00 | Avg Reward: 973.60 | Frames: 717193 | Epsilon: 0.2828


Training:   7%|▋         | 718065/10000000 [1:52:13<42:51:08, 60.17it/s]

Episode 1653 completed | Reward: 1980.00 | Avg Reward: 987.00 | Frames: 718063 | Epsilon: 0.2819


Training:   7%|▋         | 718473/10000000 [1:52:19<41:15:42, 62.48it/s]

Episode 1654 completed | Reward: 1400.00 | Avg Reward: 992.20 | Frames: 718465 | Epsilon: 0.2815


Training:   7%|▋         | 719001/10000000 [1:52:28<41:49:42, 61.63it/s]

Episode 1655 completed | Reward: 1720.00 | Avg Reward: 995.60 | Frames: 718999 | Epsilon: 0.2810


Training:   7%|▋         | 719401/10000000 [1:52:34<42:51:00, 60.16it/s]

Episode 1656 completed | Reward: 940.00 | Avg Reward: 996.20 | Frames: 719395 | Epsilon: 0.2806


Training:   7%|▋         | 719785/10000000 [1:52:40<42:04:48, 61.26it/s]

Episode 1657 completed | Reward: 1220.00 | Avg Reward: 1000.00 | Frames: 719776 | Epsilon: 0.2802


Training:   7%|▋         | 720225/10000000 [1:52:47<41:24:56, 62.24it/s]

Episode 1658 completed | Reward: 1020.00 | Avg Reward: 1001.80 | Frames: 720218 | Epsilon: 0.2798


Training:   7%|▋         | 720697/10000000 [1:52:54<42:24:41, 60.78it/s]

Episode 1659 completed | Reward: 880.00 | Avg Reward: 1004.40 | Frames: 720694 | Epsilon: 0.2793


Training:   7%|▋         | 721049/10000000 [1:53:00<42:44:03, 60.31it/s]

Episode 1660 completed | Reward: 840.00 | Avg Reward: 1005.60 | Frames: 721043 | Epsilon: 0.2790


Training:   7%|▋         | 721593/10000000 [1:53:08<41:13:58, 62.51it/s]

Episode 1661 completed | Reward: 2400.00 | Avg Reward: 1022.60 | Frames: 721587 | Epsilon: 0.2784


Training:   7%|▋         | 722049/10000000 [1:53:15<41:04:39, 62.74it/s]

Episode 1662 completed | Reward: 1080.00 | Avg Reward: 1022.20 | Frames: 722043 | Epsilon: 0.2780


Training:   7%|▋         | 722545/10000000 [1:53:23<41:12:58, 62.53it/s]

Episode 1663 completed | Reward: 1280.00 | Avg Reward: 1027.60 | Frames: 722540 | Epsilon: 0.2775


Training:   7%|▋         | 722969/10000000 [1:53:29<41:28:04, 62.14it/s]

Episode 1664 completed | Reward: 520.00 | Avg Reward: 1022.00 | Frames: 722962 | Epsilon: 0.2770


Training:   7%|▋         | 723391/10000000 [1:53:36<42:01:22, 61.32it/s]

Episode 1665 completed | Reward: 700.00 | Avg Reward: 1012.20 | Frames: 723381 | Epsilon: 0.2766


Training:   7%|▋         | 723797/10000000 [1:53:42<41:08:12, 62.64it/s]

Episode 1666 completed | Reward: 600.00 | Avg Reward: 1013.80 | Frames: 723786 | Epsilon: 0.2762


Training:   7%|▋         | 724197/10000000 [1:53:49<43:19:17, 59.48it/s]

Episode 1667 completed | Reward: 900.00 | Avg Reward: 1010.80 | Frames: 724195 | Epsilon: 0.2758


Training:   7%|▋         | 724701/10000000 [1:53:56<42:00:46, 61.33it/s]

Episode 1668 completed | Reward: 960.00 | Avg Reward: 1009.40 | Frames: 724697 | Epsilon: 0.2753


Training:   7%|▋         | 725044/10000000 [1:54:02<39:42:31, 64.88it/s]

Episode 1669 completed | Reward: 580.00 | Avg Reward: 1003.80 | Frames: 725037 | Epsilon: 0.2750


Training:   7%|▋         | 725569/10000000 [1:54:10<42:39:41, 60.39it/s]

Episode 1670 completed | Reward: 660.00 | Avg Reward: 1008.60 | Frames: 725564 | Epsilon: 0.2744


Training:   7%|▋         | 726033/10000000 [1:54:17<42:33:57, 60.52it/s]

Episode 1671 completed | Reward: 700.00 | Avg Reward: 1005.20 | Frames: 726028 | Epsilon: 0.2740


Training:   7%|▋         | 726383/10000000 [1:54:23<40:46:02, 63.19it/s]

Episode 1672 completed | Reward: 940.00 | Avg Reward: 1003.60 | Frames: 726372 | Epsilon: 0.2736


Training:   7%|▋         | 726821/10000000 [1:54:30<39:30:23, 65.20it/s]

Episode 1673 completed | Reward: 900.00 | Avg Reward: 1005.20 | Frames: 726821 | Epsilon: 0.2732

Memory usage: 1.12 GB


Training:   7%|▋         | 727241/10000000 [1:54:36<42:49:44, 60.14it/s]

Episode 1674 completed | Reward: 860.00 | Avg Reward: 1009.20 | Frames: 727237 | Epsilon: 0.2728


Training:   7%|▋         | 728385/10000000 [1:54:54<42:20:22, 60.83it/s]

Episode 1675 completed | Reward: 1060.00 | Avg Reward: 1012.00 | Frames: 728378 | Epsilon: 0.2716


Training:   7%|▋         | 728769/10000000 [1:55:00<41:44:46, 61.69it/s]

Episode 1676 completed | Reward: 1640.00 | Avg Reward: 1020.40 | Frames: 728763 | Epsilon: 0.2712


Training:   7%|▋         | 729257/10000000 [1:55:08<42:40:30, 60.34it/s]

Episode 1677 completed | Reward: 1100.00 | Avg Reward: 1019.20 | Frames: 729255 | Epsilon: 0.2707


Training:   7%|▋         | 729745/10000000 [1:55:16<42:15:59, 60.92it/s]

Episode 1678 completed | Reward: 600.00 | Avg Reward: 1017.60 | Frames: 729737 | Epsilon: 0.2703


Training:   7%|▋         | 730361/10000000 [1:55:25<41:38:36, 61.83it/s]

Episode 1679 completed | Reward: 960.00 | Avg Reward: 1016.80 | Frames: 730356 | Epsilon: 0.2696


Training:   7%|▋         | 730761/10000000 [1:55:31<41:56:53, 61.38it/s]

Episode 1680 completed | Reward: 600.00 | Avg Reward: 1017.00 | Frames: 730755 | Epsilon: 0.2692


Training:   7%|▋         | 731377/10000000 [1:55:41<42:36:33, 60.42it/s]

Episode 1681 completed | Reward: 680.00 | Avg Reward: 1010.40 | Frames: 731370 | Epsilon: 0.2686


Training:   7%|▋         | 731889/10000000 [1:55:49<42:30:27, 60.57it/s]

Episode 1682 completed | Reward: 660.00 | Avg Reward: 999.40 | Frames: 731881 | Epsilon: 0.2681


Training:   7%|▋         | 733121/10000000 [1:56:08<42:07:28, 61.11it/s]

Episode 1683 completed | Reward: 1920.00 | Avg Reward: 996.20 | Frames: 733113 | Epsilon: 0.2669


Training:   7%|▋         | 733615/10000000 [1:56:16<41:41:56, 61.73it/s]

Episode 1684 completed | Reward: 700.00 | Avg Reward: 987.80 | Frames: 733607 | Epsilon: 0.2664


Training:   7%|▋         | 734124/10000000 [1:56:24<39:16:04, 65.55it/s]

Episode 1685 completed | Reward: 2080.00 | Avg Reward: 990.20 | Frames: 734117 | Epsilon: 0.2659


Training:   7%|▋         | 734577/10000000 [1:56:31<41:50:52, 61.50it/s]

Episode 1686 completed | Reward: 980.00 | Avg Reward: 991.00 | Frames: 734569 | Epsilon: 0.2654


Training:   7%|▋         | 735057/10000000 [1:56:39<41:34:38, 61.90it/s]

Episode 1687 completed | Reward: 1240.00 | Avg Reward: 993.20 | Frames: 735050 | Epsilon: 0.2650


Training:   7%|▋         | 735575/10000000 [1:56:47<42:04:41, 61.16it/s]

Episode 1688 completed | Reward: 840.00 | Avg Reward: 990.20 | Frames: 735564 | Epsilon: 0.2644


Training:   7%|▋         | 735957/10000000 [1:56:53<43:30:45, 59.14it/s]

Episode 1689 completed | Reward: 1360.00 | Avg Reward: 993.60 | Frames: 735952 | Epsilon: 0.2640


Training:   7%|▋         | 736749/10000000 [1:57:06<41:24:10, 62.15it/s]

Episode 1690 completed | Reward: 1440.00 | Avg Reward: 998.00 | Frames: 736738 | Epsilon: 0.2633


Training:   7%|▋         | 737245/10000000 [1:57:14<40:26:55, 63.61it/s]

Episode 1691 completed | Reward: 1120.00 | Avg Reward: 995.80 | Frames: 737245 | Epsilon: 0.2628

Memory usage: 1.12 GB


Training:   7%|▋         | 737841/10000000 [1:57:23<43:23:18, 59.30it/s]

Episode 1692 completed | Reward: 860.00 | Avg Reward: 992.80 | Frames: 737836 | Epsilon: 0.2622


Training:   7%|▋         | 738209/10000000 [1:57:29<42:05:05, 61.13it/s]

Episode 1693 completed | Reward: 1060.00 | Avg Reward: 996.80 | Frames: 738200 | Epsilon: 0.2618


Training:   7%|▋         | 738592/10000000 [1:57:35<40:30:02, 63.52it/s]

Episode 1694 completed | Reward: 820.00 | Avg Reward: 991.40 | Frames: 738587 | Epsilon: 0.2614


Training:   7%|▋         | 738981/10000000 [1:57:41<41:57:04, 61.32it/s]

Episode 1695 completed | Reward: 1580.00 | Avg Reward: 994.60 | Frames: 738972 | Epsilon: 0.2610


Training:   7%|▋         | 739629/10000000 [1:57:51<42:01:52, 61.20it/s]

Episode 1696 completed | Reward: 1240.00 | Avg Reward: 1001.20 | Frames: 739627 | Epsilon: 0.2604


Training:   7%|▋         | 741093/10000000 [1:58:14<42:22:15, 60.70it/s]

Episode 1697 completed | Reward: 2300.00 | Avg Reward: 1016.40 | Frames: 741086 | Epsilon: 0.2589


Training:   7%|▋         | 741541/10000000 [1:58:21<41:49:36, 61.49it/s]

Episode 1698 completed | Reward: 1240.00 | Avg Reward: 1017.20 | Frames: 741533 | Epsilon: 0.2585


Training:   7%|▋         | 742085/10000000 [1:58:30<42:56:20, 59.89it/s]

Episode 1699 completed | Reward: 1040.00 | Avg Reward: 1017.80 | Frames: 742083 | Epsilon: 0.2579


Training:   7%|▋         | 742645/10000000 [1:58:39<41:52:57, 61.40it/s]

Episode 1700 completed | Reward: 920.00 | Avg Reward: 1018.20 | Frames: 742640 | Epsilon: 0.2574


Training:   7%|▋         | 743085/10000000 [1:58:46<42:46:31, 60.11it/s]

Episode 1701 completed | Reward: 1360.00 | Avg Reward: 1024.00 | Frames: 743080 | Epsilon: 0.2569


Training:   7%|▋         | 744036/10000000 [1:59:01<41:28:17, 62.00it/s]

Episode 1702 completed | Reward: 2160.00 | Avg Reward: 1034.40 | Frames: 744029 | Epsilon: 0.2560


Training:   7%|▋         | 745033/10000000 [1:59:17<41:36:14, 61.79it/s]

Episode 1703 completed | Reward: 2500.00 | Avg Reward: 1048.40 | Frames: 745024 | Epsilon: 0.2550


Training:   7%|▋         | 745528/10000000 [1:59:25<41:30:05, 61.94it/s]

Episode 1704 completed | Reward: 1200.00 | Avg Reward: 1053.60 | Frames: 745522 | Epsilon: 0.2545


Training:   7%|▋         | 746053/10000000 [1:59:33<44:02:41, 58.36it/s]

Episode 1705 completed | Reward: 1200.00 | Avg Reward: 1058.80 | Frames: 746050 | Epsilon: 0.2540


Training:   7%|▋         | 746483/10000000 [1:59:40<42:09:58, 60.96it/s]

Episode 1706 completed | Reward: 140.00 | Avg Reward: 1054.80 | Frames: 746472 | Epsilon: 0.2535


Training:   7%|▋         | 746841/10000000 [1:59:46<42:07:07, 61.03it/s]

Episode 1707 completed | Reward: 1440.00 | Avg Reward: 1058.40 | Frames: 746833 | Epsilon: 0.2532


Training:   7%|▋         | 747331/10000000 [1:59:54<41:05:10, 62.56it/s]

Episode 1708 completed | Reward: 1040.00 | Avg Reward: 1064.20 | Frames: 747331 | Epsilon: 0.2527

Memory usage: 1.12 GB


Training:   7%|▋         | 747725/10000000 [2:00:00<43:55:07, 58.52it/s]

Episode 1709 completed | Reward: 1140.00 | Avg Reward: 1065.40 | Frames: 747717 | Epsilon: 0.2523


Training:   7%|▋         | 748149/10000000 [2:00:07<41:26:10, 62.02it/s]

Episode 1710 completed | Reward: 1060.00 | Avg Reward: 1071.00 | Frames: 748139 | Epsilon: 0.2519


Training:   7%|▋         | 748957/10000000 [2:00:20<42:47:01, 60.06it/s]

Episode 1711 completed | Reward: 1280.00 | Avg Reward: 1072.40 | Frames: 748954 | Epsilon: 0.2510


Training:   7%|▋         | 749915/10000000 [2:00:35<41:27:14, 61.98it/s]

Episode 1712 completed | Reward: 1540.00 | Avg Reward: 1078.80 | Frames: 749906 | Epsilon: 0.2501


Training:   8%|▊         | 750009/10000000 [2:00:54<1211:16:44,  2.12it/s]


Evaluation at frame 750000: 878.00
Episode 1713 completed | Reward: 80.00 | Avg Reward: 1067.40 | Frames: 750001 | Epsilon: 0.2500


Training:   8%|▊         | 750401/10000000 [2:01:00<43:33:43, 58.98it/s]

Episode 1714 completed | Reward: 500.00 | Avg Reward: 1056.60 | Frames: 750396 | Epsilon: 0.2496


Training:   8%|▊         | 750905/10000000 [2:01:08<43:01:07, 59.72it/s]

Episode 1715 completed | Reward: 840.00 | Avg Reward: 1055.00 | Frames: 750897 | Epsilon: 0.2491


Training:   8%|▊         | 751431/10000000 [2:01:17<42:40:42, 60.20it/s]

Episode 1716 completed | Reward: 360.00 | Avg Reward: 1048.00 | Frames: 751421 | Epsilon: 0.2486


Training:   8%|▊         | 751925/10000000 [2:01:25<44:19:20, 57.96it/s]

Episode 1717 completed | Reward: 1280.00 | Avg Reward: 1052.20 | Frames: 751923 | Epsilon: 0.2481


Training:   8%|▊         | 752413/10000000 [2:01:33<42:56:44, 59.81it/s]

Episode 1718 completed | Reward: 700.00 | Avg Reward: 1044.80 | Frames: 752407 | Epsilon: 0.2476


Training:   8%|▊         | 752868/10000000 [2:01:40<40:53:07, 62.83it/s]

Episode 1719 completed | Reward: 1480.00 | Avg Reward: 1049.40 | Frames: 752863 | Epsilon: 0.2471


Training:   8%|▊         | 753215/10000000 [2:01:46<41:36:26, 61.73it/s]

Episode 1720 completed | Reward: 680.00 | Avg Reward: 1045.40 | Frames: 753205 | Epsilon: 0.2468


Training:   8%|▊         | 753692/10000000 [2:01:53<40:10:00, 63.94it/s]

Episode 1721 completed | Reward: 1440.00 | Avg Reward: 1049.20 | Frames: 753687 | Epsilon: 0.2463


Training:   8%|▊         | 754113/10000000 [2:02:00<43:12:35, 59.44it/s]

Episode 1722 completed | Reward: 1160.00 | Avg Reward: 1056.00 | Frames: 754108 | Epsilon: 0.2459


Training:   8%|▊         | 754585/10000000 [2:02:08<42:47:00, 60.03it/s]

Episode 1723 completed | Reward: 860.00 | Avg Reward: 1058.80 | Frames: 754581 | Epsilon: 0.2454


Training:   8%|▊         | 755169/10000000 [2:02:17<43:11:35, 59.45it/s]

Episode 1724 completed | Reward: 800.00 | Avg Reward: 1060.20 | Frames: 755163 | Epsilon: 0.2448


Training:   8%|▊         | 755673/10000000 [2:02:25<43:48:35, 58.61it/s]

Episode 1725 completed | Reward: 920.00 | Avg Reward: 1047.20 | Frames: 755666 | Epsilon: 0.2443


Training:   8%|▊         | 756096/10000000 [2:02:32<40:36:22, 63.24it/s]

Episode 1726 completed | Reward: 380.00 | Avg Reward: 1045.60 | Frames: 756089 | Epsilon: 0.2439


Training:   8%|▊         | 756523/10000000 [2:02:39<42:17:46, 60.71it/s]

Episode 1727 completed | Reward: 1940.00 | Avg Reward: 1062.20 | Frames: 756515 | Epsilon: 0.2435


Training:   8%|▊         | 756921/10000000 [2:02:45<43:28:03, 59.07it/s]

Episode 1728 completed | Reward: 680.00 | Avg Reward: 1063.60 | Frames: 756917 | Epsilon: 0.2431


Training:   8%|▊         | 757303/10000000 [2:02:51<42:06:20, 60.98it/s]

Episode 1729 completed | Reward: 1140.00 | Avg Reward: 1065.80 | Frames: 757292 | Epsilon: 0.2427


Training:   8%|▊         | 757775/10000000 [2:02:59<41:18:35, 62.15it/s]

Episode 1730 completed | Reward: 1120.00 | Avg Reward: 1067.80 | Frames: 757775 | Epsilon: 0.2422

Memory usage: 1.12 GB


Training:   8%|▊         | 758345/10000000 [2:03:08<41:46:45, 61.44it/s]

Episode 1731 completed | Reward: 660.00 | Avg Reward: 1066.40 | Frames: 758335 | Epsilon: 0.2417


Training:   8%|▊         | 758865/10000000 [2:03:17<42:24:10, 60.54it/s]

Episode 1732 completed | Reward: 1240.00 | Avg Reward: 1062.80 | Frames: 758860 | Epsilon: 0.2411


Training:   8%|▊         | 759529/10000000 [2:03:27<43:43:47, 58.70it/s]

Episode 1733 completed | Reward: 1300.00 | Avg Reward: 1065.40 | Frames: 759522 | Epsilon: 0.2405


Training:   8%|▊         | 760225/10000000 [2:03:39<43:09:49, 59.46it/s]

Episode 1734 completed | Reward: 1440.00 | Avg Reward: 1075.00 | Frames: 760223 | Epsilon: 0.2398


Training:   8%|▊         | 760713/10000000 [2:03:47<42:39:29, 60.16it/s]

Episode 1735 completed | Reward: 1860.00 | Avg Reward: 1088.00 | Frames: 760706 | Epsilon: 0.2393


Training:   8%|▊         | 761217/10000000 [2:03:55<42:51:46, 59.87it/s]

Episode 1736 completed | Reward: 1200.00 | Avg Reward: 1091.40 | Frames: 761212 | Epsilon: 0.2388


Training:   8%|▊         | 761577/10000000 [2:04:01<43:03:22, 59.60it/s]

Episode 1737 completed | Reward: 680.00 | Avg Reward: 1083.20 | Frames: 761574 | Epsilon: 0.2384


Training:   8%|▊         | 762017/10000000 [2:04:08<44:10:20, 58.09it/s]

Episode 1738 completed | Reward: 540.00 | Avg Reward: 1079.60 | Frames: 762009 | Epsilon: 0.2380


Training:   8%|▊         | 762401/10000000 [2:04:14<44:07:56, 58.14it/s]

Episode 1739 completed | Reward: 880.00 | Avg Reward: 1072.40 | Frames: 762394 | Epsilon: 0.2376


Training:   8%|▊         | 762817/10000000 [2:04:21<43:37:37, 58.81it/s]

Episode 1740 completed | Reward: 960.00 | Avg Reward: 1078.00 | Frames: 762813 | Epsilon: 0.2372


Training:   8%|▊         | 763297/10000000 [2:04:29<43:08:41, 59.47it/s]

Episode 1741 completed | Reward: 1200.00 | Avg Reward: 1081.40 | Frames: 763290 | Epsilon: 0.2367


Training:   8%|▊         | 763809/10000000 [2:04:37<44:13:57, 58.00it/s]

Episode 1742 completed | Reward: 780.00 | Avg Reward: 1080.40 | Frames: 763806 | Epsilon: 0.2362


Training:   8%|▊         | 764216/10000000 [2:04:44<40:09:16, 63.89it/s]

Episode 1743 completed | Reward: 780.00 | Avg Reward: 1081.60 | Frames: 764209 | Epsilon: 0.2358


Training:   8%|▊         | 764669/10000000 [2:04:51<43:43:35, 58.67it/s]

Episode 1744 completed | Reward: 760.00 | Avg Reward: 1078.60 | Frames: 764662 | Epsilon: 0.2353


Training:   8%|▊         | 765173/10000000 [2:04:59<43:26:10, 59.06it/s]

Episode 1745 completed | Reward: 1400.00 | Avg Reward: 1086.80 | Frames: 765165 | Epsilon: 0.2348


Training:   8%|▊         | 765603/10000000 [2:05:06<42:21:02, 60.57it/s]

Episode 1746 completed | Reward: 880.00 | Avg Reward: 1083.60 | Frames: 765595 | Epsilon: 0.2344


Training:   8%|▊         | 766017/10000000 [2:05:13<43:01:07, 59.62it/s]

Episode 1747 completed | Reward: 360.00 | Avg Reward: 1076.40 | Frames: 766009 | Epsilon: 0.2340


Training:   8%|▊         | 766767/10000000 [2:05:25<42:42:49, 60.05it/s]

Episode 1748 completed | Reward: 1920.00 | Avg Reward: 1080.60 | Frames: 766757 | Epsilon: 0.2332


Training:   8%|▊         | 767356/10000000 [2:05:35<40:40:31, 63.05it/s]

Episode 1749 completed | Reward: 900.00 | Avg Reward: 1078.40 | Frames: 767349 | Epsilon: 0.2327


Training:   8%|▊         | 767752/10000000 [2:05:41<41:06:20, 62.39it/s]

Episode 1750 completed | Reward: 1060.00 | Avg Reward: 1079.80 | Frames: 767747 | Epsilon: 0.2323


Training:   8%|▊         | 768204/10000000 [2:05:49<51:58:09, 49.34it/s]

Episode 1751 completed | Reward: 820.00 | Avg Reward: 1080.00 | Frames: 768202 | Epsilon: 0.2318

Memory usage: 1.12 GB


Training:   8%|▊         | 769021/10000000 [2:06:02<47:18:55, 54.19it/s]

Episode 1752 completed | Reward: 1300.00 | Avg Reward: 1084.60 | Frames: 769014 | Epsilon: 0.2310


Training:   8%|▊         | 769509/10000000 [2:06:10<43:19:54, 59.17it/s]

Episode 1753 completed | Reward: 740.00 | Avg Reward: 1072.20 | Frames: 769507 | Epsilon: 0.2305


Training:   8%|▊         | 770061/10000000 [2:06:19<43:23:20, 59.09it/s]

Episode 1754 completed | Reward: 520.00 | Avg Reward: 1063.40 | Frames: 770056 | Epsilon: 0.2299


Training:   8%|▊         | 770501/10000000 [2:06:27<45:00:45, 56.96it/s]

Episode 1755 completed | Reward: 460.00 | Avg Reward: 1050.80 | Frames: 770498 | Epsilon: 0.2295


Training:   8%|▊         | 770997/10000000 [2:06:35<44:18:00, 57.87it/s]

Episode 1756 completed | Reward: 1160.00 | Avg Reward: 1053.00 | Frames: 770989 | Epsilon: 0.2290


Training:   8%|▊         | 771869/10000000 [2:06:49<43:21:42, 59.12it/s]

Episode 1757 completed | Reward: 1300.00 | Avg Reward: 1053.80 | Frames: 771861 | Epsilon: 0.2281


Training:   8%|▊         | 772389/10000000 [2:06:58<43:26:00, 59.02it/s]

Episode 1758 completed | Reward: 1360.00 | Avg Reward: 1057.20 | Frames: 772380 | Epsilon: 0.2276


Training:   8%|▊         | 772756/10000000 [2:07:04<42:02:35, 60.96it/s]

Episode 1759 completed | Reward: 1140.00 | Avg Reward: 1059.80 | Frames: 772749 | Epsilon: 0.2273


Training:   8%|▊         | 773248/10000000 [2:07:12<40:48:39, 62.80it/s]

Episode 1760 completed | Reward: 1600.00 | Avg Reward: 1067.40 | Frames: 773241 | Epsilon: 0.2268


Training:   8%|▊         | 773677/10000000 [2:07:19<43:49:16, 58.48it/s]

Episode 1761 completed | Reward: 2340.00 | Avg Reward: 1066.80 | Frames: 773675 | Epsilon: 0.2263


Training:   8%|▊         | 774133/10000000 [2:07:26<44:54:51, 57.06it/s]

Episode 1762 completed | Reward: 1140.00 | Avg Reward: 1067.40 | Frames: 774131 | Epsilon: 0.2259


Training:   8%|▊         | 774813/10000000 [2:07:38<44:12:32, 57.96it/s]

Episode 1763 completed | Reward: 1820.00 | Avg Reward: 1072.80 | Frames: 774809 | Epsilon: 0.2252


Training:   8%|▊         | 775244/10000000 [2:07:45<41:08:12, 62.29it/s]

Episode 1764 completed | Reward: 480.00 | Avg Reward: 1072.40 | Frames: 775238 | Epsilon: 0.2248


Training:   8%|▊         | 776137/10000000 [2:07:59<43:56:15, 58.31it/s]

Episode 1765 completed | Reward: 1640.00 | Avg Reward: 1081.80 | Frames: 776130 | Epsilon: 0.2239


Training:   8%|▊         | 776649/10000000 [2:08:08<43:54:00, 58.36it/s]

Episode 1766 completed | Reward: 1640.00 | Avg Reward: 1092.20 | Frames: 776641 | Epsilon: 0.2234


Training:   8%|▊         | 777160/10000000 [2:08:16<42:04:00, 60.90it/s]

Episode 1767 completed | Reward: 880.00 | Avg Reward: 1092.00 | Frames: 777154 | Epsilon: 0.2228


Training:   8%|▊         | 777661/10000000 [2:08:24<44:28:06, 57.61it/s]

Episode 1768 completed | Reward: 840.00 | Avg Reward: 1090.80 | Frames: 777654 | Epsilon: 0.2223


Training:   8%|▊         | 778647/10000000 [2:08:41<41:41:03, 61.45it/s]

Episode 1769 completed | Reward: 1160.00 | Avg Reward: 1096.60 | Frames: 778647 | Epsilon: 0.2214

Memory usage: 1.12 GB


Training:   8%|▊         | 779145/10000000 [2:08:49<43:29:44, 58.89it/s]

Episode 1770 completed | Reward: 1160.00 | Avg Reward: 1101.60 | Frames: 779137 | Epsilon: 0.2209


Training:   8%|▊         | 779569/10000000 [2:08:56<44:23:52, 57.69it/s]

Episode 1771 completed | Reward: 1020.00 | Avg Reward: 1104.80 | Frames: 779566 | Epsilon: 0.2204


Training:   8%|▊         | 780073/10000000 [2:09:05<43:37:30, 58.71it/s]

Episode 1772 completed | Reward: 1160.00 | Avg Reward: 1107.00 | Frames: 780068 | Epsilon: 0.2199


Training:   8%|▊         | 780889/10000000 [2:09:18<43:32:56, 58.80it/s]

Episode 1773 completed | Reward: 440.00 | Avg Reward: 1102.40 | Frames: 780880 | Epsilon: 0.2191


Training:   8%|▊         | 781367/10000000 [2:09:26<42:55:09, 59.66it/s]

Episode 1774 completed | Reward: 780.00 | Avg Reward: 1101.60 | Frames: 781357 | Epsilon: 0.2186


Training:   8%|▊         | 781941/10000000 [2:09:35<44:35:30, 57.42it/s]

Episode 1775 completed | Reward: 1380.00 | Avg Reward: 1104.80 | Frames: 781938 | Epsilon: 0.2181


Training:   8%|▊         | 782933/10000000 [2:09:52<44:35:55, 57.41it/s]

Episode 1776 completed | Reward: 2640.00 | Avg Reward: 1114.80 | Frames: 782929 | Epsilon: 0.2171


Training:   8%|▊         | 783685/10000000 [2:10:04<43:37:11, 58.69it/s]

Episode 1777 completed | Reward: 1060.00 | Avg Reward: 1114.40 | Frames: 783683 | Epsilon: 0.2163


Training:   8%|▊         | 784188/10000000 [2:10:13<41:01:49, 62.39it/s]

Episode 1778 completed | Reward: 1120.00 | Avg Reward: 1119.60 | Frames: 784181 | Epsilon: 0.2158


Training:   8%|▊         | 784585/10000000 [2:10:19<42:58:36, 59.56it/s]

Episode 1779 completed | Reward: 1040.00 | Avg Reward: 1120.40 | Frames: 784576 | Epsilon: 0.2154


Training:   8%|▊         | 785265/10000000 [2:10:31<44:30:49, 57.50it/s]

Episode 1780 completed | Reward: 1720.00 | Avg Reward: 1131.60 | Frames: 785257 | Epsilon: 0.2147


Training:   8%|▊         | 786233/10000000 [2:10:47<44:12:44, 57.89it/s]

Episode 1781 completed | Reward: 1520.00 | Avg Reward: 1140.00 | Frames: 786228 | Epsilon: 0.2138


Training:   8%|▊         | 786633/10000000 [2:10:53<44:52:48, 57.02it/s]

Episode 1782 completed | Reward: 680.00 | Avg Reward: 1140.20 | Frames: 786628 | Epsilon: 0.2134


Training:   8%|▊         | 787177/10000000 [2:11:02<45:09:34, 56.67it/s]

Episode 1783 completed | Reward: 1280.00 | Avg Reward: 1133.80 | Frames: 787170 | Epsilon: 0.2128


Training:   8%|▊         | 787657/10000000 [2:11:11<44:25:32, 57.60it/s]

Episode 1784 completed | Reward: 900.00 | Avg Reward: 1135.80 | Frames: 787653 | Epsilon: 0.2123


Training:   8%|▊         | 788105/10000000 [2:11:18<44:56:32, 56.94it/s]

Episode 1785 completed | Reward: 940.00 | Avg Reward: 1124.40 | Frames: 788096 | Epsilon: 0.2119


Training:   8%|▊         | 788575/10000000 [2:11:26<44:44:54, 57.18it/s]

Episode 1786 completed | Reward: 1360.00 | Avg Reward: 1128.20 | Frames: 788567 | Epsilon: 0.2114


Training:   8%|▊         | 789084/10000000 [2:11:35<50:57:18, 50.21it/s]

Episode 1787 completed | Reward: 1140.00 | Avg Reward: 1127.20 | Frames: 789080 | Epsilon: 0.2109

Memory usage: 1.12 GB


Training:   8%|▊         | 789583/10000000 [2:11:43<42:48:26, 59.77it/s]

Episode 1788 completed | Reward: 960.00 | Avg Reward: 1128.40 | Frames: 789575 | Epsilon: 0.2104


Training:   8%|▊         | 790197/10000000 [2:11:53<43:31:58, 58.77it/s]

Episode 1789 completed | Reward: 860.00 | Avg Reward: 1123.40 | Frames: 790189 | Epsilon: 0.2098


Training:   8%|▊         | 790713/10000000 [2:12:02<48:05:46, 53.19it/s]

Episode 1790 completed | Reward: 560.00 | Avg Reward: 1114.60 | Frames: 790705 | Epsilon: 0.2093


Training:   8%|▊         | 791185/10000000 [2:12:10<45:06:07, 56.72it/s]

Episode 1791 completed | Reward: 760.00 | Avg Reward: 1111.00 | Frames: 791183 | Epsilon: 0.2088


Training:   8%|▊         | 791664/10000000 [2:12:18<40:49:58, 62.64it/s]

Episode 1792 completed | Reward: 960.00 | Avg Reward: 1112.00 | Frames: 791657 | Epsilon: 0.2083


Training:   8%|▊         | 792125/10000000 [2:12:25<44:33:30, 57.40it/s]

Episode 1793 completed | Reward: 980.00 | Avg Reward: 1111.20 | Frames: 792117 | Epsilon: 0.2079


Training:   8%|▊         | 792501/10000000 [2:12:32<45:06:51, 56.69it/s]

Episode 1794 completed | Reward: 940.00 | Avg Reward: 1112.40 | Frames: 792497 | Epsilon: 0.2075


Training:   8%|▊         | 793045/10000000 [2:12:41<44:24:14, 57.60it/s]

Episode 1795 completed | Reward: 1560.00 | Avg Reward: 1112.20 | Frames: 793041 | Epsilon: 0.2070


Training:   8%|▊         | 793789/10000000 [2:12:53<44:08:33, 57.93it/s]

Episode 1796 completed | Reward: 2180.00 | Avg Reward: 1121.60 | Frames: 793781 | Epsilon: 0.2062


Training:   8%|▊         | 794269/10000000 [2:13:01<44:15:36, 57.78it/s]

Episode 1797 completed | Reward: 1080.00 | Avg Reward: 1109.40 | Frames: 794265 | Epsilon: 0.2057


Training:   8%|▊         | 794755/10000000 [2:13:10<43:04:25, 59.36it/s]

Episode 1798 completed | Reward: 2520.00 | Avg Reward: 1122.20 | Frames: 794744 | Epsilon: 0.2053


Training:   8%|▊         | 795313/10000000 [2:13:19<44:20:57, 57.65it/s]

Episode 1799 completed | Reward: 2160.00 | Avg Reward: 1133.40 | Frames: 795310 | Epsilon: 0.2047


Training:   8%|▊         | 795833/10000000 [2:13:28<44:33:38, 57.38it/s]

Episode 1800 completed | Reward: 2100.00 | Avg Reward: 1145.20 | Frames: 795829 | Epsilon: 0.2042


Training:   8%|▊         | 796489/10000000 [2:13:39<44:43:11, 57.17it/s]

Episode 1801 completed | Reward: 1500.00 | Avg Reward: 1146.60 | Frames: 796483 | Epsilon: 0.2035


Training:   8%|▊         | 796913/10000000 [2:13:46<45:12:33, 56.55it/s]

Episode 1802 completed | Reward: 940.00 | Avg Reward: 1134.40 | Frames: 796908 | Epsilon: 0.2031


Training:   8%|▊         | 797337/10000000 [2:13:53<45:13:24, 56.53it/s]

Episode 1803 completed | Reward: 1640.00 | Avg Reward: 1125.80 | Frames: 797334 | Epsilon: 0.2027


Training:   8%|▊         | 797841/10000000 [2:14:02<45:23:41, 56.31it/s]

Episode 1804 completed | Reward: 1720.00 | Avg Reward: 1131.00 | Frames: 797839 | Epsilon: 0.2022


Training:   8%|▊         | 798425/10000000 [2:14:11<44:17:00, 57.72it/s]

Episode 1805 completed | Reward: 1560.00 | Avg Reward: 1134.60 | Frames: 798420 | Epsilon: 0.2016


Training:   8%|▊         | 798937/10000000 [2:14:20<44:28:12, 57.47it/s]

Episode 1806 completed | Reward: 1460.00 | Avg Reward: 1147.80 | Frames: 798928 | Epsilon: 0.2011


Training:   8%|▊         | 799433/10000000 [2:14:29<54:59:46, 46.47it/s]

Episode 1807 completed | Reward: 1340.00 | Avg Reward: 1146.80 | Frames: 799432 | Epsilon: 0.2006

Memory usage: 1.12 GB


Training:   8%|▊         | 799799/10000000 [2:14:35<43:15:42, 59.07it/s]

Episode 1808 completed | Reward: 960.00 | Avg Reward: 1146.00 | Frames: 799788 | Epsilon: 0.2002


Training:   8%|▊         | 800325/10000000 [2:14:44<43:46:53, 58.37it/s]

Episode 1809 completed | Reward: 920.00 | Avg Reward: 1143.80 | Frames: 800318 | Epsilon: 0.1997


Training:   8%|▊         | 800723/10000000 [2:14:50<43:49:03, 58.32it/s]

Episode 1810 completed | Reward: 300.00 | Avg Reward: 1136.20 | Frames: 800717 | Epsilon: 0.1993


Training:   8%|▊         | 801263/10000000 [2:14:59<43:25:19, 58.85it/s]

Episode 1811 completed | Reward: 1460.00 | Avg Reward: 1138.00 | Frames: 801254 | Epsilon: 0.1987


Training:   8%|▊         | 801811/10000000 [2:15:09<44:05:04, 57.96it/s]

Episode 1812 completed | Reward: 1740.00 | Avg Reward: 1140.00 | Frames: 801805 | Epsilon: 0.1982


Training:   8%|▊         | 802153/10000000 [2:15:15<46:08:36, 55.37it/s]

Episode 1813 completed | Reward: 1240.00 | Avg Reward: 1151.60 | Frames: 802150 | Epsilon: 0.1978


Training:   8%|▊         | 802665/10000000 [2:15:23<44:32:18, 57.36it/s]

Episode 1814 completed | Reward: 740.00 | Avg Reward: 1154.00 | Frames: 802656 | Epsilon: 0.1973


Training:   8%|▊         | 803063/10000000 [2:15:30<44:17:27, 57.68it/s]

Episode 1815 completed | Reward: 880.00 | Avg Reward: 1154.40 | Frames: 803058 | Epsilon: 0.1969


Training:   8%|▊         | 803484/10000000 [2:15:37<42:08:35, 60.62it/s]

Episode 1816 completed | Reward: 240.00 | Avg Reward: 1153.20 | Frames: 803478 | Epsilon: 0.1965


Training:   8%|▊         | 803969/10000000 [2:15:45<43:57:48, 58.10it/s]

Episode 1817 completed | Reward: 760.00 | Avg Reward: 1148.00 | Frames: 803961 | Epsilon: 0.1960


Training:   8%|▊         | 804505/10000000 [2:15:55<43:56:18, 58.13it/s]

Episode 1818 completed | Reward: 700.00 | Avg Reward: 1148.00 | Frames: 804496 | Epsilon: 0.1955


Training:   8%|▊         | 804975/10000000 [2:16:03<44:10:02, 57.83it/s]

Episode 1819 completed | Reward: 800.00 | Avg Reward: 1141.20 | Frames: 804970 | Epsilon: 0.1950


Training:   8%|▊         | 805493/10000000 [2:16:11<44:03:23, 57.97it/s]

Episode 1820 completed | Reward: 1240.00 | Avg Reward: 1146.80 | Frames: 805484 | Epsilon: 0.1945


Training:   8%|▊         | 806027/10000000 [2:16:20<43:35:56, 58.58it/s]

Episode 1821 completed | Reward: 880.00 | Avg Reward: 1141.20 | Frames: 806022 | Epsilon: 0.1940


Training:   8%|▊         | 806953/10000000 [2:16:36<45:15:02, 56.43it/s]

Episode 1822 completed | Reward: 1540.00 | Avg Reward: 1145.00 | Frames: 806944 | Epsilon: 0.1931


Training:   8%|▊         | 807439/10000000 [2:16:44<43:31:12, 58.67it/s]

Episode 1823 completed | Reward: 740.00 | Avg Reward: 1143.80 | Frames: 807433 | Epsilon: 0.1926


Training:   8%|▊         | 807941/10000000 [2:16:53<44:49:13, 56.97it/s]

Episode 1824 completed | Reward: 540.00 | Avg Reward: 1141.20 | Frames: 807933 | Epsilon: 0.1921


Training:   8%|▊         | 808349/10000000 [2:17:00<44:49:25, 56.96it/s]

Episode 1825 completed | Reward: 620.00 | Avg Reward: 1138.20 | Frames: 808341 | Epsilon: 0.1917


Training:   8%|▊         | 808973/10000000 [2:17:10<45:41:53, 55.87it/s]

Episode 1826 completed | Reward: 880.00 | Avg Reward: 1143.20 | Frames: 808964 | Epsilon: 0.1910


Training:   8%|▊         | 809413/10000000 [2:17:18<45:10:48, 56.51it/s]

Episode 1827 completed | Reward: 760.00 | Avg Reward: 1131.40 | Frames: 809410 | Epsilon: 0.1906


Training:   8%|▊         | 809875/10000000 [2:17:26<53:41:52, 47.54it/s]

Episode 1828 completed | Reward: 880.00 | Avg Reward: 1133.40 | Frames: 809873 | Epsilon: 0.1901

Memory usage: 1.12 GB


Training:   8%|▊         | 810817/10000000 [2:17:42<44:46:42, 57.00it/s]

Episode 1829 completed | Reward: 1980.00 | Avg Reward: 1141.80 | Frames: 810811 | Epsilon: 0.1892


Training:   8%|▊         | 811361/10000000 [2:17:51<43:59:35, 58.02it/s]

Episode 1830 completed | Reward: 820.00 | Avg Reward: 1138.80 | Frames: 811352 | Epsilon: 0.1886


Training:   8%|▊         | 812055/10000000 [2:18:03<44:12:23, 57.73it/s]

Episode 1831 completed | Reward: 1460.00 | Avg Reward: 1146.80 | Frames: 812049 | Epsilon: 0.1880


Training:   8%|▊         | 812533/10000000 [2:18:11<45:59:41, 55.49it/s]

Episode 1832 completed | Reward: 1240.00 | Avg Reward: 1146.80 | Frames: 812527 | Epsilon: 0.1875


Training:   8%|▊         | 814013/10000000 [2:18:36<44:55:00, 56.81it/s]

Episode 1833 completed | Reward: 440.00 | Avg Reward: 1138.20 | Frames: 814006 | Epsilon: 0.1860


Training:   8%|▊         | 814377/10000000 [2:18:43<47:00:18, 54.28it/s]

Episode 1834 completed | Reward: 820.00 | Avg Reward: 1132.00 | Frames: 814369 | Epsilon: 0.1856


Training:   8%|▊         | 814831/10000000 [2:18:50<44:42:54, 57.06it/s]

Episode 1835 completed | Reward: 800.00 | Avg Reward: 1121.40 | Frames: 814827 | Epsilon: 0.1852


Training:   8%|▊         | 815253/10000000 [2:18:58<44:18:43, 57.58it/s]

Episode 1836 completed | Reward: 1160.00 | Avg Reward: 1121.00 | Frames: 815248 | Epsilon: 0.1848


Training:   8%|▊         | 815773/10000000 [2:19:07<44:56:16, 56.77it/s]

Episode 1837 completed | Reward: 1180.00 | Avg Reward: 1126.00 | Frames: 815765 | Epsilon: 0.1842


Training:   8%|▊         | 816453/10000000 [2:19:18<45:46:04, 55.74it/s]

Episode 1838 completed | Reward: 4760.00 | Avg Reward: 1168.20 | Frames: 816444 | Epsilon: 0.1836


Training:   8%|▊         | 816891/10000000 [2:19:26<45:22:56, 56.21it/s]

Episode 1839 completed | Reward: 540.00 | Avg Reward: 1164.80 | Frames: 816886 | Epsilon: 0.1831


Training:   8%|▊         | 817551/10000000 [2:19:37<44:06:02, 57.84it/s]

Episode 1840 completed | Reward: 1500.00 | Avg Reward: 1170.20 | Frames: 817543 | Epsilon: 0.1825


Training:   8%|▊         | 818468/10000000 [2:19:53<42:01:24, 60.69it/s]

Episode 1841 completed | Reward: 2060.00 | Avg Reward: 1178.80 | Frames: 818462 | Epsilon: 0.1815


Training:   8%|▊         | 818969/10000000 [2:20:02<45:28:07, 56.09it/s]

Episode 1842 completed | Reward: 920.00 | Avg Reward: 1180.20 | Frames: 818962 | Epsilon: 0.1810


Training:   8%|▊         | 819559/10000000 [2:20:12<43:36:04, 58.49it/s]

Episode 1843 completed | Reward: 1220.00 | Avg Reward: 1184.60 | Frames: 819553 | Epsilon: 0.1804


Training:   8%|▊         | 820259/10000000 [2:20:24<54:45:20, 46.57it/s]

Episode 1844 completed | Reward: 2000.00 | Avg Reward: 1197.00 | Frames: 820257 | Epsilon: 0.1797

Memory usage: 1.12 GB


Training:   8%|▊         | 821145/10000000 [2:20:39<46:19:26, 55.04it/s]

Episode 1845 completed | Reward: 460.00 | Avg Reward: 1187.60 | Frames: 821136 | Epsilon: 0.1789


Training:   8%|▊         | 821657/10000000 [2:20:48<44:52:44, 56.81it/s]

Episode 1846 completed | Reward: 820.00 | Avg Reward: 1187.00 | Frames: 821650 | Epsilon: 0.1784


Training:   8%|▊         | 822569/10000000 [2:21:04<45:01:39, 56.62it/s]

Episode 1847 completed | Reward: 2680.00 | Avg Reward: 1210.20 | Frames: 822562 | Epsilon: 0.1774


Training:   8%|▊         | 823105/10000000 [2:21:13<44:40:16, 57.06it/s]

Episode 1848 completed | Reward: 1640.00 | Avg Reward: 1207.40 | Frames: 823101 | Epsilon: 0.1769


Training:   8%|▊         | 823789/10000000 [2:21:25<46:55:17, 54.32it/s]

Episode 1849 completed | Reward: 1500.00 | Avg Reward: 1213.40 | Frames: 823782 | Epsilon: 0.1762


Training:   8%|▊         | 824275/10000000 [2:21:33<44:36:41, 57.13it/s]

Episode 1850 completed | Reward: 1020.00 | Avg Reward: 1213.00 | Frames: 824267 | Epsilon: 0.1757


Training:   8%|▊         | 824777/10000000 [2:21:42<45:06:08, 56.51it/s]

Episode 1851 completed | Reward: 640.00 | Avg Reward: 1211.20 | Frames: 824774 | Epsilon: 0.1752


Training:   8%|▊         | 825601/10000000 [2:21:56<45:12:48, 56.36it/s]

Episode 1852 completed | Reward: 1100.00 | Avg Reward: 1209.20 | Frames: 825594 | Epsilon: 0.1744


Training:   8%|▊         | 825961/10000000 [2:22:02<46:15:51, 55.08it/s]

Episode 1853 completed | Reward: 1120.00 | Avg Reward: 1213.00 | Frames: 825957 | Epsilon: 0.1740


Training:   8%|▊         | 826769/10000000 [2:22:16<45:29:41, 56.01it/s]

Episode 1854 completed | Reward: 1080.00 | Avg Reward: 1218.60 | Frames: 826763 | Epsilon: 0.1732


Training:   8%|▊         | 827545/10000000 [2:22:30<44:56:09, 56.70it/s]

Episode 1855 completed | Reward: 1280.00 | Avg Reward: 1226.80 | Frames: 827536 | Epsilon: 0.1725


Training:   8%|▊         | 828285/10000000 [2:22:43<47:05:15, 54.11it/s]

Episode 1856 completed | Reward: 860.00 | Avg Reward: 1223.80 | Frames: 828278 | Epsilon: 0.1717


Training:   8%|▊         | 829261/10000000 [2:22:59<45:04:59, 56.51it/s]

Episode 1857 completed | Reward: 1760.00 | Avg Reward: 1228.40 | Frames: 829256 | Epsilon: 0.1707


Training:   8%|▊         | 829769/10000000 [2:23:08<47:09:29, 54.02it/s]

Episode 1858 completed | Reward: 1160.00 | Avg Reward: 1226.40 | Frames: 829761 | Epsilon: 0.1702


Training:   8%|▊         | 830089/10000000 [2:23:14<45:56:49, 55.44it/s]

Episode 1859 completed | Reward: 1100.00 | Avg Reward: 1226.00 | Frames: 830080 | Epsilon: 0.1699


Training:   8%|▊         | 831419/10000000 [2:23:37<44:22:32, 57.39it/s]

Episode 1860 completed | Reward: 2000.00 | Avg Reward: 1230.00 | Frames: 831419 | Epsilon: 0.1686

Memory usage: 1.12 GB


Training:   8%|▊         | 832083/10000000 [2:23:48<44:42:22, 56.96it/s]

Episode 1861 completed | Reward: 1160.00 | Avg Reward: 1218.20 | Frames: 832078 | Epsilon: 0.1679


Training:   8%|▊         | 832641/10000000 [2:23:58<45:26:17, 56.04it/s]

Episode 1862 completed | Reward: 1280.00 | Avg Reward: 1219.60 | Frames: 832639 | Epsilon: 0.1674


Training:   8%|▊         | 833329/10000000 [2:24:10<45:44:40, 55.66it/s]

Episode 1863 completed | Reward: 900.00 | Avg Reward: 1210.40 | Frames: 833323 | Epsilon: 0.1667


Training:   8%|▊         | 833893/10000000 [2:24:20<47:54:04, 53.15it/s]

Episode 1864 completed | Reward: 640.00 | Avg Reward: 1212.00 | Frames: 833886 | Epsilon: 0.1661


Training:   8%|▊         | 834347/10000000 [2:24:28<45:15:08, 56.26it/s]

Episode 1865 completed | Reward: 600.00 | Avg Reward: 1201.60 | Frames: 834341 | Epsilon: 0.1657


Training:   8%|▊         | 834833/10000000 [2:24:36<46:09:40, 55.15it/s]

Episode 1866 completed | Reward: 1040.00 | Avg Reward: 1195.60 | Frames: 834829 | Epsilon: 0.1652


Training:   8%|▊         | 836249/10000000 [2:25:01<46:11:04, 55.12it/s]

Episode 1867 completed | Reward: 2700.00 | Avg Reward: 1213.80 | Frames: 836244 | Epsilon: 0.1638


Training:   8%|▊         | 836701/10000000 [2:25:09<48:38:37, 52.33it/s]

Episode 1868 completed | Reward: 960.00 | Avg Reward: 1215.00 | Frames: 836693 | Epsilon: 0.1633


Training:   8%|▊         | 837317/10000000 [2:25:20<46:00:43, 55.32it/s]

Episode 1869 completed | Reward: 1000.00 | Avg Reward: 1213.40 | Frames: 837314 | Epsilon: 0.1627


Training:   8%|▊         | 837893/10000000 [2:25:30<45:45:30, 55.62it/s]

Episode 1870 completed | Reward: 1080.00 | Avg Reward: 1212.60 | Frames: 837886 | Epsilon: 0.1621


Training:   8%|▊         | 838396/10000000 [2:25:39<42:16:58, 60.19it/s]

Episode 1871 completed | Reward: 1100.00 | Avg Reward: 1213.40 | Frames: 838389 | Epsilon: 0.1616


Training:   8%|▊         | 838969/10000000 [2:25:49<45:30:04, 55.93it/s]

Episode 1872 completed | Reward: 1760.00 | Avg Reward: 1219.40 | Frames: 838966 | Epsilon: 0.1610


Training:   8%|▊         | 839969/10000000 [2:26:06<45:42:13, 55.67it/s]

Episode 1873 completed | Reward: 1140.00 | Avg Reward: 1226.40 | Frames: 839960 | Epsilon: 0.1600


Training:   8%|▊         | 840545/10000000 [2:26:16<47:02:42, 54.08it/s]

Episode 1874 completed | Reward: 600.00 | Avg Reward: 1224.60 | Frames: 840541 | Epsilon: 0.1595


Training:   8%|▊         | 841759/10000000 [2:26:38<56:51:45, 44.74it/s]

Episode 1875 completed | Reward: 1780.00 | Avg Reward: 1228.60 | Frames: 841758 | Epsilon: 0.1582

Memory usage: 1.12 GB


Training:   8%|▊         | 842451/10000000 [2:26:50<44:50:43, 56.72it/s]

Episode 1876 completed | Reward: 940.00 | Avg Reward: 1211.60 | Frames: 842446 | Epsilon: 0.1576


Training:   8%|▊         | 842849/10000000 [2:26:57<46:03:45, 55.22it/s]

Episode 1877 completed | Reward: 760.00 | Avg Reward: 1208.60 | Frames: 842841 | Epsilon: 0.1572


Training:   8%|▊         | 843601/10000000 [2:27:10<46:16:34, 54.96it/s]

Episode 1878 completed | Reward: 1180.00 | Avg Reward: 1209.20 | Frames: 843595 | Epsilon: 0.1564


Training:   8%|▊         | 844801/10000000 [2:27:31<47:00:22, 54.10it/s]

Episode 1879 completed | Reward: 2120.00 | Avg Reward: 1220.00 | Frames: 844797 | Epsilon: 0.1552


Training:   8%|▊         | 845393/10000000 [2:27:42<46:41:38, 54.46it/s]

Episode 1880 completed | Reward: 1340.00 | Avg Reward: 1216.20 | Frames: 845388 | Epsilon: 0.1546


Training:   8%|▊         | 845765/10000000 [2:27:48<47:46:03, 53.23it/s]

Episode 1881 completed | Reward: 860.00 | Avg Reward: 1209.60 | Frames: 845758 | Epsilon: 0.1542


Training:   8%|▊         | 846597/10000000 [2:28:03<46:52:15, 54.25it/s]

Episode 1882 completed | Reward: 960.00 | Avg Reward: 1212.40 | Frames: 846588 | Epsilon: 0.1534


Training:   8%|▊         | 846995/10000000 [2:28:10<46:07:31, 55.12it/s]

Episode 1883 completed | Reward: 480.00 | Avg Reward: 1204.40 | Frames: 846991 | Epsilon: 0.1530


Training:   8%|▊         | 847585/10000000 [2:28:20<47:18:11, 53.75it/s]

Episode 1884 completed | Reward: 980.00 | Avg Reward: 1205.20 | Frames: 847583 | Epsilon: 0.1524


Training:   8%|▊         | 848985/10000000 [2:28:45<46:35:39, 54.56it/s]

Episode 1885 completed | Reward: 2780.00 | Avg Reward: 1223.60 | Frames: 848980 | Epsilon: 0.1510


Training:   8%|▊         | 849487/10000000 [2:28:54<45:04:09, 56.40it/s]

Episode 1886 completed | Reward: 420.00 | Avg Reward: 1214.20 | Frames: 849482 | Epsilon: 0.1505


Training:   8%|▊         | 849969/10000000 [2:29:02<48:47:25, 52.09it/s]

Episode 1887 completed | Reward: 460.00 | Avg Reward: 1207.40 | Frames: 849961 | Epsilon: 0.1500


Training:   9%|▊         | 851529/10000000 [2:29:30<46:19:10, 54.86it/s]

Episode 1888 completed | Reward: 1340.00 | Avg Reward: 1211.20 | Frames: 851520 | Epsilon: 0.1485


Training:   9%|▊         | 851985/10000000 [2:29:38<45:37:28, 55.70it/s]

Episode 1889 completed | Reward: 1140.00 | Avg Reward: 1214.00 | Frames: 851985 | Epsilon: 0.1480

Memory usage: 1.12 GB


Training:   9%|▊         | 852637/10000000 [2:29:50<46:48:25, 54.29it/s]

Episode 1890 completed | Reward: 1340.00 | Avg Reward: 1221.80 | Frames: 852628 | Epsilon: 0.1474


Training:   9%|▊         | 853173/10000000 [2:30:00<45:49:50, 55.44it/s]

Episode 1891 completed | Reward: 1440.00 | Avg Reward: 1228.60 | Frames: 853171 | Epsilon: 0.1468


Training:   9%|▊         | 853737/10000000 [2:30:10<48:10:44, 52.73it/s]

Episode 1892 completed | Reward: 1540.00 | Avg Reward: 1234.40 | Frames: 853730 | Epsilon: 0.1463


Training:   9%|▊         | 854335/10000000 [2:30:20<44:58:21, 56.49it/s]

Episode 1893 completed | Reward: 1320.00 | Avg Reward: 1237.80 | Frames: 854329 | Epsilon: 0.1457


Training:   9%|▊         | 854697/10000000 [2:30:27<48:07:23, 52.79it/s]

Episode 1894 completed | Reward: 480.00 | Avg Reward: 1233.20 | Frames: 854690 | Epsilon: 0.1453


Training:   9%|▊         | 855213/10000000 [2:30:36<49:06:59, 51.72it/s]

Episode 1895 completed | Reward: 540.00 | Avg Reward: 1223.00 | Frames: 855204 | Epsilon: 0.1448


Training:   9%|▊         | 856893/10000000 [2:31:06<47:36:51, 53.34it/s]

Episode 1896 completed | Reward: 1760.00 | Avg Reward: 1218.80 | Frames: 856888 | Epsilon: 0.1431


Training:   9%|▊         | 857914/10000000 [2:31:24<47:49:28, 53.10it/s]

Episode 1897 completed | Reward: 2340.00 | Avg Reward: 1231.40 | Frames: 857907 | Epsilon: 0.1421


Training:   9%|▊         | 858605/10000000 [2:31:37<49:05:49, 51.72it/s]

Episode 1898 completed | Reward: 1420.00 | Avg Reward: 1220.40 | Frames: 858597 | Epsilon: 0.1414


Training:   9%|▊         | 859251/10000000 [2:31:48<44:49:00, 56.65it/s]

Episode 1899 completed | Reward: 1560.00 | Avg Reward: 1214.40 | Frames: 859245 | Epsilon: 0.1408


Training:   9%|▊         | 860121/10000000 [2:32:04<48:22:06, 52.49it/s]

Episode 1900 completed | Reward: 1420.00 | Avg Reward: 1207.60 | Frames: 860116 | Epsilon: 0.1399


Training:   9%|▊         | 860625/10000000 [2:32:13<47:13:02, 53.77it/s]

Episode 1901 completed | Reward: 1520.00 | Avg Reward: 1207.80 | Frames: 860623 | Epsilon: 0.1394


Training:   9%|▊         | 861117/10000000 [2:32:22<48:23:53, 52.45it/s]

Episode 1902 completed | Reward: 680.00 | Avg Reward: 1205.20 | Frames: 861110 | Epsilon: 0.1389


Training:   9%|▊         | 862619/10000000 [2:32:49<56:41:13, 44.78it/s]

Episode 1903 completed | Reward: 1820.00 | Avg Reward: 1207.00 | Frames: 862618 | Epsilon: 0.1374

Memory usage: 1.12 GB


Training:   9%|▊         | 863375/10000000 [2:33:03<45:39:44, 55.58it/s]

Episode 1904 completed | Reward: 1120.00 | Avg Reward: 1201.00 | Frames: 863370 | Epsilon: 0.1366


Training:   9%|▊         | 864075/10000000 [2:33:15<45:45:45, 55.45it/s]

Episode 1905 completed | Reward: 1700.00 | Avg Reward: 1202.40 | Frames: 864070 | Epsilon: 0.1359


Training:   9%|▊         | 864497/10000000 [2:33:23<47:01:50, 53.96it/s]

Episode 1906 completed | Reward: 520.00 | Avg Reward: 1193.00 | Frames: 864493 | Epsilon: 0.1355


Training:   9%|▊         | 865471/10000000 [2:33:40<44:59:56, 56.39it/s]

Episode 1907 completed | Reward: 820.00 | Avg Reward: 1187.80 | Frames: 865467 | Epsilon: 0.1345


Training:   9%|▊         | 866770/10000000 [2:34:04<49:12:02, 51.56it/s]

Episode 1908 completed | Reward: 1820.00 | Avg Reward: 1196.40 | Frames: 866763 | Epsilon: 0.1332


Training:   9%|▊         | 867189/10000000 [2:34:11<48:46:28, 52.01it/s]

Episode 1909 completed | Reward: 780.00 | Avg Reward: 1195.00 | Frames: 867181 | Epsilon: 0.1328


Training:   9%|▊         | 867601/10000000 [2:34:19<49:30:59, 51.23it/s]

Episode 1910 completed | Reward: 500.00 | Avg Reward: 1197.00 | Frames: 867592 | Epsilon: 0.1324


Training:   9%|▊         | 868177/10000000 [2:34:29<46:52:29, 54.11it/s]

Episode 1911 completed | Reward: 760.00 | Avg Reward: 1190.00 | Frames: 868170 | Epsilon: 0.1318


Training:   9%|▊         | 868633/10000000 [2:34:37<48:31:58, 52.26it/s]

Episode 1912 completed | Reward: 1680.00 | Avg Reward: 1189.40 | Frames: 868624 | Epsilon: 0.1314


Training:   9%|▊         | 869837/10000000 [2:34:59<49:48:11, 50.92it/s]

Episode 1913 completed | Reward: 1260.00 | Avg Reward: 1189.60 | Frames: 869830 | Epsilon: 0.1302


Training:   9%|▊         | 870405/10000000 [2:35:10<47:54:31, 52.93it/s]

Episode 1914 completed | Reward: 1060.00 | Avg Reward: 1192.80 | Frames: 870403 | Epsilon: 0.1296


Training:   9%|▊         | 871483/10000000 [2:35:29<46:02:28, 55.07it/s]

Episode 1915 completed | Reward: 1280.00 | Avg Reward: 1196.80 | Frames: 871479 | Epsilon: 0.1285


Training:   9%|▊         | 871977/10000000 [2:35:38<47:43:13, 53.13it/s]

Episode 1916 completed | Reward: 2240.00 | Avg Reward: 1216.80 | Frames: 871970 | Epsilon: 0.1280


Training:   9%|▊         | 872401/10000000 [2:35:46<46:57:02, 54.00it/s]

Episode 1917 completed | Reward: 1840.00 | Avg Reward: 1227.60 | Frames: 872392 | Epsilon: 0.1276


Training:   9%|▊         | 872946/10000000 [2:35:56<46:52:36, 54.08it/s]

Episode 1918 completed | Reward: 1160.00 | Avg Reward: 1232.20 | Frames: 872946 | Epsilon: 0.1271

Memory usage: 1.12 GB


Training:   9%|▊         | 874269/10000000 [2:36:20<47:02:13, 53.89it/s]

Episode 1919 completed | Reward: 980.00 | Avg Reward: 1234.00 | Frames: 874266 | Epsilon: 0.1257


Training:   9%|▊         | 874909/10000000 [2:36:32<47:14:39, 53.65it/s]

Episode 1920 completed | Reward: 940.00 | Avg Reward: 1231.00 | Frames: 874907 | Epsilon: 0.1251


Training:   9%|▉         | 875540/10000000 [2:36:44<43:08:21, 58.75it/s]

Episode 1921 completed | Reward: 1600.00 | Avg Reward: 1238.20 | Frames: 875533 | Epsilon: 0.1245


Training:   9%|▉         | 875933/10000000 [2:36:51<50:57:53, 49.73it/s]

Episode 1922 completed | Reward: 1180.00 | Avg Reward: 1234.60 | Frames: 875925 | Epsilon: 0.1241


Training:   9%|▉         | 876651/10000000 [2:37:04<46:26:13, 54.57it/s]

Episode 1923 completed | Reward: 1420.00 | Avg Reward: 1241.40 | Frames: 876646 | Epsilon: 0.1234


Training:   9%|▉         | 878006/10000000 [2:37:29<47:44:44, 53.07it/s]

Episode 1924 completed | Reward: 3080.00 | Avg Reward: 1266.80 | Frames: 877999 | Epsilon: 0.1220


Training:   9%|▉         | 878941/10000000 [2:37:46<47:48:43, 52.99it/s]

Episode 1925 completed | Reward: 1280.00 | Avg Reward: 1273.40 | Frames: 878939 | Epsilon: 0.1211


Training:   9%|▉         | 879493/10000000 [2:37:56<47:22:44, 53.47it/s]

Episode 1926 completed | Reward: 1080.00 | Avg Reward: 1275.40 | Frames: 879490 | Epsilon: 0.1205


Training:   9%|▉         | 880161/10000000 [2:38:08<49:29:48, 51.18it/s]

Episode 1927 completed | Reward: 820.00 | Avg Reward: 1276.00 | Frames: 880153 | Epsilon: 0.1198


Training:   9%|▉         | 880889/10000000 [2:38:21<48:29:05, 52.24it/s]

Episode 1928 completed | Reward: 1320.00 | Avg Reward: 1280.40 | Frames: 880884 | Epsilon: 0.1191


Training:   9%|▉         | 881254/10000000 [2:38:28<49:58:33, 50.68it/s]

Episode 1929 completed | Reward: 1700.00 | Avg Reward: 1277.60 | Frames: 881247 | Epsilon: 0.1188


Training:   9%|▉         | 881825/10000000 [2:38:39<51:15:00, 49.42it/s]

Episode 1930 completed | Reward: 1600.00 | Avg Reward: 1285.40 | Frames: 881818 | Epsilon: 0.1182


Training:   9%|▉         | 882400/10000000 [2:38:49<43:36:41, 58.07it/s]

Episode 1931 completed | Reward: 800.00 | Avg Reward: 1278.80 | Frames: 882394 | Epsilon: 0.1176


Training:   9%|▉         | 882819/10000000 [2:38:57<46:21:04, 54.64it/s]

Episode 1932 completed | Reward: 980.00 | Avg Reward: 1276.20 | Frames: 882814 | Epsilon: 0.1172


Training:   9%|▉         | 883527/10000000 [2:39:10<57:55:07, 43.72it/s]

Episode 1933 completed | Reward: 1280.00 | Avg Reward: 1284.60 | Frames: 883525 | Epsilon: 0.1165

Memory usage: 1.12 GB


Training:   9%|▉         | 884083/10000000 [2:39:20<46:06:56, 54.91it/s]

Episode 1934 completed | Reward: 1580.00 | Avg Reward: 1292.20 | Frames: 884079 | Epsilon: 0.1159


Training:   9%|▉         | 884597/10000000 [2:39:30<50:26:53, 50.19it/s]

Episode 1935 completed | Reward: 880.00 | Avg Reward: 1293.00 | Frames: 884588 | Epsilon: 0.1154


Training:   9%|▉         | 885181/10000000 [2:39:41<48:17:15, 52.43it/s]

Episode 1936 completed | Reward: 1120.00 | Avg Reward: 1292.60 | Frames: 885172 | Epsilon: 0.1148


Training:   9%|▉         | 886405/10000000 [2:40:03<48:39:39, 52.02it/s]

Episode 1937 completed | Reward: 1600.00 | Avg Reward: 1296.80 | Frames: 886403 | Epsilon: 0.1136


Training:   9%|▉         | 887185/10000000 [2:40:17<49:04:42, 51.58it/s]

Episode 1938 completed | Reward: 1600.00 | Avg Reward: 1265.20 | Frames: 887177 | Epsilon: 0.1128


Training:   9%|▉         | 887713/10000000 [2:40:27<48:03:36, 52.67it/s]

Episode 1939 completed | Reward: 2380.00 | Avg Reward: 1283.60 | Frames: 887711 | Epsilon: 0.1123


Training:   9%|▉         | 888085/10000000 [2:40:34<49:12:48, 51.43it/s]

Episode 1940 completed | Reward: 680.00 | Avg Reward: 1275.40 | Frames: 888078 | Epsilon: 0.1119


Training:   9%|▉         | 888499/10000000 [2:40:42<47:43:24, 53.03it/s]

Episode 1941 completed | Reward: 1140.00 | Avg Reward: 1266.20 | Frames: 888493 | Epsilon: 0.1115


Training:   9%|▉         | 889407/10000000 [2:40:58<47:15:24, 53.55it/s]

Episode 1942 completed | Reward: 1000.00 | Avg Reward: 1267.00 | Frames: 889403 | Epsilon: 0.1106


Training:   9%|▉         | 890210/10000000 [2:41:13<48:05:20, 52.62it/s]

Episode 1943 completed | Reward: 760.00 | Avg Reward: 1262.40 | Frames: 890203 | Epsilon: 0.1098


Training:   9%|▉         | 890694/10000000 [2:41:22<47:43:26, 53.02it/s]

Episode 1944 completed | Reward: 920.00 | Avg Reward: 1251.60 | Frames: 890685 | Epsilon: 0.1093


Training:   9%|▉         | 891217/10000000 [2:41:32<49:16:07, 51.36it/s]

Episode 1945 completed | Reward: 680.00 | Avg Reward: 1253.80 | Frames: 891210 | Epsilon: 0.1088


Training:   9%|▉         | 891695/10000000 [2:41:41<45:36:40, 55.47it/s]

Episode 1946 completed | Reward: 940.00 | Avg Reward: 1255.00 | Frames: 891690 | Epsilon: 0.1083


Training:   9%|▉         | 893125/10000000 [2:42:07<47:29:00, 53.28it/s]

Episode 1947 completed | Reward: 1960.00 | Avg Reward: 1247.80 | Frames: 893121 | Epsilon: 0.1069


Training:   9%|▉         | 893659/10000000 [2:42:17<58:08:52, 43.50it/s]

Episode 1948 completed | Reward: 940.00 | Avg Reward: 1240.80 | Frames: 893658 | Epsilon: 0.1063

Memory usage: 1.12 GB


Training:   9%|▉         | 895129/10000000 [2:42:44<48:22:37, 52.28it/s]

Episode 1949 completed | Reward: 1860.00 | Avg Reward: 1244.40 | Frames: 895122 | Epsilon: 0.1049


Training:   9%|▉         | 895877/10000000 [2:42:58<50:56:26, 49.64it/s]

Episode 1950 completed | Reward: 720.00 | Avg Reward: 1241.40 | Frames: 895870 | Epsilon: 0.1041


Training:   9%|▉         | 897277/10000000 [2:43:24<48:42:24, 51.91it/s]

Episode 1951 completed | Reward: 1380.00 | Avg Reward: 1248.80 | Frames: 897268 | Epsilon: 0.1027


Training:   9%|▉         | 897813/10000000 [2:43:34<47:30:00, 53.23it/s]

Episode 1952 completed | Reward: 700.00 | Avg Reward: 1244.80 | Frames: 897805 | Epsilon: 0.1022


Training:   9%|▉         | 898348/10000000 [2:43:44<43:32:51, 58.06it/s]

Episode 1953 completed | Reward: 1120.00 | Avg Reward: 1244.80 | Frames: 898343 | Epsilon: 0.1017


Training:   9%|▉         | 898935/10000000 [2:43:55<46:27:46, 54.41it/s]

Episode 1954 completed | Reward: 960.00 | Avg Reward: 1243.60 | Frames: 898931 | Epsilon: 0.1011


Training:   9%|▉         | 899541/10000000 [2:44:06<48:17:37, 52.34it/s]

Episode 1955 completed | Reward: 860.00 | Avg Reward: 1239.40 | Frames: 899532 | Epsilon: 0.1005


Training:   9%|▉         | 900035/10000000 [2:44:15<45:45:57, 55.23it/s]

Episode 1956 completed | Reward: 820.00 | Avg Reward: 1239.00 | Frames: 900030 | Epsilon: 0.1000


Training:   9%|▉         | 901391/10000000 [2:44:40<46:40:14, 54.15it/s]

Episode 1957 completed | Reward: 2220.00 | Avg Reward: 1243.60 | Frames: 901385 | Epsilon: 0.1000


Training:   9%|▉         | 901842/10000000 [2:44:49<48:10:49, 52.45it/s]

Episode 1958 completed | Reward: 1060.00 | Avg Reward: 1242.60 | Frames: 901834 | Epsilon: 0.1000


Training:   9%|▉         | 902343/10000000 [2:44:58<45:30:04, 55.54it/s]

Episode 1959 completed | Reward: 680.00 | Avg Reward: 1238.40 | Frames: 902339 | Epsilon: 0.1000


Training:   9%|▉         | 904006/10000000 [2:45:29<46:55:36, 53.84it/s]

Episode 1960 completed | Reward: 1840.00 | Avg Reward: 1236.80 | Frames: 904006 | Epsilon: 0.1000

Memory usage: 1.12 GB


Training:   9%|▉         | 904454/10000000 [2:45:37<48:18:22, 52.30it/s]

Episode 1961 completed | Reward: 380.00 | Avg Reward: 1229.00 | Frames: 904445 | Epsilon: 0.1000


Training:   9%|▉         | 904947/10000000 [2:45:47<45:53:06, 55.06it/s]

Episode 1962 completed | Reward: 740.00 | Avg Reward: 1223.60 | Frames: 904942 | Epsilon: 0.1000


Training:   9%|▉         | 905545/10000000 [2:45:58<48:45:16, 51.82it/s]

Episode 1963 completed | Reward: 1520.00 | Avg Reward: 1229.80 | Frames: 905536 | Epsilon: 0.1000


Training:   9%|▉         | 906233/10000000 [2:46:11<48:23:03, 52.21it/s]

Episode 1964 completed | Reward: 1700.00 | Avg Reward: 1240.40 | Frames: 906231 | Epsilon: 0.1000


Training:   9%|▉         | 906757/10000000 [2:46:20<50:21:18, 50.16it/s]

Episode 1965 completed | Reward: 660.00 | Avg Reward: 1241.00 | Frames: 906750 | Epsilon: 0.1000


Training:   9%|▉         | 907841/10000000 [2:46:41<50:19:22, 50.19it/s]

Episode 1966 completed | Reward: 2360.00 | Avg Reward: 1254.20 | Frames: 907832 | Epsilon: 0.1000


Training:   9%|▉         | 908345/10000000 [2:46:50<49:35:50, 50.92it/s]

Episode 1967 completed | Reward: 720.00 | Avg Reward: 1234.40 | Frames: 908336 | Epsilon: 0.1000


Training:   9%|▉         | 908848/10000000 [2:47:00<44:06:20, 57.26it/s]

Episode 1968 completed | Reward: 40.00 | Avg Reward: 1225.20 | Frames: 908842 | Epsilon: 0.1000


Training:   9%|▉         | 909781/10000000 [2:47:17<49:06:55, 51.41it/s]

Episode 1969 completed | Reward: 1420.00 | Avg Reward: 1229.40 | Frames: 909776 | Epsilon: 0.1000


Training:   9%|▉         | 910442/10000000 [2:47:30<49:12:19, 51.31it/s]

Episode 1970 completed | Reward: 1180.00 | Avg Reward: 1230.40 | Frames: 910435 | Epsilon: 0.1000


Training:   9%|▉         | 910951/10000000 [2:47:39<48:09:24, 52.43it/s]

Episode 1971 completed | Reward: 180.00 | Avg Reward: 1221.20 | Frames: 910947 | Epsilon: 0.1000


Training:   9%|▉         | 911461/10000000 [2:47:49<48:59:28, 51.53it/s]

Episode 1972 completed | Reward: 820.00 | Avg Reward: 1211.80 | Frames: 911456 | Epsilon: 0.1000


Training:   9%|▉         | 911893/10000000 [2:47:57<48:40:31, 51.86it/s]

Episode 1973 completed | Reward: 540.00 | Avg Reward: 1205.80 | Frames: 911884 | Epsilon: 0.1000


Training:   9%|▉         | 913313/10000000 [2:48:24<50:25:17, 50.06it/s]

Episode 1974 completed | Reward: 2220.00 | Avg Reward: 1222.00 | Frames: 913304 | Epsilon: 0.1000


Training:   9%|▉         | 913989/10000000 [2:48:36<51:37:09, 48.89it/s]

Episode 1975 completed | Reward: 1120.00 | Avg Reward: 1215.40 | Frames: 913980 | Epsilon: 0.1000


Training:   9%|▉         | 914741/10000000 [2:48:51<47:33:02, 53.07it/s]

Episode 1976 completed | Reward: 520.00 | Avg Reward: 1211.20 | Frames: 914741 | Epsilon: 0.1000

Memory usage: 1.12 GB


Training:   9%|▉         | 916165/10000000 [2:49:17<51:16:57, 49.20it/s]

Episode 1977 completed | Reward: 740.00 | Avg Reward: 1211.00 | Frames: 916157 | Epsilon: 0.1000


Training:   9%|▉         | 917425/10000000 [2:49:41<51:07:11, 49.35it/s]

Episode 1978 completed | Reward: 400.00 | Avg Reward: 1203.20 | Frames: 917417 | Epsilon: 0.1000


Training:   9%|▉         | 918383/10000000 [2:49:59<46:56:30, 53.74it/s]

Episode 1979 completed | Reward: 700.00 | Avg Reward: 1189.00 | Frames: 918377 | Epsilon: 0.1000


Training:   9%|▉         | 919721/10000000 [2:50:24<50:19:33, 50.12it/s]

Episode 1980 completed | Reward: 700.00 | Avg Reward: 1182.60 | Frames: 919713 | Epsilon: 0.1000


Training:   9%|▉         | 920225/10000000 [2:50:34<48:25:00, 52.09it/s]

Episode 1981 completed | Reward: 1140.00 | Avg Reward: 1185.40 | Frames: 920219 | Epsilon: 0.1000


Training:   9%|▉         | 920783/10000000 [2:50:44<47:38:08, 52.94it/s]

Episode 1982 completed | Reward: 720.00 | Avg Reward: 1183.00 | Frames: 920777 | Epsilon: 0.1000


Training:   9%|▉         | 921497/10000000 [2:50:58<51:01:55, 49.42it/s]

Episode 1983 completed | Reward: 1720.00 | Avg Reward: 1195.40 | Frames: 921488 | Epsilon: 0.1000


Training:   9%|▉         | 922057/10000000 [2:51:08<48:59:10, 51.48it/s]

Episode 1984 completed | Reward: 1060.00 | Avg Reward: 1196.20 | Frames: 922054 | Epsilon: 0.1000


Training:   9%|▉         | 922673/10000000 [2:51:20<48:59:13, 51.47it/s]

Episode 1985 completed | Reward: 1100.00 | Avg Reward: 1179.40 | Frames: 922670 | Epsilon: 0.1000


Training:   9%|▉         | 923643/10000000 [2:51:38<47:10:51, 53.44it/s]

Episode 1986 completed | Reward: 1180.00 | Avg Reward: 1187.00 | Frames: 923637 | Epsilon: 0.1000


Training:   9%|▉         | 924873/10000000 [2:52:02<60:13:49, 41.85it/s]

Episode 1987 completed | Reward: 1420.00 | Avg Reward: 1196.60 | Frames: 924872 | Epsilon: 0.1000

Memory usage: 1.12 GB


Training:   9%|▉         | 925631/10000000 [2:52:16<47:36:40, 52.94it/s]

Episode 1988 completed | Reward: 1120.00 | Avg Reward: 1194.40 | Frames: 925625 | Epsilon: 0.1000


Training:   9%|▉         | 926229/10000000 [2:52:28<49:10:14, 51.26it/s]

Episode 1989 completed | Reward: 980.00 | Avg Reward: 1192.80 | Frames: 926220 | Epsilon: 0.1000


Training:   9%|▉         | 927331/10000000 [2:52:49<47:07:52, 53.47it/s]

Episode 1990 completed | Reward: 1680.00 | Avg Reward: 1196.20 | Frames: 927325 | Epsilon: 0.1000


Training:   9%|▉         | 927877/10000000 [2:52:59<50:28:42, 49.92it/s]

Episode 1991 completed | Reward: 740.00 | Avg Reward: 1189.20 | Frames: 927870 | Epsilon: 0.1000


Training:   9%|▉         | 929451/10000000 [2:53:29<47:05:33, 53.50it/s]

Episode 1992 completed | Reward: 1340.00 | Avg Reward: 1187.20 | Frames: 929446 | Epsilon: 0.1000


Training:   9%|▉         | 930743/10000000 [2:53:54<48:03:59, 52.41it/s]

Episode 1993 completed | Reward: 980.00 | Avg Reward: 1183.80 | Frames: 930739 | Epsilon: 0.1000


Training:   9%|▉         | 931531/10000000 [2:54:09<47:30:54, 53.01it/s]

Episode 1994 completed | Reward: 160.00 | Avg Reward: 1180.60 | Frames: 931526 | Epsilon: 0.1000


Training:   9%|▉         | 932111/10000000 [2:54:20<47:56:27, 52.54it/s]

Episode 1995 completed | Reward: 880.00 | Avg Reward: 1184.00 | Frames: 932105 | Epsilon: 0.1000


Training:   9%|▉         | 932697/10000000 [2:54:31<51:38:35, 48.77it/s]

Episode 1996 completed | Reward: 620.00 | Avg Reward: 1172.60 | Frames: 932688 | Epsilon: 0.1000


Training:   9%|▉         | 933359/10000000 [2:54:44<47:27:52, 53.06it/s]

Episode 1997 completed | Reward: 820.00 | Avg Reward: 1157.40 | Frames: 933353 | Epsilon: 0.1000


Training:   9%|▉         | 934089/10000000 [2:54:58<51:44:40, 48.67it/s]

Episode 1998 completed | Reward: 980.00 | Avg Reward: 1153.00 | Frames: 934082 | Epsilon: 0.1000


Training:   9%|▉         | 934535/10000000 [2:55:06<46:31:43, 54.12it/s]

Episode 1999 completed | Reward: 760.00 | Avg Reward: 1145.00 | Frames: 934531 | Epsilon: 0.1000


Training:   9%|▉         | 935252/10000000 [2:55:20<56:09:12, 44.84it/s]

Episode 2000 completed | Reward: 1400.00 | Avg Reward: 1144.80 | Frames: 935251 | Epsilon: 0.1000

Memory usage: 1.12 GB


Training:   9%|▉         | 936361/10000000 [2:55:42<48:53:43, 51.49it/s]

Episode 2001 completed | Reward: 1360.00 | Avg Reward: 1143.20 | Frames: 936352 | Epsilon: 0.1000


Training:   9%|▉         | 937045/10000000 [2:55:55<51:35:13, 48.80it/s]

Episode 2002 completed | Reward: 580.00 | Avg Reward: 1142.20 | Frames: 937038 | Epsilon: 0.1000


Training:   9%|▉         | 938485/10000000 [2:56:22<49:48:32, 50.53it/s]

Episode 2003 completed | Reward: 2180.00 | Avg Reward: 1145.80 | Frames: 938480 | Epsilon: 0.1000


Training:   9%|▉         | 939699/10000000 [2:56:45<48:19:16, 52.08it/s]

Episode 2004 completed | Reward: 1920.00 | Avg Reward: 1153.80 | Frames: 939693 | Epsilon: 0.1000


Training:   9%|▉         | 940289/10000000 [2:56:57<50:36:55, 49.72it/s]

Episode 2005 completed | Reward: 620.00 | Avg Reward: 1143.00 | Frames: 940280 | Epsilon: 0.1000


Training:   9%|▉         | 941775/10000000 [2:57:26<48:24:02, 51.99it/s]

Episode 2006 completed | Reward: 480.00 | Avg Reward: 1142.60 | Frames: 941771 | Epsilon: 0.1000


Training:   9%|▉         | 942321/10000000 [2:57:36<52:43:24, 47.72it/s]

Episode 2007 completed | Reward: 480.00 | Avg Reward: 1139.20 | Frames: 942314 | Epsilon: 0.1000


Training:   9%|▉         | 943217/10000000 [2:57:54<50:58:26, 49.35it/s]

Episode 2008 completed | Reward: 980.00 | Avg Reward: 1130.80 | Frames: 943208 | Epsilon: 0.1000


Training:   9%|▉         | 943621/10000000 [2:58:02<51:52:43, 48.49it/s]

Episode 2009 completed | Reward: 800.00 | Avg Reward: 1131.00 | Frames: 943613 | Epsilon: 0.1000


Training:   9%|▉         | 945049/10000000 [2:58:29<51:35:22, 48.76it/s]

Episode 2010 completed | Reward: 980.00 | Avg Reward: 1135.80 | Frames: 945041 | Epsilon: 0.1000


Training:   9%|▉         | 945529/10000000 [2:58:39<62:35:26, 40.18it/s]

Episode 2011 completed | Reward: 780.00 | Avg Reward: 1136.00 | Frames: 945528 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:   9%|▉         | 946161/10000000 [2:58:51<49:45:51, 50.54it/s]

Episode 2012 completed | Reward: 1340.00 | Avg Reward: 1132.60 | Frames: 946159 | Epsilon: 0.1000


Training:   9%|▉         | 947257/10000000 [2:59:12<49:31:00, 50.78it/s]

Episode 2013 completed | Reward: 960.00 | Avg Reward: 1129.60 | Frames: 947250 | Epsilon: 0.1000


Training:   9%|▉         | 948359/10000000 [2:59:33<46:44:17, 53.80it/s]

Episode 2014 completed | Reward: 1520.00 | Avg Reward: 1134.20 | Frames: 948354 | Epsilon: 0.1000


Training:   9%|▉         | 948995/10000000 [2:59:46<49:11:04, 51.12it/s]

Episode 2015 completed | Reward: 440.00 | Avg Reward: 1125.80 | Frames: 948990 | Epsilon: 0.1000


Training:  10%|▉         | 950215/10000000 [3:00:09<46:52:16, 53.63it/s]

Episode 2016 completed | Reward: 1860.00 | Avg Reward: 1122.00 | Frames: 950211 | Epsilon: 0.1000


Training:  10%|▉         | 951436/10000000 [3:00:33<44:58:50, 55.88it/s]

Episode 2017 completed | Reward: 1860.00 | Avg Reward: 1122.20 | Frames: 951431 | Epsilon: 0.1000


Training:  10%|▉         | 952005/10000000 [3:00:44<52:22:45, 47.98it/s]

Episode 2018 completed | Reward: 1580.00 | Avg Reward: 1126.40 | Frames: 951998 | Epsilon: 0.1000


Training:  10%|▉         | 952477/10000000 [3:00:53<48:56:49, 51.35it/s]

Episode 2019 completed | Reward: 660.00 | Avg Reward: 1123.20 | Frames: 952468 | Epsilon: 0.1000


Training:  10%|▉         | 952971/10000000 [3:01:03<46:54:49, 53.57it/s]

Episode 2020 completed | Reward: 1480.00 | Avg Reward: 1128.60 | Frames: 952967 | Epsilon: 0.1000


Training:  10%|▉         | 953485/10000000 [3:01:13<51:15:14, 49.03it/s]

Episode 2021 completed | Reward: 2060.00 | Avg Reward: 1133.20 | Frames: 953477 | Epsilon: 0.1000


Training:  10%|▉         | 954091/10000000 [3:01:25<47:57:44, 52.39it/s]

Episode 2022 completed | Reward: 1080.00 | Avg Reward: 1132.20 | Frames: 954086 | Epsilon: 0.1000


Training:  10%|▉         | 955151/10000000 [3:01:45<47:12:54, 53.21it/s]

Episode 2023 completed | Reward: 1220.00 | Avg Reward: 1130.20 | Frames: 955145 | Epsilon: 0.1000


Training:  10%|▉         | 955675/10000000 [3:01:55<57:23:08, 43.78it/s]

Episode 2024 completed | Reward: 2820.00 | Avg Reward: 1127.60 | Frames: 955674 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|▉         | 956430/10000000 [3:02:10<50:01:51, 50.21it/s]

Episode 2025 completed | Reward: 880.00 | Avg Reward: 1123.60 | Frames: 956422 | Epsilon: 0.1000


Training:  10%|▉         | 956971/10000000 [3:02:21<47:17:27, 53.12it/s]

Episode 2026 completed | Reward: 1220.00 | Avg Reward: 1125.00 | Frames: 956966 | Epsilon: 0.1000


Training:  10%|▉         | 957674/10000000 [3:02:34<51:44:27, 48.54it/s]

Episode 2027 completed | Reward: 1240.00 | Avg Reward: 1129.20 | Frames: 957667 | Epsilon: 0.1000


Training:  10%|▉         | 958534/10000000 [3:02:51<51:01:48, 49.22it/s]

Episode 2028 completed | Reward: 900.00 | Avg Reward: 1125.00 | Frames: 958527 | Epsilon: 0.1000


Training:  10%|▉         | 959555/10000000 [3:03:11<48:52:09, 51.39it/s]

Episode 2029 completed | Reward: 940.00 | Avg Reward: 1117.40 | Frames: 959550 | Epsilon: 0.1000


Training:  10%|▉         | 960391/10000000 [3:03:27<47:05:39, 53.32it/s]

Episode 2030 completed | Reward: 1300.00 | Avg Reward: 1114.40 | Frames: 960385 | Epsilon: 0.1000


Training:  10%|▉         | 960962/10000000 [3:03:38<50:33:05, 49.67it/s]

Episode 2031 completed | Reward: 1520.00 | Avg Reward: 1121.60 | Frames: 960953 | Epsilon: 0.1000


Training:  10%|▉         | 962581/10000000 [3:04:10<51:51:01, 48.42it/s]

Episode 2032 completed | Reward: 1560.00 | Avg Reward: 1127.40 | Frames: 962572 | Epsilon: 0.1000


Training:  10%|▉         | 963194/10000000 [3:04:22<50:10:57, 50.02it/s]

Episode 2033 completed | Reward: 3120.00 | Avg Reward: 1145.80 | Frames: 963185 | Epsilon: 0.1000


Training:  10%|▉         | 963697/10000000 [3:04:32<49:51:11, 50.35it/s]

Episode 2034 completed | Reward: 1340.00 | Avg Reward: 1143.40 | Frames: 963690 | Epsilon: 0.1000


Training:  10%|▉         | 964365/10000000 [3:04:45<52:20:09, 47.96it/s]

Episode 2035 completed | Reward: 640.00 | Avg Reward: 1141.00 | Frames: 964358 | Epsilon: 0.1000


Training:  10%|▉         | 965235/10000000 [3:05:01<49:53:41, 50.30it/s]

Episode 2036 completed | Reward: 2580.00 | Avg Reward: 1155.60 | Frames: 965231 | Epsilon: 0.1000


Training:  10%|▉         | 966600/10000000 [3:05:28<58:49:53, 42.65it/s]

Episode 2037 completed | Reward: 1760.00 | Avg Reward: 1157.20 | Frames: 966599 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|▉         | 967154/10000000 [3:05:39<50:53:16, 49.31it/s]

Episode 2038 completed | Reward: 700.00 | Avg Reward: 1148.20 | Frames: 967147 | Epsilon: 0.1000


Training:  10%|▉         | 967837/10000000 [3:05:52<52:34:45, 47.72it/s]

Episode 2039 completed | Reward: 2040.00 | Avg Reward: 1144.80 | Frames: 967828 | Epsilon: 0.1000


Training:  10%|▉         | 968163/10000000 [3:05:59<46:53:03, 53.51it/s]

Episode 2040 completed | Reward: 980.00 | Avg Reward: 1147.80 | Frames: 968157 | Epsilon: 0.1000


Training:  10%|▉         | 968689/10000000 [3:06:09<48:36:20, 51.61it/s]

Episode 2041 completed | Reward: 2880.00 | Avg Reward: 1165.20 | Frames: 968680 | Epsilon: 0.1000


Training:  10%|▉         | 969326/10000000 [3:06:21<49:45:34, 50.41it/s]

Episode 2042 completed | Reward: 680.00 | Avg Reward: 1162.00 | Frames: 969319 | Epsilon: 0.1000


Training:  10%|▉         | 969769/10000000 [3:06:30<52:50:33, 47.47it/s]

Episode 2043 completed | Reward: 3840.00 | Avg Reward: 1192.80 | Frames: 969762 | Epsilon: 0.1000


Training:  10%|▉         | 970469/10000000 [3:06:44<52:38:39, 47.64it/s]

Episode 2044 completed | Reward: 2240.00 | Avg Reward: 1206.00 | Frames: 970461 | Epsilon: 0.1000


Training:  10%|▉         | 970993/10000000 [3:06:54<51:31:07, 48.68it/s]

Episode 2045 completed | Reward: 1780.00 | Avg Reward: 1217.00 | Frames: 970985 | Epsilon: 0.1000


Training:  10%|▉         | 971625/10000000 [3:07:06<50:03:10, 50.10it/s]

Episode 2046 completed | Reward: 1440.00 | Avg Reward: 1222.00 | Frames: 971616 | Epsilon: 0.1000


Training:  10%|▉         | 972113/10000000 [3:07:16<50:25:38, 49.73it/s]

Episode 2047 completed | Reward: 1260.00 | Avg Reward: 1215.00 | Frames: 972108 | Epsilon: 0.1000


Training:  10%|▉         | 972929/10000000 [3:07:32<50:25:30, 49.73it/s]

Episode 2048 completed | Reward: 740.00 | Avg Reward: 1213.00 | Frames: 972927 | Epsilon: 0.1000


Training:  10%|▉         | 973431/10000000 [3:07:41<48:57:35, 51.21it/s]

Episode 2049 completed | Reward: 1320.00 | Avg Reward: 1207.60 | Frames: 973427 | Epsilon: 0.1000


Training:  10%|▉         | 973841/10000000 [3:07:49<51:20:44, 48.83it/s]

Episode 2050 completed | Reward: 820.00 | Avg Reward: 1208.60 | Frames: 973833 | Epsilon: 0.1000


Training:  10%|▉         | 974685/10000000 [3:08:06<52:41:26, 47.58it/s]

Episode 2051 completed | Reward: 1840.00 | Avg Reward: 1213.20 | Frames: 974677 | Epsilon: 0.1000


Training:  10%|▉         | 975618/10000000 [3:08:24<50:21:56, 49.77it/s]

Episode 2052 completed | Reward: 1440.00 | Avg Reward: 1220.60 | Frames: 975611 | Epsilon: 0.1000


Training:  10%|▉         | 976129/10000000 [3:08:34<49:18:44, 50.83it/s]

Episode 2053 completed | Reward: 1760.00 | Avg Reward: 1227.00 | Frames: 976121 | Epsilon: 0.1000


Training:  10%|▉         | 976875/10000000 [3:08:49<48:15:06, 51.94it/s]

Episode 2054 completed | Reward: 2280.00 | Avg Reward: 1240.20 | Frames: 976875 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|▉         | 977290/10000000 [3:08:57<51:06:49, 49.03it/s]

Episode 2055 completed | Reward: 520.00 | Avg Reward: 1236.80 | Frames: 977283 | Epsilon: 0.1000


Training:  10%|▉         | 977989/10000000 [3:09:10<51:29:47, 48.67it/s]

Episode 2056 completed | Reward: 740.00 | Avg Reward: 1236.00 | Frames: 977981 | Epsilon: 0.1000


Training:  10%|▉         | 978601/10000000 [3:09:22<52:01:50, 48.16it/s]

Episode 2057 completed | Reward: 1060.00 | Avg Reward: 1224.40 | Frames: 978592 | Epsilon: 0.1000


Training:  10%|▉         | 979059/10000000 [3:09:31<47:53:31, 52.32it/s]

Episode 2058 completed | Reward: 880.00 | Avg Reward: 1222.60 | Frames: 979053 | Epsilon: 0.1000


Training:  10%|▉         | 979873/10000000 [3:09:47<50:58:42, 49.15it/s]

Episode 2059 completed | Reward: 1980.00 | Avg Reward: 1235.60 | Frames: 979864 | Epsilon: 0.1000


Training:  10%|▉         | 980417/10000000 [3:09:58<51:08:14, 48.99it/s]

Episode 2060 completed | Reward: 1820.00 | Avg Reward: 1235.40 | Frames: 980408 | Epsilon: 0.1000


Training:  10%|▉         | 981969/10000000 [3:10:28<50:08:43, 49.96it/s]

Episode 2061 completed | Reward: 3420.00 | Avg Reward: 1265.80 | Frames: 981963 | Epsilon: 0.1000


Training:  10%|▉         | 982494/10000000 [3:10:38<50:08:15, 49.96it/s]

Episode 2062 completed | Reward: 880.00 | Avg Reward: 1267.20 | Frames: 982487 | Epsilon: 0.1000


Training:  10%|▉         | 982929/10000000 [3:10:47<52:03:03, 48.12it/s]

Episode 2063 completed | Reward: 320.00 | Avg Reward: 1255.20 | Frames: 982922 | Epsilon: 0.1000


Training:  10%|▉         | 983557/10000000 [3:10:59<51:41:58, 48.44it/s]

Episode 2064 completed | Reward: 1000.00 | Avg Reward: 1248.20 | Frames: 983550 | Epsilon: 0.1000


Training:  10%|▉         | 985393/10000000 [3:11:35<52:13:31, 47.95it/s]

Episode 2065 completed | Reward: 3480.00 | Avg Reward: 1276.40 | Frames: 985385 | Epsilon: 0.1000


Training:  10%|▉         | 986423/10000000 [3:11:55<48:17:34, 51.85it/s]

Episode 2066 completed | Reward: 2200.00 | Avg Reward: 1274.80 | Frames: 986418 | Epsilon: 0.1000


Training:  10%|▉         | 986996/10000000 [3:12:06<58:54:31, 42.50it/s]

Episode 2067 completed | Reward: 760.00 | Avg Reward: 1275.20 | Frames: 986995 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|▉         | 987543/10000000 [3:12:17<49:08:36, 50.94it/s]

Episode 2068 completed | Reward: 620.00 | Avg Reward: 1281.00 | Frames: 987537 | Epsilon: 0.1000


Training:  10%|▉         | 988305/10000000 [3:12:32<52:59:30, 47.24it/s]

Episode 2069 completed | Reward: 2040.00 | Avg Reward: 1287.20 | Frames: 988297 | Epsilon: 0.1000


Training:  10%|▉         | 988949/10000000 [3:12:44<51:25:53, 48.67it/s]

Episode 2070 completed | Reward: 940.00 | Avg Reward: 1284.80 | Frames: 988941 | Epsilon: 0.1000


Training:  10%|▉         | 990081/10000000 [3:13:06<51:18:58, 48.77it/s]

Episode 2071 completed | Reward: 1540.00 | Avg Reward: 1298.40 | Frames: 990074 | Epsilon: 0.1000


Training:  10%|▉         | 990777/10000000 [3:13:20<50:41:53, 49.36it/s]

Episode 2072 completed | Reward: 660.00 | Avg Reward: 1296.80 | Frames: 990773 | Epsilon: 0.1000


Training:  10%|▉         | 992847/10000000 [3:14:00<48:00:46, 52.11it/s]

Episode 2073 completed | Reward: 2320.00 | Avg Reward: 1314.60 | Frames: 992841 | Epsilon: 0.1000


Training:  10%|▉         | 993435/10000000 [3:14:11<47:40:55, 52.47it/s]

Episode 2074 completed | Reward: 560.00 | Avg Reward: 1298.00 | Frames: 993430 | Epsilon: 0.1000


Training:  10%|▉         | 994830/10000000 [3:14:38<50:15:52, 49.77it/s]

Episode 2075 completed | Reward: 1760.00 | Avg Reward: 1304.40 | Frames: 994823 | Epsilon: 0.1000


Training:  10%|▉         | 995357/10000000 [3:14:49<49:46:56, 50.24it/s]

Episode 2076 completed | Reward: 440.00 | Avg Reward: 1303.60 | Frames: 995348 | Epsilon: 0.1000


Training:  10%|▉         | 996235/10000000 [3:15:06<47:38:39, 52.49it/s]

Episode 2077 completed | Reward: 1020.00 | Avg Reward: 1306.40 | Frames: 996231 | Epsilon: 0.1000


Training:  10%|▉         | 996897/10000000 [3:15:19<49:28:44, 50.54it/s]

Episode 2078 completed | Reward: 1320.00 | Avg Reward: 1315.60 | Frames: 996889 | Epsilon: 0.1000


Training:  10%|▉         | 997559/10000000 [3:15:32<59:13:08, 42.23it/s]

Episode 2079 completed | Reward: 1120.00 | Avg Reward: 1319.80 | Frames: 997556 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|▉         | 998253/10000000 [3:15:45<49:24:59, 50.60it/s]

Episode 2080 completed | Reward: 960.00 | Avg Reward: 1322.40 | Frames: 998250 | Epsilon: 0.1000


Training:  10%|▉         | 999307/10000000 [3:16:06<47:23:02, 52.76it/s]

Episode 2081 completed | Reward: 500.00 | Avg Reward: 1316.00 | Frames: 999303 | Epsilon: 0.1000


Training:  10%|▉         | 999993/10000000 [3:16:19<48:37:15, 51.42it/s]

Model saved to weights/CarnivalDeterministic-v4_dqn_1000000frames.pth


Training:  10%|█         | 1000000/10000000 [3:16:44<48:37:15, 51.42it/s]


Evaluation at frame 1000000: 1324.00


Training:  10%|█         | 1000005/10000000 [3:16:47<2173:03:31,  1.15it/s]

Model saved to weights/CarnivalDeterministic-v4_dqn_best.pth
Episode 2082 completed | Reward: 680.00 | Avg Reward: 1315.60 | Frames: 1000001 | Epsilon: 0.1000


Training:  10%|█         | 1000745/10000000 [3:17:01<51:19:28, 48.71it/s]

Episode 2083 completed | Reward: 480.00 | Avg Reward: 1303.20 | Frames: 1000737 | Epsilon: 0.1000


Training:  10%|█         | 1001173/10000000 [3:17:09<51:52:41, 48.18it/s]

Episode 2084 completed | Reward: 340.00 | Avg Reward: 1296.00 | Frames: 1001165 | Epsilon: 0.1000


Training:  10%|█         | 1001929/10000000 [3:17:24<51:58:55, 48.08it/s]

Episode 2085 completed | Reward: 540.00 | Avg Reward: 1290.40 | Frames: 1001922 | Epsilon: 0.1000


Training:  10%|█         | 1002661/10000000 [3:17:38<51:27:11, 48.57it/s]

Episode 2086 completed | Reward: 500.00 | Avg Reward: 1283.60 | Frames: 1002654 | Epsilon: 0.1000


Training:  10%|█         | 1003540/10000000 [3:17:56<45:07:06, 55.39it/s]

Episode 2087 completed | Reward: 1180.00 | Avg Reward: 1281.20 | Frames: 1003533 | Epsilon: 0.1000


Training:  10%|█         | 1004503/10000000 [3:18:14<48:32:06, 51.48it/s]

Episode 2088 completed | Reward: 1040.00 | Avg Reward: 1280.40 | Frames: 1004498 | Epsilon: 0.1000


Training:  10%|█         | 1005285/10000000 [3:18:30<49:33:40, 50.41it/s]

Episode 2089 completed | Reward: 840.00 | Avg Reward: 1279.00 | Frames: 1005281 | Epsilon: 0.1000


Training:  10%|█         | 1005845/10000000 [3:18:41<50:23:41, 49.58it/s]

Episode 2090 completed | Reward: 440.00 | Avg Reward: 1266.60 | Frames: 1005843 | Epsilon: 0.1000


Training:  10%|█         | 1006333/10000000 [3:18:50<49:56:21, 50.03it/s]

Episode 2091 completed | Reward: 900.00 | Avg Reward: 1268.20 | Frames: 1006331 | Epsilon: 0.1000


Training:  10%|█         | 1007205/10000000 [3:19:07<49:08:48, 50.83it/s]

Episode 2092 completed | Reward: 800.00 | Avg Reward: 1262.80 | Frames: 1007198 | Epsilon: 0.1000


Training:  10%|█         | 1008411/10000000 [3:19:31<59:34:43, 41.92it/s]

Episode 2093 completed | Reward: 1080.00 | Avg Reward: 1263.80 | Frames: 1008410 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|█         | 1008935/10000000 [3:19:41<47:47:29, 52.26it/s]

Episode 2094 completed | Reward: 720.00 | Avg Reward: 1269.40 | Frames: 1008930 | Epsilon: 0.1000


Training:  10%|█         | 1010009/10000000 [3:20:02<51:46:58, 48.22it/s]

Episode 2095 completed | Reward: 1680.00 | Avg Reward: 1277.40 | Frames: 1010001 | Epsilon: 0.1000


Training:  10%|█         | 1010673/10000000 [3:20:15<50:47:36, 49.16it/s]

Episode 2096 completed | Reward: 1040.00 | Avg Reward: 1281.60 | Frames: 1010664 | Epsilon: 0.1000


Training:  10%|█         | 1011103/10000000 [3:20:23<47:49:43, 52.21it/s]

Episode 2097 completed | Reward: 960.00 | Avg Reward: 1283.00 | Frames: 1011098 | Epsilon: 0.1000


Training:  10%|█         | 1011589/10000000 [3:20:33<49:37:34, 50.31it/s]

Episode 2098 completed | Reward: 2080.00 | Avg Reward: 1294.00 | Frames: 1011585 | Epsilon: 0.1000


Training:  10%|█         | 1012298/10000000 [3:20:46<50:18:24, 49.63it/s]

Episode 2099 completed | Reward: 1520.00 | Avg Reward: 1301.60 | Frames: 1012291 | Epsilon: 0.1000


Training:  10%|█         | 1013061/10000000 [3:21:01<52:49:04, 47.26it/s]

Episode 2100 completed | Reward: 1660.00 | Avg Reward: 1304.20 | Frames: 1013052 | Epsilon: 0.1000


Training:  10%|█         | 1013747/10000000 [3:21:15<48:34:20, 51.39it/s]

Episode 2101 completed | Reward: 1020.00 | Avg Reward: 1300.80 | Frames: 1013743 | Epsilon: 0.1000


Training:  10%|█         | 1014549/10000000 [3:21:30<51:25:48, 48.53it/s]

Episode 2102 completed | Reward: 680.00 | Avg Reward: 1301.80 | Frames: 1014542 | Epsilon: 0.1000


Training:  10%|█         | 1015307/10000000 [3:21:45<48:02:21, 51.95it/s]

Episode 2103 completed | Reward: 1400.00 | Avg Reward: 1294.00 | Frames: 1015301 | Epsilon: 0.1000


Training:  10%|█         | 1015991/10000000 [3:21:58<49:22:59, 50.53it/s]

Episode 2104 completed | Reward: 1080.00 | Avg Reward: 1285.60 | Frames: 1015986 | Epsilon: 0.1000


Training:  10%|█         | 1016538/10000000 [3:22:09<50:34:23, 49.34it/s]

Episode 2105 completed | Reward: 980.00 | Avg Reward: 1289.20 | Frames: 1016531 | Epsilon: 0.1000


Training:  10%|█         | 1017301/10000000 [3:22:24<51:08:42, 48.79it/s]

Episode 2106 completed | Reward: 660.00 | Avg Reward: 1291.00 | Frames: 1017293 | Epsilon: 0.1000


Training:  10%|█         | 1018275/10000000 [3:22:43<48:05:42, 51.87it/s]

Episode 2107 completed | Reward: 1720.00 | Avg Reward: 1303.40 | Frames: 1018270 | Epsilon: 0.1000


Training:  10%|█         | 1018785/10000000 [3:22:53<61:29:39, 40.57it/s]

Episode 2108 completed | Reward: 880.00 | Avg Reward: 1302.40 | Frames: 1018784 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|█         | 1019989/10000000 [3:23:16<51:14:01, 48.69it/s]

Episode 2109 completed | Reward: 1000.00 | Avg Reward: 1304.40 | Frames: 1019981 | Epsilon: 0.1000


Training:  10%|█         | 1020777/10000000 [3:23:32<51:23:11, 48.54it/s]

Episode 2110 completed | Reward: 1420.00 | Avg Reward: 1308.80 | Frames: 1020768 | Epsilon: 0.1000


Training:  10%|█         | 1021393/10000000 [3:23:44<49:16:36, 50.61it/s]

Episode 2111 completed | Reward: 760.00 | Avg Reward: 1308.60 | Frames: 1021386 | Epsilon: 0.1000


Training:  10%|█         | 1022037/10000000 [3:23:56<51:14:33, 48.67it/s]

Episode 2112 completed | Reward: 140.00 | Avg Reward: 1296.60 | Frames: 1022029 | Epsilon: 0.1000


Training:  10%|█         | 1023573/10000000 [3:24:26<50:24:58, 49.46it/s]

Episode 2113 completed | Reward: 1200.00 | Avg Reward: 1299.00 | Frames: 1023571 | Epsilon: 0.1000


Training:  10%|█         | 1024468/10000000 [3:24:43<44:52:06, 55.57it/s]

Episode 2114 completed | Reward: 1060.00 | Avg Reward: 1294.40 | Frames: 1024463 | Epsilon: 0.1000


Training:  10%|█         | 1025550/10000000 [3:25:04<49:44:46, 50.11it/s]

Episode 2115 completed | Reward: 2420.00 | Avg Reward: 1314.20 | Frames: 1025541 | Epsilon: 0.1000


Training:  10%|█         | 1026049/10000000 [3:25:14<51:20:08, 48.56it/s]

Episode 2116 completed | Reward: 1200.00 | Avg Reward: 1307.60 | Frames: 1026041 | Epsilon: 0.1000


Training:  10%|█         | 1026813/10000000 [3:25:29<52:08:24, 47.80it/s]

Episode 2117 completed | Reward: 1440.00 | Avg Reward: 1303.40 | Frames: 1026804 | Epsilon: 0.1000


Training:  10%|█         | 1027861/10000000 [3:25:49<49:17:36, 50.56it/s]

Episode 2118 completed | Reward: 1240.00 | Avg Reward: 1300.00 | Frames: 1027856 | Epsilon: 0.1000


Training:  10%|█         | 1028806/10000000 [3:26:08<48:50:33, 51.02it/s]

Episode 2119 completed | Reward: 2480.00 | Avg Reward: 1318.20 | Frames: 1028806 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|█         | 1029679/10000000 [3:26:25<47:36:56, 52.33it/s]

Episode 2120 completed | Reward: 1920.00 | Avg Reward: 1322.60 | Frames: 1029673 | Epsilon: 0.1000


Training:  10%|█         | 1030149/10000000 [3:26:34<49:40:09, 50.16it/s]

Episode 2121 completed | Reward: 1040.00 | Avg Reward: 1312.40 | Frames: 1030144 | Epsilon: 0.1000


Training:  10%|█         | 1030965/10000000 [3:26:50<50:22:47, 49.45it/s]

Episode 2122 completed | Reward: 580.00 | Avg Reward: 1307.40 | Frames: 1030956 | Epsilon: 0.1000


Training:  10%|█         | 1031309/10000000 [3:26:57<49:46:23, 50.05it/s]

Episode 2123 completed | Reward: 540.00 | Avg Reward: 1300.60 | Frames: 1031304 | Epsilon: 0.1000


Training:  10%|█         | 1032019/10000000 [3:27:11<47:22:41, 52.58it/s]

Episode 2124 completed | Reward: 1500.00 | Avg Reward: 1287.40 | Frames: 1032013 | Epsilon: 0.1000


Training:  10%|█         | 1033391/10000000 [3:27:37<47:01:29, 52.97it/s]

Episode 2125 completed | Reward: 2320.00 | Avg Reward: 1301.80 | Frames: 1033387 | Epsilon: 0.1000


Training:  10%|█         | 1034033/10000000 [3:27:50<51:03:42, 48.78it/s]

Episode 2126 completed | Reward: 920.00 | Avg Reward: 1298.80 | Frames: 1034026 | Epsilon: 0.1000


Training:  10%|█         | 1034897/10000000 [3:28:07<51:07:16, 48.71it/s]

Episode 2127 completed | Reward: 1660.00 | Avg Reward: 1303.00 | Frames: 1034888 | Epsilon: 0.1000


Training:  10%|█         | 1035374/10000000 [3:28:16<51:16:33, 48.56it/s]

Episode 2128 completed | Reward: 1600.00 | Avg Reward: 1310.00 | Frames: 1035367 | Epsilon: 0.1000


Training:  10%|█         | 1035939/10000000 [3:28:27<46:26:18, 53.62it/s]

Episode 2129 completed | Reward: 1140.00 | Avg Reward: 1312.00 | Frames: 1035935 | Epsilon: 0.1000


Training:  10%|█         | 1037077/10000000 [3:28:49<51:34:54, 48.27it/s]

Episode 2130 completed | Reward: 680.00 | Avg Reward: 1305.80 | Frames: 1037068 | Epsilon: 0.1000


Training:  10%|█         | 1037565/10000000 [3:28:59<49:35:38, 50.20it/s]

Episode 2131 completed | Reward: 1920.00 | Avg Reward: 1309.80 | Frames: 1037557 | Epsilon: 0.1000


Training:  10%|█         | 1038099/10000000 [3:29:09<46:55:39, 53.05it/s]

Episode 2132 completed | Reward: 1020.00 | Avg Reward: 1304.40 | Frames: 1038093 | Epsilon: 0.1000


Training:  10%|█         | 1038831/10000000 [3:29:24<58:16:04, 42.72it/s]

Episode 2133 completed | Reward: 1060.00 | Avg Reward: 1283.80 | Frames: 1038829 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|█         | 1039426/10000000 [3:29:35<49:25:27, 50.36it/s]

Episode 2134 completed | Reward: 540.00 | Avg Reward: 1275.80 | Frames: 1039419 | Epsilon: 0.1000


Training:  10%|█         | 1040321/10000000 [3:29:53<49:32:11, 50.24it/s]

Episode 2135 completed | Reward: 2320.00 | Avg Reward: 1292.60 | Frames: 1040312 | Epsilon: 0.1000


Training:  10%|█         | 1040825/10000000 [3:30:02<49:04:55, 50.70it/s]

Episode 2136 completed | Reward: 1180.00 | Avg Reward: 1278.60 | Frames: 1040816 | Epsilon: 0.1000


Training:  10%|█         | 1041561/10000000 [3:30:17<50:01:11, 49.75it/s]

Episode 2137 completed | Reward: 1580.00 | Avg Reward: 1276.80 | Frames: 1041552 | Epsilon: 0.1000


Training:  10%|█         | 1042142/10000000 [3:30:28<51:38:07, 48.19it/s]

Episode 2138 completed | Reward: 1280.00 | Avg Reward: 1282.60 | Frames: 1042135 | Epsilon: 0.1000


Training:  10%|█         | 1042851/10000000 [3:30:42<47:43:48, 52.13it/s]

Episode 2139 completed | Reward: 1720.00 | Avg Reward: 1279.40 | Frames: 1042845 | Epsilon: 0.1000


Training:  10%|█         | 1043447/10000000 [3:30:53<47:37:55, 52.23it/s]

Episode 2140 completed | Reward: 1200.00 | Avg Reward: 1281.60 | Frames: 1043443 | Epsilon: 0.1000


Training:  10%|█         | 1044093/10000000 [3:31:06<50:36:49, 49.15it/s]

Episode 2141 completed | Reward: 1000.00 | Avg Reward: 1262.80 | Frames: 1044084 | Epsilon: 0.1000


Training:  10%|█         | 1045035/10000000 [3:31:24<46:42:28, 53.26it/s]

Episode 2142 completed | Reward: 2240.00 | Avg Reward: 1278.40 | Frames: 1045029 | Epsilon: 0.1000


Training:  10%|█         | 1045473/10000000 [3:31:33<49:35:54, 50.15it/s]

Episode 2143 completed | Reward: 640.00 | Avg Reward: 1246.40 | Frames: 1045464 | Epsilon: 0.1000


Training:  10%|█         | 1045982/10000000 [3:31:43<50:05:32, 49.65it/s]

Episode 2144 completed | Reward: 920.00 | Avg Reward: 1233.20 | Frames: 1045975 | Epsilon: 0.1000


Training:  10%|█         | 1046725/10000000 [3:31:57<49:23:50, 50.35it/s]

Episode 2145 completed | Reward: 1320.00 | Avg Reward: 1228.60 | Frames: 1046716 | Epsilon: 0.1000


Training:  10%|█         | 1047609/10000000 [3:32:15<52:22:19, 47.48it/s]

Episode 2146 completed | Reward: 1220.00 | Avg Reward: 1226.40 | Frames: 1047602 | Epsilon: 0.1000


Training:  10%|█         | 1048134/10000000 [3:32:25<50:07:26, 49.61it/s]

Episode 2147 completed | Reward: 620.00 | Avg Reward: 1220.00 | Frames: 1048127 | Epsilon: 0.1000


Training:  10%|█         | 1048884/10000000 [3:32:40<55:26:50, 44.84it/s]

Episode 2148 completed | Reward: 1280.00 | Avg Reward: 1225.40 | Frames: 1048883 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  10%|█         | 1049527/10000000 [3:32:52<47:41:57, 52.12it/s]

Episode 2149 completed | Reward: 960.00 | Avg Reward: 1221.80 | Frames: 1049521 | Epsilon: 0.1000


Training:  11%|█         | 1050490/10000000 [3:33:11<51:11:34, 48.56it/s]

Episode 2150 completed | Reward: 1760.00 | Avg Reward: 1231.20 | Frames: 1050483 | Epsilon: 0.1000


Training:  11%|█         | 1051399/10000000 [3:33:29<47:48:45, 51.99it/s]

Episode 2151 completed | Reward: 1880.00 | Avg Reward: 1231.60 | Frames: 1051393 | Epsilon: 0.1000


Training:  11%|█         | 1052225/10000000 [3:33:45<52:38:42, 47.21it/s]

Episode 2152 completed | Reward: 1000.00 | Avg Reward: 1227.20 | Frames: 1052218 | Epsilon: 0.1000


Training:  11%|█         | 1052729/10000000 [3:33:55<49:33:21, 50.15it/s]

Episode 2153 completed | Reward: 680.00 | Avg Reward: 1216.40 | Frames: 1052724 | Epsilon: 0.1000


Training:  11%|█         | 1053263/10000000 [3:34:05<47:21:35, 52.48it/s]

Episode 2154 completed | Reward: 2000.00 | Avg Reward: 1213.60 | Frames: 1053258 | Epsilon: 0.1000


Training:  11%|█         | 1053777/10000000 [3:34:15<51:03:51, 48.67it/s]

Episode 2155 completed | Reward: 1660.00 | Avg Reward: 1225.00 | Frames: 1053769 | Epsilon: 0.1000


Training:  11%|█         | 1054373/10000000 [3:34:27<51:24:56, 48.33it/s]

Episode 2156 completed | Reward: 580.00 | Avg Reward: 1223.40 | Frames: 1054365 | Epsilon: 0.1000


Training:  11%|█         | 1055779/10000000 [3:34:54<46:49:55, 53.05it/s]

Episode 2157 completed | Reward: 2920.00 | Avg Reward: 1242.00 | Frames: 1055774 | Epsilon: 0.1000


Training:  11%|█         | 1056479/10000000 [3:35:08<47:18:54, 52.51it/s]

Episode 2158 completed | Reward: 1220.00 | Avg Reward: 1245.40 | Frames: 1056475 | Epsilon: 0.1000


Training:  11%|█         | 1057113/10000000 [3:35:20<51:07:13, 48.59it/s]

Episode 2159 completed | Reward: 1680.00 | Avg Reward: 1242.40 | Frames: 1057105 | Epsilon: 0.1000


Training:  11%|█         | 1057741/10000000 [3:35:32<52:30:53, 47.30it/s]

Episode 2160 completed | Reward: 500.00 | Avg Reward: 1229.20 | Frames: 1057732 | Epsilon: 0.1000


Training:  11%|█         | 1058951/10000000 [3:35:56<48:23:36, 51.32it/s]

Episode 2161 completed | Reward: 2880.00 | Avg Reward: 1223.80 | Frames: 1058951 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1060653/10000000 [3:36:29<52:12:51, 47.56it/s]

Episode 2162 completed | Reward: 3940.00 | Avg Reward: 1254.40 | Frames: 1060645 | Epsilon: 0.1000


Training:  11%|█         | 1061106/10000000 [3:36:38<51:02:22, 48.65it/s]

Episode 2163 completed | Reward: 940.00 | Avg Reward: 1260.60 | Frames: 1061099 | Epsilon: 0.1000


Training:  11%|█         | 1061783/10000000 [3:36:51<46:49:55, 53.02it/s]

Episode 2164 completed | Reward: 900.00 | Avg Reward: 1259.60 | Frames: 1061779 | Epsilon: 0.1000


Training:  11%|█         | 1062603/10000000 [3:37:07<47:09:08, 52.65it/s]

Episode 2165 completed | Reward: 1680.00 | Avg Reward: 1241.60 | Frames: 1062599 | Epsilon: 0.1000


Training:  11%|█         | 1063535/10000000 [3:37:25<47:00:32, 52.81it/s]

Episode 2166 completed | Reward: 960.00 | Avg Reward: 1229.20 | Frames: 1063530 | Epsilon: 0.1000


Training:  11%|█         | 1064459/10000000 [3:37:43<48:07:22, 51.58it/s]

Episode 2167 completed | Reward: 1120.00 | Avg Reward: 1232.80 | Frames: 1064455 | Epsilon: 0.1000


Training:  11%|█         | 1065014/10000000 [3:37:54<49:35:28, 50.05it/s]

Episode 2168 completed | Reward: 1440.00 | Avg Reward: 1241.00 | Frames: 1065006 | Epsilon: 0.1000


Training:  11%|█         | 1065651/10000000 [3:38:07<47:13:13, 52.56it/s]

Episode 2169 completed | Reward: 920.00 | Avg Reward: 1229.80 | Frames: 1065645 | Epsilon: 0.1000


Training:  11%|█         | 1066135/10000000 [3:38:16<48:38:16, 51.02it/s]

Episode 2170 completed | Reward: 980.00 | Avg Reward: 1230.20 | Frames: 1066130 | Epsilon: 0.1000


Training:  11%|█         | 1067859/10000000 [3:38:50<46:41:43, 53.13it/s]

Episode 2171 completed | Reward: 1160.00 | Avg Reward: 1226.40 | Frames: 1067855 | Epsilon: 0.1000


Training:  11%|█         | 1069250/10000000 [3:39:17<48:49:42, 50.81it/s]

Episode 2172 completed | Reward: 1660.00 | Avg Reward: 1236.40 | Frames: 1069250 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1070553/10000000 [3:39:42<51:29:35, 48.17it/s]

Episode 2173 completed | Reward: 2240.00 | Avg Reward: 1235.60 | Frames: 1070545 | Epsilon: 0.1000


Training:  11%|█         | 1071237/10000000 [3:39:56<52:36:39, 47.14it/s]

Episode 2174 completed | Reward: 1140.00 | Avg Reward: 1241.40 | Frames: 1071229 | Epsilon: 0.1000


Training:  11%|█         | 1071725/10000000 [3:40:06<50:02:58, 49.55it/s]

Episode 2175 completed | Reward: 900.00 | Avg Reward: 1232.80 | Frames: 1071716 | Epsilon: 0.1000


Training:  11%|█         | 1072435/10000000 [3:40:20<47:01:40, 52.73it/s]

Episode 2176 completed | Reward: 920.00 | Avg Reward: 1237.60 | Frames: 1072429 | Epsilon: 0.1000


Training:  11%|█         | 1072837/10000000 [3:40:27<51:16:23, 48.36it/s]

Episode 2177 completed | Reward: 620.00 | Avg Reward: 1233.60 | Frames: 1072828 | Epsilon: 0.1000


Training:  11%|█         | 1073531/10000000 [3:40:41<47:25:44, 52.28it/s]

Episode 2178 completed | Reward: 800.00 | Avg Reward: 1228.40 | Frames: 1073527 | Epsilon: 0.1000


Training:  11%|█         | 1073977/10000000 [3:40:50<50:50:59, 48.76it/s]

Episode 2179 completed | Reward: 780.00 | Avg Reward: 1225.00 | Frames: 1073968 | Epsilon: 0.1000


Training:  11%|█         | 1075279/10000000 [3:41:15<47:50:25, 51.82it/s]

Episode 2180 completed | Reward: 1980.00 | Avg Reward: 1235.20 | Frames: 1075273 | Epsilon: 0.1000


Training:  11%|█         | 1075759/10000000 [3:41:25<47:04:36, 52.66it/s]

Episode 2181 completed | Reward: 1380.00 | Avg Reward: 1244.00 | Frames: 1075753 | Epsilon: 0.1000


Training:  11%|█         | 1076377/10000000 [3:41:37<51:10:46, 48.43it/s]

Episode 2182 completed | Reward: 580.00 | Avg Reward: 1243.00 | Frames: 1076370 | Epsilon: 0.1000


Training:  11%|█         | 1077022/10000000 [3:41:50<49:27:48, 50.11it/s]

Episode 2183 completed | Reward: 1360.00 | Avg Reward: 1251.80 | Frames: 1077015 | Epsilon: 0.1000


Training:  11%|█         | 1077531/10000000 [3:41:59<46:58:34, 52.76it/s]

Episode 2184 completed | Reward: 1180.00 | Avg Reward: 1260.20 | Frames: 1077525 | Epsilon: 0.1000


Training:  11%|█         | 1077878/10000000 [3:42:06<49:46:24, 49.79it/s]

Episode 2185 completed | Reward: 1400.00 | Avg Reward: 1268.80 | Frames: 1077868 | Epsilon: 0.1000


Training:  11%|█         | 1078347/10000000 [3:42:16<47:33:06, 52.12it/s]

Episode 2186 completed | Reward: 1060.00 | Avg Reward: 1274.40 | Frames: 1078342 | Epsilon: 0.1000


Training:  11%|█         | 1079721/10000000 [3:42:43<49:06:36, 50.46it/s]

Episode 2187 completed | Reward: 960.00 | Avg Reward: 1272.20 | Frames: 1079721 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1080420/10000000 [3:42:56<44:47:29, 55.32it/s]

Episode 2188 completed | Reward: 1120.00 | Avg Reward: 1273.00 | Frames: 1080413 | Epsilon: 0.1000


Training:  11%|█         | 1080861/10000000 [3:43:05<52:03:47, 47.59it/s]

Episode 2189 completed | Reward: 1100.00 | Avg Reward: 1275.60 | Frames: 1080852 | Epsilon: 0.1000


Training:  11%|█         | 1081577/10000000 [3:43:19<50:56:44, 48.63it/s]

Episode 2190 completed | Reward: 480.00 | Avg Reward: 1276.00 | Frames: 1081569 | Epsilon: 0.1000


Training:  11%|█         | 1082079/10000000 [3:43:29<49:52:57, 49.66it/s]

Episode 2191 completed | Reward: 780.00 | Avg Reward: 1274.80 | Frames: 1082073 | Epsilon: 0.1000


Training:  11%|█         | 1082627/10000000 [3:43:39<47:52:36, 51.74it/s]

Episode 2192 completed | Reward: 1140.00 | Avg Reward: 1278.20 | Frames: 1082623 | Epsilon: 0.1000


Training:  11%|█         | 1083593/10000000 [3:43:58<50:26:49, 49.10it/s]

Episode 2193 completed | Reward: 2120.00 | Avg Reward: 1288.60 | Frames: 1083584 | Epsilon: 0.1000


Training:  11%|█         | 1084141/10000000 [3:44:09<51:14:41, 48.33it/s]

Episode 2194 completed | Reward: 720.00 | Avg Reward: 1288.60 | Frames: 1084132 | Epsilon: 0.1000


Training:  11%|█         | 1084577/10000000 [3:44:17<51:26:11, 48.15it/s]

Episode 2195 completed | Reward: 840.00 | Avg Reward: 1280.20 | Frames: 1084568 | Epsilon: 0.1000


Training:  11%|█         | 1085481/10000000 [3:44:35<52:36:29, 47.07it/s]

Episode 2196 completed | Reward: 1280.00 | Avg Reward: 1282.60 | Frames: 1085474 | Epsilon: 0.1000


Training:  11%|█         | 1086174/10000000 [3:44:49<50:29:55, 49.03it/s]

Episode 2197 completed | Reward: 1280.00 | Avg Reward: 1285.80 | Frames: 1086167 | Epsilon: 0.1000


Training:  11%|█         | 1086747/10000000 [3:45:00<47:44:34, 51.86it/s]

Episode 2198 completed | Reward: 1360.00 | Avg Reward: 1278.60 | Frames: 1086743 | Epsilon: 0.1000


Training:  11%|█         | 1087741/10000000 [3:45:19<50:53:12, 48.65it/s]

Episode 2199 completed | Reward: 2480.00 | Avg Reward: 1288.20 | Frames: 1087733 | Epsilon: 0.1000


Training:  11%|█         | 1088349/10000000 [3:45:31<49:15:00, 50.26it/s]

Episode 2200 completed | Reward: 980.00 | Avg Reward: 1281.40 | Frames: 1088340 | Epsilon: 0.1000


Training:  11%|█         | 1088857/10000000 [3:45:41<52:11:24, 47.43it/s]

Episode 2201 completed | Reward: 780.00 | Avg Reward: 1279.00 | Frames: 1088848 | Epsilon: 0.1000


Training:  11%|█         | 1089377/10000000 [3:45:51<49:27:17, 50.05it/s]

Episode 2202 completed | Reward: 1340.00 | Avg Reward: 1285.60 | Frames: 1089374 | Epsilon: 0.1000


Training:  11%|█         | 1090424/10000000 [3:46:12<55:36:36, 44.50it/s]

Episode 2203 completed | Reward: 3080.00 | Avg Reward: 1302.40 | Frames: 1090423 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1091395/10000000 [3:46:31<47:40:52, 51.90it/s]

Episode 2204 completed | Reward: 1500.00 | Avg Reward: 1306.60 | Frames: 1091389 | Epsilon: 0.1000


Training:  11%|█         | 1092145/10000000 [3:46:46<49:10:47, 50.31it/s]

Episode 2205 completed | Reward: 840.00 | Avg Reward: 1305.20 | Frames: 1092142 | Epsilon: 0.1000


Training:  11%|█         | 1092527/10000000 [3:46:53<48:30:52, 51.00it/s]

Episode 2206 completed | Reward: 820.00 | Avg Reward: 1306.80 | Frames: 1092521 | Epsilon: 0.1000


Training:  11%|█         | 1093035/10000000 [3:47:03<48:06:47, 51.42it/s]

Episode 2207 completed | Reward: 1040.00 | Avg Reward: 1300.00 | Frames: 1093031 | Epsilon: 0.1000


Training:  11%|█         | 1094490/10000000 [3:47:32<49:52:08, 49.61it/s]

Episode 2208 completed | Reward: 3860.00 | Avg Reward: 1329.80 | Frames: 1094483 | Epsilon: 0.1000


Training:  11%|█         | 1095790/10000000 [3:47:57<49:28:00, 50.00it/s]

Episode 2209 completed | Reward: 1560.00 | Avg Reward: 1335.40 | Frames: 1095780 | Epsilon: 0.1000


Training:  11%|█         | 1096242/10000000 [3:48:06<49:27:15, 50.01it/s]

Episode 2210 completed | Reward: 920.00 | Avg Reward: 1330.40 | Frames: 1096233 | Epsilon: 0.1000


Training:  11%|█         | 1096718/10000000 [3:48:15<49:47:13, 49.67it/s]

Episode 2211 completed | Reward: 800.00 | Avg Reward: 1330.80 | Frames: 1096711 | Epsilon: 0.1000


Training:  11%|█         | 1097323/10000000 [3:48:27<46:26:41, 53.25it/s]

Episode 2212 completed | Reward: 700.00 | Avg Reward: 1336.40 | Frames: 1097319 | Epsilon: 0.1000


Training:  11%|█         | 1098143/10000000 [3:48:43<46:49:51, 52.80it/s]

Episode 2213 completed | Reward: 1740.00 | Avg Reward: 1341.80 | Frames: 1098139 | Epsilon: 0.1000


Training:  11%|█         | 1098747/10000000 [3:48:55<48:31:29, 50.95it/s]

Episode 2214 completed | Reward: 1220.00 | Avg Reward: 1343.40 | Frames: 1098742 | Epsilon: 0.1000


Training:  11%|█         | 1099655/10000000 [3:49:13<47:44:54, 51.78it/s]

Episode 2215 completed | Reward: 1300.00 | Avg Reward: 1332.20 | Frames: 1099650 | Epsilon: 0.1000


Training:  11%|█         | 1100267/10000000 [3:49:25<46:39:08, 52.99it/s]

Episode 2216 completed | Reward: 2940.00 | Avg Reward: 1349.60 | Frames: 1100263 | Epsilon: 0.1000


Training:  11%|█         | 1100895/10000000 [3:49:37<59:25:43, 41.60it/s]

Episode 2217 completed | Reward: 860.00 | Avg Reward: 1343.80 | Frames: 1100892 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1101492/10000000 [3:49:49<46:02:45, 53.68it/s]

Episode 2218 completed | Reward: 520.00 | Avg Reward: 1336.60 | Frames: 1101484 | Epsilon: 0.1000


Training:  11%|█         | 1102422/10000000 [3:50:07<49:48:14, 49.63it/s]

Episode 2219 completed | Reward: 2280.00 | Avg Reward: 1334.60 | Frames: 1102415 | Epsilon: 0.1000


Training:  11%|█         | 1103081/10000000 [3:50:20<51:53:17, 47.63it/s]

Episode 2220 completed | Reward: 600.00 | Avg Reward: 1321.40 | Frames: 1103073 | Epsilon: 0.1000


Training:  11%|█         | 1103493/10000000 [3:50:28<51:42:05, 47.80it/s]

Episode 2221 completed | Reward: 500.00 | Avg Reward: 1316.00 | Frames: 1103486 | Epsilon: 0.1000


Training:  11%|█         | 1104058/10000000 [3:50:39<48:57:46, 50.47it/s]

Episode 2222 completed | Reward: 1840.00 | Avg Reward: 1328.60 | Frames: 1104050 | Epsilon: 0.1000


Training:  11%|█         | 1104661/10000000 [3:50:51<52:54:03, 46.71it/s]

Episode 2223 completed | Reward: 2120.00 | Avg Reward: 1344.40 | Frames: 1104654 | Epsilon: 0.1000


Training:  11%|█         | 1105890/10000000 [3:51:15<50:57:07, 48.49it/s]

Episode 2224 completed | Reward: 3420.00 | Avg Reward: 1363.60 | Frames: 1105883 | Epsilon: 0.1000


Training:  11%|█         | 1106463/10000000 [3:51:26<46:23:23, 53.25it/s]

Episode 2225 completed | Reward: 1360.00 | Avg Reward: 1354.00 | Frames: 1106457 | Epsilon: 0.1000


Training:  11%|█         | 1107541/10000000 [3:51:47<49:52:20, 49.53it/s]

Episode 2226 completed | Reward: 4820.00 | Avg Reward: 1393.00 | Frames: 1107532 | Epsilon: 0.1000


Training:  11%|█         | 1108378/10000000 [3:52:04<49:52:18, 49.52it/s]

Episode 2227 completed | Reward: 1400.00 | Avg Reward: 1390.40 | Frames: 1108371 | Epsilon: 0.1000


Training:  11%|█         | 1109193/10000000 [3:52:20<51:34:32, 47.88it/s]

Episode 2228 completed | Reward: 1620.00 | Avg Reward: 1390.60 | Frames: 1109184 | Epsilon: 0.1000


Training:  11%|█         | 1109614/10000000 [3:52:28<49:50:15, 49.55it/s]

Episode 2229 completed | Reward: 1140.00 | Avg Reward: 1390.60 | Frames: 1109607 | Epsilon: 0.1000


Training:  11%|█         | 1110123/10000000 [3:52:38<47:20:18, 52.17it/s]

Episode 2230 completed | Reward: 1640.00 | Avg Reward: 1400.20 | Frames: 1110119 | Epsilon: 0.1000


Training:  11%|█         | 1110669/10000000 [3:52:49<52:23:18, 47.13it/s]

Episode 2231 completed | Reward: 940.00 | Avg Reward: 1390.40 | Frames: 1110660 | Epsilon: 0.1000


Training:  11%|█         | 1111381/10000000 [3:53:03<47:55:05, 51.53it/s]

Episode 2232 completed | Reward: 3160.00 | Avg Reward: 1411.80 | Frames: 1111381 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1111911/10000000 [3:53:13<47:18:44, 52.18it/s]

Episode 2233 completed | Reward: 1580.00 | Avg Reward: 1417.00 | Frames: 1111906 | Epsilon: 0.1000


Training:  11%|█         | 1112501/10000000 [3:53:25<49:33:36, 49.81it/s]

Episode 2234 completed | Reward: 1220.00 | Avg Reward: 1423.80 | Frames: 1112493 | Epsilon: 0.1000


Training:  11%|█         | 1113202/10000000 [3:53:39<50:17:41, 49.08it/s]

Episode 2235 completed | Reward: 1400.00 | Avg Reward: 1414.60 | Frames: 1113195 | Epsilon: 0.1000


Training:  11%|█         | 1114061/10000000 [3:53:55<50:53:55, 48.49it/s]

Episode 2236 completed | Reward: 800.00 | Avg Reward: 1410.80 | Frames: 1114054 | Epsilon: 0.1000


Training:  11%|█         | 1114561/10000000 [3:54:05<50:45:02, 48.63it/s]

Episode 2237 completed | Reward: 540.00 | Avg Reward: 1400.40 | Frames: 1114553 | Epsilon: 0.1000


Training:  11%|█         | 1115151/10000000 [3:54:17<46:51:38, 52.67it/s]

Episode 2238 completed | Reward: 1280.00 | Avg Reward: 1400.40 | Frames: 1115146 | Epsilon: 0.1000


Training:  11%|█         | 1115778/10000000 [3:54:29<51:24:18, 48.01it/s]

Episode 2239 completed | Reward: 940.00 | Avg Reward: 1392.60 | Frames: 1115771 | Epsilon: 0.1000


Training:  11%|█         | 1116734/10000000 [3:54:48<49:57:46, 49.39it/s]

Episode 2240 completed | Reward: 1360.00 | Avg Reward: 1394.20 | Frames: 1116727 | Epsilon: 0.1000


Training:  11%|█         | 1117765/10000000 [3:55:08<49:20:36, 50.00it/s]

Episode 2241 completed | Reward: 2140.00 | Avg Reward: 1405.60 | Frames: 1117758 | Epsilon: 0.1000


Training:  11%|█         | 1118557/10000000 [3:55:23<49:10:51, 50.16it/s]

Episode 2242 completed | Reward: 1600.00 | Avg Reward: 1399.20 | Frames: 1118555 | Epsilon: 0.1000


Training:  11%|█         | 1119105/10000000 [3:55:34<51:17:57, 48.09it/s]

Episode 2243 completed | Reward: 1000.00 | Avg Reward: 1402.80 | Frames: 1119096 | Epsilon: 0.1000


Training:  11%|█         | 1119817/10000000 [3:55:48<49:18:30, 50.03it/s]

Episode 2244 completed | Reward: 1180.00 | Avg Reward: 1405.40 | Frames: 1119808 | Epsilon: 0.1000


Training:  11%|█         | 1120361/10000000 [3:55:59<48:50:52, 50.49it/s]

Episode 2245 completed | Reward: 1660.00 | Avg Reward: 1408.80 | Frames: 1120355 | Epsilon: 0.1000


Training:  11%|█         | 1121313/10000000 [3:56:17<49:39:25, 49.67it/s]

Episode 2246 completed | Reward: 1560.00 | Avg Reward: 1412.20 | Frames: 1121304 | Epsilon: 0.1000


Training:  11%|█         | 1122753/10000000 [3:56:46<47:56:09, 51.44it/s]

Episode 2247 completed | Reward: 2520.00 | Avg Reward: 1431.20 | Frames: 1122753 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█         | 1123153/10000000 [3:56:54<51:54:57, 47.50it/s]

Episode 2248 completed | Reward: 540.00 | Avg Reward: 1423.80 | Frames: 1123145 | Epsilon: 0.1000


Training:  11%|█         | 1123597/10000000 [3:57:02<51:56:18, 47.47it/s]

Episode 2249 completed | Reward: 980.00 | Avg Reward: 1424.00 | Frames: 1123590 | Epsilon: 0.1000


Training:  11%|█         | 1124293/10000000 [3:57:16<49:07:40, 50.18it/s]

Episode 2250 completed | Reward: 1240.00 | Avg Reward: 1418.80 | Frames: 1124289 | Epsilon: 0.1000


Training:  11%|█         | 1124747/10000000 [3:57:25<47:22:18, 52.04it/s]

Episode 2251 completed | Reward: 1140.00 | Avg Reward: 1411.40 | Frames: 1124742 | Epsilon: 0.1000


Training:  11%|█▏        | 1125309/10000000 [3:57:36<52:25:18, 47.03it/s]

Episode 2252 completed | Reward: 800.00 | Avg Reward: 1409.40 | Frames: 1125300 | Epsilon: 0.1000


Training:  11%|█▏        | 1126817/10000000 [3:58:05<50:46:53, 48.54it/s]

Episode 2253 completed | Reward: 3160.00 | Avg Reward: 1434.20 | Frames: 1126809 | Epsilon: 0.1000


Training:  11%|█▏        | 1127207/10000000 [3:58:13<49:05:52, 50.20it/s]

Episode 2254 completed | Reward: 1060.00 | Avg Reward: 1424.80 | Frames: 1127202 | Epsilon: 0.1000


Training:  11%|█▏        | 1127907/10000000 [3:58:27<46:12:24, 53.34it/s]

Episode 2255 completed | Reward: 1140.00 | Avg Reward: 1419.60 | Frames: 1127902 | Epsilon: 0.1000


Training:  11%|█▏        | 1128485/10000000 [3:58:38<50:53:57, 48.42it/s]

Episode 2256 completed | Reward: 1340.00 | Avg Reward: 1427.20 | Frames: 1128477 | Epsilon: 0.1000


Training:  11%|█▏        | 1128986/10000000 [3:58:48<49:07:36, 50.16it/s]

Episode 2257 completed | Reward: 2340.00 | Avg Reward: 1421.40 | Frames: 1128979 | Epsilon: 0.1000


Training:  11%|█▏        | 1129431/10000000 [3:58:57<47:00:41, 52.41it/s]

Episode 2258 completed | Reward: 1620.00 | Avg Reward: 1425.40 | Frames: 1129425 | Epsilon: 0.1000


Training:  11%|█▏        | 1129922/10000000 [3:59:06<52:32:53, 46.89it/s]

Episode 2259 completed | Reward: 1160.00 | Avg Reward: 1420.20 | Frames: 1129915 | Epsilon: 0.1000


Training:  11%|█▏        | 1130621/10000000 [3:59:20<51:24:41, 47.92it/s]

Episode 2260 completed | Reward: 1040.00 | Avg Reward: 1425.60 | Frames: 1130614 | Epsilon: 0.1000


Training:  11%|█▏        | 1131257/10000000 [3:59:32<50:57:52, 48.34it/s]

Episode 2261 completed | Reward: 1340.00 | Avg Reward: 1410.20 | Frames: 1131250 | Epsilon: 0.1000


Training:  11%|█▏        | 1131925/10000000 [3:59:46<50:52:41, 48.42it/s]

Episode 2262 completed | Reward: 1380.00 | Avg Reward: 1384.60 | Frames: 1131918 | Epsilon: 0.1000


Training:  11%|█▏        | 1132563/10000000 [3:59:58<47:13:05, 52.17it/s]

Episode 2263 completed | Reward: 1460.00 | Avg Reward: 1389.80 | Frames: 1132559 | Epsilon: 0.1000


Training:  11%|█▏        | 1133703/10000000 [4:00:21<59:32:14, 41.37it/s]

Episode 2264 completed | Reward: 960.00 | Avg Reward: 1390.40 | Frames: 1133701 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█▏        | 1135067/10000000 [4:00:47<49:05:44, 50.16it/s]

Episode 2265 completed | Reward: 2860.00 | Avg Reward: 1402.20 | Frames: 1135063 | Epsilon: 0.1000


Training:  11%|█▏        | 1135573/10000000 [4:00:57<51:15:12, 48.04it/s]

Episode 2266 completed | Reward: 920.00 | Avg Reward: 1401.80 | Frames: 1135566 | Epsilon: 0.1000


Training:  11%|█▏        | 1136803/10000000 [4:01:21<47:54:24, 51.39it/s]

Episode 2267 completed | Reward: 1980.00 | Avg Reward: 1410.40 | Frames: 1136798 | Epsilon: 0.1000


Training:  11%|█▏        | 1137305/10000000 [4:01:31<49:52:23, 49.36it/s]

Episode 2268 completed | Reward: 1220.00 | Avg Reward: 1408.20 | Frames: 1137296 | Epsilon: 0.1000


Training:  11%|█▏        | 1138045/10000000 [4:01:46<50:17:21, 48.95it/s]

Episode 2269 completed | Reward: 980.00 | Avg Reward: 1408.80 | Frames: 1138036 | Epsilon: 0.1000


Training:  11%|█▏        | 1138553/10000000 [4:01:56<52:48:39, 46.61it/s]

Episode 2270 completed | Reward: 1700.00 | Avg Reward: 1416.00 | Frames: 1138545 | Epsilon: 0.1000


Training:  11%|█▏        | 1139158/10000000 [4:02:08<49:48:52, 49.41it/s]

Episode 2271 completed | Reward: 1980.00 | Avg Reward: 1424.20 | Frames: 1139151 | Epsilon: 0.1000


Training:  11%|█▏        | 1139626/10000000 [4:02:17<49:56:56, 49.27it/s]

Episode 2272 completed | Reward: 740.00 | Avg Reward: 1415.00 | Frames: 1139619 | Epsilon: 0.1000


Training:  11%|█▏        | 1140725/10000000 [4:02:38<51:59:23, 47.33it/s]

Episode 2273 completed | Reward: 1880.00 | Avg Reward: 1411.40 | Frames: 1140716 | Epsilon: 0.1000


Training:  11%|█▏        | 1141209/10000000 [4:02:48<51:28:33, 47.80it/s]

Episode 2274 completed | Reward: 900.00 | Avg Reward: 1409.00 | Frames: 1141201 | Epsilon: 0.1000


Training:  11%|█▏        | 1142013/10000000 [4:03:04<50:35:48, 48.63it/s]

Episode 2275 completed | Reward: 2060.00 | Avg Reward: 1420.60 | Frames: 1142004 | Epsilon: 0.1000


Training:  11%|█▏        | 1143561/10000000 [4:03:34<51:24:17, 47.86it/s]

Episode 2276 completed | Reward: 2660.00 | Avg Reward: 1438.00 | Frames: 1143553 | Epsilon: 0.1000


Training:  11%|█▏        | 1144167/10000000 [4:03:46<60:01:32, 40.98it/s]

Episode 2277 completed | Reward: 1240.00 | Avg Reward: 1444.20 | Frames: 1144165 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  11%|█▏        | 1144725/10000000 [4:03:57<49:25:04, 49.78it/s]

Episode 2278 completed | Reward: 920.00 | Avg Reward: 1445.40 | Frames: 1144717 | Epsilon: 0.1000


Training:  11%|█▏        | 1146091/10000000 [4:04:24<46:47:50, 52.55it/s]

Episode 2279 completed | Reward: 640.00 | Avg Reward: 1444.00 | Frames: 1146087 | Epsilon: 0.1000


Training:  11%|█▏        | 1146605/10000000 [4:04:34<50:56:16, 48.28it/s]

Episode 2280 completed | Reward: 1060.00 | Avg Reward: 1434.80 | Frames: 1146597 | Epsilon: 0.1000


Training:  11%|█▏        | 1147843/10000000 [4:04:58<46:52:48, 52.45it/s]

Episode 2281 completed | Reward: 3160.00 | Avg Reward: 1452.60 | Frames: 1147837 | Epsilon: 0.1000


Training:  11%|█▏        | 1148831/10000000 [4:05:17<47:42:00, 51.54it/s]

Episode 2282 completed | Reward: 1000.00 | Avg Reward: 1456.80 | Frames: 1148827 | Epsilon: 0.1000


Training:  11%|█▏        | 1149469/10000000 [4:05:30<48:50:44, 50.33it/s]

Episode 2283 completed | Reward: 760.00 | Avg Reward: 1450.80 | Frames: 1149460 | Epsilon: 0.1000


Training:  11%|█▏        | 1149948/10000000 [4:05:39<44:10:21, 55.65it/s]

Episode 2284 completed | Reward: 1060.00 | Avg Reward: 1449.60 | Frames: 1149942 | Epsilon: 0.1000


Training:  12%|█▏        | 1150561/10000000 [4:05:51<49:08:50, 50.02it/s]

Episode 2285 completed | Reward: 2480.00 | Avg Reward: 1460.40 | Frames: 1150558 | Epsilon: 0.1000


Training:  12%|█▏        | 1151985/10000000 [4:06:19<51:08:07, 48.06it/s]

Episode 2286 completed | Reward: 3100.00 | Avg Reward: 1480.80 | Frames: 1151981 | Epsilon: 0.1000


Training:  12%|█▏        | 1152641/10000000 [4:06:32<49:36:28, 49.54it/s]

Episode 2287 completed | Reward: 1720.00 | Avg Reward: 1488.40 | Frames: 1152639 | Epsilon: 0.1000


Training:  12%|█▏        | 1153577/10000000 [4:06:50<48:26:14, 50.73it/s]

Episode 2288 completed | Reward: 2260.00 | Avg Reward: 1499.80 | Frames: 1153570 | Epsilon: 0.1000


Training:  12%|█▏        | 1154065/10000000 [4:07:00<48:32:12, 50.63it/s]

Episode 2289 completed | Reward: 1120.00 | Avg Reward: 1500.00 | Frames: 1154056 | Epsilon: 0.1000


Training:  12%|█▏        | 1154703/10000000 [4:07:12<58:27:01, 42.04it/s]

Episode 2290 completed | Reward: 2020.00 | Avg Reward: 1515.40 | Frames: 1154701 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1155267/10000000 [4:07:23<48:21:13, 50.81it/s]

Episode 2291 completed | Reward: 1140.00 | Avg Reward: 1519.00 | Frames: 1155263 | Epsilon: 0.1000


Training:  12%|█▏        | 1155865/10000000 [4:07:35<49:55:32, 49.21it/s]

Episode 2292 completed | Reward: 1100.00 | Avg Reward: 1518.60 | Frames: 1155856 | Epsilon: 0.1000


Training:  12%|█▏        | 1156271/10000000 [4:07:43<46:37:30, 52.69it/s]

Episode 2293 completed | Reward: 1140.00 | Avg Reward: 1508.80 | Frames: 1156267 | Epsilon: 0.1000


Training:  12%|█▏        | 1157017/10000000 [4:07:57<50:27:15, 48.69it/s]

Episode 2294 completed | Reward: 1100.00 | Avg Reward: 1512.60 | Frames: 1157010 | Epsilon: 0.1000


Training:  12%|█▏        | 1157615/10000000 [4:08:09<46:50:39, 52.43it/s]

Episode 2295 completed | Reward: 1180.00 | Avg Reward: 1516.00 | Frames: 1157610 | Epsilon: 0.1000


Training:  12%|█▏        | 1158083/10000000 [4:08:18<47:13:20, 52.01it/s]

Episode 2296 completed | Reward: 900.00 | Avg Reward: 1512.20 | Frames: 1158078 | Epsilon: 0.1000


Training:  12%|█▏        | 1158673/10000000 [4:08:30<48:51:18, 50.27it/s]

Episode 2297 completed | Reward: 480.00 | Avg Reward: 1504.20 | Frames: 1158667 | Epsilon: 0.1000


Training:  12%|█▏        | 1159239/10000000 [4:08:41<48:21:07, 50.79it/s]

Episode 2298 completed | Reward: 1460.00 | Avg Reward: 1505.20 | Frames: 1159233 | Epsilon: 0.1000


Training:  12%|█▏        | 1160781/10000000 [4:09:11<49:24:49, 49.69it/s]

Episode 2299 completed | Reward: 2900.00 | Avg Reward: 1509.40 | Frames: 1160779 | Epsilon: 0.1000


Training:  12%|█▏        | 1161329/10000000 [4:09:22<52:35:16, 46.69it/s]

Episode 2300 completed | Reward: 1300.00 | Avg Reward: 1512.60 | Frames: 1161321 | Epsilon: 0.1000


Training:  12%|█▏        | 1162037/10000000 [4:09:36<51:43:55, 47.46it/s]

Episode 2301 completed | Reward: 2000.00 | Avg Reward: 1524.80 | Frames: 1162029 | Epsilon: 0.1000


Training:  12%|█▏        | 1162650/10000000 [4:09:48<49:55:32, 49.17it/s]

Episode 2302 completed | Reward: 900.00 | Avg Reward: 1520.40 | Frames: 1162643 | Epsilon: 0.1000


Training:  12%|█▏        | 1163143/10000000 [4:09:57<47:38:24, 51.53it/s]

Episode 2303 completed | Reward: 1160.00 | Avg Reward: 1501.20 | Frames: 1163138 | Epsilon: 0.1000


Training:  12%|█▏        | 1164009/10000000 [4:10:14<50:52:03, 48.25it/s]

Episode 2304 completed | Reward: 1180.00 | Avg Reward: 1498.00 | Frames: 1164001 | Epsilon: 0.1000


Training:  12%|█▏        | 1164501/10000000 [4:10:24<50:44:51, 48.36it/s]

Episode 2305 completed | Reward: 1180.00 | Avg Reward: 1501.40 | Frames: 1164493 | Epsilon: 0.1000


Training:  12%|█▏        | 1164941/10000000 [4:10:33<48:17:49, 50.81it/s]

Episode 2306 completed | Reward: 3500.00 | Avg Reward: 1528.20 | Frames: 1164941 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1165695/10000000 [4:10:48<47:24:06, 51.77it/s]

Episode 2307 completed | Reward: 1940.00 | Avg Reward: 1537.20 | Frames: 1165689 | Epsilon: 0.1000


Training:  12%|█▏        | 1166209/10000000 [4:10:58<51:43:38, 47.44it/s]

Episode 2308 completed | Reward: 1420.00 | Avg Reward: 1512.80 | Frames: 1166202 | Epsilon: 0.1000


Training:  12%|█▏        | 1166871/10000000 [4:11:11<48:04:43, 51.03it/s]

Episode 2309 completed | Reward: 1540.00 | Avg Reward: 1512.60 | Frames: 1166867 | Epsilon: 0.1000


Training:  12%|█▏        | 1167309/10000000 [4:11:19<49:13:58, 49.84it/s]

Episode 2310 completed | Reward: 840.00 | Avg Reward: 1511.80 | Frames: 1167304 | Epsilon: 0.1000


Training:  12%|█▏        | 1167803/10000000 [4:11:29<48:16:10, 50.83it/s]

Episode 2311 completed | Reward: 1480.00 | Avg Reward: 1518.60 | Frames: 1167797 | Epsilon: 0.1000


Training:  12%|█▏        | 1168679/10000000 [4:11:46<47:14:14, 51.93it/s]

Episode 2312 completed | Reward: 2320.00 | Avg Reward: 1534.80 | Frames: 1168675 | Epsilon: 0.1000


Training:  12%|█▏        | 1169781/10000000 [4:12:08<49:36:57, 49.44it/s]

Episode 2313 completed | Reward: 1300.00 | Avg Reward: 1530.40 | Frames: 1169773 | Epsilon: 0.1000


Training:  12%|█▏        | 1171161/10000000 [4:12:35<50:27:41, 48.60it/s]

Episode 2314 completed | Reward: 3030.00 | Avg Reward: 1548.50 | Frames: 1171154 | Epsilon: 0.1000


Training:  12%|█▏        | 1171837/10000000 [4:12:48<51:32:54, 47.57it/s]

Episode 2315 completed | Reward: 2160.00 | Avg Reward: 1557.10 | Frames: 1171828 | Epsilon: 0.1000


Training:  12%|█▏        | 1172338/10000000 [4:12:58<49:12:15, 49.84it/s]

Episode 2316 completed | Reward: 960.00 | Avg Reward: 1537.30 | Frames: 1172328 | Epsilon: 0.1000


Training:  12%|█▏        | 1173093/10000000 [4:13:13<50:42:27, 48.35it/s]

Episode 2317 completed | Reward: 2320.00 | Avg Reward: 1551.90 | Frames: 1173084 | Epsilon: 0.1000


Training:  12%|█▏        | 1173557/10000000 [4:13:22<48:51:27, 50.18it/s]

Episode 2318 completed | Reward: 580.00 | Avg Reward: 1552.50 | Frames: 1173553 | Epsilon: 0.1000


Training:  12%|█▏        | 1174133/10000000 [4:13:34<49:48:22, 49.22it/s]

Episode 2319 completed | Reward: 840.00 | Avg Reward: 1538.10 | Frames: 1174124 | Epsilon: 0.1000


Training:  12%|█▏        | 1174755/10000000 [4:13:46<48:42:32, 50.33it/s]

Episode 2320 completed | Reward: 720.00 | Avg Reward: 1539.30 | Frames: 1174750 | Epsilon: 0.1000


Training:  12%|█▏        | 1175319/10000000 [4:13:57<58:57:54, 41.57it/s]

Episode 2321 completed | Reward: 1180.00 | Avg Reward: 1546.10 | Frames: 1175316 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1175906/10000000 [4:14:09<50:58:33, 48.08it/s]

Episode 2322 completed | Reward: 840.00 | Avg Reward: 1536.10 | Frames: 1175905 | Epsilon: 0.1000


Training:  12%|█▏        | 1176553/10000000 [4:14:22<49:40:55, 49.33it/s]

Episode 2323 completed | Reward: 1460.00 | Avg Reward: 1529.50 | Frames: 1176545 | Epsilon: 0.1000


Training:  12%|█▏        | 1177441/10000000 [4:14:39<49:19:40, 49.68it/s]

Episode 2324 completed | Reward: 2260.00 | Avg Reward: 1517.90 | Frames: 1177434 | Epsilon: 0.1000


Training:  12%|█▏        | 1178295/10000000 [4:14:56<46:40:22, 52.50it/s]

Episode 2325 completed | Reward: 1060.00 | Avg Reward: 1514.90 | Frames: 1178291 | Epsilon: 0.1000


Training:  12%|█▏        | 1179709/10000000 [4:15:24<48:58:36, 50.03it/s]

Episode 2326 completed | Reward: 2000.00 | Avg Reward: 1486.70 | Frames: 1179700 | Epsilon: 0.1000


Training:  12%|█▏        | 1180154/10000000 [4:15:32<49:16:57, 49.71it/s]

Episode 2327 completed | Reward: 940.00 | Avg Reward: 1482.10 | Frames: 1180147 | Epsilon: 0.1000


Training:  12%|█▏        | 1180641/10000000 [4:15:42<49:38:32, 49.35it/s]

Episode 2328 completed | Reward: 540.00 | Avg Reward: 1471.30 | Frames: 1180632 | Epsilon: 0.1000


Training:  12%|█▏        | 1181967/10000000 [4:16:08<47:11:08, 51.91it/s]

Episode 2329 completed | Reward: 2460.00 | Avg Reward: 1484.50 | Frames: 1181963 | Epsilon: 0.1000


Training:  12%|█▏        | 1182553/10000000 [4:16:19<51:37:38, 47.44it/s]

Episode 2330 completed | Reward: 1100.00 | Avg Reward: 1479.10 | Frames: 1182546 | Epsilon: 0.1000


Training:  12%|█▏        | 1183310/10000000 [4:16:34<49:30:19, 49.47it/s]

Episode 2331 completed | Reward: 1840.00 | Avg Reward: 1488.10 | Frames: 1183303 | Epsilon: 0.1000


Training:  12%|█▏        | 1183886/10000000 [4:16:46<50:39:46, 48.34it/s]

Episode 2332 completed | Reward: 1120.00 | Avg Reward: 1467.70 | Frames: 1183879 | Epsilon: 0.1000


Training:  12%|█▏        | 1184309/10000000 [4:16:54<50:09:10, 48.83it/s]

Episode 2333 completed | Reward: 720.00 | Avg Reward: 1459.10 | Frames: 1184300 | Epsilon: 0.1000


Training:  12%|█▏        | 1184993/10000000 [4:17:08<51:27:22, 47.59it/s]

Episode 2334 completed | Reward: 1140.00 | Avg Reward: 1458.30 | Frames: 1184986 | Epsilon: 0.1000


Training:  12%|█▏        | 1185618/10000000 [4:17:20<48:35:07, 50.39it/s]

Episode 2335 completed | Reward: 2640.00 | Avg Reward: 1470.70 | Frames: 1185618 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1186633/10000000 [4:17:40<51:14:47, 47.77it/s]

Episode 2336 completed | Reward: 2080.00 | Avg Reward: 1483.50 | Frames: 1186626 | Epsilon: 0.1000


Training:  12%|█▏        | 1187552/10000000 [4:17:58<44:32:09, 54.96it/s]

Episode 2337 completed | Reward: 1400.00 | Avg Reward: 1492.10 | Frames: 1187547 | Epsilon: 0.1000


Training:  12%|█▏        | 1188145/10000000 [4:18:10<50:48:42, 48.17it/s]

Episode 2338 completed | Reward: 1340.00 | Avg Reward: 1492.70 | Frames: 1188137 | Epsilon: 0.1000


Training:  12%|█▏        | 1188606/10000000 [4:18:19<50:24:11, 48.56it/s]

Episode 2339 completed | Reward: 360.00 | Avg Reward: 1486.90 | Frames: 1188599 | Epsilon: 0.1000


Training:  12%|█▏        | 1189625/10000000 [4:18:39<51:38:38, 47.39it/s]

Episode 2340 completed | Reward: 2920.00 | Avg Reward: 1502.50 | Frames: 1189618 | Epsilon: 0.1000


Training:  12%|█▏        | 1190657/10000000 [4:19:00<49:56:15, 49.00it/s]

Episode 2341 completed | Reward: 1240.00 | Avg Reward: 1493.50 | Frames: 1190648 | Epsilon: 0.1000


Training:  12%|█▏        | 1191441/10000000 [4:19:15<49:40:50, 49.25it/s]

Episode 2342 completed | Reward: 3020.00 | Avg Reward: 1507.70 | Frames: 1191432 | Epsilon: 0.1000


Training:  12%|█▏        | 1192974/10000000 [4:19:45<49:40:44, 49.24it/s]

Episode 2343 completed | Reward: 2880.00 | Avg Reward: 1526.50 | Frames: 1192969 | Epsilon: 0.1000


Training:  12%|█▏        | 1194485/10000000 [4:20:15<50:03:33, 48.86it/s]

Episode 2344 completed | Reward: 3560.00 | Avg Reward: 1550.30 | Frames: 1194476 | Epsilon: 0.1000


Training:  12%|█▏        | 1194925/10000000 [4:20:24<49:19:45, 49.58it/s]

Episode 2345 completed | Reward: 640.00 | Avg Reward: 1540.10 | Frames: 1194916 | Epsilon: 0.1000


Training:  12%|█▏        | 1195261/10000000 [4:20:30<49:25:46, 49.48it/s]

Episode 2346 completed | Reward: 800.00 | Avg Reward: 1532.50 | Frames: 1195256 | Epsilon: 0.1000


Training:  12%|█▏        | 1195727/10000000 [4:20:40<47:49:56, 51.13it/s]

Episode 2347 completed | Reward: 860.00 | Avg Reward: 1515.90 | Frames: 1195727 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1196279/10000000 [4:20:51<48:41:10, 50.23it/s]

Episode 2348 completed | Reward: 1140.00 | Avg Reward: 1521.90 | Frames: 1196275 | Epsilon: 0.1000


Training:  12%|█▏        | 1197273/10000000 [4:21:10<51:50:05, 47.17it/s]

Episode 2349 completed | Reward: 5140.00 | Avg Reward: 1563.50 | Frames: 1197265 | Epsilon: 0.1000


Training:  12%|█▏        | 1197695/10000000 [4:21:19<47:44:22, 51.22it/s]

Episode 2350 completed | Reward: 820.00 | Avg Reward: 1559.30 | Frames: 1197689 | Epsilon: 0.1000


Training:  12%|█▏        | 1198275/10000000 [4:21:30<48:22:35, 50.54it/s]

Episode 2351 completed | Reward: 2640.00 | Avg Reward: 1574.30 | Frames: 1198270 | Epsilon: 0.1000


Training:  12%|█▏        | 1198873/10000000 [4:21:42<50:12:20, 48.69it/s]

Episode 2352 completed | Reward: 1400.00 | Avg Reward: 1580.30 | Frames: 1198864 | Epsilon: 0.1000


Training:  12%|█▏        | 1199375/10000000 [4:21:52<48:27:35, 50.45it/s]

Episode 2353 completed | Reward: 500.00 | Avg Reward: 1553.70 | Frames: 1199369 | Epsilon: 0.1000


Training:  12%|█▏        | 1199813/10000000 [4:22:00<48:57:07, 49.94it/s]

Episode 2354 completed | Reward: 1640.00 | Avg Reward: 1559.50 | Frames: 1199810 | Epsilon: 0.1000


Training:  12%|█▏        | 1200387/10000000 [4:22:12<47:55:56, 51.00it/s]

Episode 2355 completed | Reward: 1120.00 | Avg Reward: 1559.30 | Frames: 1200382 | Epsilon: 0.1000


Training:  12%|█▏        | 1200951/10000000 [4:22:23<46:56:21, 52.07it/s]

Episode 2356 completed | Reward: 1740.00 | Avg Reward: 1563.30 | Frames: 1200946 | Epsilon: 0.1000


Training:  12%|█▏        | 1201595/10000000 [4:22:35<48:21:45, 50.53it/s]

Episode 2357 completed | Reward: 1780.00 | Avg Reward: 1557.70 | Frames: 1201590 | Epsilon: 0.1000


Training:  12%|█▏        | 1202137/10000000 [4:22:46<50:08:33, 48.74it/s]

Episode 2358 completed | Reward: 1220.00 | Avg Reward: 1553.70 | Frames: 1202135 | Epsilon: 0.1000


Training:  12%|█▏        | 1202583/10000000 [4:22:55<47:22:50, 51.58it/s]

Episode 2359 completed | Reward: 1700.00 | Avg Reward: 1559.10 | Frames: 1202579 | Epsilon: 0.1000


Training:  12%|█▏        | 1203245/10000000 [4:23:08<50:14:21, 48.64it/s]

Episode 2360 completed | Reward: 2040.00 | Avg Reward: 1569.10 | Frames: 1203236 | Epsilon: 0.1000


Training:  12%|█▏        | 1203833/10000000 [4:23:20<52:29:21, 46.55it/s]

Episode 2361 completed | Reward: 760.00 | Avg Reward: 1563.30 | Frames: 1203825 | Epsilon: 0.1000


Training:  12%|█▏        | 1204405/10000000 [4:23:31<50:47:34, 48.10it/s]

Episode 2362 completed | Reward: 1080.00 | Avg Reward: 1560.30 | Frames: 1204397 | Epsilon: 0.1000


Training:  12%|█▏        | 1204859/10000000 [4:23:40<47:02:50, 51.93it/s]

Episode 2363 completed | Reward: 1140.00 | Avg Reward: 1557.10 | Frames: 1204855 | Epsilon: 0.1000


Training:  12%|█▏        | 1206055/10000000 [4:24:04<58:16:32, 41.92it/s]

Episode 2364 completed | Reward: 5700.00 | Avg Reward: 1604.50 | Frames: 1206054 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1206631/10000000 [4:24:15<47:48:03, 51.10it/s]

Episode 2365 completed | Reward: 3720.00 | Avg Reward: 1613.10 | Frames: 1206625 | Epsilon: 0.1000


Training:  12%|█▏        | 1207212/10000000 [4:24:26<44:52:54, 54.42it/s]

Episode 2366 completed | Reward: 1100.00 | Avg Reward: 1614.90 | Frames: 1207207 | Epsilon: 0.1000


Training:  12%|█▏        | 1207758/10000000 [4:24:37<49:34:14, 49.27it/s]

Episode 2367 completed | Reward: 2120.00 | Avg Reward: 1616.30 | Frames: 1207751 | Epsilon: 0.1000


Training:  12%|█▏        | 1208331/10000000 [4:24:49<46:39:20, 52.34it/s]

Episode 2368 completed | Reward: 960.00 | Avg Reward: 1613.70 | Frames: 1208326 | Epsilon: 0.1000


Training:  12%|█▏        | 1209855/10000000 [4:25:19<46:59:35, 51.96it/s]

Episode 2369 completed | Reward: 4700.00 | Avg Reward: 1650.90 | Frames: 1209849 | Epsilon: 0.1000


Training:  12%|█▏        | 1211283/10000000 [4:25:47<46:44:57, 52.22it/s]

Episode 2370 completed | Reward: 2820.00 | Avg Reward: 1662.10 | Frames: 1211277 | Epsilon: 0.1000


Training:  12%|█▏        | 1212825/10000000 [4:26:18<49:32:25, 49.27it/s]

Episode 2371 completed | Reward: 3020.00 | Avg Reward: 1672.50 | Frames: 1212816 | Epsilon: 0.1000


Training:  12%|█▏        | 1213465/10000000 [4:26:30<49:29:08, 49.32it/s]

Episode 2372 completed | Reward: 740.00 | Avg Reward: 1672.50 | Frames: 1213456 | Epsilon: 0.1000


Training:  12%|█▏        | 1214241/10000000 [4:26:46<51:17:22, 47.58it/s]

Episode 2373 completed | Reward: 880.00 | Avg Reward: 1662.50 | Frames: 1214232 | Epsilon: 0.1000


Training:  12%|█▏        | 1214965/10000000 [4:27:00<50:44:48, 48.09it/s]

Episode 2374 completed | Reward: 2780.00 | Avg Reward: 1681.30 | Frames: 1214958 | Epsilon: 0.1000


Training:  12%|█▏        | 1216075/10000000 [4:27:22<58:40:31, 41.58it/s]

Episode 2375 completed | Reward: 920.00 | Avg Reward: 1669.90 | Frames: 1216072 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1216613/10000000 [4:27:33<51:46:24, 47.13it/s]

Episode 2376 completed | Reward: 1520.00 | Avg Reward: 1658.50 | Frames: 1216605 | Epsilon: 0.1000


Training:  12%|█▏        | 1217315/10000000 [4:27:47<47:02:05, 51.87it/s]

Episode 2377 completed | Reward: 1060.00 | Avg Reward: 1656.70 | Frames: 1217311 | Epsilon: 0.1000


Training:  12%|█▏        | 1217853/10000000 [4:27:58<53:42:53, 45.42it/s]

Episode 2378 completed | Reward: 1380.00 | Avg Reward: 1661.30 | Frames: 1217850 | Epsilon: 0.1000


Training:  12%|█▏        | 1219106/10000000 [4:28:22<49:45:47, 49.01it/s]

Episode 2379 completed | Reward: 1940.00 | Avg Reward: 1674.30 | Frames: 1219099 | Epsilon: 0.1000


Training:  12%|█▏        | 1219528/10000000 [4:28:31<47:17:44, 51.57it/s]

Episode 2380 completed | Reward: 1840.00 | Avg Reward: 1682.10 | Frames: 1219527 | Epsilon: 0.1000


Training:  12%|█▏        | 1220091/10000000 [4:28:42<47:31:01, 51.33it/s]

Episode 2381 completed | Reward: 1080.00 | Avg Reward: 1661.30 | Frames: 1220087 | Epsilon: 0.1000


Training:  12%|█▏        | 1221486/10000000 [4:29:10<51:15:20, 47.57it/s]

Episode 2382 completed | Reward: 1920.00 | Avg Reward: 1670.50 | Frames: 1221481 | Epsilon: 0.1000


Training:  12%|█▏        | 1222197/10000000 [4:29:24<50:20:48, 48.43it/s]

Episode 2383 completed | Reward: 1640.00 | Avg Reward: 1679.30 | Frames: 1222194 | Epsilon: 0.1000


Training:  12%|█▏        | 1222769/10000000 [4:29:35<51:40:44, 47.18it/s]

Episode 2384 completed | Reward: 1040.00 | Avg Reward: 1679.10 | Frames: 1222760 | Epsilon: 0.1000


Training:  12%|█▏        | 1223253/10000000 [4:29:45<52:12:01, 46.70it/s]

Episode 2385 completed | Reward: 1340.00 | Avg Reward: 1667.70 | Frames: 1223244 | Epsilon: 0.1000


Training:  12%|█▏        | 1224243/10000000 [4:30:04<49:47:35, 48.96it/s]

Episode 2386 completed | Reward: 2040.00 | Avg Reward: 1657.10 | Frames: 1224240 | Epsilon: 0.1000


Training:  12%|█▏        | 1225431/10000000 [4:30:28<47:58:34, 50.80it/s]

Episode 2387 completed | Reward: 1560.00 | Avg Reward: 1655.50 | Frames: 1225425 | Epsilon: 0.1000


Training:  12%|█▏        | 1226213/10000000 [4:30:43<61:03:42, 39.91it/s]

Episode 2388 completed | Reward: 1020.00 | Avg Reward: 1643.10 | Frames: 1226212 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1226797/10000000 [4:30:55<50:29:33, 48.26it/s]

Episode 2389 completed | Reward: 1720.00 | Avg Reward: 1649.10 | Frames: 1226788 | Epsilon: 0.1000


Training:  12%|█▏        | 1227793/10000000 [4:31:15<49:03:23, 49.67it/s]

Episode 2390 completed | Reward: 2220.00 | Avg Reward: 1651.10 | Frames: 1227784 | Epsilon: 0.1000


Training:  12%|█▏        | 1228932/10000000 [4:31:37<44:29:47, 54.75it/s]

Episode 2391 completed | Reward: 1800.00 | Avg Reward: 1657.70 | Frames: 1228927 | Epsilon: 0.1000


Training:  12%|█▏        | 1229921/10000000 [4:31:57<50:00:27, 48.72it/s]

Episode 2392 completed | Reward: 1920.00 | Avg Reward: 1665.90 | Frames: 1229912 | Epsilon: 0.1000


Training:  12%|█▏        | 1230855/10000000 [4:32:15<47:01:00, 51.81it/s]

Episode 2393 completed | Reward: 1460.00 | Avg Reward: 1669.10 | Frames: 1230850 | Epsilon: 0.1000


Training:  12%|█▏        | 1231755/10000000 [4:32:33<46:51:06, 51.99it/s]

Episode 2394 completed | Reward: 2860.00 | Avg Reward: 1686.70 | Frames: 1231749 | Epsilon: 0.1000


Training:  12%|█▏        | 1232717/10000000 [4:32:52<51:34:23, 47.22it/s]

Episode 2395 completed | Reward: 1860.00 | Avg Reward: 1693.50 | Frames: 1232709 | Epsilon: 0.1000


Training:  12%|█▏        | 1233482/10000000 [4:33:07<49:36:49, 49.08it/s]

Episode 2396 completed | Reward: 1100.00 | Avg Reward: 1695.50 | Frames: 1233475 | Epsilon: 0.1000


Training:  12%|█▏        | 1234199/10000000 [4:33:21<47:39:47, 51.09it/s]

Episode 2397 completed | Reward: 1260.00 | Avg Reward: 1703.30 | Frames: 1234195 | Epsilon: 0.1000


Training:  12%|█▏        | 1234883/10000000 [4:33:34<46:03:25, 52.86it/s]

Episode 2398 completed | Reward: 1780.00 | Avg Reward: 1706.50 | Frames: 1234878 | Epsilon: 0.1000


Training:  12%|█▏        | 1235453/10000000 [4:33:46<50:25:50, 48.28it/s]

Episode 2399 completed | Reward: 740.00 | Avg Reward: 1684.90 | Frames: 1235445 | Epsilon: 0.1000


Training:  12%|█▏        | 1236171/10000000 [4:34:00<48:10:03, 50.54it/s]

Episode 2400 completed | Reward: 1060.00 | Avg Reward: 1682.50 | Frames: 1236167 | Epsilon: 0.1000


Training:  12%|█▏        | 1236631/10000000 [4:34:09<59:35:34, 40.85it/s]

Episode 2401 completed | Reward: 1540.00 | Avg Reward: 1677.90 | Frames: 1236628 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1237563/10000000 [4:34:27<47:50:17, 50.88it/s]

Episode 2402 completed | Reward: 1320.00 | Avg Reward: 1682.10 | Frames: 1237558 | Epsilon: 0.1000


Training:  12%|█▏        | 1238305/10000000 [4:34:42<48:58:06, 49.70it/s]

Episode 2403 completed | Reward: 2440.00 | Avg Reward: 1694.90 | Frames: 1238296 | Epsilon: 0.1000


Training:  12%|█▏        | 1239217/10000000 [4:35:00<49:39:39, 49.00it/s]

Episode 2404 completed | Reward: 820.00 | Avg Reward: 1691.30 | Frames: 1239208 | Epsilon: 0.1000


Training:  12%|█▏        | 1240895/10000000 [4:35:33<46:42:34, 52.09it/s]

Episode 2405 completed | Reward: 3440.00 | Avg Reward: 1713.90 | Frames: 1240890 | Epsilon: 0.1000


Training:  12%|█▏        | 1241323/10000000 [4:35:41<47:37:33, 51.08it/s]

Episode 2406 completed | Reward: 420.00 | Avg Reward: 1683.10 | Frames: 1241319 | Epsilon: 0.1000


Training:  12%|█▏        | 1241901/10000000 [4:35:53<51:00:39, 47.69it/s]

Episode 2407 completed | Reward: 940.00 | Avg Reward: 1673.10 | Frames: 1241893 | Epsilon: 0.1000


Training:  12%|█▏        | 1242329/10000000 [4:36:01<50:36:48, 48.06it/s]

Episode 2408 completed | Reward: 900.00 | Avg Reward: 1667.90 | Frames: 1242320 | Epsilon: 0.1000


Training:  12%|█▏        | 1243293/10000000 [4:36:20<52:15:40, 46.54it/s]

Episode 2409 completed | Reward: 1680.00 | Avg Reward: 1669.30 | Frames: 1243284 | Epsilon: 0.1000


Training:  12%|█▏        | 1244603/10000000 [4:36:46<46:10:18, 52.67it/s]

Episode 2410 completed | Reward: 1520.00 | Avg Reward: 1676.10 | Frames: 1244599 | Epsilon: 0.1000


Training:  12%|█▏        | 1245639/10000000 [4:37:06<46:02:11, 52.82it/s]

Episode 2411 completed | Reward: 2450.00 | Avg Reward: 1685.80 | Frames: 1245633 | Epsilon: 0.1000


Training:  12%|█▏        | 1246534/10000000 [4:37:24<48:40:50, 49.95it/s]

Episode 2412 completed | Reward: 720.00 | Avg Reward: 1669.80 | Frames: 1246527 | Epsilon: 0.1000


Training:  12%|█▏        | 1247038/10000000 [4:37:34<47:34:34, 51.10it/s]

Episode 2413 completed | Reward: 1080.00 | Avg Reward: 1667.60 | Frames: 1247038 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  12%|█▏        | 1247901/10000000 [4:37:51<51:55:07, 46.83it/s]

Episode 2414 completed | Reward: 1340.00 | Avg Reward: 1650.70 | Frames: 1247892 | Epsilon: 0.1000


Training:  12%|█▏        | 1249145/10000000 [4:38:16<50:23:52, 48.23it/s]

Episode 2415 completed | Reward: 3290.00 | Avg Reward: 1662.00 | Frames: 1249137 | Epsilon: 0.1000


Training:  12%|█▏        | 1249582/10000000 [4:38:24<50:10:17, 48.45it/s]

Episode 2416 completed | Reward: 2040.00 | Avg Reward: 1672.80 | Frames: 1249575 | Epsilon: 0.1000


Training:  13%|█▎        | 1250005/10000000 [4:38:59<2335:25:46,  1.04it/s]


Evaluation at frame 1250000: 1474.00
Model saved to weights/CarnivalDeterministic-v4_dqn_best.pth
Episode 2417 completed | Reward: 1300.00 | Avg Reward: 1662.60 | Frames: 1250001 | Epsilon: 0.1000


Training:  13%|█▎        | 1250563/10000000 [4:39:10<48:28:50, 50.13it/s]

Episode 2418 completed | Reward: 840.00 | Avg Reward: 1665.20 | Frames: 1250559 | Epsilon: 0.1000


Training:  13%|█▎        | 1251301/10000000 [4:39:25<51:20:31, 47.33it/s]

Episode 2419 completed | Reward: 1000.00 | Avg Reward: 1666.80 | Frames: 1251294 | Epsilon: 0.1000


Training:  13%|█▎        | 1252219/10000000 [4:39:43<46:58:32, 51.73it/s]

Episode 2420 completed | Reward: 900.00 | Avg Reward: 1668.60 | Frames: 1252215 | Epsilon: 0.1000


Training:  13%|█▎        | 1253065/10000000 [4:40:00<51:30:18, 47.17it/s]

Episode 2421 completed | Reward: 920.00 | Avg Reward: 1666.00 | Frames: 1253057 | Epsilon: 0.1000


Training:  13%|█▎        | 1254167/10000000 [4:40:21<49:05:41, 49.48it/s]

Episode 2422 completed | Reward: 1540.00 | Avg Reward: 1673.00 | Frames: 1254163 | Epsilon: 0.1000


Training:  13%|█▎        | 1254869/10000000 [4:40:35<49:11:07, 49.39it/s]

Episode 2423 completed | Reward: 1180.00 | Avg Reward: 1670.20 | Frames: 1254860 | Epsilon: 0.1000


Training:  13%|█▎        | 1255581/10000000 [4:40:49<48:35:33, 49.99it/s]

Episode 2424 completed | Reward: 880.00 | Avg Reward: 1656.40 | Frames: 1255572 | Epsilon: 0.1000


Training:  13%|█▎        | 1257189/10000000 [4:41:21<60:02:41, 40.45it/s]

Episode 2425 completed | Reward: 3620.00 | Avg Reward: 1682.00 | Frames: 1257188 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1257611/10000000 [4:41:29<46:32:29, 52.18it/s]

Episode 2426 completed | Reward: 820.00 | Avg Reward: 1670.20 | Frames: 1257605 | Epsilon: 0.1000


Training:  13%|█▎        | 1258215/10000000 [4:41:41<48:55:10, 49.64it/s]

Episode 2427 completed | Reward: 4040.00 | Avg Reward: 1701.20 | Frames: 1258210 | Epsilon: 0.1000


Training:  13%|█▎        | 1258851/10000000 [4:41:54<47:29:10, 51.13it/s]

Episode 2428 completed | Reward: 1220.00 | Avg Reward: 1708.00 | Frames: 1258847 | Epsilon: 0.1000


Training:  13%|█▎        | 1259469/10000000 [4:42:06<50:20:15, 48.23it/s]

Episode 2429 completed | Reward: 1580.00 | Avg Reward: 1699.20 | Frames: 1259461 | Epsilon: 0.1000


Training:  13%|█▎        | 1260243/10000000 [4:42:21<46:20:28, 52.39it/s]

Episode 2430 completed | Reward: 2140.00 | Avg Reward: 1709.60 | Frames: 1260237 | Epsilon: 0.1000


Training:  13%|█▎        | 1260877/10000000 [4:42:34<51:24:08, 47.23it/s]

Episode 2431 completed | Reward: 2020.00 | Avg Reward: 1711.40 | Frames: 1260868 | Epsilon: 0.1000


Training:  13%|█▎        | 1261467/10000000 [4:42:45<46:44:49, 51.93it/s]

Episode 2432 completed | Reward: 280.00 | Avg Reward: 1703.00 | Frames: 1261462 | Epsilon: 0.1000


Training:  13%|█▎        | 1262105/10000000 [4:42:58<49:52:51, 48.66it/s]

Episode 2433 completed | Reward: 720.00 | Avg Reward: 1703.00 | Frames: 1262102 | Epsilon: 0.1000


Training:  13%|█▎        | 1262937/10000000 [4:43:14<48:43:39, 49.81it/s]

Episode 2434 completed | Reward: 1700.00 | Avg Reward: 1708.60 | Frames: 1262928 | Epsilon: 0.1000


Training:  13%|█▎        | 1263497/10000000 [4:43:25<50:50:04, 47.74it/s]

Episode 2435 completed | Reward: 1420.00 | Avg Reward: 1696.40 | Frames: 1263490 | Epsilon: 0.1000


Training:  13%|█▎        | 1264313/10000000 [4:43:41<49:10:24, 49.35it/s]

Episode 2436 completed | Reward: 960.00 | Avg Reward: 1685.20 | Frames: 1264304 | Epsilon: 0.1000


Training:  13%|█▎        | 1264774/10000000 [4:43:50<48:34:45, 49.95it/s]

Episode 2437 completed | Reward: 700.00 | Avg Reward: 1678.20 | Frames: 1264767 | Epsilon: 0.1000


Training:  13%|█▎        | 1265314/10000000 [4:44:01<48:43:13, 49.80it/s]

Episode 2438 completed | Reward: 3600.00 | Avg Reward: 1700.80 | Frames: 1265305 | Epsilon: 0.1000


Training:  13%|█▎        | 1267009/10000000 [4:44:34<49:11:36, 49.31it/s]

Episode 2439 completed | Reward: 3000.00 | Avg Reward: 1727.20 | Frames: 1267000 | Epsilon: 0.1000


Training:  13%|█▎        | 1268025/10000000 [4:44:54<59:51:55, 40.52it/s]

Episode 2440 completed | Reward: 3760.00 | Avg Reward: 1735.60 | Frames: 1268024 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1268551/10000000 [4:45:05<46:59:27, 51.61it/s]

Episode 2441 completed | Reward: 1080.00 | Avg Reward: 1734.00 | Frames: 1268547 | Epsilon: 0.1000


Training:  13%|█▎        | 1269841/10000000 [4:45:30<50:07:13, 48.38it/s]

Episode 2442 completed | Reward: 1680.00 | Avg Reward: 1720.60 | Frames: 1269834 | Epsilon: 0.1000


Training:  13%|█▎        | 1270310/10000000 [4:45:39<48:51:29, 49.63it/s]

Episode 2443 completed | Reward: 1480.00 | Avg Reward: 1706.60 | Frames: 1270303 | Epsilon: 0.1000


Training:  13%|█▎        | 1271293/10000000 [4:45:59<49:31:16, 48.96it/s]

Episode 2444 completed | Reward: 1420.00 | Avg Reward: 1685.20 | Frames: 1271284 | Epsilon: 0.1000


Training:  13%|█▎        | 1271883/10000000 [4:46:10<47:08:28, 51.43it/s]

Episode 2445 completed | Reward: 1980.00 | Avg Reward: 1698.60 | Frames: 1271878 | Epsilon: 0.1000


Training:  13%|█▎        | 1272623/10000000 [4:46:25<46:29:12, 52.15it/s]

Episode 2446 completed | Reward: 1660.00 | Avg Reward: 1707.20 | Frames: 1272618 | Epsilon: 0.1000


Training:  13%|█▎        | 1273337/10000000 [4:46:39<51:02:29, 47.49it/s]

Episode 2447 completed | Reward: 4080.00 | Avg Reward: 1739.40 | Frames: 1273328 | Epsilon: 0.1000


Training:  13%|█▎        | 1275109/10000000 [4:47:14<51:23:34, 47.16it/s]

Episode 2448 completed | Reward: 4320.00 | Avg Reward: 1771.20 | Frames: 1275101 | Epsilon: 0.1000


Training:  13%|█▎        | 1275661/10000000 [4:47:25<48:23:20, 50.08it/s]

Episode 2449 completed | Reward: 1060.00 | Avg Reward: 1730.40 | Frames: 1275653 | Epsilon: 0.1000


Training:  13%|█▎        | 1276098/10000000 [4:47:33<48:35:23, 49.87it/s]

Episode 2450 completed | Reward: 1060.00 | Avg Reward: 1732.80 | Frames: 1276091 | Epsilon: 0.1000


Training:  13%|█▎        | 1276845/10000000 [4:47:48<51:40:17, 46.89it/s]

Episode 2451 completed | Reward: 2540.00 | Avg Reward: 1731.80 | Frames: 1276837 | Epsilon: 0.1000


Training:  13%|█▎        | 1277577/10000000 [4:48:02<51:48:19, 46.77it/s]

Episode 2452 completed | Reward: 1100.00 | Avg Reward: 1728.80 | Frames: 1277570 | Epsilon: 0.1000


Training:  13%|█▎        | 1278377/10000000 [4:48:18<47:29:06, 51.02it/s]

Episode 2453 completed | Reward: 860.00 | Avg Reward: 1732.40 | Frames: 1278377 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1278969/10000000 [4:48:30<50:10:55, 48.27it/s]

Episode 2454 completed | Reward: 1140.00 | Avg Reward: 1727.40 | Frames: 1278962 | Epsilon: 0.1000


Training:  13%|█▎        | 1280511/10000000 [4:49:00<46:18:14, 52.31it/s]

Episode 2455 completed | Reward: 3960.00 | Avg Reward: 1755.80 | Frames: 1280506 | Epsilon: 0.1000


Training:  13%|█▎        | 1281121/10000000 [4:49:12<51:02:03, 47.46it/s]

Episode 2456 completed | Reward: 2240.00 | Avg Reward: 1760.80 | Frames: 1281112 | Epsilon: 0.1000


Training:  13%|█▎        | 1281726/10000000 [4:49:24<49:45:29, 48.67it/s]

Episode 2457 completed | Reward: 2200.00 | Avg Reward: 1765.00 | Frames: 1281719 | Epsilon: 0.1000


Training:  13%|█▎        | 1282189/10000000 [4:49:34<49:09:21, 49.26it/s]

Episode 2458 completed | Reward: 1340.00 | Avg Reward: 1766.20 | Frames: 1282184 | Epsilon: 0.1000


Training:  13%|█▎        | 1282849/10000000 [4:49:47<51:07:12, 47.37it/s]

Episode 2459 completed | Reward: 940.00 | Avg Reward: 1758.60 | Frames: 1282842 | Epsilon: 0.1000


Training:  13%|█▎        | 1283433/10000000 [4:49:58<49:00:16, 49.41it/s]

Episode 2460 completed | Reward: 920.00 | Avg Reward: 1747.40 | Frames: 1283428 | Epsilon: 0.1000


Training:  13%|█▎        | 1284015/10000000 [4:50:10<46:35:36, 51.96it/s]

Episode 2461 completed | Reward: 1480.00 | Avg Reward: 1754.60 | Frames: 1284010 | Epsilon: 0.1000


Training:  13%|█▎        | 1284553/10000000 [4:50:20<50:09:23, 48.27it/s]

Episode 2462 completed | Reward: 1300.00 | Avg Reward: 1756.80 | Frames: 1284546 | Epsilon: 0.1000


Training:  13%|█▎        | 1285201/10000000 [4:50:33<49:03:42, 49.34it/s]

Episode 2463 completed | Reward: 640.00 | Avg Reward: 1751.80 | Frames: 1285194 | Epsilon: 0.1000


Training:  13%|█▎        | 1285757/10000000 [4:50:44<52:41:12, 45.94it/s]

Episode 2464 completed | Reward: 1020.00 | Avg Reward: 1705.00 | Frames: 1285750 | Epsilon: 0.1000


Training:  13%|█▎        | 1286299/10000000 [4:50:55<46:51:42, 51.65it/s]

Episode 2465 completed | Reward: 1620.00 | Avg Reward: 1684.00 | Frames: 1286293 | Epsilon: 0.1000


Training:  13%|█▎        | 1287053/10000000 [4:51:09<50:04:16, 48.34it/s]

Episode 2466 completed | Reward: 2960.00 | Avg Reward: 1702.60 | Frames: 1287044 | Epsilon: 0.1000


Training:  13%|█▎        | 1287641/10000000 [4:51:21<50:45:29, 47.68it/s]

Episode 2467 completed | Reward: 1900.00 | Avg Reward: 1700.40 | Frames: 1287634 | Epsilon: 0.1000


Training:  13%|█▎        | 1288161/10000000 [4:51:31<49:42:17, 48.69it/s]

Episode 2468 completed | Reward: 640.00 | Avg Reward: 1697.20 | Frames: 1288152 | Epsilon: 0.1000


Training:  13%|█▎        | 1289712/10000000 [4:52:02<55:10:47, 43.85it/s]

Episode 2469 completed | Reward: 4460.00 | Avg Reward: 1694.80 | Frames: 1289711 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1290315/10000000 [4:52:14<47:07:19, 51.34it/s]

Episode 2470 completed | Reward: 1120.00 | Avg Reward: 1677.80 | Frames: 1290310 | Epsilon: 0.1000


Training:  13%|█▎        | 1291336/10000000 [4:52:34<44:13:00, 54.71it/s]

Episode 2471 completed | Reward: 2920.00 | Avg Reward: 1676.80 | Frames: 1291330 | Epsilon: 0.1000


Training:  13%|█▎        | 1291977/10000000 [4:52:47<51:01:55, 47.40it/s]

Episode 2472 completed | Reward: 460.00 | Avg Reward: 1674.00 | Frames: 1291970 | Epsilon: 0.1000


Training:  13%|█▎        | 1292423/10000000 [4:52:56<47:08:03, 51.32it/s]

Episode 2473 completed | Reward: 1320.00 | Avg Reward: 1678.40 | Frames: 1292418 | Epsilon: 0.1000


Training:  13%|█▎        | 1293201/10000000 [4:53:11<52:16:32, 46.27it/s]

Episode 2474 completed | Reward: 1800.00 | Avg Reward: 1668.60 | Frames: 1293194 | Epsilon: 0.1000


Training:  13%|█▎        | 1293941/10000000 [4:53:26<50:19:02, 48.06it/s]

Episode 2475 completed | Reward: 2260.00 | Avg Reward: 1682.00 | Frames: 1293932 | Epsilon: 0.1000


Training:  13%|█▎        | 1294451/10000000 [4:53:36<46:04:04, 52.49it/s]

Episode 2476 completed | Reward: 1580.00 | Avg Reward: 1682.60 | Frames: 1294447 | Epsilon: 0.1000


Training:  13%|█▎        | 1294831/10000000 [4:53:43<46:36:01, 51.89it/s]

Episode 2477 completed | Reward: 1320.00 | Avg Reward: 1685.20 | Frames: 1294826 | Epsilon: 0.1000


Training:  13%|█▎        | 1295434/10000000 [4:53:55<49:51:52, 48.49it/s]

Episode 2478 completed | Reward: 880.00 | Avg Reward: 1680.20 | Frames: 1295427 | Epsilon: 0.1000


Training:  13%|█▎        | 1296569/10000000 [4:54:17<48:56:34, 49.40it/s]

Episode 2479 completed | Reward: 3300.00 | Avg Reward: 1693.80 | Frames: 1296560 | Epsilon: 0.1000


Training:  13%|█▎        | 1297191/10000000 [4:54:30<46:52:09, 51.58it/s]

Episode 2480 completed | Reward: 860.00 | Avg Reward: 1684.00 | Frames: 1297185 | Epsilon: 0.1000


Training:  13%|█▎        | 1297674/10000000 [4:54:39<50:05:59, 48.25it/s]

Episode 2481 completed | Reward: 1300.00 | Avg Reward: 1686.20 | Frames: 1297667 | Epsilon: 0.1000


Training:  13%|█▎        | 1298949/10000000 [4:55:04<50:46:22, 47.60it/s]

Episode 2482 completed | Reward: 1720.00 | Avg Reward: 1684.20 | Frames: 1298940 | Epsilon: 0.1000


Training:  13%|█▎        | 1299627/10000000 [4:55:18<47:10:21, 51.23it/s]

Episode 2483 completed | Reward: 780.00 | Avg Reward: 1675.60 | Frames: 1299622 | Epsilon: 0.1000


Training:  13%|█▎        | 1300199/10000000 [4:55:29<58:53:57, 41.03it/s]

Episode 2484 completed | Reward: 1840.00 | Avg Reward: 1683.60 | Frames: 1300198 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1300665/10000000 [4:55:39<50:54:56, 47.46it/s]

Episode 2485 completed | Reward: 1980.00 | Avg Reward: 1690.00 | Frames: 1300656 | Epsilon: 0.1000


Training:  13%|█▎        | 1301149/10000000 [4:55:48<51:24:14, 47.01it/s]

Episode 2486 completed | Reward: 3180.00 | Avg Reward: 1701.40 | Frames: 1301141 | Epsilon: 0.1000


Training:  13%|█▎        | 1301761/10000000 [4:56:00<51:20:23, 47.06it/s]

Episode 2487 completed | Reward: 960.00 | Avg Reward: 1695.40 | Frames: 1301753 | Epsilon: 0.1000


Training:  13%|█▎        | 1302573/10000000 [4:56:16<49:59:48, 48.32it/s]

Episode 2488 completed | Reward: 2060.00 | Avg Reward: 1705.80 | Frames: 1302566 | Epsilon: 0.1000


Training:  13%|█▎        | 1302929/10000000 [4:56:23<51:47:00, 46.65it/s]

Episode 2489 completed | Reward: 640.00 | Avg Reward: 1695.00 | Frames: 1302920 | Epsilon: 0.1000


Training:  13%|█▎        | 1303487/10000000 [4:56:34<46:49:40, 51.59it/s]

Episode 2490 completed | Reward: 1500.00 | Avg Reward: 1687.80 | Frames: 1303481 | Epsilon: 0.1000


Training:  13%|█▎        | 1305001/10000000 [4:57:04<50:21:36, 47.96it/s]

Episode 2491 completed | Reward: 3000.00 | Avg Reward: 1699.80 | Frames: 1304992 | Epsilon: 0.1000


Training:  13%|█▎        | 1305718/10000000 [4:57:18<49:20:03, 48.95it/s]

Episode 2492 completed | Reward: 1080.00 | Avg Reward: 1691.40 | Frames: 1305711 | Epsilon: 0.1000


Training:  13%|█▎        | 1306100/10000000 [4:57:26<43:45:33, 55.19it/s]

Episode 2493 completed | Reward: 1380.00 | Avg Reward: 1690.60 | Frames: 1306093 | Epsilon: 0.1000


Training:  13%|█▎        | 1307373/10000000 [4:57:51<50:43:21, 47.60it/s]

Episode 2494 completed | Reward: 1980.00 | Avg Reward: 1681.80 | Frames: 1307365 | Epsilon: 0.1000


Training:  13%|█▎        | 1308929/10000000 [4:58:22<51:05:40, 47.25it/s]

Episode 2495 completed | Reward: 1960.00 | Avg Reward: 1682.80 | Frames: 1308920 | Epsilon: 0.1000


Training:  13%|█▎        | 1309413/10000000 [4:58:31<50:59:40, 47.34it/s]

Episode 2496 completed | Reward: 1680.00 | Avg Reward: 1688.60 | Frames: 1309406 | Epsilon: 0.1000


Training:  13%|█▎        | 1309813/10000000 [4:58:39<48:51:59, 49.40it/s]

Episode 2497 completed | Reward: 120.00 | Avg Reward: 1677.20 | Frames: 1309808 | Epsilon: 0.1000


Training:  13%|█▎        | 1310275/10000000 [4:58:49<58:19:04, 41.39it/s]

Episode 2498 completed | Reward: 920.00 | Avg Reward: 1668.60 | Frames: 1310272 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1310729/10000000 [4:58:58<49:00:38, 49.25it/s]

Episode 2499 completed | Reward: 860.00 | Avg Reward: 1669.80 | Frames: 1310722 | Epsilon: 0.1000


Training:  13%|█▎        | 1311345/10000000 [4:59:10<49:08:44, 49.11it/s]

Episode 2500 completed | Reward: 2500.00 | Avg Reward: 1684.20 | Frames: 1311343 | Epsilon: 0.1000


Training:  13%|█▎        | 1311761/10000000 [4:59:18<48:45:28, 49.50it/s]

Episode 2501 completed | Reward: 1060.00 | Avg Reward: 1679.40 | Frames: 1311752 | Epsilon: 0.1000


Training:  13%|█▎        | 1312454/10000000 [4:59:32<49:33:42, 48.69it/s]

Episode 2502 completed | Reward: 2480.00 | Avg Reward: 1691.00 | Frames: 1312447 | Epsilon: 0.1000


Training:  13%|█▎        | 1313277/10000000 [4:59:48<48:51:51, 49.38it/s]

Episode 2503 completed | Reward: 2600.00 | Avg Reward: 1692.60 | Frames: 1313268 | Epsilon: 0.1000


Training:  13%|█▎        | 1314045/10000000 [5:00:03<48:28:35, 49.77it/s]

Episode 2504 completed | Reward: 2100.00 | Avg Reward: 1705.40 | Frames: 1314038 | Epsilon: 0.1000


Training:  13%|█▎        | 1314674/10000000 [5:00:16<48:31:16, 49.72it/s]

Episode 2505 completed | Reward: 400.00 | Avg Reward: 1675.00 | Frames: 1314666 | Epsilon: 0.1000


Training:  13%|█▎        | 1315302/10000000 [5:00:28<49:21:10, 48.88it/s]

Episode 2506 completed | Reward: 2240.00 | Avg Reward: 1693.20 | Frames: 1315295 | Epsilon: 0.1000


Training:  13%|█▎        | 1315899/10000000 [5:00:40<46:27:15, 51.93it/s]

Episode 2507 completed | Reward: 780.00 | Avg Reward: 1691.60 | Frames: 1315893 | Epsilon: 0.1000


Training:  13%|█▎        | 1316637/10000000 [5:00:54<51:15:04, 47.06it/s]

Episode 2508 completed | Reward: 1460.00 | Avg Reward: 1697.20 | Frames: 1316628 | Epsilon: 0.1000


Training:  13%|█▎        | 1317386/10000000 [5:01:09<48:38:15, 49.59it/s]

Episode 2509 completed | Reward: 1100.00 | Avg Reward: 1691.40 | Frames: 1317379 | Epsilon: 0.1000


Training:  13%|█▎        | 1317910/10000000 [5:01:20<49:16:07, 48.95it/s]

Episode 2510 completed | Reward: 2040.00 | Avg Reward: 1696.60 | Frames: 1317903 | Epsilon: 0.1000


Training:  13%|█▎        | 1318521/10000000 [5:01:32<50:51:49, 47.41it/s]

Episode 2511 completed | Reward: 2080.00 | Avg Reward: 1692.90 | Frames: 1318514 | Epsilon: 0.1000


Training:  13%|█▎        | 1319383/10000000 [5:01:49<46:12:09, 52.19it/s]

Episode 2512 completed | Reward: 2200.00 | Avg Reward: 1707.70 | Frames: 1319378 | Epsilon: 0.1000


Training:  13%|█▎        | 1320229/10000000 [5:02:06<49:22:17, 48.83it/s]

Episode 2513 completed | Reward: 3060.00 | Avg Reward: 1727.50 | Frames: 1320220 | Epsilon: 0.1000


Training:  13%|█▎        | 1320795/10000000 [5:02:17<58:02:05, 41.54it/s]

Episode 2514 completed | Reward: 760.00 | Avg Reward: 1721.70 | Frames: 1320794 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1322073/10000000 [5:02:42<47:58:19, 50.25it/s]

Episode 2515 completed | Reward: 4080.00 | Avg Reward: 1729.60 | Frames: 1322068 | Epsilon: 0.1000


Training:  13%|█▎        | 1322657/10000000 [5:02:54<48:45:35, 49.43it/s]

Episode 2516 completed | Reward: 1220.00 | Avg Reward: 1721.40 | Frames: 1322652 | Epsilon: 0.1000


Training:  13%|█▎        | 1323422/10000000 [5:03:09<48:53:18, 49.30it/s]

Episode 2517 completed | Reward: 2000.00 | Avg Reward: 1728.40 | Frames: 1323415 | Epsilon: 0.1000


Training:  13%|█▎        | 1324735/10000000 [5:03:35<47:42:18, 50.51it/s]

Episode 2518 completed | Reward: 2780.00 | Avg Reward: 1747.80 | Frames: 1324729 | Epsilon: 0.1000


Training:  13%|█▎        | 1325265/10000000 [5:03:45<50:16:12, 47.93it/s]

Episode 2519 completed | Reward: 760.00 | Avg Reward: 1745.40 | Frames: 1325257 | Epsilon: 0.1000


Training:  13%|█▎        | 1326177/10000000 [5:04:03<48:30:37, 49.67it/s]

Episode 2520 completed | Reward: 3580.00 | Avg Reward: 1772.20 | Frames: 1326170 | Epsilon: 0.1000


Training:  13%|█▎        | 1326823/10000000 [5:04:16<46:33:27, 51.75it/s]

Episode 2521 completed | Reward: 1700.00 | Avg Reward: 1780.00 | Frames: 1326819 | Epsilon: 0.1000


Training:  13%|█▎        | 1327325/10000000 [5:04:26<48:24:18, 49.77it/s]

Episode 2522 completed | Reward: 760.00 | Avg Reward: 1772.20 | Frames: 1327317 | Epsilon: 0.1000


Training:  13%|█▎        | 1328539/10000000 [5:04:50<46:38:11, 51.65it/s]

Episode 2523 completed | Reward: 4140.00 | Avg Reward: 1801.80 | Frames: 1328535 | Epsilon: 0.1000


Training:  13%|█▎        | 1329734/10000000 [5:05:13<48:23:37, 49.77it/s]

Episode 2524 completed | Reward: 2620.00 | Avg Reward: 1819.20 | Frames: 1329727 | Epsilon: 0.1000


Training:  13%|█▎        | 1330313/10000000 [5:05:25<50:37:07, 47.58it/s]

Episode 2525 completed | Reward: 980.00 | Avg Reward: 1792.80 | Frames: 1330304 | Epsilon: 0.1000


Training:  13%|█▎        | 1331063/10000000 [5:05:40<58:30:36, 41.16it/s]

Episode 2526 completed | Reward: 2560.00 | Avg Reward: 1810.20 | Frames: 1331062 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1332033/10000000 [5:05:59<51:28:34, 46.77it/s]

Episode 2527 completed | Reward: 2600.00 | Avg Reward: 1795.80 | Frames: 1332026 | Epsilon: 0.1000


Training:  13%|█▎        | 1332957/10000000 [5:06:17<50:35:59, 47.58it/s]

Episode 2528 completed | Reward: 3160.00 | Avg Reward: 1815.20 | Frames: 1332948 | Epsilon: 0.1000


Training:  13%|█▎        | 1333545/10000000 [5:06:29<50:46:01, 47.42it/s]

Episode 2529 completed | Reward: 1340.00 | Avg Reward: 1812.80 | Frames: 1333536 | Epsilon: 0.1000


Training:  13%|█▎        | 1334105/10000000 [5:06:40<48:25:13, 49.71it/s]

Episode 2530 completed | Reward: 4340.00 | Avg Reward: 1834.80 | Frames: 1334102 | Epsilon: 0.1000


Training:  13%|█▎        | 1335059/10000000 [5:06:59<45:28:48, 52.92it/s]

Episode 2531 completed | Reward: 3460.00 | Avg Reward: 1849.20 | Frames: 1335053 | Epsilon: 0.1000


Training:  13%|█▎        | 1335631/10000000 [5:07:10<46:10:13, 52.13it/s]

Episode 2532 completed | Reward: 3840.00 | Avg Reward: 1884.80 | Frames: 1335625 | Epsilon: 0.1000


Training:  13%|█▎        | 1336101/10000000 [5:07:19<48:29:03, 49.64it/s]

Episode 2533 completed | Reward: 960.00 | Avg Reward: 1887.20 | Frames: 1336092 | Epsilon: 0.1000


Training:  13%|█▎        | 1336523/10000000 [5:07:27<46:36:47, 51.63it/s]

Episode 2534 completed | Reward: 1440.00 | Avg Reward: 1884.60 | Frames: 1336518 | Epsilon: 0.1000


Training:  13%|█▎        | 1337025/10000000 [5:07:37<48:29:45, 49.62it/s]

Episode 2535 completed | Reward: 2460.00 | Avg Reward: 1895.00 | Frames: 1337016 | Epsilon: 0.1000


Training:  13%|█▎        | 1338407/10000000 [5:08:05<47:06:22, 51.08it/s]

Episode 2536 completed | Reward: 4640.00 | Avg Reward: 1931.80 | Frames: 1338402 | Epsilon: 0.1000


Training:  13%|█▎        | 1338883/10000000 [5:08:14<47:17:18, 50.88it/s]

Episode 2537 completed | Reward: 1040.00 | Avg Reward: 1935.20 | Frames: 1338877 | Epsilon: 0.1000


Training:  13%|█▎        | 1340015/10000000 [5:08:36<46:46:15, 51.43it/s]

Episode 2538 completed | Reward: 1280.00 | Avg Reward: 1912.00 | Frames: 1340010 | Epsilon: 0.1000


Training:  13%|█▎        | 1340650/10000000 [5:08:49<49:28:00, 48.63it/s]

Episode 2539 completed | Reward: 980.00 | Avg Reward: 1891.80 | Frames: 1340643 | Epsilon: 0.1000


Training:  13%|█▎        | 1341311/10000000 [5:09:02<56:51:16, 42.30it/s]

Episode 2540 completed | Reward: 1540.00 | Avg Reward: 1869.60 | Frames: 1341308 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  13%|█▎        | 1341924/10000000 [5:09:14<44:05:52, 54.54it/s]

Episode 2541 completed | Reward: 660.00 | Avg Reward: 1865.40 | Frames: 1341919 | Epsilon: 0.1000


Training:  13%|█▎        | 1342969/10000000 [5:09:35<48:53:09, 49.19it/s]

Episode 2542 completed | Reward: 2720.00 | Avg Reward: 1875.80 | Frames: 1342963 | Epsilon: 0.1000


Training:  13%|█▎        | 1343287/10000000 [5:09:41<46:31:30, 51.68it/s]

Episode 2543 completed | Reward: 940.00 | Avg Reward: 1870.40 | Frames: 1343282 | Epsilon: 0.1000


Training:  13%|█▎        | 1344331/10000000 [5:10:02<48:04:48, 50.01it/s]

Episode 2544 completed | Reward: 2580.00 | Avg Reward: 1882.00 | Frames: 1344325 | Epsilon: 0.1000


Training:  13%|█▎        | 1344838/10000000 [5:10:12<49:20:26, 48.73it/s]

Episode 2545 completed | Reward: 4930.00 | Avg Reward: 1911.50 | Frames: 1344833 | Epsilon: 0.1000


Training:  13%|█▎        | 1345405/10000000 [5:10:23<48:57:53, 49.10it/s]

Episode 2546 completed | Reward: 1420.00 | Avg Reward: 1909.10 | Frames: 1345399 | Epsilon: 0.1000


Training:  13%|█▎        | 1345979/10000000 [5:10:34<47:06:48, 51.02it/s]

Episode 2547 completed | Reward: 1660.00 | Avg Reward: 1884.90 | Frames: 1345975 | Epsilon: 0.1000


Training:  13%|█▎        | 1346549/10000000 [5:10:46<50:22:37, 47.71it/s]

Episode 2548 completed | Reward: 540.00 | Avg Reward: 1847.10 | Frames: 1346540 | Epsilon: 0.1000


Training:  13%|█▎        | 1346930/10000000 [5:10:53<48:22:55, 49.68it/s]

Episode 2549 completed | Reward: 1640.00 | Avg Reward: 1852.90 | Frames: 1346923 | Epsilon: 0.1000


Training:  13%|█▎        | 1347783/10000000 [5:11:10<47:19:58, 50.78it/s]

Episode 2550 completed | Reward: 2040.00 | Avg Reward: 1862.70 | Frames: 1347777 | Epsilon: 0.1000


Training:  13%|█▎        | 1348203/10000000 [5:11:18<48:03:44, 50.00it/s]

Episode 2551 completed | Reward: 560.00 | Avg Reward: 1842.90 | Frames: 1348198 | Epsilon: 0.1000


Training:  13%|█▎        | 1348655/10000000 [5:11:27<46:03:12, 52.18it/s]

Episode 2552 completed | Reward: 900.00 | Avg Reward: 1840.90 | Frames: 1348651 | Epsilon: 0.1000


Training:  13%|█▎        | 1349385/10000000 [5:11:42<51:29:47, 46.66it/s]

Episode 2553 completed | Reward: 960.00 | Avg Reward: 1841.90 | Frames: 1349378 | Epsilon: 0.1000


Training:  13%|█▎        | 1349841/10000000 [5:11:51<48:18:30, 49.74it/s]

Episode 2554 completed | Reward: 1260.00 | Avg Reward: 1843.10 | Frames: 1349838 | Epsilon: 0.1000


Training:  14%|█▎        | 1350609/10000000 [5:12:06<48:13:56, 49.81it/s]

Episode 2555 completed | Reward: 2960.00 | Avg Reward: 1833.10 | Frames: 1350606 | Epsilon: 0.1000


Training:  14%|█▎        | 1352103/10000000 [5:12:36<59:31:01, 40.36it/s]

Episode 2556 completed | Reward: 4220.00 | Avg Reward: 1852.90 | Frames: 1352101 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▎        | 1352739/10000000 [5:12:48<47:22:58, 50.69it/s]

Episode 2557 completed | Reward: 1880.00 | Avg Reward: 1849.70 | Frames: 1352735 | Epsilon: 0.1000


Training:  14%|█▎        | 1353150/10000000 [5:12:57<48:13:05, 49.81it/s]

Episode 2558 completed | Reward: 800.00 | Avg Reward: 1844.30 | Frames: 1353142 | Epsilon: 0.1000


Training:  14%|█▎        | 1353557/10000000 [5:13:05<48:02:54, 49.99it/s]

Episode 2559 completed | Reward: 1120.00 | Avg Reward: 1846.10 | Frames: 1353549 | Epsilon: 0.1000


Training:  14%|█▎        | 1354445/10000000 [5:13:22<48:27:35, 49.56it/s]

Episode 2560 completed | Reward: 1120.00 | Avg Reward: 1848.10 | Frames: 1354442 | Epsilon: 0.1000


Training:  14%|█▎        | 1354929/10000000 [5:13:32<51:32:49, 46.59it/s]

Episode 2561 completed | Reward: 1140.00 | Avg Reward: 1844.70 | Frames: 1354922 | Epsilon: 0.1000


Training:  14%|█▎        | 1355511/10000000 [5:13:44<46:47:27, 51.32it/s]

Episode 2562 completed | Reward: 1040.00 | Avg Reward: 1842.10 | Frames: 1355505 | Epsilon: 0.1000


Training:  14%|█▎        | 1356489/10000000 [5:14:03<51:19:03, 46.79it/s]

Episode 2563 completed | Reward: 1280.00 | Avg Reward: 1848.50 | Frames: 1356482 | Epsilon: 0.1000


Training:  14%|█▎        | 1357119/10000000 [5:14:16<45:40:38, 52.56it/s]

Episode 2564 completed | Reward: 1000.00 | Avg Reward: 1848.30 | Frames: 1357115 | Epsilon: 0.1000


Training:  14%|█▎        | 1357765/10000000 [5:14:28<48:26:55, 49.55it/s]

Episode 2565 completed | Reward: 1380.00 | Avg Reward: 1845.90 | Frames: 1357759 | Epsilon: 0.1000


Training:  14%|█▎        | 1358293/10000000 [5:14:39<48:35:15, 49.41it/s]

Episode 2566 completed | Reward: 1240.00 | Avg Reward: 1828.70 | Frames: 1358284 | Epsilon: 0.1000


Training:  14%|█▎        | 1358749/10000000 [5:14:48<48:58:23, 49.01it/s]

Episode 2567 completed | Reward: 900.00 | Avg Reward: 1818.70 | Frames: 1358740 | Epsilon: 0.1000


Training:  14%|█▎        | 1359210/10000000 [5:14:57<48:42:16, 49.28it/s]

Episode 2568 completed | Reward: 1340.00 | Avg Reward: 1825.70 | Frames: 1359206 | Epsilon: 0.1000


Training:  14%|█▎        | 1360085/10000000 [5:15:15<49:41:14, 48.30it/s]

Episode 2569 completed | Reward: 2780.00 | Avg Reward: 1808.90 | Frames: 1360077 | Epsilon: 0.1000


Training:  14%|█▎        | 1360588/10000000 [5:15:24<44:04:15, 54.45it/s]

Episode 2570 completed | Reward: 1380.00 | Avg Reward: 1811.50 | Frames: 1360581 | Epsilon: 0.1000


Training:  14%|█▎        | 1360985/10000000 [5:15:32<48:18:45, 49.67it/s]

Episode 2571 completed | Reward: 940.00 | Avg Reward: 1791.70 | Frames: 1360983 | Epsilon: 0.1000


Training:  14%|█▎        | 1361382/10000000 [5:15:40<49:03:21, 48.92it/s]

Episode 2572 completed | Reward: 1460.00 | Avg Reward: 1801.70 | Frames: 1361375 | Epsilon: 0.1000


Training:  14%|█▎        | 1361985/10000000 [5:15:52<51:10:02, 46.89it/s]

Episode 2573 completed | Reward: 1880.00 | Avg Reward: 1807.30 | Frames: 1361977 | Epsilon: 0.1000


Training:  14%|█▎        | 1362551/10000000 [5:16:03<56:02:09, 42.82it/s]

Episode 2574 completed | Reward: 1680.00 | Avg Reward: 1806.10 | Frames: 1362548 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▎        | 1363050/10000000 [5:16:13<49:16:21, 48.69it/s]

Episode 2575 completed | Reward: 680.00 | Avg Reward: 1790.30 | Frames: 1363042 | Epsilon: 0.1000


Training:  14%|█▎        | 1363563/10000000 [5:16:23<47:07:04, 50.91it/s]

Episode 2576 completed | Reward: 1260.00 | Avg Reward: 1787.10 | Frames: 1363557 | Epsilon: 0.1000


Training:  14%|█▎        | 1364237/10000000 [5:16:37<51:16:33, 46.78it/s]

Episode 2577 completed | Reward: 580.00 | Avg Reward: 1779.70 | Frames: 1364229 | Epsilon: 0.1000


Training:  14%|█▎        | 1364597/10000000 [5:16:44<49:45:44, 48.20it/s]

Episode 2578 completed | Reward: 880.00 | Avg Reward: 1779.70 | Frames: 1364588 | Epsilon: 0.1000


Training:  14%|█▎        | 1365346/10000000 [5:16:59<50:11:27, 47.79it/s]

Episode 2579 completed | Reward: 6370.00 | Avg Reward: 1810.40 | Frames: 1365339 | Epsilon: 0.1000


Training:  14%|█▎        | 1365958/10000000 [5:17:11<50:09:30, 47.82it/s]

Episode 2580 completed | Reward: 420.00 | Avg Reward: 1806.00 | Frames: 1365951 | Epsilon: 0.1000


Training:  14%|█▎        | 1366677/10000000 [5:17:25<49:20:25, 48.60it/s]

Episode 2581 completed | Reward: 1120.00 | Avg Reward: 1804.20 | Frames: 1366668 | Epsilon: 0.1000


Training:  14%|█▎        | 1367634/10000000 [5:17:44<48:21:49, 49.58it/s]

Episode 2582 completed | Reward: 1860.00 | Avg Reward: 1805.60 | Frames: 1367627 | Epsilon: 0.1000


Training:  14%|█▎        | 1368145/10000000 [5:17:54<48:50:54, 49.09it/s]

Episode 2583 completed | Reward: 1400.00 | Avg Reward: 1811.80 | Frames: 1368136 | Epsilon: 0.1000


Training:  14%|█▎        | 1369825/10000000 [5:18:28<48:39:09, 49.27it/s]

Episode 2584 completed | Reward: 4500.00 | Avg Reward: 1838.40 | Frames: 1369816 | Epsilon: 0.1000


Training:  14%|█▎        | 1370407/10000000 [5:18:39<46:24:53, 51.65it/s]

Episode 2585 completed | Reward: 1560.00 | Avg Reward: 1834.20 | Frames: 1370403 | Epsilon: 0.1000


Training:  14%|█▎        | 1371411/10000000 [5:18:59<46:40:17, 51.36it/s]

Episode 2586 completed | Reward: 2560.00 | Avg Reward: 1828.00 | Frames: 1371405 | Epsilon: 0.1000


Training:  14%|█▎        | 1371943/10000000 [5:19:09<45:36:20, 52.55it/s]

Episode 2587 completed | Reward: 1360.00 | Avg Reward: 1832.00 | Frames: 1371938 | Epsilon: 0.1000


Training:  14%|█▎        | 1372449/10000000 [5:19:19<49:17:33, 48.62it/s]

Episode 2588 completed | Reward: 980.00 | Avg Reward: 1821.20 | Frames: 1372440 | Epsilon: 0.1000


Training:  14%|█▎        | 1372857/10000000 [5:19:28<58:32:14, 40.94it/s]

Episode 2589 completed | Reward: 1400.00 | Avg Reward: 1828.80 | Frames: 1372856 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▎        | 1373665/10000000 [5:19:43<48:08:37, 49.77it/s]

Episode 2590 completed | Reward: 1440.00 | Avg Reward: 1828.20 | Frames: 1373656 | Epsilon: 0.1000


Training:  14%|█▎        | 1374101/10000000 [5:19:52<50:13:34, 47.71it/s]

Episode 2591 completed | Reward: 1420.00 | Avg Reward: 1812.40 | Frames: 1374092 | Epsilon: 0.1000


Training:  14%|█▎        | 1374587/10000000 [5:20:02<45:57:03, 52.14it/s]

Episode 2592 completed | Reward: 1500.00 | Avg Reward: 1816.60 | Frames: 1374581 | Epsilon: 0.1000


Training:  14%|█▎        | 1374983/10000000 [5:20:10<45:53:28, 52.21it/s]

Episode 2593 completed | Reward: 680.00 | Avg Reward: 1809.60 | Frames: 1374978 | Epsilon: 0.1000


Training:  14%|█▍        | 1375587/10000000 [5:20:22<47:36:00, 50.33it/s]

Episode 2594 completed | Reward: 2400.00 | Avg Reward: 1813.80 | Frames: 1375582 | Epsilon: 0.1000


Training:  14%|█▍        | 1376241/10000000 [5:20:35<48:58:15, 48.92it/s]

Episode 2595 completed | Reward: 840.00 | Avg Reward: 1802.60 | Frames: 1376238 | Epsilon: 0.1000


Training:  14%|█▍        | 1376625/10000000 [5:20:42<48:30:56, 49.37it/s]

Episode 2596 completed | Reward: 1540.00 | Avg Reward: 1801.20 | Frames: 1376616 | Epsilon: 0.1000


Training:  14%|█▍        | 1377545/10000000 [5:21:00<49:35:06, 48.30it/s]

Episode 2597 completed | Reward: 1420.00 | Avg Reward: 1814.20 | Frames: 1377536 | Epsilon: 0.1000


Training:  14%|█▍        | 1378045/10000000 [5:21:10<51:33:56, 46.45it/s]

Episode 2598 completed | Reward: 560.00 | Avg Reward: 1810.60 | Frames: 1378038 | Epsilon: 0.1000


Training:  14%|█▍        | 1378545/10000000 [5:21:20<50:23:57, 47.52it/s]

Episode 2599 completed | Reward: 800.00 | Avg Reward: 1810.00 | Frames: 1378536 | Epsilon: 0.1000


Training:  14%|█▍        | 1379127/10000000 [5:21:32<46:24:12, 51.61it/s]

Episode 2600 completed | Reward: 1360.00 | Avg Reward: 1798.60 | Frames: 1379124 | Epsilon: 0.1000


Training:  14%|█▍        | 1379785/10000000 [5:21:45<50:09:22, 47.74it/s]

Episode 2601 completed | Reward: 2000.00 | Avg Reward: 1808.00 | Frames: 1379777 | Epsilon: 0.1000


Training:  14%|█▍        | 1380341/10000000 [5:21:56<50:22:28, 47.53it/s]

Episode 2602 completed | Reward: 2520.00 | Avg Reward: 1808.40 | Frames: 1380332 | Epsilon: 0.1000


Training:  14%|█▍        | 1380805/10000000 [5:22:05<48:48:46, 49.05it/s]

Episode 2603 completed | Reward: 960.00 | Avg Reward: 1792.00 | Frames: 1380800 | Epsilon: 0.1000


Training:  14%|█▍        | 1381509/10000000 [5:22:19<49:12:26, 48.65it/s]

Episode 2604 completed | Reward: 3480.00 | Avg Reward: 1805.80 | Frames: 1381500 | Epsilon: 0.1000


Training:  14%|█▍        | 1382065/10000000 [5:22:30<50:40:13, 47.24it/s]

Episode 2605 completed | Reward: 1560.00 | Avg Reward: 1817.40 | Frames: 1382058 | Epsilon: 0.1000


Training:  14%|█▍        | 1382613/10000000 [5:22:41<51:43:09, 46.28it/s]

Episode 2606 completed | Reward: 2520.00 | Avg Reward: 1820.20 | Frames: 1382605 | Epsilon: 0.1000


Training:  14%|█▍        | 1383599/10000000 [5:23:00<47:25:17, 50.47it/s]

Episode 2607 completed | Reward: 3540.00 | Avg Reward: 1847.80 | Frames: 1383599 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1384181/10000000 [5:23:12<49:54:02, 47.96it/s]

Episode 2608 completed | Reward: 940.00 | Avg Reward: 1842.60 | Frames: 1384173 | Epsilon: 0.1000


Training:  14%|█▍        | 1384853/10000000 [5:23:25<51:25:35, 46.53it/s]

Episode 2609 completed | Reward: 1400.00 | Avg Reward: 1845.60 | Frames: 1384844 | Epsilon: 0.1000


Training:  14%|█▍        | 1386451/10000000 [5:23:57<46:30:53, 51.44it/s]

Episode 2610 completed | Reward: 4240.00 | Avg Reward: 1867.60 | Frames: 1386446 | Epsilon: 0.1000


Training:  14%|█▍        | 1386953/10000000 [5:24:07<48:57:07, 48.87it/s]

Episode 2611 completed | Reward: 1380.00 | Avg Reward: 1860.60 | Frames: 1386950 | Epsilon: 0.1000


Training:  14%|█▍        | 1387509/10000000 [5:24:18<51:12:18, 46.72it/s]

Episode 2612 completed | Reward: 2600.00 | Avg Reward: 1864.60 | Frames: 1387501 | Epsilon: 0.1000


Training:  14%|█▍        | 1388145/10000000 [5:24:31<50:03:12, 47.79it/s]

Episode 2613 completed | Reward: 2540.00 | Avg Reward: 1859.40 | Frames: 1388137 | Epsilon: 0.1000


Training:  14%|█▍        | 1389433/10000000 [5:24:56<49:41:04, 48.14it/s]

Episode 2614 completed | Reward: 5160.00 | Avg Reward: 1903.40 | Frames: 1389424 | Epsilon: 0.1000


Training:  14%|█▍        | 1390087/10000000 [5:25:09<45:05:13, 53.04it/s]

Episode 2615 completed | Reward: 1900.00 | Avg Reward: 1881.60 | Frames: 1390083 | Epsilon: 0.1000


Training:  14%|█▍        | 1390627/10000000 [5:25:20<47:46:24, 50.06it/s]

Episode 2616 completed | Reward: 840.00 | Avg Reward: 1877.80 | Frames: 1390626 | Epsilon: 0.1000


Training:  14%|█▍        | 1392121/10000000 [5:25:49<49:08:28, 48.66it/s]

Episode 2617 completed | Reward: 4160.00 | Avg Reward: 1899.40 | Frames: 1392116 | Epsilon: 0.1000


Training:  14%|█▍        | 1392757/10000000 [5:26:02<50:40:22, 47.18it/s]

Episode 2618 completed | Reward: 1000.00 | Avg Reward: 1881.60 | Frames: 1392748 | Epsilon: 0.1000


Training:  14%|█▍        | 1393301/10000000 [5:26:13<48:56:27, 48.85it/s]

Episode 2619 completed | Reward: 3280.00 | Avg Reward: 1906.80 | Frames: 1393299 | Epsilon: 0.1000


Training:  14%|█▍        | 1393891/10000000 [5:26:24<56:03:44, 42.64it/s]

Episode 2620 completed | Reward: 1220.00 | Avg Reward: 1883.20 | Frames: 1393889 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1394437/10000000 [5:26:35<50:07:04, 47.70it/s]

Episode 2621 completed | Reward: 1920.00 | Avg Reward: 1885.40 | Frames: 1394430 | Epsilon: 0.1000


Training:  14%|█▍        | 1394897/10000000 [5:26:44<50:26:36, 47.39it/s]

Episode 2622 completed | Reward: 1140.00 | Avg Reward: 1889.20 | Frames: 1394890 | Epsilon: 0.1000


Training:  14%|█▍        | 1395383/10000000 [5:26:54<46:47:42, 51.08it/s]

Episode 2623 completed | Reward: 2840.00 | Avg Reward: 1876.20 | Frames: 1395377 | Epsilon: 0.1000


Training:  14%|█▍        | 1395866/10000000 [5:27:04<48:12:52, 49.57it/s]

Episode 2624 completed | Reward: 2020.00 | Avg Reward: 1870.20 | Frames: 1395858 | Epsilon: 0.1000


Training:  14%|█▍        | 1396614/10000000 [5:27:18<49:40:25, 48.11it/s]

Episode 2625 completed | Reward: 3190.00 | Avg Reward: 1892.30 | Frames: 1396607 | Epsilon: 0.1000


Training:  14%|█▍        | 1397210/10000000 [5:27:30<49:30:13, 48.27it/s]

Episode 2626 completed | Reward: 1640.00 | Avg Reward: 1883.10 | Frames: 1397203 | Epsilon: 0.1000


Training:  14%|█▍        | 1397741/10000000 [5:27:41<51:21:26, 46.53it/s]

Episode 2627 completed | Reward: 1880.00 | Avg Reward: 1875.90 | Frames: 1397733 | Epsilon: 0.1000


Training:  14%|█▍        | 1398457/10000000 [5:27:55<50:33:27, 47.26it/s]

Episode 2628 completed | Reward: 3680.00 | Avg Reward: 1881.10 | Frames: 1398450 | Epsilon: 0.1000


Training:  14%|█▍        | 1398961/10000000 [5:28:05<47:44:16, 50.05it/s]

Episode 2629 completed | Reward: 900.00 | Avg Reward: 1876.70 | Frames: 1398955 | Epsilon: 0.1000


Training:  14%|█▍        | 1399518/10000000 [5:28:16<48:38:29, 49.11it/s]

Episode 2630 completed | Reward: 2240.00 | Avg Reward: 1855.70 | Frames: 1399511 | Epsilon: 0.1000


Training:  14%|█▍        | 1400010/10000000 [5:28:26<48:22:25, 49.38it/s]

Episode 2631 completed | Reward: 1520.00 | Avg Reward: 1836.30 | Frames: 1400000 | Epsilon: 0.1000


Training:  14%|█▍        | 1400405/10000000 [5:28:34<51:43:48, 46.18it/s]

Episode 2632 completed | Reward: 820.00 | Avg Reward: 1806.10 | Frames: 1400396 | Epsilon: 0.1000


Training:  14%|█▍        | 1401089/10000000 [5:28:47<50:30:08, 47.30it/s]

Episode 2633 completed | Reward: 2160.00 | Avg Reward: 1818.10 | Frames: 1401081 | Epsilon: 0.1000


Training:  14%|█▍        | 1402157/10000000 [5:29:09<50:19:05, 47.46it/s]

Episode 2634 completed | Reward: 2280.00 | Avg Reward: 1826.50 | Frames: 1402148 | Epsilon: 0.1000


Training:  14%|█▍        | 1402861/10000000 [5:29:23<48:57:25, 48.78it/s]

Episode 2635 completed | Reward: 2640.00 | Avg Reward: 1828.30 | Frames: 1402856 | Epsilon: 0.1000


Training:  14%|█▍        | 1403433/10000000 [5:29:34<48:42:05, 49.03it/s]

Episode 2636 completed | Reward: 1020.00 | Avg Reward: 1792.10 | Frames: 1403424 | Epsilon: 0.1000


Training:  14%|█▍        | 1403823/10000000 [5:29:42<45:33:13, 52.42it/s]

Episode 2637 completed | Reward: 1080.00 | Avg Reward: 1792.50 | Frames: 1403819 | Epsilon: 0.1000


Training:  14%|█▍        | 1404349/10000000 [5:29:52<58:57:07, 40.50it/s]

Episode 2638 completed | Reward: 1120.00 | Avg Reward: 1790.90 | Frames: 1404348 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1405245/10000000 [5:30:10<50:29:34, 47.28it/s]

Episode 2639 completed | Reward: 3180.00 | Avg Reward: 1812.90 | Frames: 1405237 | Epsilon: 0.1000


Training:  14%|█▍        | 1405835/10000000 [5:30:22<46:37:21, 51.20it/s]

Episode 2640 completed | Reward: 2580.00 | Avg Reward: 1823.30 | Frames: 1405834 | Epsilon: 0.1000


Training:  14%|█▍        | 1406278/10000000 [5:30:31<49:28:16, 48.25it/s]

Episode 2641 completed | Reward: 2980.00 | Avg Reward: 1846.50 | Frames: 1406271 | Epsilon: 0.1000


Training:  14%|█▍        | 1407081/10000000 [5:30:47<49:59:08, 47.75it/s]

Episode 2642 completed | Reward: 5220.00 | Avg Reward: 1871.50 | Frames: 1407072 | Epsilon: 0.1000


Training:  14%|█▍        | 1407693/10000000 [5:30:59<49:36:56, 48.10it/s]

Episode 2643 completed | Reward: 1620.00 | Avg Reward: 1878.30 | Frames: 1407684 | Epsilon: 0.1000


Training:  14%|█▍        | 1408355/10000000 [5:31:12<46:23:14, 51.45it/s]

Episode 2644 completed | Reward: 940.00 | Avg Reward: 1861.90 | Frames: 1408349 | Epsilon: 0.1000


Training:  14%|█▍        | 1408854/10000000 [5:31:22<49:03:18, 48.65it/s]

Episode 2645 completed | Reward: 1700.00 | Avg Reward: 1829.60 | Frames: 1408851 | Epsilon: 0.1000


Training:  14%|█▍        | 1409393/10000000 [5:31:33<51:15:06, 46.56it/s]

Episode 2646 completed | Reward: 2480.00 | Avg Reward: 1840.20 | Frames: 1409386 | Epsilon: 0.1000


Training:  14%|█▍        | 1410117/10000000 [5:31:47<50:29:53, 47.25it/s]

Episode 2647 completed | Reward: 800.00 | Avg Reward: 1831.60 | Frames: 1410108 | Epsilon: 0.1000


Training:  14%|█▍        | 1410634/10000000 [5:31:57<49:31:51, 48.17it/s]

Episode 2648 completed | Reward: 2720.00 | Avg Reward: 1853.40 | Frames: 1410627 | Epsilon: 0.1000


Training:  14%|█▍        | 1411117/10000000 [5:32:07<50:10:07, 47.56it/s]

Episode 2649 completed | Reward: 860.00 | Avg Reward: 1845.60 | Frames: 1411109 | Epsilon: 0.1000


Training:  14%|█▍        | 1411741/10000000 [5:32:20<47:55:34, 49.78it/s]

Episode 2650 completed | Reward: 2500.00 | Avg Reward: 1850.20 | Frames: 1411732 | Epsilon: 0.1000


Training:  14%|█▍        | 1412139/10000000 [5:32:28<48:52:45, 48.80it/s]

Episode 2651 completed | Reward: 2000.00 | Avg Reward: 1864.60 | Frames: 1412135 | Epsilon: 0.1000


Training:  14%|█▍        | 1412645/10000000 [5:32:38<50:16:22, 47.45it/s]

Episode 2652 completed | Reward: 2880.00 | Avg Reward: 1884.40 | Frames: 1412637 | Epsilon: 0.1000


Training:  14%|█▍        | 1413545/10000000 [5:32:55<49:40:42, 48.01it/s]

Episode 2653 completed | Reward: 4530.00 | Avg Reward: 1920.10 | Frames: 1413537 | Epsilon: 0.1000


Training:  14%|█▍        | 1414103/10000000 [5:33:07<46:48:40, 50.95it/s]

Episode 2654 completed | Reward: 1400.00 | Avg Reward: 1921.50 | Frames: 1414098 | Epsilon: 0.1000


Training:  14%|█▍        | 1415534/10000000 [5:33:35<47:13:41, 50.49it/s]

Episode 2655 completed | Reward: 3680.00 | Avg Reward: 1928.70 | Frames: 1415534 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1416093/10000000 [5:33:46<49:58:18, 47.72it/s]

Episode 2656 completed | Reward: 2840.00 | Avg Reward: 1914.90 | Frames: 1416086 | Epsilon: 0.1000


Training:  14%|█▍        | 1416515/10000000 [5:33:55<46:32:27, 51.23it/s]

Episode 2657 completed | Reward: 960.00 | Avg Reward: 1905.70 | Frames: 1416509 | Epsilon: 0.1000


Training:  14%|█▍        | 1417279/10000000 [5:34:10<46:27:02, 51.33it/s]

Episode 2658 completed | Reward: 4850.00 | Avg Reward: 1946.20 | Frames: 1417274 | Epsilon: 0.1000


Training:  14%|█▍        | 1417986/10000000 [5:34:24<49:30:13, 48.16it/s]

Episode 2659 completed | Reward: 1820.00 | Avg Reward: 1953.20 | Frames: 1417984 | Epsilon: 0.1000


Training:  14%|█▍        | 1418470/10000000 [5:34:34<48:38:42, 49.00it/s]

Episode 2660 completed | Reward: 2380.00 | Avg Reward: 1965.80 | Frames: 1418463 | Epsilon: 0.1000


Training:  14%|█▍        | 1419109/10000000 [5:34:46<49:45:50, 47.90it/s]

Episode 2661 completed | Reward: 2180.00 | Avg Reward: 1976.20 | Frames: 1419100 | Epsilon: 0.1000


Training:  14%|█▍        | 1420354/10000000 [5:35:11<50:32:23, 47.16it/s]

Episode 2662 completed | Reward: 5120.00 | Avg Reward: 2017.00 | Frames: 1420350 | Epsilon: 0.1000


Training:  14%|█▍        | 1420933/10000000 [5:35:23<50:34:15, 47.12it/s]

Episode 2663 completed | Reward: 2520.00 | Avg Reward: 2029.40 | Frames: 1420924 | Epsilon: 0.1000


Training:  14%|█▍        | 1421405/10000000 [5:35:32<48:04:58, 49.56it/s]

Episode 2664 completed | Reward: 740.00 | Avg Reward: 2026.80 | Frames: 1421400 | Epsilon: 0.1000


Training:  14%|█▍        | 1422010/10000000 [5:35:44<48:49:05, 48.81it/s]

Episode 2665 completed | Reward: 1960.00 | Avg Reward: 2032.60 | Frames: 1422003 | Epsilon: 0.1000


Training:  14%|█▍        | 1422943/10000000 [5:36:02<45:39:09, 52.19it/s]

Episode 2666 completed | Reward: 3140.00 | Avg Reward: 2051.60 | Frames: 1422938 | Epsilon: 0.1000


Training:  14%|█▍        | 1423409/10000000 [5:36:12<51:33:12, 46.21it/s]

Episode 2667 completed | Reward: 1360.00 | Avg Reward: 2056.20 | Frames: 1423401 | Epsilon: 0.1000


Training:  14%|█▍        | 1423959/10000000 [5:36:23<47:49:13, 49.82it/s]

Episode 2668 completed | Reward: 3180.00 | Avg Reward: 2074.60 | Frames: 1423954 | Epsilon: 0.1000


Training:  14%|█▍        | 1424501/10000000 [5:36:33<48:38:15, 48.98it/s]

Episode 2669 completed | Reward: 1540.00 | Avg Reward: 2062.20 | Frames: 1424492 | Epsilon: 0.1000


Training:  14%|█▍        | 1424938/10000000 [5:36:42<49:12:16, 48.41it/s]

Episode 2670 completed | Reward: 860.00 | Avg Reward: 2057.00 | Frames: 1424931 | Epsilon: 0.1000


Training:  14%|█▍        | 1425583/10000000 [5:36:55<56:28:18, 42.18it/s]

Episode 2671 completed | Reward: 2520.00 | Avg Reward: 2072.80 | Frames: 1425582 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1426185/10000000 [5:37:07<50:24:58, 47.24it/s]

Episode 2672 completed | Reward: 2820.00 | Avg Reward: 2086.40 | Frames: 1426177 | Epsilon: 0.1000


Training:  14%|█▍        | 1426711/10000000 [5:37:18<46:38:20, 51.06it/s]

Episode 2673 completed | Reward: 2240.00 | Avg Reward: 2090.00 | Frames: 1426705 | Epsilon: 0.1000


Training:  14%|█▍        | 1427299/10000000 [5:37:29<46:58:50, 50.69it/s]

Episode 2674 completed | Reward: 2640.00 | Avg Reward: 2099.60 | Frames: 1427297 | Epsilon: 0.1000


Training:  14%|█▍        | 1427936/10000000 [5:37:42<45:35:54, 52.22it/s]

Episode 2675 completed | Reward: 1120.00 | Avg Reward: 2104.00 | Frames: 1427928 | Epsilon: 0.1000


Training:  14%|█▍        | 1429227/10000000 [5:38:08<45:52:33, 51.90it/s]

Episode 2676 completed | Reward: 3460.00 | Avg Reward: 2126.00 | Frames: 1429223 | Epsilon: 0.1000


Training:  14%|█▍        | 1429929/10000000 [5:38:22<49:41:24, 47.91it/s]

Episode 2677 completed | Reward: 1220.00 | Avg Reward: 2132.40 | Frames: 1429925 | Epsilon: 0.1000


Training:  14%|█▍        | 1430502/10000000 [5:38:33<49:40:52, 47.91it/s]

Episode 2678 completed | Reward: 1040.00 | Avg Reward: 2134.00 | Frames: 1430497 | Epsilon: 0.1000


Training:  14%|█▍        | 1430963/10000000 [5:38:42<46:11:15, 51.54it/s]

Episode 2679 completed | Reward: 360.00 | Avg Reward: 2073.90 | Frames: 1430959 | Epsilon: 0.1000


Training:  14%|█▍        | 1431421/10000000 [5:38:51<50:52:41, 46.78it/s]

Episode 2680 completed | Reward: 1200.00 | Avg Reward: 2081.70 | Frames: 1431414 | Epsilon: 0.1000


Training:  14%|█▍        | 1432011/10000000 [5:39:03<44:43:42, 53.21it/s]

Episode 2681 completed | Reward: 2300.00 | Avg Reward: 2093.50 | Frames: 1432006 | Epsilon: 0.1000


Training:  14%|█▍        | 1432599/10000000 [5:39:15<45:28:22, 52.34it/s]

Episode 2682 completed | Reward: 2080.00 | Avg Reward: 2095.70 | Frames: 1432594 | Epsilon: 0.1000


Training:  14%|█▍        | 1433173/10000000 [5:39:26<48:38:03, 48.93it/s]

Episode 2683 completed | Reward: 2600.00 | Avg Reward: 2107.70 | Frames: 1433164 | Epsilon: 0.1000


Training:  14%|█▍        | 1433713/10000000 [5:39:37<50:08:18, 47.46it/s]

Episode 2684 completed | Reward: 1700.00 | Avg Reward: 2079.70 | Frames: 1433706 | Epsilon: 0.1000


Training:  14%|█▍        | 1434205/10000000 [5:39:47<51:04:51, 46.58it/s]

Episode 2685 completed | Reward: 2200.00 | Avg Reward: 2086.10 | Frames: 1434198 | Epsilon: 0.1000


Training:  14%|█▍        | 1434770/10000000 [5:39:58<49:11:55, 48.36it/s]

Episode 2686 completed | Reward: 1760.00 | Avg Reward: 2078.10 | Frames: 1434763 | Epsilon: 0.1000


Training:  14%|█▍        | 1435237/10000000 [5:40:07<50:05:16, 47.50it/s]

Episode 2687 completed | Reward: 1220.00 | Avg Reward: 2076.70 | Frames: 1435228 | Epsilon: 0.1000


Training:  14%|█▍        | 1436187/10000000 [5:40:26<48:20:05, 49.22it/s]

Episode 2688 completed | Reward: 3540.00 | Avg Reward: 2102.30 | Frames: 1436187 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1437017/10000000 [5:40:43<50:24:45, 47.18it/s]

Episode 2689 completed | Reward: 2500.00 | Avg Reward: 2113.30 | Frames: 1437010 | Epsilon: 0.1000


Training:  14%|█▍        | 1437598/10000000 [5:40:54<48:13:52, 49.31it/s]

Episode 2690 completed | Reward: 900.00 | Avg Reward: 2107.90 | Frames: 1437595 | Epsilon: 0.1000


Training:  14%|█▍        | 1438101/10000000 [5:41:05<50:17:03, 47.30it/s]

Episode 2691 completed | Reward: 2060.00 | Avg Reward: 2114.30 | Frames: 1438100 | Epsilon: 0.1000


Training:  14%|█▍        | 1438602/10000000 [5:41:14<49:32:54, 48.00it/s]

Episode 2692 completed | Reward: 2060.00 | Avg Reward: 2119.90 | Frames: 1438592 | Epsilon: 0.1000


Training:  14%|█▍        | 1439125/10000000 [5:41:25<51:41:50, 46.00it/s]

Episode 2693 completed | Reward: 1000.00 | Avg Reward: 2123.10 | Frames: 1439118 | Epsilon: 0.1000


Training:  14%|█▍        | 1439713/10000000 [5:41:37<50:55:04, 46.70it/s]

Episode 2694 completed | Reward: 1340.00 | Avg Reward: 2112.50 | Frames: 1439704 | Epsilon: 0.1000


Training:  14%|█▍        | 1440369/10000000 [5:41:50<48:30:01, 49.02it/s]

Episode 2695 completed | Reward: 1800.00 | Avg Reward: 2122.10 | Frames: 1440367 | Epsilon: 0.1000


Training:  14%|█▍        | 1441703/10000000 [5:42:16<45:52:29, 51.82it/s]

Episode 2696 completed | Reward: 4200.00 | Avg Reward: 2148.70 | Frames: 1441698 | Epsilon: 0.1000


Training:  14%|█▍        | 1442373/10000000 [5:42:30<48:20:33, 49.17it/s]

Episode 2697 completed | Reward: 2800.00 | Avg Reward: 2162.50 | Frames: 1442364 | Epsilon: 0.1000


Training:  14%|█▍        | 1443467/10000000 [5:42:52<45:42:23, 52.00it/s]

Episode 2698 completed | Reward: 1580.00 | Avg Reward: 2172.70 | Frames: 1443461 | Epsilon: 0.1000


Training:  14%|█▍        | 1443837/10000000 [5:42:59<52:02:22, 45.67it/s]

Episode 2699 completed | Reward: 1260.00 | Avg Reward: 2177.30 | Frames: 1443830 | Epsilon: 0.1000


Training:  14%|█▍        | 1444473/10000000 [5:43:12<51:13:52, 46.39it/s]

Episode 2700 completed | Reward: 1740.00 | Avg Reward: 2181.10 | Frames: 1444466 | Epsilon: 0.1000


Training:  14%|█▍        | 1444953/10000000 [5:43:21<49:47:27, 47.73it/s]

Episode 2701 completed | Reward: 1140.00 | Avg Reward: 2172.50 | Frames: 1444944 | Epsilon: 0.1000


Training:  14%|█▍        | 1445415/10000000 [5:43:31<48:13:23, 49.28it/s]

Episode 2702 completed | Reward: 880.00 | Avg Reward: 2156.10 | Frames: 1445410 | Epsilon: 0.1000


Training:  14%|█▍        | 1445995/10000000 [5:43:42<47:56:07, 49.57it/s]

Episode 2703 completed | Reward: 1140.00 | Avg Reward: 2157.90 | Frames: 1445989 | Epsilon: 0.1000


Training:  14%|█▍        | 1446545/10000000 [5:43:53<59:12:55, 40.12it/s]

Episode 2704 completed | Reward: 2080.00 | Avg Reward: 2143.90 | Frames: 1446544 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  14%|█▍        | 1447037/10000000 [5:44:03<50:42:06, 46.86it/s]

Episode 2705 completed | Reward: 1940.00 | Avg Reward: 2147.70 | Frames: 1447030 | Epsilon: 0.1000


Training:  14%|█▍        | 1447537/10000000 [5:44:13<51:29:31, 46.14it/s]

Episode 2706 completed | Reward: 2380.00 | Avg Reward: 2146.30 | Frames: 1447529 | Epsilon: 0.1000


Training:  14%|█▍        | 1448313/10000000 [5:44:29<50:21:02, 47.18it/s]

Episode 2707 completed | Reward: 1620.00 | Avg Reward: 2127.10 | Frames: 1448306 | Epsilon: 0.1000


Training:  14%|█▍        | 1448845/10000000 [5:44:39<50:26:02, 47.10it/s]

Episode 2708 completed | Reward: 1360.00 | Avg Reward: 2131.30 | Frames: 1448836 | Epsilon: 0.1000


Training:  14%|█▍        | 1449329/10000000 [5:44:49<50:15:24, 47.26it/s]

Episode 2709 completed | Reward: 1500.00 | Avg Reward: 2132.30 | Frames: 1449321 | Epsilon: 0.1000


Training:  15%|█▍        | 1450193/10000000 [5:45:06<48:27:06, 49.02it/s]

Episode 2710 completed | Reward: 2180.00 | Avg Reward: 2111.70 | Frames: 1450184 | Epsilon: 0.1000


Training:  15%|█▍        | 1450678/10000000 [5:45:16<47:56:55, 49.53it/s]

Episode 2711 completed | Reward: 1700.00 | Avg Reward: 2114.90 | Frames: 1450675 | Epsilon: 0.1000


Training:  15%|█▍        | 1451225/10000000 [5:45:27<50:42:12, 46.83it/s]

Episode 2712 completed | Reward: 840.00 | Avg Reward: 2097.30 | Frames: 1451216 | Epsilon: 0.1000


Training:  15%|█▍        | 1451942/10000000 [5:45:41<48:29:27, 48.97it/s]

Episode 2713 completed | Reward: 2220.00 | Avg Reward: 2094.10 | Frames: 1451933 | Epsilon: 0.1000


Training:  15%|█▍        | 1452506/10000000 [5:45:53<48:28:26, 48.98it/s]

Episode 2714 completed | Reward: 1060.00 | Avg Reward: 2053.10 | Frames: 1452502 | Epsilon: 0.1000


Training:  15%|█▍        | 1453191/10000000 [5:46:06<47:00:32, 50.50it/s]

Episode 2715 completed | Reward: 3340.00 | Avg Reward: 2067.50 | Frames: 1453186 | Epsilon: 0.1000


Training:  15%|█▍        | 1454019/10000000 [5:46:23<46:42:38, 50.82it/s]

Episode 2716 completed | Reward: 3740.00 | Avg Reward: 2096.50 | Frames: 1454014 | Epsilon: 0.1000


Training:  15%|█▍        | 1454486/10000000 [5:46:32<49:34:44, 47.88it/s]

Episode 2717 completed | Reward: 1180.00 | Avg Reward: 2066.70 | Frames: 1454481 | Epsilon: 0.1000


Training:  15%|█▍        | 1454994/10000000 [5:46:43<47:58:31, 49.48it/s]

Episode 2718 completed | Reward: 1740.00 | Avg Reward: 2074.10 | Frames: 1454984 | Epsilon: 0.1000


Training:  15%|█▍        | 1455681/10000000 [5:46:56<49:38:14, 47.82it/s]

Episode 2719 completed | Reward: 1120.00 | Avg Reward: 2052.50 | Frames: 1455672 | Epsilon: 0.1000


Training:  15%|█▍        | 1456095/10000000 [5:47:05<46:46:21, 50.74it/s]

Episode 2720 completed | Reward: 1040.00 | Avg Reward: 2050.70 | Frames: 1456090 | Epsilon: 0.1000


Training:  15%|█▍        | 1456999/10000000 [5:47:23<46:47:35, 50.71it/s]

Episode 2721 completed | Reward: 4000.00 | Avg Reward: 2071.50 | Frames: 1456999 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▍        | 1457574/10000000 [5:47:34<49:13:15, 48.21it/s]

Episode 2722 completed | Reward: 2360.00 | Avg Reward: 2083.70 | Frames: 1457567 | Epsilon: 0.1000


Training:  15%|█▍        | 1458057/10000000 [5:47:44<49:29:23, 47.94it/s]

Episode 2723 completed | Reward: 3160.00 | Avg Reward: 2086.90 | Frames: 1458048 | Epsilon: 0.1000


Training:  15%|█▍        | 1458919/10000000 [5:48:01<45:51:55, 51.73it/s]

Episode 2724 completed | Reward: 2760.00 | Avg Reward: 2094.30 | Frames: 1458915 | Epsilon: 0.1000


Training:  15%|█▍        | 1459620/10000000 [5:48:15<43:23:50, 54.67it/s]

Episode 2725 completed | Reward: 1500.00 | Avg Reward: 2077.40 | Frames: 1459614 | Epsilon: 0.1000


Training:  15%|█▍        | 1460078/10000000 [5:48:24<50:10:56, 47.27it/s]

Episode 2726 completed | Reward: 2540.00 | Avg Reward: 2086.40 | Frames: 1460071 | Epsilon: 0.1000


Training:  15%|█▍        | 1461165/10000000 [5:48:46<50:43:45, 46.76it/s]

Episode 2727 completed | Reward: 3500.00 | Avg Reward: 2102.60 | Frames: 1461156 | Epsilon: 0.1000


Training:  15%|█▍        | 1461633/10000000 [5:48:55<49:31:26, 47.89it/s]

Episode 2728 completed | Reward: 920.00 | Avg Reward: 2075.00 | Frames: 1461624 | Epsilon: 0.1000


Training:  15%|█▍        | 1462089/10000000 [5:49:04<49:18:26, 48.10it/s]

Episode 2729 completed | Reward: 980.00 | Avg Reward: 2075.80 | Frames: 1462080 | Epsilon: 0.1000


Training:  15%|█▍        | 1463207/10000000 [5:49:27<45:30:22, 52.11it/s]

Episode 2730 completed | Reward: 1680.00 | Avg Reward: 2070.20 | Frames: 1463202 | Epsilon: 0.1000


Training:  15%|█▍        | 1463867/10000000 [5:49:40<45:50:51, 51.72it/s]

Episode 2731 completed | Reward: 2200.00 | Avg Reward: 2077.00 | Frames: 1463861 | Epsilon: 0.1000


Training:  15%|█▍        | 1465161/10000000 [5:50:06<48:55:27, 48.46it/s]

Episode 2732 completed | Reward: 3320.00 | Avg Reward: 2102.00 | Frames: 1465152 | Epsilon: 0.1000


Training:  15%|█▍        | 1465653/10000000 [5:50:15<50:15:26, 47.17it/s]

Episode 2733 completed | Reward: 3520.00 | Avg Reward: 2115.60 | Frames: 1465644 | Epsilon: 0.1000


Training:  15%|█▍        | 1466515/10000000 [5:50:33<45:43:16, 51.84it/s]

Episode 2734 completed | Reward: 3260.00 | Avg Reward: 2125.40 | Frames: 1466509 | Epsilon: 0.1000


Training:  15%|█▍        | 1466958/10000000 [5:50:41<49:05:48, 48.28it/s]

Episode 2735 completed | Reward: 1260.00 | Avg Reward: 2111.60 | Frames: 1466954 | Epsilon: 0.1000


Training:  15%|█▍        | 1467453/10000000 [5:50:51<47:27:59, 49.93it/s]

Episode 2736 completed | Reward: 1640.00 | Avg Reward: 2117.80 | Frames: 1467453 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▍        | 1467977/10000000 [5:51:02<48:09:46, 49.21it/s]

Episode 2737 completed | Reward: 1120.00 | Avg Reward: 2118.20 | Frames: 1467975 | Epsilon: 0.1000


Training:  15%|█▍        | 1468535/10000000 [5:51:13<47:49:12, 49.56it/s]

Episode 2738 completed | Reward: 1340.00 | Avg Reward: 2120.40 | Frames: 1468529 | Epsilon: 0.1000


Training:  15%|█▍        | 1469733/10000000 [5:51:37<47:58:45, 49.39it/s]

Episode 2739 completed | Reward: 2860.00 | Avg Reward: 2117.20 | Frames: 1469731 | Epsilon: 0.1000


Training:  15%|█▍        | 1470338/10000000 [5:51:49<49:14:19, 48.12it/s]

Episode 2740 completed | Reward: 1620.00 | Avg Reward: 2107.60 | Frames: 1470331 | Epsilon: 0.1000


Training:  15%|█▍        | 1470822/10000000 [5:51:58<48:41:08, 48.66it/s]

Episode 2741 completed | Reward: 1480.00 | Avg Reward: 2092.60 | Frames: 1470819 | Epsilon: 0.1000


Training:  15%|█▍        | 1471345/10000000 [5:52:09<51:23:20, 46.10it/s]

Episode 2742 completed | Reward: 2940.00 | Avg Reward: 2069.80 | Frames: 1471338 | Epsilon: 0.1000


Training:  15%|█▍        | 1471965/10000000 [5:52:21<47:18:52, 50.07it/s]

Episode 2743 completed | Reward: 2680.00 | Avg Reward: 2080.40 | Frames: 1471956 | Epsilon: 0.1000


Training:  15%|█▍        | 1473193/10000000 [5:52:46<49:29:26, 47.86it/s]

Episode 2744 completed | Reward: 5400.00 | Avg Reward: 2125.00 | Frames: 1473184 | Epsilon: 0.1000


Training:  15%|█▍        | 1473711/10000000 [5:52:56<47:31:24, 49.84it/s]

Episode 2745 completed | Reward: 4140.00 | Avg Reward: 2149.40 | Frames: 1473705 | Epsilon: 0.1000


Training:  15%|█▍        | 1474402/10000000 [5:53:10<48:32:14, 48.79it/s]

Episode 2746 completed | Reward: 2520.00 | Avg Reward: 2149.80 | Frames: 1474393 | Epsilon: 0.1000


Training:  15%|█▍        | 1474927/10000000 [5:53:20<45:37:56, 51.89it/s]

Episode 2747 completed | Reward: 4930.00 | Avg Reward: 2191.10 | Frames: 1474923 | Epsilon: 0.1000


Training:  15%|█▍        | 1475570/10000000 [5:53:33<47:45:25, 49.58it/s]

Episode 2748 completed | Reward: 1300.00 | Avg Reward: 2176.90 | Frames: 1475561 | Epsilon: 0.1000


Training:  15%|█▍        | 1476605/10000000 [5:53:53<49:52:22, 47.47it/s]

Episode 2749 completed | Reward: 4980.00 | Avg Reward: 2218.10 | Frames: 1476597 | Epsilon: 0.1000


Training:  15%|█▍        | 1477162/10000000 [5:54:05<50:15:02, 47.11it/s]

Episode 2750 completed | Reward: 1240.00 | Avg Reward: 2205.50 | Frames: 1477155 | Epsilon: 0.1000


Training:  15%|█▍        | 1477578/10000000 [5:54:13<46:51:29, 50.52it/s]

Episode 2751 completed | Reward: 1120.00 | Avg Reward: 2196.70 | Frames: 1477578 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▍        | 1478425/10000000 [5:54:30<49:15:58, 48.05it/s]

Episode 2752 completed | Reward: 2380.00 | Avg Reward: 2191.70 | Frames: 1478418 | Epsilon: 0.1000


Training:  15%|█▍        | 1478961/10000000 [5:54:41<48:52:58, 48.42it/s]

Episode 2753 completed | Reward: 840.00 | Avg Reward: 2154.80 | Frames: 1478952 | Epsilon: 0.1000


Training:  15%|█▍        | 1479510/10000000 [5:54:52<49:57:19, 47.38it/s]

Episode 2754 completed | Reward: 1420.00 | Avg Reward: 2155.00 | Frames: 1479503 | Epsilon: 0.1000


Training:  15%|█▍        | 1480178/10000000 [5:55:05<48:47:41, 48.50it/s]

Episode 2755 completed | Reward: 1080.00 | Avg Reward: 2129.00 | Frames: 1480171 | Epsilon: 0.1000


Training:  15%|█▍        | 1480838/10000000 [5:55:18<49:00:38, 48.28it/s]

Episode 2756 completed | Reward: 1880.00 | Avg Reward: 2119.40 | Frames: 1480831 | Epsilon: 0.1000


Training:  15%|█▍        | 1481442/10000000 [5:55:30<48:43:01, 48.57it/s]

Episode 2757 completed | Reward: 1540.00 | Avg Reward: 2125.20 | Frames: 1481435 | Epsilon: 0.1000


Training:  15%|█▍        | 1481981/10000000 [5:55:41<49:58:14, 47.35it/s]

Episode 2758 completed | Reward: 3640.00 | Avg Reward: 2113.10 | Frames: 1481972 | Epsilon: 0.1000


Training:  15%|█▍        | 1482641/10000000 [5:55:54<51:37:36, 45.83it/s]

Episode 2759 completed | Reward: 1560.00 | Avg Reward: 2110.50 | Frames: 1482634 | Epsilon: 0.1000


Training:  15%|█▍        | 1483766/10000000 [5:56:16<48:21:18, 48.92it/s]

Episode 2760 completed | Reward: 3120.00 | Avg Reward: 2117.90 | Frames: 1483759 | Epsilon: 0.1000


Training:  15%|█▍        | 1484274/10000000 [5:56:26<47:57:47, 49.32it/s]

Episode 2761 completed | Reward: 1860.00 | Avg Reward: 2114.70 | Frames: 1484265 | Epsilon: 0.1000


Training:  15%|█▍        | 1484721/10000000 [5:56:35<48:03:52, 49.21it/s]

Episode 2762 completed | Reward: 720.00 | Avg Reward: 2070.70 | Frames: 1484719 | Epsilon: 0.1000


Training:  15%|█▍        | 1485119/10000000 [5:56:43<46:44:52, 50.60it/s]

Episode 2763 completed | Reward: 2480.00 | Avg Reward: 2070.30 | Frames: 1485115 | Epsilon: 0.1000


Training:  15%|█▍        | 1485693/10000000 [5:56:55<49:16:53, 47.99it/s]

Episode 2764 completed | Reward: 2280.00 | Avg Reward: 2085.70 | Frames: 1485691 | Epsilon: 0.1000


Training:  15%|█▍        | 1486234/10000000 [5:57:05<47:55:25, 49.35it/s]

Episode 2765 completed | Reward: 3900.00 | Avg Reward: 2105.10 | Frames: 1486226 | Epsilon: 0.1000


Training:  15%|█▍        | 1486751/10000000 [5:57:16<46:39:01, 50.69it/s]

Episode 2766 completed | Reward: 4440.00 | Avg Reward: 2118.10 | Frames: 1486747 | Epsilon: 0.1000


Training:  15%|█▍        | 1487234/10000000 [5:57:25<48:14:03, 49.02it/s]

Episode 2767 completed | Reward: 1300.00 | Avg Reward: 2117.50 | Frames: 1487227 | Epsilon: 0.1000


Training:  15%|█▍        | 1487986/10000000 [5:57:40<47:27:51, 49.82it/s]

Episode 2768 completed | Reward: 3940.00 | Avg Reward: 2125.10 | Frames: 1487986 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▍        | 1488421/10000000 [5:57:49<48:32:04, 48.71it/s]

Episode 2769 completed | Reward: 720.00 | Avg Reward: 2116.90 | Frames: 1488417 | Epsilon: 0.1000


Training:  15%|█▍        | 1489050/10000000 [5:58:01<48:37:18, 48.62it/s]

Episode 2770 completed | Reward: 3580.00 | Avg Reward: 2144.10 | Frames: 1489041 | Epsilon: 0.1000


Training:  15%|█▍        | 1489413/10000000 [5:58:09<50:13:26, 47.07it/s]

Episode 2771 completed | Reward: 3220.00 | Avg Reward: 2151.10 | Frames: 1489406 | Epsilon: 0.1000


Training:  15%|█▍        | 1489969/10000000 [5:58:20<50:01:19, 47.26it/s]

Episode 2772 completed | Reward: 3740.00 | Avg Reward: 2160.30 | Frames: 1489961 | Epsilon: 0.1000


Training:  15%|█▍        | 1490375/10000000 [5:58:28<45:37:46, 51.80it/s]

Episode 2773 completed | Reward: 1900.00 | Avg Reward: 2156.90 | Frames: 1490370 | Epsilon: 0.1000


Training:  15%|█▍        | 1490884/10000000 [5:58:38<43:43:59, 54.05it/s]

Episode 2774 completed | Reward: 2100.00 | Avg Reward: 2151.50 | Frames: 1490879 | Epsilon: 0.1000


Training:  15%|█▍        | 1491462/10000000 [5:58:50<49:47:22, 47.47it/s]

Episode 2775 completed | Reward: 760.00 | Avg Reward: 2147.90 | Frames: 1491458 | Epsilon: 0.1000


Training:  15%|█▍        | 1492371/10000000 [5:59:08<48:01:41, 49.21it/s]

Episode 2776 completed | Reward: 4100.00 | Avg Reward: 2154.30 | Frames: 1492368 | Epsilon: 0.1000


Training:  15%|█▍        | 1492862/10000000 [5:59:18<48:05:01, 49.15it/s]

Episode 2777 completed | Reward: 3740.00 | Avg Reward: 2179.50 | Frames: 1492855 | Epsilon: 0.1000


Training:  15%|█▍        | 1493361/10000000 [5:59:28<50:02:43, 47.22it/s]

Episode 2778 completed | Reward: 3300.00 | Avg Reward: 2202.10 | Frames: 1493352 | Epsilon: 0.1000


Training:  15%|█▍        | 1493783/10000000 [5:59:36<46:06:01, 51.25it/s]

Episode 2779 completed | Reward: 2880.00 | Avg Reward: 2227.30 | Frames: 1493777 | Epsilon: 0.1000


Training:  15%|█▍        | 1494409/10000000 [5:59:48<49:07:03, 48.10it/s]

Episode 2780 completed | Reward: 3360.00 | Avg Reward: 2248.90 | Frames: 1494402 | Epsilon: 0.1000


Training:  15%|█▍        | 1494909/10000000 [5:59:58<51:21:51, 46.00it/s]

Episode 2781 completed | Reward: 680.00 | Avg Reward: 2232.70 | Frames: 1494901 | Epsilon: 0.1000


Training:  15%|█▍        | 1495411/10000000 [6:00:08<45:41:55, 51.69it/s]

Episode 2782 completed | Reward: 3860.00 | Avg Reward: 2250.50 | Frames: 1495405 | Epsilon: 0.1000


Training:  15%|█▍        | 1496039/10000000 [6:00:21<46:32:31, 50.75it/s]

Episode 2783 completed | Reward: 4280.00 | Avg Reward: 2267.30 | Frames: 1496034 | Epsilon: 0.1000


Training:  15%|█▍        | 1496571/10000000 [6:00:32<47:35:26, 49.63it/s]

Episode 2784 completed | Reward: 2720.00 | Avg Reward: 2277.50 | Frames: 1496565 | Epsilon: 0.1000


Training:  15%|█▍        | 1497109/10000000 [6:00:42<51:03:33, 46.26it/s]

Episode 2785 completed | Reward: 1140.00 | Avg Reward: 2266.90 | Frames: 1497100 | Epsilon: 0.1000


Training:  15%|█▍        | 1498799/10000000 [6:01:16<56:02:37, 42.14it/s]

Episode 2786 completed | Reward: 4300.00 | Avg Reward: 2292.30 | Frames: 1498797 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▍        | 1499261/10000000 [6:01:25<48:15:06, 48.94it/s]

Episode 2787 completed | Reward: 3360.00 | Avg Reward: 2313.70 | Frames: 1499259 | Epsilon: 0.1000


Training:  15%|█▍        | 1499997/10000000 [6:01:40<47:23:24, 49.82it/s]

Model saved to weights/CarnivalDeterministic-v4_dqn_1500000frames.pth


Training:  15%|█▌        | 1500002/10000000 [6:01:51<1097:19:01,  2.15it/s]


Evaluation at frame 1500000: 2898.00
Model saved to weights/CarnivalDeterministic-v4_dqn_best.pth
Episode 2788 completed | Reward: 3120.00 | Avg Reward: 2309.50 | Frames: 1500001 | Epsilon: 0.1000


Training:  15%|█▌        | 1500583/10000000 [6:02:02<45:56:51, 51.38it/s]

Episode 2789 completed | Reward: 3320.00 | Avg Reward: 2317.70 | Frames: 1500576 | Epsilon: 0.1000


Training:  15%|█▌        | 1500969/10000000 [6:02:10<51:00:54, 46.28it/s]

Episode 2790 completed | Reward: 3140.00 | Avg Reward: 2340.10 | Frames: 1500962 | Epsilon: 0.1000


Training:  15%|█▌        | 1501534/10000000 [6:02:21<48:43:30, 48.45it/s]

Episode 2791 completed | Reward: 2120.00 | Avg Reward: 2340.70 | Frames: 1501527 | Epsilon: 0.1000


Training:  15%|█▌        | 1502053/10000000 [6:02:31<50:44:28, 46.52it/s]

Episode 2792 completed | Reward: 2920.00 | Avg Reward: 2349.30 | Frames: 1502046 | Epsilon: 0.1000


Training:  15%|█▌        | 1502555/10000000 [6:02:41<45:40:20, 51.68it/s]

Episode 2793 completed | Reward: 1480.00 | Avg Reward: 2354.10 | Frames: 1502550 | Epsilon: 0.1000


Training:  15%|█▌        | 1503105/10000000 [6:02:52<47:36:31, 49.58it/s]

Episode 2794 completed | Reward: 2320.00 | Avg Reward: 2363.90 | Frames: 1503099 | Epsilon: 0.1000


Training:  15%|█▌        | 1503719/10000000 [6:03:05<45:45:21, 51.58it/s]

Episode 2795 completed | Reward: 2580.00 | Avg Reward: 2371.70 | Frames: 1503715 | Epsilon: 0.1000


Training:  15%|█▌        | 1504204/10000000 [6:03:14<44:00:38, 53.62it/s]

Episode 2796 completed | Reward: 2680.00 | Avg Reward: 2356.50 | Frames: 1504198 | Epsilon: 0.1000


Training:  15%|█▌        | 1504817/10000000 [6:03:26<48:00:19, 49.16it/s]

Episode 2797 completed | Reward: 3960.00 | Avg Reward: 2368.10 | Frames: 1504814 | Epsilon: 0.1000


Training:  15%|█▌        | 1505177/10000000 [6:03:34<50:31:48, 46.70it/s]

Episode 2798 completed | Reward: 2140.00 | Avg Reward: 2373.70 | Frames: 1505176 | Epsilon: 0.1000


Training:  15%|█▌        | 1505673/10000000 [6:03:43<47:34:25, 49.60it/s]

Episode 2799 completed | Reward: 4640.00 | Avg Reward: 2407.50 | Frames: 1505664 | Epsilon: 0.1000


Training:  15%|█▌        | 1506279/10000000 [6:03:56<46:04:30, 51.21it/s]

Episode 2800 completed | Reward: 420.00 | Avg Reward: 2394.30 | Frames: 1506275 | Epsilon: 0.1000


Training:  15%|█▌        | 1506834/10000000 [6:04:07<47:37:16, 49.54it/s]

Episode 2801 completed | Reward: 2440.00 | Avg Reward: 2407.30 | Frames: 1506831 | Epsilon: 0.1000


Training:  15%|█▌        | 1507405/10000000 [6:04:18<49:19:29, 47.83it/s]

Episode 2802 completed | Reward: 3840.00 | Avg Reward: 2436.90 | Frames: 1507397 | Epsilon: 0.1000


Training:  15%|█▌        | 1508178/10000000 [6:04:33<48:50:48, 48.29it/s]

Episode 2803 completed | Reward: 4040.00 | Avg Reward: 2465.90 | Frames: 1508173 | Epsilon: 0.1000


Training:  15%|█▌        | 1508705/10000000 [6:04:44<49:09:08, 47.99it/s]

Episode 2804 completed | Reward: 2140.00 | Avg Reward: 2466.50 | Frames: 1508701 | Epsilon: 0.1000


Training:  15%|█▌        | 1509289/10000000 [6:04:56<46:22:19, 50.86it/s]

Episode 2805 completed | Reward: 3680.00 | Avg Reward: 2483.90 | Frames: 1509289 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▌        | 1509813/10000000 [6:05:06<49:10:38, 47.96it/s]

Episode 2806 completed | Reward: 1080.00 | Avg Reward: 2470.90 | Frames: 1509808 | Epsilon: 0.1000


Training:  15%|█▌        | 1510667/10000000 [6:05:23<45:18:23, 52.05it/s]

Episode 2807 completed | Reward: 3560.00 | Avg Reward: 2490.30 | Frames: 1510661 | Epsilon: 0.1000


Training:  15%|█▌        | 1511269/10000000 [6:05:35<49:12:02, 47.93it/s]

Episode 2808 completed | Reward: 2560.00 | Avg Reward: 2502.30 | Frames: 1511262 | Epsilon: 0.1000


Training:  15%|█▌        | 1511859/10000000 [6:05:47<45:44:44, 51.54it/s]

Episode 2809 completed | Reward: 1140.00 | Avg Reward: 2498.70 | Frames: 1511854 | Epsilon: 0.1000


Training:  15%|█▌        | 1512577/10000000 [6:06:01<48:31:11, 48.59it/s]

Episode 2810 completed | Reward: 4500.00 | Avg Reward: 2521.90 | Frames: 1512568 | Epsilon: 0.1000


Training:  15%|█▌        | 1513097/10000000 [6:06:12<47:50:25, 49.28it/s]

Episode 2811 completed | Reward: 680.00 | Avg Reward: 2511.70 | Frames: 1513088 | Epsilon: 0.1000


Training:  15%|█▌        | 1514502/10000000 [6:06:40<48:27:22, 48.64it/s]

Episode 2812 completed | Reward: 3200.00 | Avg Reward: 2535.30 | Frames: 1514497 | Epsilon: 0.1000


Training:  15%|█▌        | 1515002/10000000 [6:06:50<48:20:12, 48.76it/s]

Episode 2813 completed | Reward: 3690.00 | Avg Reward: 2550.00 | Frames: 1514999 | Epsilon: 0.1000


Training:  15%|█▌        | 1515529/10000000 [6:07:00<49:06:47, 47.99it/s]

Episode 2814 completed | Reward: 1060.00 | Avg Reward: 2550.00 | Frames: 1515520 | Epsilon: 0.1000


Training:  15%|█▌        | 1516103/10000000 [6:07:12<45:52:14, 51.38it/s]

Episode 2815 completed | Reward: 740.00 | Avg Reward: 2524.00 | Frames: 1516098 | Epsilon: 0.1000


Training:  15%|█▌        | 1516494/10000000 [6:07:20<49:50:18, 47.28it/s]

Episode 2816 completed | Reward: 3120.00 | Avg Reward: 2517.80 | Frames: 1516487 | Epsilon: 0.1000


Training:  15%|█▌        | 1516985/10000000 [6:07:30<51:00:42, 46.19it/s]

Episode 2817 completed | Reward: 940.00 | Avg Reward: 2515.40 | Frames: 1516978 | Epsilon: 0.1000


Training:  15%|█▌        | 1517422/10000000 [6:07:38<48:52:13, 48.21it/s]

Episode 2818 completed | Reward: 3590.00 | Avg Reward: 2533.90 | Frames: 1517415 | Epsilon: 0.1000


Training:  15%|█▌        | 1517850/10000000 [6:07:47<49:33:53, 47.54it/s]

Episode 2819 completed | Reward: 1100.00 | Avg Reward: 2533.70 | Frames: 1517843 | Epsilon: 0.1000


Training:  15%|█▌        | 1518405/10000000 [6:07:58<47:53:53, 49.19it/s]

Episode 2820 completed | Reward: 3900.00 | Avg Reward: 2562.30 | Frames: 1518402 | Epsilon: 0.1000


Training:  15%|█▌        | 1518905/10000000 [6:08:08<50:11:28, 46.94it/s]

Episode 2821 completed | Reward: 4100.00 | Avg Reward: 2563.30 | Frames: 1518896 | Epsilon: 0.1000


Training:  15%|█▌        | 1519297/10000000 [6:08:16<57:55:24, 40.67it/s]

Episode 2822 completed | Reward: 2540.00 | Avg Reward: 2565.10 | Frames: 1519296 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▌        | 1519854/10000000 [6:08:27<49:06:54, 47.96it/s]

Episode 2823 completed | Reward: 920.00 | Avg Reward: 2542.70 | Frames: 1519847 | Epsilon: 0.1000


Training:  15%|█▌        | 1520354/10000000 [6:08:37<49:14:32, 47.83it/s]

Episode 2824 completed | Reward: 2920.00 | Avg Reward: 2544.30 | Frames: 1520349 | Epsilon: 0.1000


Training:  15%|█▌        | 1520765/10000000 [6:08:46<49:39:32, 47.43it/s]

Episode 2825 completed | Reward: 2340.00 | Avg Reward: 2552.70 | Frames: 1520756 | Epsilon: 0.1000


Training:  15%|█▌        | 1521203/10000000 [6:08:54<45:54:01, 51.31it/s]

Episode 2826 completed | Reward: 2660.00 | Avg Reward: 2553.90 | Frames: 1521199 | Epsilon: 0.1000


Training:  15%|█▌        | 1521653/10000000 [6:09:03<49:31:29, 47.55it/s]

Episode 2827 completed | Reward: 2680.00 | Avg Reward: 2545.70 | Frames: 1521645 | Epsilon: 0.1000


Training:  15%|█▌        | 1522109/10000000 [6:09:12<47:33:32, 49.52it/s]

Episode 2828 completed | Reward: 2540.00 | Avg Reward: 2561.90 | Frames: 1522100 | Epsilon: 0.1000


Training:  15%|█▌        | 1522593/10000000 [6:09:22<49:45:53, 47.32it/s]

Episode 2829 completed | Reward: 1400.00 | Avg Reward: 2566.10 | Frames: 1522584 | Epsilon: 0.1000


Training:  15%|█▌        | 1523069/10000000 [6:09:32<51:18:19, 45.90it/s]

Episode 2830 completed | Reward: 3320.00 | Avg Reward: 2582.50 | Frames: 1523062 | Epsilon: 0.1000


Training:  15%|█▌        | 1523537/10000000 [6:09:41<50:36:41, 46.52it/s]

Episode 2831 completed | Reward: 2280.00 | Avg Reward: 2583.30 | Frames: 1523528 | Epsilon: 0.1000


Training:  15%|█▌        | 1524086/10000000 [6:09:52<47:59:21, 49.06it/s]

Episode 2832 completed | Reward: 3400.00 | Avg Reward: 2584.10 | Frames: 1524079 | Epsilon: 0.1000


Training:  15%|█▌        | 1524570/10000000 [6:10:02<48:10:18, 48.87it/s]

Episode 2833 completed | Reward: 4190.00 | Avg Reward: 2590.80 | Frames: 1524565 | Epsilon: 0.1000


Training:  15%|█▌        | 1525054/10000000 [6:10:11<47:30:33, 49.55it/s]

Episode 2834 completed | Reward: 4480.00 | Avg Reward: 2603.00 | Frames: 1525045 | Epsilon: 0.1000


Training:  15%|█▌        | 1525481/10000000 [6:10:20<49:48:55, 47.26it/s]

Episode 2835 completed | Reward: 2880.00 | Avg Reward: 2619.20 | Frames: 1525473 | Epsilon: 0.1000


Training:  15%|█▌        | 1525903/10000000 [6:10:28<45:05:27, 52.20it/s]

Episode 2836 completed | Reward: 2740.00 | Avg Reward: 2630.20 | Frames: 1525899 | Epsilon: 0.1000


Training:  15%|█▌        | 1526635/10000000 [6:10:43<47:10:11, 49.90it/s]

Episode 2837 completed | Reward: 3360.00 | Avg Reward: 2652.60 | Frames: 1526629 | Epsilon: 0.1000


Training:  15%|█▌        | 1527495/10000000 [6:11:00<45:23:33, 51.85it/s]

Episode 2838 completed | Reward: 980.00 | Avg Reward: 2649.00 | Frames: 1527489 | Epsilon: 0.1000


Training:  15%|█▌        | 1527978/10000000 [6:11:10<48:48:05, 48.22it/s]

Episode 2839 completed | Reward: 4140.00 | Avg Reward: 2661.80 | Frames: 1527971 | Epsilon: 0.1000


Training:  15%|█▌        | 1528485/10000000 [6:11:20<49:45:14, 47.30it/s]

Episode 2840 completed | Reward: 2220.00 | Avg Reward: 2667.80 | Frames: 1528477 | Epsilon: 0.1000


Training:  15%|█▌        | 1528963/10000000 [6:11:30<45:43:54, 51.45it/s]

Episode 2841 completed | Reward: 1080.00 | Avg Reward: 2663.80 | Frames: 1528959 | Epsilon: 0.1000


Training:  15%|█▌        | 1530090/10000000 [6:11:52<46:54:32, 50.16it/s]

Episode 2842 completed | Reward: 2780.00 | Avg Reward: 2662.20 | Frames: 1530090 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▌        | 1530730/10000000 [6:12:05<47:54:37, 49.10it/s]

Episode 2843 completed | Reward: 2080.00 | Avg Reward: 2656.20 | Frames: 1530723 | Epsilon: 0.1000


Training:  15%|█▌        | 1531134/10000000 [6:12:13<48:18:59, 48.69it/s]

Episode 2844 completed | Reward: 1940.00 | Avg Reward: 2621.60 | Frames: 1531130 | Epsilon: 0.1000


Training:  15%|█▌        | 1531745/10000000 [6:12:26<51:22:20, 45.79it/s]

Episode 2845 completed | Reward: 2740.00 | Avg Reward: 2607.60 | Frames: 1531736 | Epsilon: 0.1000


Training:  15%|█▌        | 1532430/10000000 [6:12:39<47:42:06, 49.31it/s]

Episode 2846 completed | Reward: 3330.00 | Avg Reward: 2615.70 | Frames: 1532423 | Epsilon: 0.1000


Training:  15%|█▌        | 1532962/10000000 [6:12:50<48:27:24, 48.54it/s]

Episode 2847 completed | Reward: 2580.00 | Avg Reward: 2592.20 | Frames: 1532957 | Epsilon: 0.1000


Training:  15%|█▌        | 1533373/10000000 [6:12:58<50:03:28, 46.98it/s]

Episode 2848 completed | Reward: 3640.00 | Avg Reward: 2615.60 | Frames: 1533364 | Epsilon: 0.1000


Training:  15%|█▌        | 1533893/10000000 [6:13:09<48:15:57, 48.72it/s]

Episode 2849 completed | Reward: 1020.00 | Avg Reward: 2576.00 | Frames: 1533888 | Epsilon: 0.1000


Training:  15%|█▌        | 1534481/10000000 [6:13:20<48:49:34, 48.16it/s]

Episode 2850 completed | Reward: 4440.00 | Avg Reward: 2608.00 | Frames: 1534472 | Epsilon: 0.1000


Training:  15%|█▌        | 1535002/10000000 [6:13:31<48:57:08, 48.03it/s]

Episode 2851 completed | Reward: 4790.00 | Avg Reward: 2644.70 | Frames: 1535000 | Epsilon: 0.1000


Training:  15%|█▌        | 1535513/10000000 [6:13:41<49:35:36, 47.41it/s]

Episode 2852 completed | Reward: 2680.00 | Avg Reward: 2647.70 | Frames: 1535504 | Epsilon: 0.1000


Training:  15%|█▌        | 1536221/10000000 [6:13:56<49:16:19, 47.72it/s]

Episode 2853 completed | Reward: 1940.00 | Avg Reward: 2658.70 | Frames: 1536212 | Epsilon: 0.1000


Training:  15%|█▌        | 1536597/10000000 [6:14:03<47:52:56, 49.10it/s]

Episode 2854 completed | Reward: 1300.00 | Avg Reward: 2657.50 | Frames: 1536588 | Epsilon: 0.1000


Training:  15%|█▌        | 1537353/10000000 [6:14:18<49:30:45, 47.48it/s]

Episode 2855 completed | Reward: 4990.00 | Avg Reward: 2696.60 | Frames: 1537344 | Epsilon: 0.1000


Training:  15%|█▌        | 1537873/10000000 [6:14:29<48:39:50, 48.30it/s]

Episode 2856 completed | Reward: 2140.00 | Avg Reward: 2699.20 | Frames: 1537865 | Epsilon: 0.1000


Training:  15%|█▌        | 1538333/10000000 [6:14:38<51:17:40, 45.82it/s]

Episode 2857 completed | Reward: 1940.00 | Avg Reward: 2703.20 | Frames: 1538325 | Epsilon: 0.1000


Training:  15%|█▌        | 1539433/10000000 [6:15:00<51:05:06, 46.00it/s]

Episode 2858 completed | Reward: 3480.00 | Avg Reward: 2701.60 | Frames: 1539425 | Epsilon: 0.1000


Training:  15%|█▌        | 1540185/10000000 [6:15:15<58:36:58, 40.09it/s]

Episode 2859 completed | Reward: 2720.00 | Avg Reward: 2713.20 | Frames: 1540184 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  15%|█▌        | 1540689/10000000 [6:15:26<50:22:12, 46.65it/s]

Episode 2860 completed | Reward: 2820.00 | Avg Reward: 2710.20 | Frames: 1540682 | Epsilon: 0.1000


Training:  15%|█▌        | 1541711/10000000 [6:15:46<46:35:39, 50.43it/s]

Episode 2861 completed | Reward: 5400.00 | Avg Reward: 2745.60 | Frames: 1541706 | Epsilon: 0.1000


Training:  15%|█▌        | 1542242/10000000 [6:15:57<48:39:46, 48.28it/s]

Episode 2862 completed | Reward: 920.00 | Avg Reward: 2747.60 | Frames: 1542237 | Epsilon: 0.1000


Training:  15%|█▌        | 1542623/10000000 [6:16:04<47:27:50, 49.50it/s]

Episode 2863 completed | Reward: 740.00 | Avg Reward: 2730.20 | Frames: 1542619 | Epsilon: 0.1000


Training:  15%|█▌        | 1543205/10000000 [6:16:16<49:34:20, 47.39it/s]

Episode 2864 completed | Reward: 2440.00 | Avg Reward: 2731.80 | Frames: 1543197 | Epsilon: 0.1000


Training:  15%|█▌        | 1543637/10000000 [6:16:25<48:26:29, 48.49it/s]

Episode 2865 completed | Reward: 1600.00 | Avg Reward: 2708.80 | Frames: 1543628 | Epsilon: 0.1000


Training:  15%|█▌        | 1544146/10000000 [6:16:35<48:08:34, 48.79it/s]

Episode 2866 completed | Reward: 1960.00 | Avg Reward: 2684.00 | Frames: 1544138 | Epsilon: 0.1000


Training:  15%|█▌        | 1544622/10000000 [6:16:45<49:35:51, 47.36it/s]

Episode 2867 completed | Reward: 600.00 | Avg Reward: 2677.00 | Frames: 1544619 | Epsilon: 0.1000


Training:  15%|█▌        | 1545085/10000000 [6:16:54<49:01:50, 47.90it/s]

Episode 2868 completed | Reward: 1560.00 | Avg Reward: 2653.20 | Frames: 1545076 | Epsilon: 0.1000


Training:  15%|█▌        | 1545593/10000000 [6:17:04<49:37:46, 47.32it/s]

Episode 2869 completed | Reward: 1020.00 | Avg Reward: 2656.20 | Frames: 1545585 | Epsilon: 0.1000


Training:  15%|█▌        | 1546089/10000000 [6:17:14<49:13:33, 47.70it/s]

Episode 2870 completed | Reward: 4440.00 | Avg Reward: 2664.80 | Frames: 1546080 | Epsilon: 0.1000


Training:  15%|█▌        | 1546601/10000000 [6:17:24<49:50:31, 47.11it/s]

Episode 2871 completed | Reward: 2680.00 | Avg Reward: 2659.40 | Frames: 1546592 | Epsilon: 0.1000


Training:  15%|█▌        | 1546919/10000000 [6:17:31<45:32:18, 51.56it/s]

Episode 2872 completed | Reward: 1340.00 | Avg Reward: 2635.40 | Frames: 1546914 | Epsilon: 0.1000


Training:  15%|█▌        | 1547493/10000000 [6:17:42<47:20:44, 49.59it/s]

Episode 2873 completed | Reward: 3180.00 | Avg Reward: 2648.20 | Frames: 1547484 | Epsilon: 0.1000


Training:  15%|█▌        | 1548017/10000000 [6:17:53<49:06:48, 47.80it/s]

Episode 2874 completed | Reward: 4040.00 | Avg Reward: 2667.60 | Frames: 1548008 | Epsilon: 0.1000


Training:  15%|█▌        | 1548569/10000000 [6:18:04<48:18:18, 48.60it/s]

Episode 2875 completed | Reward: 1100.00 | Avg Reward: 2671.00 | Frames: 1548561 | Epsilon: 0.1000


Training:  15%|█▌        | 1549367/10000000 [6:18:20<47:02:29, 49.90it/s]

Episode 2876 completed | Reward: 3920.00 | Avg Reward: 2669.20 | Frames: 1549362 | Epsilon: 0.1000


Training:  15%|█▌        | 1549946/10000000 [6:18:32<47:54:41, 48.99it/s]

Episode 2877 completed | Reward: 3080.00 | Avg Reward: 2662.60 | Frames: 1549942 | Epsilon: 0.1000


Training:  16%|█▌        | 1550679/10000000 [6:18:46<55:56:04, 41.96it/s]

Episode 2878 completed | Reward: 2820.00 | Avg Reward: 2657.80 | Frames: 1550676 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1551258/10000000 [6:18:58<47:59:59, 48.89it/s]

Episode 2879 completed | Reward: 1520.00 | Avg Reward: 2644.20 | Frames: 1551252 | Epsilon: 0.1000


Training:  16%|█▌        | 1551807/10000000 [6:19:09<46:45:46, 50.18it/s]

Episode 2880 completed | Reward: 2800.00 | Avg Reward: 2638.60 | Frames: 1551803 | Epsilon: 0.1000


Training:  16%|█▌        | 1552357/10000000 [6:19:20<48:54:42, 47.98it/s]

Episode 2881 completed | Reward: 4750.00 | Avg Reward: 2679.30 | Frames: 1552348 | Epsilon: 0.1000


Training:  16%|█▌        | 1552867/10000000 [6:19:30<46:12:12, 50.78it/s]

Episode 2882 completed | Reward: 1240.00 | Avg Reward: 2653.10 | Frames: 1552865 | Epsilon: 0.1000


Training:  16%|█▌        | 1553469/10000000 [6:19:43<50:19:32, 46.62it/s]

Episode 2883 completed | Reward: 2460.00 | Avg Reward: 2634.90 | Frames: 1553462 | Epsilon: 0.1000


Training:  16%|█▌        | 1554039/10000000 [6:19:54<44:47:44, 52.37it/s]

Episode 2884 completed | Reward: 3500.00 | Avg Reward: 2642.70 | Frames: 1554034 | Epsilon: 0.1000


Training:  16%|█▌        | 1554762/10000000 [6:20:09<50:23:03, 46.56it/s]

Episode 2885 completed | Reward: 3080.00 | Avg Reward: 2662.10 | Frames: 1554755 | Epsilon: 0.1000


Training:  16%|█▌        | 1555245/10000000 [6:20:18<48:52:31, 47.99it/s]

Episode 2886 completed | Reward: 4840.00 | Avg Reward: 2667.50 | Frames: 1555237 | Epsilon: 0.1000


Training:  16%|█▌        | 1556178/10000000 [6:20:37<48:25:21, 48.44it/s]

Episode 2887 completed | Reward: 3920.00 | Avg Reward: 2673.10 | Frames: 1556174 | Epsilon: 0.1000


Training:  16%|█▌        | 1556862/10000000 [6:20:51<49:11:29, 47.68it/s]

Episode 2888 completed | Reward: 2560.00 | Avg Reward: 2667.50 | Frames: 1556855 | Epsilon: 0.1000


Training:  16%|█▌        | 1557401/10000000 [6:21:02<50:18:56, 46.61it/s]

Episode 2889 completed | Reward: 1660.00 | Avg Reward: 2650.90 | Frames: 1557394 | Epsilon: 0.1000


Training:  16%|█▌        | 1557939/10000000 [6:21:12<45:08:09, 51.95it/s]

Episode 2890 completed | Reward: 1660.00 | Avg Reward: 2636.10 | Frames: 1557935 | Epsilon: 0.1000


Training:  16%|█▌        | 1558389/10000000 [6:21:21<49:19:46, 47.54it/s]

Episode 2891 completed | Reward: 1060.00 | Avg Reward: 2625.50 | Frames: 1558382 | Epsilon: 0.1000


Training:  16%|█▌        | 1558826/10000000 [6:21:30<48:15:42, 48.58it/s]

Episode 2892 completed | Reward: 1440.00 | Avg Reward: 2610.70 | Frames: 1558819 | Epsilon: 0.1000


Training:  16%|█▌        | 1559614/10000000 [6:21:46<48:49:59, 48.01it/s]

Episode 2893 completed | Reward: 3700.00 | Avg Reward: 2632.90 | Frames: 1559607 | Epsilon: 0.1000


Training:  16%|█▌        | 1560107/10000000 [6:21:56<47:23:07, 49.48it/s]

Episode 2894 completed | Reward: 3740.00 | Avg Reward: 2647.10 | Frames: 1560102 | Epsilon: 0.1000


Training:  16%|█▌        | 1560591/10000000 [6:22:05<44:40:00, 52.48it/s]

Episode 2895 completed | Reward: 1980.00 | Avg Reward: 2641.10 | Frames: 1560587 | Epsilon: 0.1000


Training:  16%|█▌        | 1561099/10000000 [6:22:16<57:47:33, 40.56it/s]

Episode 2896 completed | Reward: 3040.00 | Avg Reward: 2644.70 | Frames: 1561098 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1561833/10000000 [6:22:31<47:56:05, 48.90it/s]

Episode 2897 completed | Reward: 3650.00 | Avg Reward: 2641.60 | Frames: 1561824 | Epsilon: 0.1000


Training:  16%|█▌        | 1562713/10000000 [6:22:48<48:57:20, 47.87it/s]

Episode 2898 completed | Reward: 3880.00 | Avg Reward: 2659.00 | Frames: 1562704 | Epsilon: 0.1000


Training:  16%|█▌        | 1563229/10000000 [6:22:59<49:35:15, 47.26it/s]

Episode 2899 completed | Reward: 2820.00 | Avg Reward: 2640.80 | Frames: 1563221 | Epsilon: 0.1000


Training:  16%|█▌        | 1563905/10000000 [6:23:12<49:18:50, 47.52it/s]

Episode 2900 completed | Reward: 1700.00 | Avg Reward: 2653.60 | Frames: 1563896 | Epsilon: 0.1000


Training:  16%|█▌        | 1564319/10000000 [6:23:21<45:02:35, 52.02it/s]

Episode 2901 completed | Reward: 740.00 | Avg Reward: 2636.60 | Frames: 1564313 | Epsilon: 0.1000


Training:  16%|█▌        | 1564959/10000000 [6:23:34<46:06:07, 50.82it/s]

Episode 2902 completed | Reward: 1780.00 | Avg Reward: 2616.00 | Frames: 1564955 | Epsilon: 0.1000


Training:  16%|█▌        | 1565634/10000000 [6:23:47<48:42:38, 48.10it/s]

Episode 2903 completed | Reward: 2500.00 | Avg Reward: 2600.60 | Frames: 1565630 | Epsilon: 0.1000


Training:  16%|█▌        | 1566253/10000000 [6:24:00<47:45:51, 49.05it/s]

Episode 2904 completed | Reward: 1060.00 | Avg Reward: 2589.80 | Frames: 1566250 | Epsilon: 0.1000


Training:  16%|█▌        | 1566729/10000000 [6:24:09<50:50:45, 46.07it/s]

Episode 2905 completed | Reward: 580.00 | Avg Reward: 2558.80 | Frames: 1566721 | Epsilon: 0.1000


Training:  16%|█▌        | 1567207/10000000 [6:24:19<46:46:43, 50.08it/s]

Episode 2906 completed | Reward: 2080.00 | Avg Reward: 2568.80 | Frames: 1567203 | Epsilon: 0.1000


Training:  16%|█▌        | 1567805/10000000 [6:24:31<48:37:33, 48.17it/s]

Episode 2907 completed | Reward: 2540.00 | Avg Reward: 2558.60 | Frames: 1567796 | Epsilon: 0.1000


Training:  16%|█▌        | 1568357/10000000 [6:24:42<50:08:27, 46.71it/s]

Episode 2908 completed | Reward: 3740.00 | Avg Reward: 2570.40 | Frames: 1568349 | Epsilon: 0.1000


Training:  16%|█▌        | 1568982/10000000 [6:24:55<48:57:18, 47.84it/s]

Episode 2909 completed | Reward: 3280.00 | Avg Reward: 2591.80 | Frames: 1568972 | Epsilon: 0.1000


Training:  16%|█▌        | 1569469/10000000 [6:25:05<49:30:59, 47.29it/s]

Episode 2910 completed | Reward: 3440.00 | Avg Reward: 2581.20 | Frames: 1569462 | Epsilon: 0.1000


Training:  16%|█▌        | 1570221/10000000 [6:25:20<48:38:21, 48.14it/s]

Episode 2911 completed | Reward: 3080.00 | Avg Reward: 2605.20 | Frames: 1570216 | Epsilon: 0.1000


Training:  16%|█▌        | 1570723/10000000 [6:25:30<46:22:05, 50.50it/s]

Episode 2912 completed | Reward: 2380.00 | Avg Reward: 2597.00 | Frames: 1570717 | Epsilon: 0.1000


Training:  16%|█▌        | 1571152/10000000 [6:25:39<54:43:16, 42.79it/s]

Episode 2913 completed | Reward: 1340.00 | Avg Reward: 2573.50 | Frames: 1571151 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1571806/10000000 [6:25:52<49:19:36, 47.46it/s]

Episode 2914 completed | Reward: 2740.00 | Avg Reward: 2590.30 | Frames: 1571799 | Epsilon: 0.1000


Training:  16%|█▌        | 1572266/10000000 [6:26:01<47:45:42, 49.01it/s]

Episode 2915 completed | Reward: 2620.00 | Avg Reward: 2609.10 | Frames: 1572261 | Epsilon: 0.1000


Training:  16%|█▌        | 1573475/10000000 [6:26:25<45:51:35, 51.04it/s]

Episode 2916 completed | Reward: 4980.00 | Avg Reward: 2627.70 | Frames: 1573470 | Epsilon: 0.1000


Training:  16%|█▌        | 1573849/10000000 [6:26:33<49:07:22, 47.65it/s]

Episode 2917 completed | Reward: 1600.00 | Avg Reward: 2634.30 | Frames: 1573842 | Epsilon: 0.1000


Training:  16%|█▌        | 1574313/10000000 [6:26:42<47:11:31, 49.59it/s]

Episode 2918 completed | Reward: 2700.00 | Avg Reward: 2625.40 | Frames: 1574310 | Epsilon: 0.1000


Training:  16%|█▌        | 1574839/10000000 [6:26:53<46:28:49, 50.35it/s]

Episode 2919 completed | Reward: 1440.00 | Avg Reward: 2628.80 | Frames: 1574832 | Epsilon: 0.1000


Training:  16%|█▌        | 1575833/10000000 [6:27:13<51:13:43, 45.68it/s]

Episode 2920 completed | Reward: 6300.00 | Avg Reward: 2652.80 | Frames: 1575826 | Epsilon: 0.1000


Training:  16%|█▌        | 1576289/10000000 [6:27:22<48:12:41, 48.53it/s]

Episode 2921 completed | Reward: 3880.00 | Avg Reward: 2650.60 | Frames: 1576287 | Epsilon: 0.1000


Training:  16%|█▌        | 1576761/10000000 [6:27:31<48:40:44, 48.07it/s]

Episode 2922 completed | Reward: 1480.00 | Avg Reward: 2640.00 | Frames: 1576752 | Epsilon: 0.1000


Training:  16%|█▌        | 1577270/10000000 [6:27:42<47:28:17, 49.29it/s]

Episode 2923 completed | Reward: 1180.00 | Avg Reward: 2642.60 | Frames: 1577260 | Epsilon: 0.1000


Training:  16%|█▌        | 1577812/10000000 [6:27:53<44:13:38, 52.90it/s]

Episode 2924 completed | Reward: 5210.00 | Avg Reward: 2665.50 | Frames: 1577804 | Epsilon: 0.1000


Training:  16%|█▌        | 1578342/10000000 [6:28:03<48:40:26, 48.06it/s]

Episode 2925 completed | Reward: 2180.00 | Avg Reward: 2663.90 | Frames: 1578335 | Epsilon: 0.1000


Training:  16%|█▌        | 1578906/10000000 [6:28:15<47:14:46, 49.51it/s]

Episode 2926 completed | Reward: 2440.00 | Avg Reward: 2661.70 | Frames: 1578903 | Epsilon: 0.1000


Training:  16%|█▌        | 1579481/10000000 [6:28:26<50:11:50, 46.60it/s]

Episode 2927 completed | Reward: 2660.00 | Avg Reward: 2661.50 | Frames: 1579473 | Epsilon: 0.1000


Training:  16%|█▌        | 1579983/10000000 [6:28:36<46:02:10, 50.81it/s]

Episode 2928 completed | Reward: 2920.00 | Avg Reward: 2665.30 | Frames: 1579977 | Epsilon: 0.1000


Training:  16%|█▌        | 1580562/10000000 [6:28:48<47:31:59, 49.20it/s]

Episode 2929 completed | Reward: 3940.00 | Avg Reward: 2690.70 | Frames: 1580554 | Epsilon: 0.1000


Training:  16%|█▌        | 1581298/10000000 [6:29:03<47:20:58, 49.39it/s]

Episode 2930 completed | Reward: 3200.00 | Avg Reward: 2689.50 | Frames: 1581298 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1581882/10000000 [6:29:15<48:12:50, 48.50it/s]

Episode 2931 completed | Reward: 800.00 | Avg Reward: 2674.70 | Frames: 1581878 | Epsilon: 0.1000


Training:  16%|█▌        | 1582705/10000000 [6:29:31<49:35:18, 47.15it/s]

Episode 2932 completed | Reward: 2160.00 | Avg Reward: 2662.30 | Frames: 1582698 | Epsilon: 0.1000


Training:  16%|█▌        | 1583302/10000000 [6:29:43<48:19:21, 48.38it/s]

Episode 2933 completed | Reward: 3320.00 | Avg Reward: 2653.60 | Frames: 1583295 | Epsilon: 0.1000


Training:  16%|█▌        | 1583802/10000000 [6:29:53<48:10:25, 48.53it/s]

Episode 2934 completed | Reward: 1720.00 | Avg Reward: 2626.00 | Frames: 1583797 | Epsilon: 0.1000


Training:  16%|█▌        | 1584481/10000000 [6:30:07<48:36:54, 48.08it/s]

Episode 2935 completed | Reward: 4240.00 | Avg Reward: 2639.60 | Frames: 1584477 | Epsilon: 0.1000


Training:  16%|█▌        | 1584961/10000000 [6:30:17<47:39:28, 49.05it/s]

Episode 2936 completed | Reward: 760.00 | Avg Reward: 2619.80 | Frames: 1584952 | Epsilon: 0.1000


Training:  16%|█▌        | 1585389/10000000 [6:30:25<49:59:17, 46.76it/s]

Episode 2937 completed | Reward: 3180.00 | Avg Reward: 2618.00 | Frames: 1585380 | Epsilon: 0.1000


Training:  16%|█▌        | 1585874/10000000 [6:30:35<48:02:42, 48.65it/s]

Episode 2938 completed | Reward: 880.00 | Avg Reward: 2617.00 | Frames: 1585867 | Epsilon: 0.1000


Training:  16%|█▌        | 1586366/10000000 [6:30:45<50:09:14, 46.60it/s]

Episode 2939 completed | Reward: 3300.00 | Avg Reward: 2608.60 | Frames: 1586359 | Epsilon: 0.1000


Training:  16%|█▌        | 1587354/10000000 [6:31:05<48:35:24, 48.09it/s]

Episode 2940 completed | Reward: 3940.00 | Avg Reward: 2625.80 | Frames: 1587347 | Epsilon: 0.1000


Training:  16%|█▌        | 1587797/10000000 [6:31:14<50:25:28, 46.34it/s]

Episode 2941 completed | Reward: 1340.00 | Avg Reward: 2628.40 | Frames: 1587789 | Epsilon: 0.1000


Training:  16%|█▌        | 1588225/10000000 [6:31:23<49:56:20, 46.79it/s]

Episode 2942 completed | Reward: 560.00 | Avg Reward: 2606.20 | Frames: 1588216 | Epsilon: 0.1000


Training:  16%|█▌        | 1589038/10000000 [6:31:39<49:10:01, 47.52it/s]

Episode 2943 completed | Reward: 3560.00 | Avg Reward: 2621.00 | Frames: 1589034 | Epsilon: 0.1000


Training:  16%|█▌        | 1589570/10000000 [6:31:50<49:07:03, 47.56it/s]

Episode 2944 completed | Reward: 760.00 | Avg Reward: 2609.20 | Frames: 1589563 | Epsilon: 0.1000


Training:  16%|█▌        | 1590029/10000000 [6:31:59<47:31:29, 49.16it/s]

Episode 2945 completed | Reward: 3100.00 | Avg Reward: 2612.80 | Frames: 1590020 | Epsilon: 0.1000


Training:  16%|█▌        | 1590794/10000000 [6:32:14<49:48:31, 46.90it/s]

Episode 2946 completed | Reward: 2720.00 | Avg Reward: 2606.70 | Frames: 1590787 | Epsilon: 0.1000


Training:  16%|█▌        | 1591657/10000000 [6:32:32<46:23:05, 50.35it/s]

Episode 2947 completed | Reward: 1880.00 | Avg Reward: 2599.70 | Frames: 1591657 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1592058/10000000 [6:32:40<49:16:43, 47.39it/s]

Episode 2948 completed | Reward: 1440.00 | Avg Reward: 2577.70 | Frames: 1592055 | Epsilon: 0.1000


Training:  16%|█▌        | 1592661/10000000 [6:32:52<50:07:54, 46.58it/s]

Episode 2949 completed | Reward: 4340.00 | Avg Reward: 2610.90 | Frames: 1592652 | Epsilon: 0.1000


Training:  16%|█▌        | 1593157/10000000 [6:33:02<50:08:59, 46.57it/s]

Episode 2950 completed | Reward: 2820.00 | Avg Reward: 2594.70 | Frames: 1593148 | Epsilon: 0.1000


Training:  16%|█▌        | 1593601/10000000 [6:33:11<48:50:56, 47.80it/s]

Episode 2951 completed | Reward: 1540.00 | Avg Reward: 2562.20 | Frames: 1593592 | Epsilon: 0.1000


Training:  16%|█▌        | 1594091/10000000 [6:33:21<45:52:46, 50.89it/s]

Episode 2952 completed | Reward: 960.00 | Avg Reward: 2545.00 | Frames: 1594085 | Epsilon: 0.1000


Training:  16%|█▌        | 1594455/10000000 [6:33:28<45:46:58, 51.00it/s]

Episode 2953 completed | Reward: 860.00 | Avg Reward: 2534.20 | Frames: 1594449 | Epsilon: 0.1000


Training:  16%|█▌        | 1594941/10000000 [6:33:38<48:36:50, 48.03it/s]

Episode 2954 completed | Reward: 2620.00 | Avg Reward: 2547.40 | Frames: 1594932 | Epsilon: 0.1000


Training:  16%|█▌        | 1595413/10000000 [6:33:47<47:39:26, 48.99it/s]

Episode 2955 completed | Reward: 1720.00 | Avg Reward: 2514.70 | Frames: 1595404 | Epsilon: 0.1000


Training:  16%|█▌        | 1596117/10000000 [6:34:02<48:17:39, 48.34it/s]

Episode 2956 completed | Reward: 3820.00 | Avg Reward: 2531.50 | Frames: 1596113 | Epsilon: 0.1000


Training:  16%|█▌        | 1596546/10000000 [6:34:10<49:16:16, 47.38it/s]

Episode 2957 completed | Reward: 520.00 | Avg Reward: 2517.30 | Frames: 1596539 | Epsilon: 0.1000


Training:  16%|█▌        | 1597009/10000000 [6:34:20<48:22:34, 48.25it/s]

Episode 2958 completed | Reward: 1960.00 | Avg Reward: 2502.10 | Frames: 1597000 | Epsilon: 0.1000


Training:  16%|█▌        | 1597586/10000000 [6:34:31<48:49:57, 47.80it/s]

Episode 2959 completed | Reward: 1760.00 | Avg Reward: 2492.50 | Frames: 1597582 | Epsilon: 0.1000


Training:  16%|█▌        | 1598545/10000000 [6:34:51<49:38:44, 47.01it/s]

Episode 2960 completed | Reward: 3660.00 | Avg Reward: 2500.90 | Frames: 1598538 | Epsilon: 0.1000


Training:  16%|█▌        | 1599375/10000000 [6:35:07<46:16:07, 50.43it/s]

Episode 2961 completed | Reward: 4230.00 | Avg Reward: 2489.20 | Frames: 1599371 | Epsilon: 0.1000


Training:  16%|█▌        | 1599785/10000000 [6:35:16<50:03:10, 46.62it/s]

Episode 2962 completed | Reward: 640.00 | Avg Reward: 2486.40 | Frames: 1599776 | Epsilon: 0.1000


Training:  16%|█▌        | 1600282/10000000 [6:35:26<48:33:34, 48.05it/s]

Episode 2963 completed | Reward: 1640.00 | Avg Reward: 2495.40 | Frames: 1600275 | Epsilon: 0.1000


Training:  16%|█▌        | 1600953/10000000 [6:35:39<48:45:00, 47.86it/s]

Episode 2964 completed | Reward: 2600.00 | Avg Reward: 2497.00 | Frames: 1600944 | Epsilon: 0.1000


Training:  16%|█▌        | 1601537/10000000 [6:35:51<48:41:19, 47.91it/s]

Episode 2965 completed | Reward: 4260.00 | Avg Reward: 2523.60 | Frames: 1601528 | Epsilon: 0.1000


Training:  16%|█▌        | 1602017/10000000 [6:36:01<46:15:50, 50.42it/s]

Episode 2966 completed | Reward: 1480.00 | Avg Reward: 2518.80 | Frames: 1602017 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1602451/10000000 [6:36:09<44:56:48, 51.90it/s]

Episode 2967 completed | Reward: 1440.00 | Avg Reward: 2527.20 | Frames: 1602445 | Epsilon: 0.1000


Training:  16%|█▌        | 1602886/10000000 [6:36:18<47:50:33, 48.75it/s]

Episode 2968 completed | Reward: 3490.00 | Avg Reward: 2546.50 | Frames: 1602879 | Epsilon: 0.1000


Training:  16%|█▌        | 1604405/10000000 [6:36:49<49:36:45, 47.01it/s]

Episode 2969 completed | Reward: 3280.00 | Avg Reward: 2569.10 | Frames: 1604397 | Epsilon: 0.1000


Training:  16%|█▌        | 1604857/10000000 [6:36:58<50:03:54, 46.58it/s]

Episode 2970 completed | Reward: 660.00 | Avg Reward: 2531.30 | Frames: 1604848 | Epsilon: 0.1000


Training:  16%|█▌        | 1605373/10000000 [6:37:08<48:52:35, 47.71it/s]

Episode 2971 completed | Reward: 1720.00 | Avg Reward: 2521.70 | Frames: 1605364 | Epsilon: 0.1000


Training:  16%|█▌        | 1605926/10000000 [6:37:19<49:53:22, 46.74it/s]

Episode 2972 completed | Reward: 900.00 | Avg Reward: 2517.30 | Frames: 1605919 | Epsilon: 0.1000


Training:  16%|█▌        | 1606449/10000000 [6:37:30<49:43:28, 46.89it/s]

Episode 2973 completed | Reward: 1240.00 | Avg Reward: 2497.90 | Frames: 1606440 | Epsilon: 0.1000


Training:  16%|█▌        | 1606894/10000000 [6:37:39<48:35:57, 47.97it/s]

Episode 2974 completed | Reward: 1280.00 | Avg Reward: 2470.30 | Frames: 1606889 | Epsilon: 0.1000


Training:  16%|█▌        | 1608077/10000000 [6:38:03<48:44:10, 47.83it/s]

Episode 2975 completed | Reward: 2040.00 | Avg Reward: 2479.70 | Frames: 1608068 | Epsilon: 0.1000


Training:  16%|█▌        | 1608853/10000000 [6:38:18<48:00:52, 48.55it/s]

Episode 2976 completed | Reward: 4640.00 | Avg Reward: 2486.90 | Frames: 1608851 | Epsilon: 0.1000


Training:  16%|█▌        | 1609234/10000000 [6:38:26<47:54:28, 48.65it/s]

Episode 2977 completed | Reward: 1580.00 | Avg Reward: 2471.90 | Frames: 1609227 | Epsilon: 0.1000


Training:  16%|█▌        | 1609746/10000000 [6:38:36<48:48:59, 47.74it/s]

Episode 2978 completed | Reward: 2740.00 | Avg Reward: 2471.10 | Frames: 1609743 | Epsilon: 0.1000


Training:  16%|█▌        | 1610271/10000000 [6:38:47<45:50:37, 50.84it/s]

Episode 2979 completed | Reward: 2780.00 | Avg Reward: 2483.70 | Frames: 1610265 | Epsilon: 0.1000


Training:  16%|█▌        | 1610650/10000000 [6:38:54<49:07:31, 47.44it/s]

Episode 2980 completed | Reward: 2100.00 | Avg Reward: 2476.70 | Frames: 1610647 | Epsilon: 0.1000


Training:  16%|█▌        | 1611191/10000000 [6:39:05<44:56:08, 51.86it/s]

Episode 2981 completed | Reward: 3360.00 | Avg Reward: 2462.80 | Frames: 1611185 | Epsilon: 0.1000


Training:  16%|█▌        | 1611579/10000000 [6:39:13<45:02:33, 51.73it/s]

Episode 2982 completed | Reward: 1340.00 | Avg Reward: 2463.80 | Frames: 1611575 | Epsilon: 0.1000


Training:  16%|█▌        | 1612015/10000000 [6:39:22<45:45:08, 50.93it/s]

Episode 2983 completed | Reward: 4000.00 | Avg Reward: 2479.20 | Frames: 1612009 | Epsilon: 0.1000


Training:  16%|█▌        | 1612455/10000000 [6:39:31<46:53:58, 49.68it/s]

Episode 2984 completed | Reward: 4740.00 | Avg Reward: 2491.60 | Frames: 1612455 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1612969/10000000 [6:39:41<47:21:03, 49.20it/s]

Episode 2985 completed | Reward: 2640.00 | Avg Reward: 2487.20 | Frames: 1612963 | Epsilon: 0.1000


Training:  16%|█▌        | 1613486/10000000 [6:39:52<48:04:13, 48.46it/s]

Episode 2986 completed | Reward: 2740.00 | Avg Reward: 2466.20 | Frames: 1613481 | Epsilon: 0.1000


Training:  16%|█▌        | 1614545/10000000 [6:40:13<49:34:23, 46.99it/s]

Episode 2987 completed | Reward: 4960.00 | Avg Reward: 2476.60 | Frames: 1614538 | Epsilon: 0.1000


Training:  16%|█▌        | 1614925/10000000 [6:40:21<49:14:10, 47.31it/s]

Episode 2988 completed | Reward: 1760.00 | Avg Reward: 2468.60 | Frames: 1614916 | Epsilon: 0.1000


Training:  16%|█▌        | 1615473/10000000 [6:40:32<50:38:38, 45.99it/s]

Episode 2989 completed | Reward: 1060.00 | Avg Reward: 2462.60 | Frames: 1615466 | Epsilon: 0.1000


Training:  16%|█▌        | 1616014/10000000 [6:40:43<48:15:19, 48.26it/s]

Episode 2990 completed | Reward: 2320.00 | Avg Reward: 2469.20 | Frames: 1616011 | Epsilon: 0.1000


Training:  16%|█▌        | 1616546/10000000 [6:40:53<48:12:54, 48.30it/s]

Episode 2991 completed | Reward: 2920.00 | Avg Reward: 2487.80 | Frames: 1616539 | Epsilon: 0.1000


Training:  16%|█▌        | 1617055/10000000 [6:41:04<45:13:58, 51.48it/s]

Episode 2992 completed | Reward: 580.00 | Avg Reward: 2479.20 | Frames: 1617049 | Epsilon: 0.1000


Training:  16%|█▌        | 1617483/10000000 [6:41:12<45:02:47, 51.69it/s]

Episode 2993 completed | Reward: 2620.00 | Avg Reward: 2468.40 | Frames: 1617478 | Epsilon: 0.1000


Training:  16%|█▌        | 1618082/10000000 [6:41:24<49:11:31, 47.33it/s]

Episode 2994 completed | Reward: 4820.00 | Avg Reward: 2479.20 | Frames: 1618075 | Epsilon: 0.1000


Training:  16%|█▌        | 1618545/10000000 [6:41:34<51:24:53, 45.28it/s]

Episode 2995 completed | Reward: 3160.00 | Avg Reward: 2491.00 | Frames: 1618536 | Epsilon: 0.1000


Training:  16%|█▌        | 1619189/10000000 [6:41:47<49:04:52, 47.43it/s]

Episode 2996 completed | Reward: 2580.00 | Avg Reward: 2486.40 | Frames: 1619181 | Epsilon: 0.1000


Training:  16%|█▌        | 1619721/10000000 [6:41:57<47:37:57, 48.87it/s]

Episode 2997 completed | Reward: 2380.00 | Avg Reward: 2473.70 | Frames: 1619716 | Epsilon: 0.1000


Training:  16%|█▌        | 1620246/10000000 [6:42:08<48:55:05, 47.58it/s]

Episode 2998 completed | Reward: 1720.00 | Avg Reward: 2452.10 | Frames: 1620239 | Epsilon: 0.1000


Training:  16%|█▌        | 1620837/10000000 [6:42:20<50:47:34, 45.82it/s]

Episode 2999 completed | Reward: 1840.00 | Avg Reward: 2442.30 | Frames: 1620829 | Epsilon: 0.1000


Training:  16%|█▌        | 1621477/10000000 [6:42:33<48:37:38, 47.86it/s]

Episode 3000 completed | Reward: 3520.00 | Avg Reward: 2460.50 | Frames: 1621468 | Epsilon: 0.1000


Training:  16%|█▌        | 1622129/10000000 [6:42:46<49:33:08, 46.96it/s]

Episode 3001 completed | Reward: 3850.00 | Avg Reward: 2491.60 | Frames: 1622120 | Epsilon: 0.1000


Training:  16%|█▌        | 1622550/10000000 [6:42:55<59:42:12, 38.98it/s]

Episode 3002 completed | Reward: 740.00 | Avg Reward: 2481.20 | Frames: 1622549 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▌        | 1623186/10000000 [6:43:08<48:00:41, 48.47it/s]

Episode 3003 completed | Reward: 2260.00 | Avg Reward: 2478.80 | Frames: 1623181 | Epsilon: 0.1000


Training:  16%|█▌        | 1623710/10000000 [6:43:18<49:16:09, 47.23it/s]

Episode 3004 completed | Reward: 900.00 | Avg Reward: 2477.20 | Frames: 1623706 | Epsilon: 0.1000


Training:  16%|█▌        | 1624185/10000000 [6:43:28<50:38:58, 45.94it/s]

Episode 3005 completed | Reward: 1780.00 | Avg Reward: 2489.20 | Frames: 1624177 | Epsilon: 0.1000


Training:  16%|█▌        | 1624834/10000000 [6:43:41<48:28:18, 48.00it/s]

Episode 3006 completed | Reward: 3760.00 | Avg Reward: 2506.00 | Frames: 1624828 | Epsilon: 0.1000


Training:  16%|█▋        | 1625374/10000000 [6:43:52<48:23:25, 48.07it/s]

Episode 3007 completed | Reward: 1960.00 | Avg Reward: 2500.20 | Frames: 1625366 | Epsilon: 0.1000


Training:  16%|█▋        | 1625909/10000000 [6:44:03<47:39:27, 48.81it/s]

Episode 3008 completed | Reward: 2160.00 | Avg Reward: 2484.40 | Frames: 1625906 | Epsilon: 0.1000


Training:  16%|█▋        | 1626562/10000000 [6:44:16<48:07:12, 48.34it/s]

Episode 3009 completed | Reward: 2020.00 | Avg Reward: 2471.80 | Frames: 1626557 | Epsilon: 0.1000


Training:  16%|█▋        | 1627082/10000000 [6:44:26<48:49:28, 47.64it/s]

Episode 3010 completed | Reward: 2100.00 | Avg Reward: 2458.40 | Frames: 1627075 | Epsilon: 0.1000


Training:  16%|█▋        | 1627438/10000000 [6:44:34<48:33:50, 47.89it/s]

Episode 3011 completed | Reward: 3320.00 | Avg Reward: 2460.80 | Frames: 1627433 | Epsilon: 0.1000


Training:  16%|█▋        | 1627946/10000000 [6:44:44<48:02:28, 48.41it/s]

Episode 3012 completed | Reward: 680.00 | Avg Reward: 2443.80 | Frames: 1627941 | Epsilon: 0.1000


Training:  16%|█▋        | 1628533/10000000 [6:44:56<48:18:10, 48.14it/s]

Episode 3013 completed | Reward: 1440.00 | Avg Reward: 2444.80 | Frames: 1628524 | Epsilon: 0.1000


Training:  16%|█▋        | 1628937/10000000 [6:45:04<50:35:36, 45.96it/s]

Episode 3014 completed | Reward: 2740.00 | Avg Reward: 2444.80 | Frames: 1628930 | Epsilon: 0.1000


Training:  16%|█▋        | 1629454/10000000 [6:45:15<48:23:45, 48.04it/s]

Episode 3015 completed | Reward: 2840.00 | Avg Reward: 2447.00 | Frames: 1629449 | Epsilon: 0.1000


Training:  16%|█▋        | 1629890/10000000 [6:45:23<48:21:23, 48.08it/s]

Episode 3016 completed | Reward: 1360.00 | Avg Reward: 2410.80 | Frames: 1629880 | Epsilon: 0.1000


Training:  16%|█▋        | 1630366/10000000 [6:45:33<47:21:28, 49.09it/s]

Episode 3017 completed | Reward: 2920.00 | Avg Reward: 2424.00 | Frames: 1630363 | Epsilon: 0.1000


Training:  16%|█▋        | 1630851/10000000 [6:45:43<45:27:37, 51.14it/s]

Episode 3018 completed | Reward: 1260.00 | Avg Reward: 2409.60 | Frames: 1630846 | Epsilon: 0.1000


Training:  16%|█▋        | 1631352/10000000 [6:45:53<44:37:16, 52.10it/s]

Episode 3019 completed | Reward: 1820.00 | Avg Reward: 2413.40 | Frames: 1631344 | Epsilon: 0.1000


Training:  16%|█▋        | 1631848/10000000 [6:46:03<42:56:09, 54.14it/s]

Episode 3020 completed | Reward: 3300.00 | Avg Reward: 2383.40 | Frames: 1631843 | Epsilon: 0.1000


Training:  16%|█▋        | 1632282/10000000 [6:46:12<47:42:53, 48.71it/s]

Episode 3021 completed | Reward: 3340.00 | Avg Reward: 2378.00 | Frames: 1632279 | Epsilon: 0.1000


Training:  16%|█▋        | 1632954/10000000 [6:46:25<46:17:26, 50.21it/s]

Episode 3022 completed | Reward: 4460.00 | Avg Reward: 2407.80 | Frames: 1632954 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▋        | 1633405/10000000 [6:46:34<51:04:41, 45.50it/s]

Episode 3023 completed | Reward: 3280.00 | Avg Reward: 2428.80 | Frames: 1633398 | Epsilon: 0.1000


Training:  16%|█▋        | 1634013/10000000 [6:46:47<48:54:10, 47.52it/s]

Episode 3024 completed | Reward: 4480.00 | Avg Reward: 2421.50 | Frames: 1634012 | Epsilon: 0.1000


Training:  16%|█▋        | 1634498/10000000 [6:46:56<47:26:06, 48.99it/s]

Episode 3025 completed | Reward: 3620.00 | Avg Reward: 2435.90 | Frames: 1634494 | Epsilon: 0.1000


Training:  16%|█▋        | 1635097/10000000 [6:47:09<49:19:22, 47.11it/s]

Episode 3026 completed | Reward: 3700.00 | Avg Reward: 2448.50 | Frames: 1635092 | Epsilon: 0.1000


Training:  16%|█▋        | 1635631/10000000 [6:47:19<46:26:36, 50.03it/s]

Episode 3027 completed | Reward: 880.00 | Avg Reward: 2430.70 | Frames: 1635625 | Epsilon: 0.1000


Training:  16%|█▋        | 1636246/10000000 [6:47:32<48:14:15, 48.16it/s]

Episode 3028 completed | Reward: 1920.00 | Avg Reward: 2420.70 | Frames: 1636243 | Epsilon: 0.1000


Training:  16%|█▋        | 1636786/10000000 [6:47:43<49:05:36, 47.32it/s]

Episode 3029 completed | Reward: 1580.00 | Avg Reward: 2397.10 | Frames: 1636779 | Epsilon: 0.1000


Training:  16%|█▋        | 1637511/10000000 [6:47:57<46:02:58, 50.44it/s]

Episode 3030 completed | Reward: 3650.00 | Avg Reward: 2401.60 | Frames: 1637506 | Epsilon: 0.1000


Training:  16%|█▋        | 1638237/10000000 [6:48:12<48:33:04, 47.84it/s]

Episode 3031 completed | Reward: 3680.00 | Avg Reward: 2430.40 | Frames: 1638228 | Epsilon: 0.1000


Training:  16%|█▋        | 1638859/10000000 [6:48:24<45:27:42, 51.09it/s]

Episode 3032 completed | Reward: 1140.00 | Avg Reward: 2420.20 | Frames: 1638854 | Epsilon: 0.1000


Training:  16%|█▋        | 1639397/10000000 [6:48:35<49:11:39, 47.21it/s]

Episode 3033 completed | Reward: 3880.00 | Avg Reward: 2425.80 | Frames: 1639389 | Epsilon: 0.1000


Training:  16%|█▋        | 1639809/10000000 [6:48:44<48:52:31, 47.51it/s]

Episode 3034 completed | Reward: 2480.00 | Avg Reward: 2433.40 | Frames: 1639800 | Epsilon: 0.1000


Training:  16%|█▋        | 1640279/10000000 [6:48:53<44:45:07, 51.89it/s]

Episode 3035 completed | Reward: 3660.00 | Avg Reward: 2427.60 | Frames: 1640275 | Epsilon: 0.1000


Training:  16%|█▋        | 1640761/10000000 [6:49:03<50:13:56, 46.23it/s]

Episode 3036 completed | Reward: 2680.00 | Avg Reward: 2446.80 | Frames: 1640752 | Epsilon: 0.1000


Training:  16%|█▋        | 1641237/10000000 [6:49:13<49:42:06, 46.72it/s]

Episode 3037 completed | Reward: 1980.00 | Avg Reward: 2434.80 | Frames: 1641228 | Epsilon: 0.1000


Training:  16%|█▋        | 1641853/10000000 [6:49:25<47:39:34, 48.71it/s]

Episode 3038 completed | Reward: 3140.00 | Avg Reward: 2457.40 | Frames: 1641844 | Epsilon: 0.1000


Training:  16%|█▋        | 1642338/10000000 [6:49:35<47:45:41, 48.61it/s]

Episode 3039 completed | Reward: 680.00 | Avg Reward: 2431.20 | Frames: 1642331 | Epsilon: 0.1000


Training:  16%|█▋        | 1642850/10000000 [6:49:45<48:59:04, 47.39it/s]

Episode 3040 completed | Reward: 2080.00 | Avg Reward: 2412.60 | Frames: 1642842 | Epsilon: 0.1000


Training:  16%|█▋        | 1643278/10000000 [6:49:54<58:35:33, 39.62it/s]

Episode 3041 completed | Reward: 1180.00 | Avg Reward: 2411.00 | Frames: 1643277 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  16%|█▋        | 1643673/10000000 [6:50:02<49:26:56, 46.94it/s]

Episode 3042 completed | Reward: 740.00 | Avg Reward: 2412.80 | Frames: 1643665 | Epsilon: 0.1000


Training:  16%|█▋        | 1644122/10000000 [6:50:11<48:12:50, 48.14it/s]

Episode 3043 completed | Reward: 1280.00 | Avg Reward: 2390.00 | Frames: 1644117 | Epsilon: 0.1000


Training:  16%|█▋        | 1644726/10000000 [6:50:24<47:34:31, 48.78it/s]

Episode 3044 completed | Reward: 3240.00 | Avg Reward: 2414.80 | Frames: 1644723 | Epsilon: 0.1000


Training:  16%|█▋        | 1645321/10000000 [6:50:36<49:44:47, 46.65it/s]

Episode 3045 completed | Reward: 2340.00 | Avg Reward: 2407.20 | Frames: 1645314 | Epsilon: 0.1000


Training:  16%|█▋        | 1645822/10000000 [6:50:46<47:13:58, 49.13it/s]

Episode 3046 completed | Reward: 3080.00 | Avg Reward: 2410.80 | Frames: 1645819 | Epsilon: 0.1000


Training:  16%|█▋        | 1646453/10000000 [6:50:58<48:43:57, 47.62it/s]

Episode 3047 completed | Reward: 3280.00 | Avg Reward: 2424.80 | Frames: 1646449 | Epsilon: 0.1000


Training:  16%|█▋        | 1647033/10000000 [6:51:10<50:04:21, 46.34it/s]

Episode 3048 completed | Reward: 2140.00 | Avg Reward: 2431.80 | Frames: 1647026 | Epsilon: 0.1000


Training:  16%|█▋        | 1647630/10000000 [6:51:22<48:38:16, 47.70it/s]

Episode 3049 completed | Reward: 4540.00 | Avg Reward: 2433.80 | Frames: 1647623 | Epsilon: 0.1000


Training:  16%|█▋        | 1648038/10000000 [6:51:31<48:48:16, 47.54it/s]

Episode 3050 completed | Reward: 1940.00 | Avg Reward: 2425.00 | Frames: 1648031 | Epsilon: 0.1000


Training:  16%|█▋        | 1648450/10000000 [6:51:39<48:29:12, 47.85it/s]

Episode 3051 completed | Reward: 1120.00 | Avg Reward: 2420.80 | Frames: 1648443 | Epsilon: 0.1000


Training:  16%|█▋        | 1648850/10000000 [6:51:47<50:02:35, 46.36it/s]

Episode 3052 completed | Reward: 1140.00 | Avg Reward: 2422.60 | Frames: 1648847 | Epsilon: 0.1000


Training:  16%|█▋        | 1649583/10000000 [6:52:02<44:31:34, 52.09it/s]

Episode 3053 completed | Reward: 4120.00 | Avg Reward: 2455.20 | Frames: 1649578 | Epsilon: 0.1000


Training:  17%|█▋        | 1650205/10000000 [6:52:15<48:10:46, 48.14it/s]

Episode 3054 completed | Reward: 2940.00 | Avg Reward: 2458.40 | Frames: 1650196 | Epsilon: 0.1000


Training:  17%|█▋        | 1650681/10000000 [6:52:24<49:09:08, 47.19it/s]

Episode 3055 completed | Reward: 880.00 | Avg Reward: 2450.00 | Frames: 1650672 | Epsilon: 0.1000


Training:  17%|█▋        | 1651325/10000000 [6:52:37<49:36:30, 46.75it/s]

Episode 3056 completed | Reward: 2360.00 | Avg Reward: 2435.40 | Frames: 1651317 | Epsilon: 0.1000


Training:  17%|█▋        | 1651898/10000000 [6:52:49<47:31:06, 48.80it/s]

Episode 3057 completed | Reward: 1640.00 | Avg Reward: 2446.60 | Frames: 1651891 | Epsilon: 0.1000


Training:  17%|█▋        | 1652555/10000000 [6:53:02<45:23:43, 51.08it/s]

Episode 3058 completed | Reward: 2860.00 | Avg Reward: 2455.60 | Frames: 1652550 | Epsilon: 0.1000


Training:  17%|█▋        | 1653190/10000000 [6:53:15<48:24:07, 47.90it/s]

Episode 3059 completed | Reward: 2140.00 | Avg Reward: 2459.40 | Frames: 1653183 | Epsilon: 0.1000


Training:  17%|█▋        | 1653771/10000000 [6:53:27<58:19:23, 39.75it/s]

Episode 3060 completed | Reward: 3840.00 | Avg Reward: 2461.20 | Frames: 1653770 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1654151/10000000 [6:53:34<48:06:27, 48.19it/s]

Episode 3061 completed | Reward: 1340.00 | Avg Reward: 2432.30 | Frames: 1654150 | Epsilon: 0.1000


Training:  17%|█▋        | 1654677/10000000 [6:53:45<47:45:12, 48.54it/s]

Episode 3062 completed | Reward: 2540.00 | Avg Reward: 2451.30 | Frames: 1654675 | Epsilon: 0.1000


Training:  17%|█▋        | 1655170/10000000 [6:53:55<49:08:16, 47.17it/s]

Episode 3063 completed | Reward: 3690.00 | Avg Reward: 2471.80 | Frames: 1655167 | Epsilon: 0.1000


Training:  17%|█▋        | 1655759/10000000 [6:54:07<45:26:15, 51.01it/s]

Episode 3064 completed | Reward: 4360.00 | Avg Reward: 2489.40 | Frames: 1655755 | Epsilon: 0.1000


Training:  17%|█▋        | 1656281/10000000 [6:54:18<50:32:09, 45.86it/s]

Episode 3065 completed | Reward: 1020.00 | Avg Reward: 2457.00 | Frames: 1656280 | Epsilon: 0.1000


Training:  17%|█▋        | 1656842/10000000 [6:54:29<47:58:26, 48.31it/s]

Episode 3066 completed | Reward: 2680.00 | Avg Reward: 2469.00 | Frames: 1656833 | Epsilon: 0.1000


Training:  17%|█▋        | 1657302/10000000 [6:54:38<48:53:14, 47.40it/s]

Episode 3067 completed | Reward: 3120.00 | Avg Reward: 2485.80 | Frames: 1657295 | Epsilon: 0.1000


Training:  17%|█▋        | 1657641/10000000 [6:54:45<49:51:58, 46.47it/s]

Episode 3068 completed | Reward: 1320.00 | Avg Reward: 2464.10 | Frames: 1657634 | Epsilon: 0.1000


Training:  17%|█▋        | 1658089/10000000 [6:54:54<48:52:32, 47.41it/s]

Episode 3069 completed | Reward: 820.00 | Avg Reward: 2439.50 | Frames: 1658080 | Epsilon: 0.1000


Training:  17%|█▋        | 1658741/10000000 [6:55:07<48:42:00, 47.58it/s]

Episode 3070 completed | Reward: 2210.00 | Avg Reward: 2455.00 | Frames: 1658732 | Epsilon: 0.1000


Training:  17%|█▋        | 1659161/10000000 [6:55:16<49:52:11, 46.46it/s]

Episode 3071 completed | Reward: 940.00 | Avg Reward: 2447.20 | Frames: 1659152 | Epsilon: 0.1000


Training:  17%|█▋        | 1659784/10000000 [6:55:28<42:30:31, 54.50it/s]

Episode 3072 completed | Reward: 4680.00 | Avg Reward: 2485.00 | Frames: 1659777 | Epsilon: 0.1000


Training:  17%|█▋        | 1660249/10000000 [6:55:38<49:16:29, 47.01it/s]

Episode 3073 completed | Reward: 1320.00 | Avg Reward: 2485.80 | Frames: 1660241 | Epsilon: 0.1000


Training:  17%|█▋        | 1660806/10000000 [6:55:49<48:27:16, 47.81it/s]

Episode 3074 completed | Reward: 1320.00 | Avg Reward: 2486.20 | Frames: 1660803 | Epsilon: 0.1000


Training:  17%|█▋        | 1661539/10000000 [6:56:04<44:39:29, 51.87it/s]

Episode 3075 completed | Reward: 2980.00 | Avg Reward: 2495.60 | Frames: 1661534 | Epsilon: 0.1000


Training:  17%|█▋        | 1662045/10000000 [6:56:14<49:39:18, 46.64it/s]

Episode 3076 completed | Reward: 880.00 | Avg Reward: 2458.00 | Frames: 1662038 | Epsilon: 0.1000


Training:  17%|█▋        | 1662566/10000000 [6:56:25<47:50:36, 48.41it/s]

Episode 3077 completed | Reward: 980.00 | Avg Reward: 2452.00 | Frames: 1662559 | Epsilon: 0.1000


Training:  17%|█▋        | 1663541/10000000 [6:56:44<48:29:53, 47.75it/s]

Episode 3078 completed | Reward: 3600.00 | Avg Reward: 2460.60 | Frames: 1663532 | Epsilon: 0.1000


Training:  17%|█▋        | 1664234/10000000 [6:56:58<58:06:46, 39.84it/s]

Episode 3079 completed | Reward: 2980.00 | Avg Reward: 2462.60 | Frames: 1664232 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1664708/10000000 [6:57:08<43:02:23, 53.80it/s]

Episode 3080 completed | Reward: 1860.00 | Avg Reward: 2460.20 | Frames: 1664702 | Epsilon: 0.1000


Training:  17%|█▋        | 1665734/10000000 [6:57:29<47:21:40, 48.88it/s]

Episode 3081 completed | Reward: 2200.00 | Avg Reward: 2448.60 | Frames: 1665730 | Epsilon: 0.1000


Training:  17%|█▋        | 1666273/10000000 [6:57:40<48:54:19, 47.33it/s]

Episode 3082 completed | Reward: 4440.00 | Avg Reward: 2479.60 | Frames: 1666265 | Epsilon: 0.1000


Training:  17%|█▋        | 1666773/10000000 [6:57:50<50:13:07, 46.09it/s]

Episode 3083 completed | Reward: 2520.00 | Avg Reward: 2464.80 | Frames: 1666765 | Epsilon: 0.1000


Training:  17%|█▋        | 1667250/10000000 [6:57:59<47:21:16, 48.88it/s]

Episode 3084 completed | Reward: 3580.00 | Avg Reward: 2453.20 | Frames: 1667247 | Epsilon: 0.1000


Training:  17%|█▋        | 1667777/10000000 [6:58:10<48:55:53, 47.30it/s]

Episode 3085 completed | Reward: 2100.00 | Avg Reward: 2447.80 | Frames: 1667770 | Epsilon: 0.1000


Training:  17%|█▋        | 1668310/10000000 [6:58:21<49:47:39, 46.48it/s]

Episode 3086 completed | Reward: 1440.00 | Avg Reward: 2434.80 | Frames: 1668303 | Epsilon: 0.1000


Training:  17%|█▋        | 1668899/10000000 [6:58:33<44:23:44, 52.13it/s]

Episode 3087 completed | Reward: 3720.00 | Avg Reward: 2422.40 | Frames: 1668893 | Epsilon: 0.1000


Training:  17%|█▋        | 1669517/10000000 [6:58:45<49:12:21, 47.03it/s]

Episode 3088 completed | Reward: 2540.00 | Avg Reward: 2430.20 | Frames: 1669509 | Epsilon: 0.1000


Training:  17%|█▋        | 1670074/10000000 [6:58:57<49:00:22, 47.22it/s]

Episode 3089 completed | Reward: 1920.00 | Avg Reward: 2438.80 | Frames: 1670067 | Epsilon: 0.1000


Training:  17%|█▋        | 1670652/10000000 [6:59:08<45:29:19, 50.86it/s]

Episode 3090 completed | Reward: 1740.00 | Avg Reward: 2433.00 | Frames: 1670644 | Epsilon: 0.1000


Training:  17%|█▋        | 1671190/10000000 [6:59:19<47:30:16, 48.70it/s]

Episode 3091 completed | Reward: 3140.00 | Avg Reward: 2435.20 | Frames: 1671185 | Epsilon: 0.1000


Training:  17%|█▋        | 1671757/10000000 [6:59:31<48:16:25, 47.92it/s]

Episode 3092 completed | Reward: 1700.00 | Avg Reward: 2446.40 | Frames: 1671748 | Epsilon: 0.1000


Training:  17%|█▋        | 1672233/10000000 [6:59:40<50:02:29, 46.23it/s]

Episode 3093 completed | Reward: 2220.00 | Avg Reward: 2442.40 | Frames: 1672226 | Epsilon: 0.1000


Training:  17%|█▋        | 1672730/10000000 [6:59:50<46:56:01, 49.29it/s]

Episode 3094 completed | Reward: 3340.00 | Avg Reward: 2427.60 | Frames: 1672725 | Epsilon: 0.1000


Training:  17%|█▋        | 1673205/10000000 [7:00:00<48:01:15, 48.17it/s]

Episode 3095 completed | Reward: 3990.00 | Avg Reward: 2435.90 | Frames: 1673196 | Epsilon: 0.1000


Training:  17%|█▋        | 1673724/10000000 [7:00:11<42:50:21, 53.99it/s]

Episode 3096 completed | Reward: 3320.00 | Avg Reward: 2443.30 | Frames: 1673719 | Epsilon: 0.1000


Training:  17%|█▋        | 1674313/10000000 [7:00:23<45:46:48, 50.52it/s]

Episode 3097 completed | Reward: 1480.00 | Avg Reward: 2434.30 | Frames: 1674313 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1674770/10000000 [7:00:32<48:12:01, 47.98it/s]

Episode 3098 completed | Reward: 3900.00 | Avg Reward: 2456.10 | Frames: 1674763 | Epsilon: 0.1000


Training:  17%|█▋        | 1675270/10000000 [7:00:42<46:50:46, 49.36it/s]

Episode 3099 completed | Reward: 4020.00 | Avg Reward: 2477.90 | Frames: 1675267 | Epsilon: 0.1000


Training:  17%|█▋        | 1675644/10000000 [7:00:49<43:40:02, 52.95it/s]

Episode 3100 completed | Reward: 1360.00 | Avg Reward: 2456.30 | Frames: 1675639 | Epsilon: 0.1000


Training:  17%|█▋        | 1676031/10000000 [7:00:57<45:59:42, 50.27it/s]

Episode 3101 completed | Reward: 3590.00 | Avg Reward: 2453.70 | Frames: 1676027 | Epsilon: 0.1000


Training:  17%|█▋        | 1676458/10000000 [7:01:06<47:30:10, 48.67it/s]

Episode 3102 completed | Reward: 800.00 | Avg Reward: 2454.30 | Frames: 1676451 | Epsilon: 0.1000


Training:  17%|█▋        | 1676950/10000000 [7:01:16<47:43:45, 48.44it/s]

Episode 3103 completed | Reward: 4280.00 | Avg Reward: 2474.50 | Frames: 1676947 | Epsilon: 0.1000


Training:  17%|█▋        | 1677337/10000000 [7:01:24<49:40:38, 46.54it/s]

Episode 3104 completed | Reward: 3840.00 | Avg Reward: 2503.90 | Frames: 1677329 | Epsilon: 0.1000


Training:  17%|█▋        | 1677718/10000000 [7:01:31<47:13:48, 48.95it/s]

Episode 3105 completed | Reward: 1080.00 | Avg Reward: 2496.90 | Frames: 1677714 | Epsilon: 0.1000


Training:  17%|█▋        | 1678423/10000000 [7:01:46<46:43:23, 49.47it/s]

Episode 3106 completed | Reward: 1060.00 | Avg Reward: 2469.90 | Frames: 1678418 | Epsilon: 0.1000


Training:  17%|█▋        | 1678946/10000000 [7:01:56<46:42:42, 49.48it/s]

Episode 3107 completed | Reward: 1680.00 | Avg Reward: 2467.10 | Frames: 1678942 | Epsilon: 0.1000


Training:  17%|█▋        | 1679635/10000000 [7:02:10<45:58:39, 50.27it/s]

Episode 3108 completed | Reward: 3280.00 | Avg Reward: 2478.30 | Frames: 1679629 | Epsilon: 0.1000


Training:  17%|█▋        | 1680287/10000000 [7:02:23<45:44:16, 50.53it/s]

Episode 3109 completed | Reward: 4280.00 | Avg Reward: 2500.90 | Frames: 1680281 | Epsilon: 0.1000


Training:  17%|█▋        | 1680803/10000000 [7:02:34<47:08:17, 49.02it/s]

Episode 3110 completed | Reward: 800.00 | Avg Reward: 2487.90 | Frames: 1680802 | Epsilon: 0.1000


Training:  17%|█▋        | 1681253/10000000 [7:02:43<50:27:44, 45.79it/s]

Episode 3111 completed | Reward: 4840.00 | Avg Reward: 2503.10 | Frames: 1681246 | Epsilon: 0.1000


Training:  17%|█▋        | 1681685/10000000 [7:02:52<49:08:11, 47.03it/s]

Episode 3112 completed | Reward: 540.00 | Avg Reward: 2501.70 | Frames: 1681676 | Epsilon: 0.1000


Training:  17%|█▋        | 1682066/10000000 [7:02:59<48:18:30, 47.83it/s]

Episode 3113 completed | Reward: 820.00 | Avg Reward: 2495.50 | Frames: 1682059 | Epsilon: 0.1000


Training:  17%|█▋        | 1682597/10000000 [7:03:10<48:34:12, 47.57it/s]

Episode 3114 completed | Reward: 2300.00 | Avg Reward: 2491.10 | Frames: 1682588 | Epsilon: 0.1000


Training:  17%|█▋        | 1683097/10000000 [7:03:20<48:50:02, 47.31it/s]

Episode 3115 completed | Reward: 2900.00 | Avg Reward: 2491.70 | Frames: 1683088 | Epsilon: 0.1000


Training:  17%|█▋        | 1683549/10000000 [7:03:29<49:20:05, 46.83it/s]

Episode 3116 completed | Reward: 3080.00 | Avg Reward: 2508.90 | Frames: 1683541 | Epsilon: 0.1000


Training:  17%|█▋        | 1684392/10000000 [7:03:46<55:30:39, 41.61it/s]

Episode 3117 completed | Reward: 1640.00 | Avg Reward: 2496.10 | Frames: 1684391 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1684810/10000000 [7:03:55<48:41:58, 47.43it/s]

Episode 3118 completed | Reward: 3140.00 | Avg Reward: 2514.90 | Frames: 1684803 | Epsilon: 0.1000


Training:  17%|█▋        | 1685389/10000000 [7:04:07<50:23:37, 45.83it/s]

Episode 3119 completed | Reward: 3840.00 | Avg Reward: 2535.10 | Frames: 1685382 | Epsilon: 0.1000


Training:  17%|█▋        | 1686050/10000000 [7:04:20<47:52:09, 48.24it/s]

Episode 3120 completed | Reward: 3080.00 | Avg Reward: 2532.90 | Frames: 1686043 | Epsilon: 0.1000


Training:  17%|█▋        | 1686429/10000000 [7:04:28<48:33:30, 47.56it/s]

Episode 3121 completed | Reward: 2840.00 | Avg Reward: 2527.90 | Frames: 1686420 | Epsilon: 0.1000


Training:  17%|█▋        | 1686977/10000000 [7:04:39<49:08:55, 46.98it/s]

Episode 3122 completed | Reward: 1300.00 | Avg Reward: 2496.30 | Frames: 1686969 | Epsilon: 0.1000


Training:  17%|█▋        | 1687465/10000000 [7:04:48<47:34:27, 48.54it/s]

Episode 3123 completed | Reward: 3520.00 | Avg Reward: 2498.70 | Frames: 1687456 | Epsilon: 0.1000


Training:  17%|█▋        | 1687894/10000000 [7:04:57<47:02:05, 49.09it/s]

Episode 3124 completed | Reward: 3490.00 | Avg Reward: 2488.80 | Frames: 1687889 | Epsilon: 0.1000


Training:  17%|█▋        | 1688379/10000000 [7:05:07<47:11:24, 48.93it/s]

Episode 3125 completed | Reward: 4060.00 | Avg Reward: 2493.20 | Frames: 1688378 | Epsilon: 0.1000


Training:  17%|█▋        | 1688862/10000000 [7:05:17<48:40:37, 47.43it/s]

Episode 3126 completed | Reward: 2000.00 | Avg Reward: 2476.20 | Frames: 1688855 | Epsilon: 0.1000


Training:  17%|█▋        | 1689537/10000000 [7:05:30<48:19:23, 47.77it/s]

Episode 3127 completed | Reward: 2760.00 | Avg Reward: 2495.00 | Frames: 1689530 | Epsilon: 0.1000


Training:  17%|█▋        | 1689934/10000000 [7:05:38<46:54:32, 49.21it/s]

Episode 3128 completed | Reward: 2480.00 | Avg Reward: 2500.60 | Frames: 1689929 | Epsilon: 0.1000


Training:  17%|█▋        | 1690345/10000000 [7:05:47<49:48:00, 46.35it/s]

Episode 3129 completed | Reward: 2540.00 | Avg Reward: 2510.20 | Frames: 1690337 | Epsilon: 0.1000


Training:  17%|█▋        | 1690789/10000000 [7:05:56<49:00:03, 47.10it/s]

Episode 3130 completed | Reward: 3490.00 | Avg Reward: 2508.60 | Frames: 1690782 | Epsilon: 0.1000


Training:  17%|█▋        | 1691290/10000000 [7:06:06<49:20:16, 46.78it/s]

Episode 3131 completed | Reward: 2200.00 | Avg Reward: 2493.80 | Frames: 1691283 | Epsilon: 0.1000


Training:  17%|█▋        | 1691782/10000000 [7:06:16<47:32:03, 48.55it/s]

Episode 3132 completed | Reward: 2680.00 | Avg Reward: 2509.20 | Frames: 1691775 | Epsilon: 0.1000


Training:  17%|█▋        | 1692529/10000000 [7:06:31<49:18:06, 46.81it/s]

Episode 3133 completed | Reward: 2100.00 | Avg Reward: 2491.40 | Frames: 1692520 | Epsilon: 0.1000


Training:  17%|█▋        | 1693241/10000000 [7:06:45<48:19:44, 47.74it/s]

Episode 3134 completed | Reward: 2740.00 | Avg Reward: 2494.00 | Frames: 1693232 | Epsilon: 0.1000


Training:  17%|█▋        | 1693826/10000000 [7:06:57<49:04:32, 47.01it/s]

Episode 3135 completed | Reward: 2720.00 | Avg Reward: 2484.60 | Frames: 1693822 | Epsilon: 0.1000


Training:  17%|█▋        | 1694278/10000000 [7:07:06<47:15:28, 48.82it/s]

Episode 3136 completed | Reward: 1360.00 | Avg Reward: 2471.40 | Frames: 1694274 | Epsilon: 0.1000


Training:  17%|█▋        | 1695051/10000000 [7:07:22<55:46:33, 41.36it/s]

Episode 3137 completed | Reward: 3760.00 | Avg Reward: 2489.20 | Frames: 1695048 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1695461/10000000 [7:07:30<48:38:53, 47.42it/s]

Episode 3138 completed | Reward: 2680.00 | Avg Reward: 2484.60 | Frames: 1695452 | Epsilon: 0.1000


Training:  17%|█▋        | 1696042/10000000 [7:07:42<47:16:59, 48.78it/s]

Episode 3139 completed | Reward: 2600.00 | Avg Reward: 2503.80 | Frames: 1696038 | Epsilon: 0.1000


Training:  17%|█▋        | 1696534/10000000 [7:07:52<47:29:18, 48.57it/s]

Episode 3140 completed | Reward: 3740.00 | Avg Reward: 2520.40 | Frames: 1696529 | Epsilon: 0.1000


Training:  17%|█▋        | 1697010/10000000 [7:08:02<46:54:14, 49.17it/s]

Episode 3141 completed | Reward: 3880.00 | Avg Reward: 2547.40 | Frames: 1697007 | Epsilon: 0.1000


Training:  17%|█▋        | 1697529/10000000 [7:08:12<47:14:06, 48.82it/s]

Episode 3142 completed | Reward: 3380.00 | Avg Reward: 2573.80 | Frames: 1697522 | Epsilon: 0.1000


Training:  17%|█▋        | 1697962/10000000 [7:08:21<46:41:25, 49.39it/s]

Episode 3143 completed | Reward: 1940.00 | Avg Reward: 2580.40 | Frames: 1697958 | Epsilon: 0.1000


Training:  17%|█▋        | 1698437/10000000 [7:08:30<49:14:24, 46.83it/s]

Episode 3144 completed | Reward: 2040.00 | Avg Reward: 2568.40 | Frames: 1698428 | Epsilon: 0.1000


Training:  17%|█▋        | 1698905/10000000 [7:08:40<50:03:50, 46.06it/s]

Episode 3145 completed | Reward: 3480.00 | Avg Reward: 2579.80 | Frames: 1698897 | Epsilon: 0.1000


Training:  17%|█▋        | 1699397/10000000 [7:08:50<48:54:29, 47.14it/s]

Episode 3146 completed | Reward: 3240.00 | Avg Reward: 2581.40 | Frames: 1699390 | Epsilon: 0.1000


Training:  17%|█▋        | 1700057/10000000 [7:09:03<50:17:07, 45.85it/s]

Episode 3147 completed | Reward: 3090.00 | Avg Reward: 2579.50 | Frames: 1700049 | Epsilon: 0.1000


Training:  17%|█▋        | 1700526/10000000 [7:09:12<46:38:51, 49.42it/s]

Episode 3148 completed | Reward: 1220.00 | Avg Reward: 2570.30 | Frames: 1700522 | Epsilon: 0.1000


Training:  17%|█▋        | 1701066/10000000 [7:09:23<47:07:54, 48.91it/s]

Episode 3149 completed | Reward: 2380.00 | Avg Reward: 2548.70 | Frames: 1701059 | Epsilon: 0.1000


Training:  17%|█▋        | 1701598/10000000 [7:09:34<47:26:31, 48.59it/s]

Episode 3150 completed | Reward: 3800.00 | Avg Reward: 2567.30 | Frames: 1701591 | Epsilon: 0.1000


Training:  17%|█▋        | 1702093/10000000 [7:09:44<48:40:45, 47.35it/s]

Episode 3151 completed | Reward: 2540.00 | Avg Reward: 2581.50 | Frames: 1702085 | Epsilon: 0.1000


Training:  17%|█▋        | 1702601/10000000 [7:09:54<49:42:07, 46.37it/s]

Episode 3152 completed | Reward: 2040.00 | Avg Reward: 2590.50 | Frames: 1702593 | Epsilon: 0.1000


Training:  17%|█▋        | 1703230/10000000 [7:10:07<48:39:13, 47.37it/s]

Episode 3153 completed | Reward: 3720.00 | Avg Reward: 2586.50 | Frames: 1703227 | Epsilon: 0.1000


Training:  17%|█▋        | 1703766/10000000 [7:10:18<49:48:02, 46.27it/s]

Episode 3154 completed | Reward: 1360.00 | Avg Reward: 2570.70 | Frames: 1703759 | Epsilon: 0.1000


Training:  17%|█▋        | 1704222/10000000 [7:10:27<49:03:02, 46.98it/s]

Episode 3155 completed | Reward: 920.00 | Avg Reward: 2571.10 | Frames: 1704217 | Epsilon: 0.1000


Training:  17%|█▋        | 1704730/10000000 [7:10:38<49:35:46, 46.46it/s]

Episode 3156 completed | Reward: 3490.00 | Avg Reward: 2582.40 | Frames: 1704728 | Epsilon: 0.1000


Training:  17%|█▋        | 1705315/10000000 [7:10:50<46:18:30, 49.75it/s]

Episode 3157 completed | Reward: 4300.00 | Avg Reward: 2609.00 | Frames: 1705315 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1705793/10000000 [7:10:59<49:20:02, 46.70it/s]

Episode 3158 completed | Reward: 3360.00 | Avg Reward: 2614.00 | Frames: 1705785 | Epsilon: 0.1000


Training:  17%|█▋        | 1706197/10000000 [7:11:08<48:15:25, 47.74it/s]

Episode 3159 completed | Reward: 3900.00 | Avg Reward: 2631.60 | Frames: 1706188 | Epsilon: 0.1000


Training:  17%|█▋        | 1706746/10000000 [7:11:19<47:02:53, 48.96it/s]

Episode 3160 completed | Reward: 3440.00 | Avg Reward: 2627.60 | Frames: 1706736 | Epsilon: 0.1000


Training:  17%|█▋        | 1707486/10000000 [7:11:34<46:11:18, 49.87it/s]

Episode 3161 completed | Reward: 3160.00 | Avg Reward: 2645.80 | Frames: 1707482 | Epsilon: 0.1000


Training:  17%|█▋        | 1708038/10000000 [7:11:45<50:35:08, 45.53it/s]

Episode 3162 completed | Reward: 2680.00 | Avg Reward: 2647.20 | Frames: 1708033 | Epsilon: 0.1000


Training:  17%|█▋        | 1708613/10000000 [7:11:56<49:12:20, 46.81it/s]

Episode 3163 completed | Reward: 2180.00 | Avg Reward: 2632.10 | Frames: 1708606 | Epsilon: 0.1000


Training:  17%|█▋        | 1709673/10000000 [7:12:18<49:45:12, 46.29it/s]

Episode 3164 completed | Reward: 3560.00 | Avg Reward: 2624.10 | Frames: 1709666 | Epsilon: 0.1000


Training:  17%|█▋        | 1710210/10000000 [7:12:29<47:34:44, 48.40it/s]

Episode 3165 completed | Reward: 3970.00 | Avg Reward: 2653.60 | Frames: 1710207 | Epsilon: 0.1000


Training:  17%|█▋        | 1710630/10000000 [7:12:37<48:17:20, 47.68it/s]

Episode 3166 completed | Reward: 2060.00 | Avg Reward: 2647.40 | Frames: 1710623 | Epsilon: 0.1000


Training:  17%|█▋        | 1711141/10000000 [7:12:48<47:56:01, 48.03it/s]

Episode 3167 completed | Reward: 3200.00 | Avg Reward: 2648.20 | Frames: 1711132 | Epsilon: 0.1000


Training:  17%|█▋        | 1712001/10000000 [7:13:05<50:21:59, 45.71it/s]

Episode 3168 completed | Reward: 3340.00 | Avg Reward: 2668.40 | Frames: 1711994 | Epsilon: 0.1000


Training:  17%|█▋        | 1712453/10000000 [7:13:14<49:47:26, 46.24it/s]

Episode 3169 completed | Reward: 840.00 | Avg Reward: 2668.60 | Frames: 1712444 | Epsilon: 0.1000


Training:  17%|█▋        | 1712898/10000000 [7:13:23<48:39:18, 47.31it/s]

Episode 3170 completed | Reward: 1800.00 | Avg Reward: 2664.50 | Frames: 1712891 | Epsilon: 0.1000


Training:  17%|█▋        | 1713425/10000000 [7:13:34<48:43:09, 47.25it/s]

Episode 3171 completed | Reward: 3340.00 | Avg Reward: 2688.50 | Frames: 1713417 | Epsilon: 0.1000


Training:  17%|█▋        | 1713945/10000000 [7:13:44<47:01:48, 48.94it/s]

Episode 3172 completed | Reward: 2700.00 | Avg Reward: 2668.70 | Frames: 1713941 | Epsilon: 0.1000


Training:  17%|█▋        | 1714666/10000000 [7:13:59<48:25:57, 47.52it/s]

Episode 3173 completed | Reward: 1240.00 | Avg Reward: 2667.90 | Frames: 1714659 | Epsilon: 0.1000


Training:  17%|█▋        | 1715086/10000000 [7:14:07<47:30:49, 48.44it/s]

Episode 3174 completed | Reward: 2600.00 | Avg Reward: 2680.70 | Frames: 1715083 | Epsilon: 0.1000


Training:  17%|█▋        | 1715580/10000000 [7:14:17<54:27:38, 42.25it/s]

Episode 3175 completed | Reward: 2700.00 | Avg Reward: 2677.90 | Frames: 1715579 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1715949/10000000 [7:14:25<49:41:24, 46.31it/s]

Episode 3176 completed | Reward: 2520.00 | Avg Reward: 2694.30 | Frames: 1715942 | Epsilon: 0.1000


Training:  17%|█▋        | 1716297/10000000 [7:14:32<47:49:57, 48.11it/s]

Episode 3177 completed | Reward: 2480.00 | Avg Reward: 2709.30 | Frames: 1716288 | Epsilon: 0.1000


Training:  17%|█▋        | 1716758/10000000 [7:14:41<49:48:41, 46.19it/s]

Episode 3178 completed | Reward: 800.00 | Avg Reward: 2681.30 | Frames: 1716757 | Epsilon: 0.1000


Training:  17%|█▋        | 1717469/10000000 [7:14:56<49:16:10, 46.70it/s]

Episode 3179 completed | Reward: 3600.00 | Avg Reward: 2687.50 | Frames: 1717461 | Epsilon: 0.1000


Training:  17%|█▋        | 1718138/10000000 [7:15:09<48:04:50, 47.85it/s]

Episode 3180 completed | Reward: 3400.00 | Avg Reward: 2702.90 | Frames: 1718131 | Epsilon: 0.1000


Training:  17%|█▋        | 1718541/10000000 [7:15:17<49:00:09, 46.94it/s]

Episode 3181 completed | Reward: 1860.00 | Avg Reward: 2699.50 | Frames: 1718532 | Epsilon: 0.1000


Training:  17%|█▋        | 1718978/10000000 [7:15:26<48:45:41, 47.17it/s]

Episode 3182 completed | Reward: 3590.00 | Avg Reward: 2691.00 | Frames: 1718976 | Epsilon: 0.1000


Training:  17%|█▋        | 1719446/10000000 [7:15:36<47:23:23, 48.54it/s]

Episode 3183 completed | Reward: 2160.00 | Avg Reward: 2687.40 | Frames: 1719436 | Epsilon: 0.1000


Training:  17%|█▋        | 1719805/10000000 [7:15:43<48:48:54, 47.12it/s]

Episode 3184 completed | Reward: 4190.00 | Avg Reward: 2693.50 | Frames: 1719797 | Epsilon: 0.1000


Training:  17%|█▋        | 1720234/10000000 [7:15:52<48:17:06, 47.63it/s]

Episode 3185 completed | Reward: 2380.00 | Avg Reward: 2696.30 | Frames: 1720227 | Epsilon: 0.1000


Training:  17%|█▋        | 1720630/10000000 [7:16:00<48:26:25, 47.48it/s]

Episode 3186 completed | Reward: 3340.00 | Avg Reward: 2715.30 | Frames: 1720623 | Epsilon: 0.1000


Training:  17%|█▋        | 1721034/10000000 [7:16:08<46:39:10, 49.29it/s]

Episode 3187 completed | Reward: 2940.00 | Avg Reward: 2707.50 | Frames: 1721029 | Epsilon: 0.1000


Training:  17%|█▋        | 1721507/10000000 [7:16:18<44:38:59, 51.50it/s]

Episode 3188 completed | Reward: 3960.00 | Avg Reward: 2721.70 | Frames: 1721502 | Epsilon: 0.1000


Training:  17%|█▋        | 1722085/10000000 [7:16:29<48:44:35, 47.17it/s]

Episode 3189 completed | Reward: 2860.00 | Avg Reward: 2731.10 | Frames: 1722077 | Epsilon: 0.1000


Training:  17%|█▋        | 1722585/10000000 [7:16:40<49:10:26, 46.76it/s]

Episode 3190 completed | Reward: 2360.00 | Avg Reward: 2737.30 | Frames: 1722584 | Epsilon: 0.1000


Training:  17%|█▋        | 1723154/10000000 [7:16:51<47:46:24, 48.13it/s]

Episode 3191 completed | Reward: 3780.00 | Avg Reward: 2743.70 | Frames: 1723147 | Epsilon: 0.1000


Training:  17%|█▋        | 1723885/10000000 [7:17:06<49:39:02, 46.30it/s]

Episode 3192 completed | Reward: 4040.00 | Avg Reward: 2767.10 | Frames: 1723876 | Epsilon: 0.1000


Training:  17%|█▋        | 1724282/10000000 [7:17:14<47:30:12, 48.39it/s]

Episode 3193 completed | Reward: 2220.00 | Avg Reward: 2767.10 | Frames: 1724277 | Epsilon: 0.1000


Training:  17%|█▋        | 1724754/10000000 [7:17:24<47:28:11, 48.42it/s]

Episode 3194 completed | Reward: 3700.00 | Avg Reward: 2770.70 | Frames: 1724752 | Epsilon: 0.1000


Training:  17%|█▋        | 1725158/10000000 [7:17:32<48:12:27, 47.68it/s]

Episode 3195 completed | Reward: 540.00 | Avg Reward: 2736.20 | Frames: 1725155 | Epsilon: 0.1000


Training:  17%|█▋        | 1725653/10000000 [7:17:42<61:09:46, 37.58it/s]

Episode 3196 completed | Reward: 3340.00 | Avg Reward: 2736.40 | Frames: 1725652 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1726158/10000000 [7:17:52<46:55:54, 48.97it/s]

Episode 3197 completed | Reward: 940.00 | Avg Reward: 2731.00 | Frames: 1726154 | Epsilon: 0.1000


Training:  17%|█▋        | 1726737/10000000 [7:18:04<48:25:40, 47.45it/s]

Episode 3198 completed | Reward: 520.00 | Avg Reward: 2697.20 | Frames: 1726728 | Epsilon: 0.1000


Training:  17%|█▋        | 1727294/10000000 [7:18:15<47:15:11, 48.63it/s]

Episode 3199 completed | Reward: 2800.00 | Avg Reward: 2685.00 | Frames: 1727286 | Epsilon: 0.1000


Training:  17%|█▋        | 1727714/10000000 [7:18:24<48:42:09, 47.18it/s]

Episode 3200 completed | Reward: 580.00 | Avg Reward: 2677.20 | Frames: 1727707 | Epsilon: 0.1000


Training:  17%|█▋        | 1728293/10000000 [7:18:36<49:23:43, 46.52it/s]

Episode 3201 completed | Reward: 3020.00 | Avg Reward: 2671.50 | Frames: 1728284 | Epsilon: 0.1000


Training:  17%|█▋        | 1728906/10000000 [7:18:48<47:05:07, 48.79it/s]

Episode 3202 completed | Reward: 4280.00 | Avg Reward: 2706.30 | Frames: 1728901 | Epsilon: 0.1000


Training:  17%|█▋        | 1729265/10000000 [7:18:56<47:18:56, 48.56it/s]

Episode 3203 completed | Reward: 5290.00 | Avg Reward: 2716.40 | Frames: 1729256 | Epsilon: 0.1000


Training:  17%|█▋        | 1729798/10000000 [7:19:06<47:51:16, 48.01it/s]

Episode 3204 completed | Reward: 1420.00 | Avg Reward: 2692.20 | Frames: 1729793 | Epsilon: 0.1000


Training:  17%|█▋        | 1730373/10000000 [7:19:18<47:19:41, 48.54it/s]

Episode 3205 completed | Reward: 1420.00 | Avg Reward: 2695.60 | Frames: 1730364 | Epsilon: 0.1000


Training:  17%|█▋        | 1730941/10000000 [7:19:30<47:30:57, 48.34it/s]

Episode 3206 completed | Reward: 4180.00 | Avg Reward: 2726.80 | Frames: 1730932 | Epsilon: 0.1000


Training:  17%|█▋        | 1731388/10000000 [7:19:39<43:12:50, 53.15it/s]

Episode 3207 completed | Reward: 2080.00 | Avg Reward: 2730.80 | Frames: 1731380 | Epsilon: 0.1000


Training:  17%|█▋        | 1731841/10000000 [7:19:48<47:06:43, 48.75it/s]

Episode 3208 completed | Reward: 2080.00 | Avg Reward: 2718.80 | Frames: 1731837 | Epsilon: 0.1000


Training:  17%|█▋        | 1732281/10000000 [7:19:57<47:23:38, 48.46it/s]

Episode 3209 completed | Reward: 2260.00 | Avg Reward: 2698.60 | Frames: 1732272 | Epsilon: 0.1000


Training:  17%|█▋        | 1732662/10000000 [7:20:05<48:49:26, 47.04it/s]

Episode 3210 completed | Reward: 1700.00 | Avg Reward: 2707.60 | Frames: 1732654 | Epsilon: 0.1000


Training:  17%|█▋        | 1733261/10000000 [7:20:17<48:06:03, 47.74it/s]

Episode 3211 completed | Reward: 2900.00 | Avg Reward: 2688.20 | Frames: 1733252 | Epsilon: 0.1000


Training:  17%|█▋        | 1733749/10000000 [7:20:27<47:40:35, 48.16it/s]

Episode 3212 completed | Reward: 1520.00 | Avg Reward: 2698.00 | Frames: 1733747 | Epsilon: 0.1000


Training:  17%|█▋        | 1734330/10000000 [7:20:39<47:46:19, 48.06it/s]

Episode 3213 completed | Reward: 2040.00 | Avg Reward: 2710.20 | Frames: 1734326 | Epsilon: 0.1000


Training:  17%|█▋        | 1734745/10000000 [7:20:47<48:05:19, 47.74it/s]

Episode 3214 completed | Reward: 1980.00 | Avg Reward: 2707.00 | Frames: 1734741 | Epsilon: 0.1000


Training:  17%|█▋        | 1735325/10000000 [7:20:59<49:44:08, 46.16it/s]

Episode 3215 completed | Reward: 4780.00 | Avg Reward: 2725.80 | Frames: 1735318 | Epsilon: 0.1000


Training:  17%|█▋        | 1735805/10000000 [7:21:09<59:19:54, 38.69it/s]

Episode 3216 completed | Reward: 1140.00 | Avg Reward: 2706.40 | Frames: 1735804 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1736325/10000000 [7:21:19<47:51:30, 47.96it/s]

Episode 3217 completed | Reward: 1860.00 | Avg Reward: 2708.60 | Frames: 1736316 | Epsilon: 0.1000


Training:  17%|█▋        | 1736914/10000000 [7:21:31<46:57:53, 48.87it/s]

Episode 3218 completed | Reward: 4940.00 | Avg Reward: 2726.60 | Frames: 1736910 | Epsilon: 0.1000


Training:  17%|█▋        | 1737318/10000000 [7:21:39<46:58:09, 48.87it/s]

Episode 3219 completed | Reward: 2520.00 | Avg Reward: 2713.40 | Frames: 1737309 | Epsilon: 0.1000


Training:  17%|█▋        | 1737702/10000000 [7:21:47<47:48:01, 48.01it/s]

Episode 3220 completed | Reward: 1600.00 | Avg Reward: 2698.60 | Frames: 1737698 | Epsilon: 0.1000


Training:  17%|█▋        | 1738145/10000000 [7:21:56<49:39:09, 46.22it/s]

Episode 3221 completed | Reward: 640.00 | Avg Reward: 2676.60 | Frames: 1738136 | Epsilon: 0.1000


Training:  17%|█▋        | 1738650/10000000 [7:22:06<48:09:56, 47.64it/s]

Episode 3222 completed | Reward: 2540.00 | Avg Reward: 2689.00 | Frames: 1738644 | Epsilon: 0.1000


Training:  17%|█▋        | 1739156/10000000 [7:22:17<44:02:52, 52.10it/s]

Episode 3223 completed | Reward: 2240.00 | Avg Reward: 2676.20 | Frames: 1739155 | Epsilon: 0.1000


Training:  17%|█▋        | 1739705/10000000 [7:22:28<48:23:24, 47.42it/s]

Episode 3224 completed | Reward: 2340.00 | Avg Reward: 2664.70 | Frames: 1739696 | Epsilon: 0.1000


Training:  17%|█▋        | 1740198/10000000 [7:22:38<49:19:54, 46.51it/s]

Episode 3225 completed | Reward: 1520.00 | Avg Reward: 2639.30 | Frames: 1740197 | Epsilon: 0.1000


Training:  17%|█▋        | 1740656/10000000 [7:22:47<44:59:57, 50.98it/s]

Episode 3226 completed | Reward: 3240.00 | Avg Reward: 2651.70 | Frames: 1740648 | Epsilon: 0.1000


Training:  17%|█▋        | 1741161/10000000 [7:22:58<51:21:29, 44.67it/s]

Episode 3227 completed | Reward: 1180.00 | Avg Reward: 2635.90 | Frames: 1741154 | Epsilon: 0.1000


Training:  17%|█▋        | 1741641/10000000 [7:23:07<49:03:44, 46.76it/s]

Episode 3228 completed | Reward: 2460.00 | Avg Reward: 2635.70 | Frames: 1741634 | Epsilon: 0.1000


Training:  17%|█▋        | 1742066/10000000 [7:23:16<50:47:08, 45.17it/s]

Episode 3229 completed | Reward: 2820.00 | Avg Reward: 2638.50 | Frames: 1742060 | Epsilon: 0.1000


Training:  17%|█▋        | 1742550/10000000 [7:23:26<47:55:05, 47.87it/s]

Episode 3230 completed | Reward: 3910.00 | Avg Reward: 2642.70 | Frames: 1742543 | Epsilon: 0.1000


Training:  17%|█▋        | 1743022/10000000 [7:23:35<47:21:11, 48.44it/s]

Episode 3231 completed | Reward: 3280.00 | Avg Reward: 2653.50 | Frames: 1743017 | Epsilon: 0.1000


Training:  17%|█▋        | 1743749/10000000 [7:23:50<49:48:43, 46.04it/s]

Episode 3232 completed | Reward: 4100.00 | Avg Reward: 2667.70 | Frames: 1743742 | Epsilon: 0.1000


Training:  17%|█▋        | 1744353/10000000 [7:24:02<48:35:35, 47.19it/s]

Episode 3233 completed | Reward: 2240.00 | Avg Reward: 2669.10 | Frames: 1744346 | Epsilon: 0.1000


Training:  17%|█▋        | 1744801/10000000 [7:24:11<48:21:14, 47.42it/s]

Episode 3234 completed | Reward: 460.00 | Avg Reward: 2646.30 | Frames: 1744793 | Epsilon: 0.1000


Training:  17%|█▋        | 1745302/10000000 [7:24:22<47:46:17, 48.00it/s]

Episode 3235 completed | Reward: 1840.00 | Avg Reward: 2637.50 | Frames: 1745297 | Epsilon: 0.1000


Training:  17%|█▋        | 1745722/10000000 [7:24:30<47:01:17, 48.76it/s]

Episode 3236 completed | Reward: 3620.00 | Avg Reward: 2660.10 | Frames: 1745719 | Epsilon: 0.1000


Training:  17%|█▋        | 1746643/10000000 [7:24:49<48:04:11, 47.69it/s]

Episode 3237 completed | Reward: 4200.00 | Avg Reward: 2664.50 | Frames: 1746643 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  17%|█▋        | 1747097/10000000 [7:24:58<49:24:26, 46.40it/s]

Episode 3238 completed | Reward: 1240.00 | Avg Reward: 2650.10 | Frames: 1747090 | Epsilon: 0.1000


Training:  17%|█▋        | 1747733/10000000 [7:25:11<49:13:45, 46.56it/s]

Episode 3239 completed | Reward: 3820.00 | Avg Reward: 2662.30 | Frames: 1747726 | Epsilon: 0.1000


Training:  17%|█▋        | 1748233/10000000 [7:25:21<48:11:31, 47.56it/s]

Episode 3240 completed | Reward: 1860.00 | Avg Reward: 2643.50 | Frames: 1748225 | Epsilon: 0.1000


Training:  17%|█▋        | 1748845/10000000 [7:25:34<48:53:43, 46.88it/s]

Episode 3241 completed | Reward: 4460.00 | Avg Reward: 2649.30 | Frames: 1748837 | Epsilon: 0.1000


Training:  17%|█▋        | 1749486/10000000 [7:25:47<48:13:10, 47.53it/s]

Episode 3242 completed | Reward: 2940.00 | Avg Reward: 2644.90 | Frames: 1749482 | Epsilon: 0.1000


Training:  17%|█▋        | 1749966/10000000 [7:25:56<48:49:11, 46.94it/s]

Episode 3243 completed | Reward: 3100.00 | Avg Reward: 2656.50 | Frames: 1749963 | Epsilon: 0.1000


Training:  18%|█▊        | 1750009/10000000 [7:26:09<849:57:51,  2.70it/s] 


Evaluation at frame 1750000: 2730.00
Episode 3244 completed | Reward: 60.00 | Avg Reward: 2636.70 | Frames: 1750001 | Epsilon: 0.1000


Training:  18%|█▊        | 1750929/10000000 [7:26:28<47:44:51, 47.99it/s]

Episode 3245 completed | Reward: 3220.00 | Avg Reward: 2634.10 | Frames: 1750920 | Epsilon: 0.1000


Training:  18%|█▊        | 1751794/10000000 [7:26:45<48:16:50, 47.46it/s]

Episode 3246 completed | Reward: 2680.00 | Avg Reward: 2628.50 | Frames: 1751789 | Epsilon: 0.1000


Training:  18%|█▊        | 1752182/10000000 [7:26:53<48:30:53, 47.22it/s]

Episode 3247 completed | Reward: 1520.00 | Avg Reward: 2612.80 | Frames: 1752175 | Epsilon: 0.1000


Training:  18%|█▊        | 1753146/10000000 [7:27:13<47:20:23, 48.39it/s]

Episode 3248 completed | Reward: 2260.00 | Avg Reward: 2623.20 | Frames: 1753141 | Epsilon: 0.1000


Training:  18%|█▊        | 1754013/10000000 [7:27:30<49:05:19, 46.66it/s]

Episode 3249 completed | Reward: 3470.00 | Avg Reward: 2634.10 | Frames: 1754004 | Epsilon: 0.1000


Training:  18%|█▊        | 1754594/10000000 [7:27:42<47:48:21, 47.91it/s]

Episode 3250 completed | Reward: 1200.00 | Avg Reward: 2608.10 | Frames: 1754589 | Epsilon: 0.1000


Training:  18%|█▊        | 1755310/10000000 [7:27:57<46:12:20, 49.57it/s]

Episode 3251 completed | Reward: 3980.00 | Avg Reward: 2622.50 | Frames: 1755305 | Epsilon: 0.1000


Training:  18%|█▊        | 1755682/10000000 [7:28:04<48:08:56, 47.56it/s]

Episode 3252 completed | Reward: 1060.00 | Avg Reward: 2612.70 | Frames: 1755678 | Epsilon: 0.1000


Training:  18%|█▊        | 1756210/10000000 [7:28:15<46:34:09, 49.17it/s]

Episode 3253 completed | Reward: 2960.00 | Avg Reward: 2605.10 | Frames: 1756207 | Epsilon: 0.1000


Training:  18%|█▊        | 1756610/10000000 [7:28:23<47:26:05, 48.27it/s]

Episode 3254 completed | Reward: 1440.00 | Avg Reward: 2605.90 | Frames: 1756603 | Epsilon: 0.1000


Training:  18%|█▊        | 1757144/10000000 [7:28:34<53:09:24, 43.07it/s]

Episode 3255 completed | Reward: 1060.00 | Avg Reward: 2607.30 | Frames: 1757143 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1757637/10000000 [7:28:44<49:55:55, 45.85it/s]

Episode 3256 completed | Reward: 2300.00 | Avg Reward: 2595.40 | Frames: 1757629 | Epsilon: 0.1000


Training:  18%|█▊        | 1758301/10000000 [7:28:58<49:03:52, 46.66it/s]

Episode 3257 completed | Reward: 2280.00 | Avg Reward: 2575.20 | Frames: 1758294 | Epsilon: 0.1000


Training:  18%|█▊        | 1758742/10000000 [7:29:07<50:15:29, 45.55it/s]

Episode 3258 completed | Reward: 1040.00 | Avg Reward: 2552.00 | Frames: 1758738 | Epsilon: 0.1000


Training:  18%|█▊        | 1759336/10000000 [7:29:19<45:38:09, 50.16it/s]

Episode 3259 completed | Reward: 3200.00 | Avg Reward: 2545.00 | Frames: 1759328 | Epsilon: 0.1000


Training:  18%|█▊        | 1759714/10000000 [7:29:26<46:42:47, 49.00it/s]

Episode 3260 completed | Reward: 2740.00 | Avg Reward: 2538.00 | Frames: 1759707 | Epsilon: 0.1000


Training:  18%|█▊        | 1760169/10000000 [7:29:36<48:01:07, 47.67it/s]

Episode 3261 completed | Reward: 1500.00 | Avg Reward: 2521.40 | Frames: 1760160 | Epsilon: 0.1000


Training:  18%|█▊        | 1760786/10000000 [7:29:48<48:47:10, 46.91it/s]

Episode 3262 completed | Reward: 3260.00 | Avg Reward: 2527.20 | Frames: 1760779 | Epsilon: 0.1000


Training:  18%|█▊        | 1761538/10000000 [7:30:04<47:42:04, 47.98it/s]

Episode 3263 completed | Reward: 2460.00 | Avg Reward: 2530.00 | Frames: 1761534 | Epsilon: 0.1000


Training:  18%|█▊        | 1762238/10000000 [7:30:18<49:34:18, 46.16it/s]

Episode 3264 completed | Reward: 1780.00 | Avg Reward: 2512.20 | Frames: 1762233 | Epsilon: 0.1000


Training:  18%|█▊        | 1762850/10000000 [7:30:30<49:26:30, 46.28it/s]

Episode 3265 completed | Reward: 1800.00 | Avg Reward: 2490.50 | Frames: 1762843 | Epsilon: 0.1000


Training:  18%|█▊        | 1763378/10000000 [7:30:41<48:11:35, 47.47it/s]

Episode 3266 completed | Reward: 2420.00 | Avg Reward: 2494.10 | Frames: 1763375 | Epsilon: 0.1000


Training:  18%|█▊        | 1763874/10000000 [7:30:51<46:44:52, 48.94it/s]

Episode 3267 completed | Reward: 3200.00 | Avg Reward: 2494.10 | Frames: 1763866 | Epsilon: 0.1000


Training:  18%|█▊        | 1764433/10000000 [7:31:02<49:59:39, 45.76it/s]

Episode 3268 completed | Reward: 2120.00 | Avg Reward: 2481.90 | Frames: 1764426 | Epsilon: 0.1000


Training:  18%|█▊        | 1764954/10000000 [7:31:13<49:08:08, 46.56it/s]

Episode 3269 completed | Reward: 3280.00 | Avg Reward: 2506.30 | Frames: 1764949 | Epsilon: 0.1000


Training:  18%|█▊        | 1765641/10000000 [7:31:27<49:03:34, 46.62it/s]

Episode 3270 completed | Reward: 2480.00 | Avg Reward: 2513.10 | Frames: 1765632 | Epsilon: 0.1000


Training:  18%|█▊        | 1766129/10000000 [7:31:37<48:04:09, 47.58it/s]

Episode 3271 completed | Reward: 2180.00 | Avg Reward: 2501.50 | Frames: 1766120 | Epsilon: 0.1000


Training:  18%|█▊        | 1766873/10000000 [7:31:52<48:21:11, 47.30it/s]

Episode 3272 completed | Reward: 4320.00 | Avg Reward: 2517.70 | Frames: 1766864 | Epsilon: 0.1000


Training:  18%|█▊        | 1767391/10000000 [7:32:03<57:02:56, 40.09it/s]

Episode 3273 completed | Reward: 1280.00 | Avg Reward: 2518.10 | Frames: 1767390 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1767922/10000000 [7:32:13<47:48:15, 47.83it/s]

Episode 3274 completed | Reward: 5040.00 | Avg Reward: 2542.50 | Frames: 1767917 | Epsilon: 0.1000


Training:  18%|█▊        | 1768754/10000000 [7:32:30<47:36:57, 48.02it/s]

Episode 3275 completed | Reward: 5040.00 | Avg Reward: 2565.90 | Frames: 1768747 | Epsilon: 0.1000


Training:  18%|█▊        | 1769273/10000000 [7:32:41<48:51:49, 46.79it/s]

Episode 3276 completed | Reward: 3880.00 | Avg Reward: 2579.50 | Frames: 1769266 | Epsilon: 0.1000


Training:  18%|█▊        | 1769932/10000000 [7:32:54<45:01:22, 50.78it/s]

Episode 3277 completed | Reward: 2620.00 | Avg Reward: 2580.90 | Frames: 1769924 | Epsilon: 0.1000


Training:  18%|█▊        | 1770729/10000000 [7:33:11<47:57:04, 47.67it/s]

Episode 3278 completed | Reward: 3060.00 | Avg Reward: 2603.50 | Frames: 1770720 | Epsilon: 0.1000


Training:  18%|█▊        | 1771230/10000000 [7:33:21<47:09:46, 48.47it/s]

Episode 3279 completed | Reward: 2360.00 | Avg Reward: 2591.10 | Frames: 1771223 | Epsilon: 0.1000


Training:  18%|█▊        | 1771638/10000000 [7:33:29<47:14:58, 48.37it/s]

Episode 3280 completed | Reward: 3340.00 | Avg Reward: 2590.50 | Frames: 1771630 | Epsilon: 0.1000


Training:  18%|█▊        | 1772073/10000000 [7:33:38<48:26:36, 47.18it/s]

Episode 3281 completed | Reward: 3120.00 | Avg Reward: 2603.10 | Frames: 1772065 | Epsilon: 0.1000


Training:  18%|█▊        | 1772766/10000000 [7:33:52<46:24:25, 49.25it/s]

Episode 3282 completed | Reward: 3160.00 | Avg Reward: 2598.80 | Frames: 1772763 | Epsilon: 0.1000


Training:  18%|█▊        | 1773258/10000000 [7:34:02<48:42:27, 46.92it/s]

Episode 3283 completed | Reward: 2540.00 | Avg Reward: 2602.60 | Frames: 1773254 | Epsilon: 0.1000


Training:  18%|█▊        | 1773650/10000000 [7:34:10<47:20:50, 48.26it/s]

Episode 3284 completed | Reward: 3680.00 | Avg Reward: 2597.50 | Frames: 1773645 | Epsilon: 0.1000


Training:  18%|█▊        | 1774130/10000000 [7:34:20<48:10:51, 47.42it/s]

Episode 3285 completed | Reward: 1360.00 | Avg Reward: 2587.30 | Frames: 1774125 | Epsilon: 0.1000


Training:  18%|█▊        | 1774637/10000000 [7:34:30<48:33:40, 47.05it/s]

Episode 3286 completed | Reward: 2400.00 | Avg Reward: 2577.90 | Frames: 1774628 | Epsilon: 0.1000


Training:  18%|█▊        | 1775297/10000000 [7:34:44<50:20:45, 45.38it/s]

Episode 3287 completed | Reward: 1420.00 | Avg Reward: 2562.70 | Frames: 1775289 | Epsilon: 0.1000


Training:  18%|█▊        | 1776017/10000000 [7:34:58<48:31:44, 47.07it/s]

Episode 3288 completed | Reward: 2940.00 | Avg Reward: 2552.50 | Frames: 1776009 | Epsilon: 0.1000


Training:  18%|█▊        | 1776402/10000000 [7:35:06<47:42:46, 47.88it/s]

Episode 3289 completed | Reward: 2940.00 | Avg Reward: 2553.30 | Frames: 1776400 | Epsilon: 0.1000


Training:  18%|█▊        | 1776869/10000000 [7:35:16<49:26:00, 46.21it/s]

Episode 3290 completed | Reward: 4480.00 | Avg Reward: 2574.50 | Frames: 1776862 | Epsilon: 0.1000


Training:  18%|█▊        | 1777341/10000000 [7:35:25<47:40:24, 47.91it/s]

Episode 3291 completed | Reward: 1420.00 | Avg Reward: 2550.90 | Frames: 1777338 | Epsilon: 0.1000


Training:  18%|█▊        | 1777918/10000000 [7:35:37<58:46:23, 38.86it/s]

Episode 3292 completed | Reward: 2920.00 | Avg Reward: 2539.70 | Frames: 1777917 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1778614/10000000 [7:35:52<49:25:17, 46.21it/s]

Episode 3293 completed | Reward: 4140.00 | Avg Reward: 2558.90 | Frames: 1778607 | Epsilon: 0.1000


Training:  18%|█▊        | 1779133/10000000 [7:36:02<49:04:30, 46.53it/s]

Episode 3294 completed | Reward: 660.00 | Avg Reward: 2528.50 | Frames: 1779125 | Epsilon: 0.1000


Training:  18%|█▊        | 1779626/10000000 [7:36:13<49:08:32, 46.47it/s]

Episode 3295 completed | Reward: 2040.00 | Avg Reward: 2543.50 | Frames: 1779623 | Epsilon: 0.1000


Training:  18%|█▊        | 1780065/10000000 [7:36:22<47:06:48, 48.46it/s]

Episode 3296 completed | Reward: 2120.00 | Avg Reward: 2531.30 | Frames: 1780058 | Epsilon: 0.1000


Training:  18%|█▊        | 1780569/10000000 [7:36:32<49:30:09, 46.12it/s]

Episode 3297 completed | Reward: 3640.00 | Avg Reward: 2558.30 | Frames: 1780562 | Epsilon: 0.1000


Training:  18%|█▊        | 1781101/10000000 [7:36:43<49:31:02, 46.11it/s]

Episode 3298 completed | Reward: 1340.00 | Avg Reward: 2566.50 | Frames: 1781093 | Epsilon: 0.1000


Training:  18%|█▊        | 1781550/10000000 [7:36:52<46:21:41, 49.24it/s]

Episode 3299 completed | Reward: 1580.00 | Avg Reward: 2554.30 | Frames: 1781546 | Epsilon: 0.1000


Training:  18%|█▊        | 1782015/10000000 [7:37:01<45:56:04, 49.70it/s]

Episode 3300 completed | Reward: 2440.00 | Avg Reward: 2572.90 | Frames: 1782009 | Epsilon: 0.1000


Training:  18%|█▊        | 1782494/10000000 [7:37:11<48:41:17, 46.88it/s]

Episode 3301 completed | Reward: 1200.00 | Avg Reward: 2554.70 | Frames: 1782487 | Epsilon: 0.1000


Training:  18%|█▊        | 1782963/10000000 [7:37:20<45:09:53, 50.54it/s]

Episode 3302 completed | Reward: 3360.00 | Avg Reward: 2545.50 | Frames: 1782958 | Epsilon: 0.1000


Training:  18%|█▊        | 1783541/10000000 [7:37:32<50:06:31, 45.55it/s]

Episode 3303 completed | Reward: 2860.00 | Avg Reward: 2521.20 | Frames: 1783534 | Epsilon: 0.1000


Training:  18%|█▊        | 1783955/10000000 [7:37:40<43:44:37, 52.17it/s]

Episode 3304 completed | Reward: 2220.00 | Avg Reward: 2529.20 | Frames: 1783950 | Epsilon: 0.1000


Training:  18%|█▊        | 1784451/10000000 [7:37:50<44:15:05, 51.57it/s]

Episode 3305 completed | Reward: 2440.00 | Avg Reward: 2539.40 | Frames: 1784445 | Epsilon: 0.1000


Training:  18%|█▊        | 1785023/10000000 [7:38:02<45:29:14, 50.17it/s]

Episode 3306 completed | Reward: 2140.00 | Avg Reward: 2519.00 | Frames: 1785019 | Epsilon: 0.1000


Training:  18%|█▊        | 1785459/10000000 [7:38:10<45:45:35, 49.87it/s]

Episode 3307 completed | Reward: 2140.00 | Avg Reward: 2519.60 | Frames: 1785454 | Epsilon: 0.1000


Training:  18%|█▊        | 1785901/10000000 [7:38:19<49:28:36, 46.12it/s]

Episode 3308 completed | Reward: 1680.00 | Avg Reward: 2515.60 | Frames: 1785893 | Epsilon: 0.1000


Training:  18%|█▊        | 1786377/10000000 [7:38:29<48:43:45, 46.82it/s]

Episode 3309 completed | Reward: 2590.00 | Avg Reward: 2518.90 | Frames: 1786370 | Epsilon: 0.1000


Training:  18%|█▊        | 1786785/10000000 [7:38:37<49:27:05, 46.13it/s]

Episode 3310 completed | Reward: 1260.00 | Avg Reward: 2514.50 | Frames: 1786778 | Epsilon: 0.1000


Training:  18%|█▊        | 1787345/10000000 [7:38:48<48:26:20, 47.10it/s]

Episode 3311 completed | Reward: 3440.00 | Avg Reward: 2519.90 | Frames: 1787337 | Epsilon: 0.1000


Training:  18%|█▊        | 1787749/10000000 [7:38:57<48:05:33, 47.43it/s]

Episode 3312 completed | Reward: 1560.00 | Avg Reward: 2520.30 | Frames: 1787741 | Epsilon: 0.1000


Training:  18%|█▊        | 1788337/10000000 [7:39:09<58:19:41, 39.11it/s]

Episode 3313 completed | Reward: 1020.00 | Avg Reward: 2510.10 | Frames: 1788336 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1788869/10000000 [7:39:19<48:05:56, 47.42it/s]

Episode 3314 completed | Reward: 2640.00 | Avg Reward: 2516.70 | Frames: 1788861 | Epsilon: 0.1000


Training:  18%|█▊        | 1789402/10000000 [7:39:30<48:09:14, 47.36it/s]

Episode 3315 completed | Reward: 3140.00 | Avg Reward: 2500.30 | Frames: 1789395 | Epsilon: 0.1000


Training:  18%|█▊        | 1790081/10000000 [7:39:43<48:38:38, 46.88it/s]

Episode 3316 completed | Reward: 3940.00 | Avg Reward: 2528.30 | Frames: 1790073 | Epsilon: 0.1000


Training:  18%|█▊        | 1790757/10000000 [7:39:57<48:30:46, 47.00it/s]

Episode 3317 completed | Reward: 3350.00 | Avg Reward: 2543.20 | Frames: 1790749 | Epsilon: 0.1000


Training:  18%|█▊        | 1791455/10000000 [7:40:11<46:01:30, 49.54it/s]

Episode 3318 completed | Reward: 600.00 | Avg Reward: 2499.80 | Frames: 1791454 | Epsilon: 0.1000


Training:  18%|█▊        | 1791987/10000000 [7:40:22<44:48:52, 50.88it/s]

Episode 3319 completed | Reward: 4260.00 | Avg Reward: 2517.20 | Frames: 1791983 | Epsilon: 0.1000


Training:  18%|█▊        | 1792514/10000000 [7:40:33<47:54:03, 47.60it/s]

Episode 3320 completed | Reward: 3940.00 | Avg Reward: 2540.60 | Frames: 1792507 | Epsilon: 0.1000


Training:  18%|█▊        | 1793129/10000000 [7:40:45<49:40:35, 45.89it/s]

Episode 3321 completed | Reward: 3240.00 | Avg Reward: 2566.60 | Frames: 1793128 | Epsilon: 0.1000


Training:  18%|█▊        | 1793650/10000000 [7:40:56<48:55:11, 46.60it/s]

Episode 3322 completed | Reward: 2500.00 | Avg Reward: 2566.20 | Frames: 1793643 | Epsilon: 0.1000


Training:  18%|█▊        | 1794369/10000000 [7:41:10<48:07:17, 47.37it/s]

Episode 3323 completed | Reward: 2820.00 | Avg Reward: 2572.00 | Frames: 1794361 | Epsilon: 0.1000


Training:  18%|█▊        | 1795085/10000000 [7:41:25<48:35:13, 46.91it/s]

Episode 3324 completed | Reward: 3460.00 | Avg Reward: 2583.20 | Frames: 1795076 | Epsilon: 0.1000


Training:  18%|█▊        | 1795701/10000000 [7:41:37<49:53:32, 45.68it/s]

Episode 3325 completed | Reward: 3940.00 | Avg Reward: 2607.40 | Frames: 1795694 | Epsilon: 0.1000


Training:  18%|█▊        | 1796326/10000000 [7:41:50<47:02:27, 48.44it/s]

Episode 3326 completed | Reward: 1560.00 | Avg Reward: 2590.60 | Frames: 1796323 | Epsilon: 0.1000


Training:  18%|█▊        | 1796825/10000000 [7:42:00<47:49:57, 47.64it/s]

Episode 3327 completed | Reward: 2140.00 | Avg Reward: 2600.20 | Frames: 1796816 | Epsilon: 0.1000


Training:  18%|█▊        | 1797638/10000000 [7:42:16<46:25:50, 49.07it/s]

Episode 3328 completed | Reward: 2900.00 | Avg Reward: 2604.60 | Frames: 1797628 | Epsilon: 0.1000


Training:  18%|█▊        | 1798171/10000000 [7:42:27<44:35:56, 51.08it/s]

Episode 3329 completed | Reward: 3540.00 | Avg Reward: 2611.80 | Frames: 1798165 | Epsilon: 0.1000


Training:  18%|█▊        | 1798591/10000000 [7:42:35<53:14:38, 42.79it/s]

Episode 3330 completed | Reward: 1500.00 | Avg Reward: 2587.70 | Frames: 1798589 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1799109/10000000 [7:42:46<46:48:21, 48.67it/s]

Episode 3331 completed | Reward: 2580.00 | Avg Reward: 2580.70 | Frames: 1799100 | Epsilon: 0.1000


Training:  18%|█▊        | 1799689/10000000 [7:42:57<47:27:14, 48.00it/s]

Episode 3332 completed | Reward: 3080.00 | Avg Reward: 2570.50 | Frames: 1799682 | Epsilon: 0.1000


Training:  18%|█▊        | 1800381/10000000 [7:43:11<48:43:11, 46.75it/s]

Episode 3333 completed | Reward: 3940.00 | Avg Reward: 2587.50 | Frames: 1800374 | Epsilon: 0.1000


Training:  18%|█▊        | 1800883/10000000 [7:43:21<43:38:17, 52.19it/s]

Episode 3334 completed | Reward: 2520.00 | Avg Reward: 2608.10 | Frames: 1800879 | Epsilon: 0.1000


Training:  18%|█▊        | 1801606/10000000 [7:43:35<47:54:10, 47.54it/s]

Episode 3335 completed | Reward: 2020.00 | Avg Reward: 2609.90 | Frames: 1801599 | Epsilon: 0.1000


Training:  18%|█▊        | 1802077/10000000 [7:43:45<46:56:16, 48.52it/s]

Episode 3336 completed | Reward: 2940.00 | Avg Reward: 2603.10 | Frames: 1802068 | Epsilon: 0.1000


Training:  18%|█▊        | 1802779/10000000 [7:43:59<44:29:09, 51.18it/s]

Episode 3337 completed | Reward: 2920.00 | Avg Reward: 2590.30 | Frames: 1802775 | Epsilon: 0.1000


Training:  18%|█▊        | 1803803/10000000 [7:44:19<45:30:19, 50.03it/s]

Episode 3338 completed | Reward: 3160.00 | Avg Reward: 2609.50 | Frames: 1803796 | Epsilon: 0.1000


Training:  18%|█▊        | 1804181/10000000 [7:44:27<47:35:00, 47.84it/s]

Episode 3339 completed | Reward: 1120.00 | Avg Reward: 2582.50 | Frames: 1804172 | Epsilon: 0.1000


Training:  18%|█▊        | 1804577/10000000 [7:44:35<51:00:51, 44.62it/s]

Episode 3340 completed | Reward: 2540.00 | Avg Reward: 2589.30 | Frames: 1804571 | Epsilon: 0.1000


Training:  18%|█▊        | 1805157/10000000 [7:44:46<49:13:45, 46.24it/s]

Episode 3341 completed | Reward: 3140.00 | Avg Reward: 2576.10 | Frames: 1805149 | Epsilon: 0.1000


Training:  18%|█▊        | 1805641/10000000 [7:44:56<49:14:39, 46.22it/s]

Episode 3342 completed | Reward: 1200.00 | Avg Reward: 2558.70 | Frames: 1805634 | Epsilon: 0.1000


Training:  18%|█▊        | 1806207/10000000 [7:45:07<45:29:24, 50.03it/s]

Episode 3343 completed | Reward: 3000.00 | Avg Reward: 2557.70 | Frames: 1806201 | Epsilon: 0.1000


Training:  18%|█▊        | 1807113/10000000 [7:45:26<46:38:11, 48.80it/s]

Episode 3344 completed | Reward: 5140.00 | Avg Reward: 2608.50 | Frames: 1807104 | Epsilon: 0.1000


Training:  18%|█▊        | 1807609/10000000 [7:45:35<47:48:57, 47.59it/s]

Episode 3345 completed | Reward: 2120.00 | Avg Reward: 2597.50 | Frames: 1807600 | Epsilon: 0.1000


Training:  18%|█▊        | 1808134/10000000 [7:45:46<47:04:57, 48.33it/s]

Episode 3346 completed | Reward: 1040.00 | Avg Reward: 2581.10 | Frames: 1808124 | Epsilon: 0.1000


Training:  18%|█▊        | 1808631/10000000 [7:45:56<45:15:29, 50.28it/s]

Episode 3347 completed | Reward: 1700.00 | Avg Reward: 2582.90 | Frames: 1808631 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1809087/10000000 [7:46:05<44:59:04, 50.58it/s]

Episode 3348 completed | Reward: 3460.00 | Avg Reward: 2594.90 | Frames: 1809081 | Epsilon: 0.1000


Training:  18%|█▊        | 1809571/10000000 [7:46:15<44:19:05, 51.34it/s]

Episode 3349 completed | Reward: 3640.00 | Avg Reward: 2596.60 | Frames: 1809567 | Epsilon: 0.1000


Training:  18%|█▊        | 1809953/10000000 [7:46:22<46:35:39, 48.83it/s]

Episode 3350 completed | Reward: 580.00 | Avg Reward: 2590.40 | Frames: 1809944 | Epsilon: 0.1000


Training:  18%|█▊        | 1810445/10000000 [7:46:32<49:05:14, 46.34it/s]

Episode 3351 completed | Reward: 1040.00 | Avg Reward: 2561.00 | Frames: 1810438 | Epsilon: 0.1000


Training:  18%|█▊        | 1811498/10000000 [7:46:53<45:57:44, 49.49it/s]

Episode 3352 completed | Reward: 4720.00 | Avg Reward: 2597.60 | Frames: 1811490 | Epsilon: 0.1000


Training:  18%|█▊        | 1811913/10000000 [7:47:01<46:16:09, 49.16it/s]

Episode 3353 completed | Reward: 1540.00 | Avg Reward: 2583.40 | Frames: 1811904 | Epsilon: 0.1000


Training:  18%|█▊        | 1812382/10000000 [7:47:11<47:15:52, 48.12it/s]

Episode 3354 completed | Reward: 1020.00 | Avg Reward: 2579.20 | Frames: 1812375 | Epsilon: 0.1000


Training:  18%|█▊        | 1812943/10000000 [7:47:22<45:34:56, 49.89it/s]

Episode 3355 completed | Reward: 3340.00 | Avg Reward: 2602.00 | Frames: 1812940 | Epsilon: 0.1000


Training:  18%|█▊        | 1813532/10000000 [7:47:34<41:30:34, 54.78it/s]

Episode 3356 completed | Reward: 2240.00 | Avg Reward: 2601.40 | Frames: 1813526 | Epsilon: 0.1000


Training:  18%|█▊        | 1814021/10000000 [7:47:44<50:01:22, 45.46it/s]

Episode 3357 completed | Reward: 3000.00 | Avg Reward: 2608.60 | Frames: 1814014 | Epsilon: 0.1000


Training:  18%|█▊        | 1814483/10000000 [7:47:53<44:45:35, 50.80it/s]

Episode 3358 completed | Reward: 3060.00 | Avg Reward: 2628.80 | Frames: 1814479 | Epsilon: 0.1000


Training:  18%|█▊        | 1814956/10000000 [7:48:02<42:34:42, 53.40it/s]

Episode 3359 completed | Reward: 1580.00 | Avg Reward: 2612.60 | Frames: 1814951 | Epsilon: 0.1000


Training:  18%|█▊        | 1815474/10000000 [7:48:13<46:34:49, 48.81it/s]

Episode 3360 completed | Reward: 1680.00 | Avg Reward: 2602.00 | Frames: 1815467 | Epsilon: 0.1000


Training:  18%|█▊        | 1816365/10000000 [7:48:30<47:07:37, 48.24it/s]

Episode 3361 completed | Reward: 5100.00 | Avg Reward: 2638.00 | Frames: 1816356 | Epsilon: 0.1000


Training:  18%|█▊        | 1816937/10000000 [7:48:42<47:22:16, 47.98it/s]

Episode 3362 completed | Reward: 2740.00 | Avg Reward: 2632.80 | Frames: 1816928 | Epsilon: 0.1000


Training:  18%|█▊        | 1817345/10000000 [7:48:50<46:02:19, 49.37it/s]

Episode 3363 completed | Reward: 1340.00 | Avg Reward: 2621.60 | Frames: 1817336 | Epsilon: 0.1000


Training:  18%|█▊        | 1817838/10000000 [7:49:00<46:18:42, 49.08it/s]

Episode 3364 completed | Reward: 1220.00 | Avg Reward: 2616.00 | Frames: 1817831 | Epsilon: 0.1000


Training:  18%|█▊        | 1818237/10000000 [7:49:08<46:10:35, 49.22it/s]

Episode 3365 completed | Reward: 1040.00 | Avg Reward: 2608.40 | Frames: 1818228 | Epsilon: 0.1000


Training:  18%|█▊        | 1818714/10000000 [7:49:17<57:03:34, 39.83it/s]

Episode 3366 completed | Reward: 860.00 | Avg Reward: 2592.80 | Frames: 1818712 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1819153/10000000 [7:49:26<46:16:32, 49.11it/s]

Episode 3367 completed | Reward: 3220.00 | Avg Reward: 2593.00 | Frames: 1819144 | Epsilon: 0.1000


Training:  18%|█▊        | 1819589/10000000 [7:49:35<48:15:44, 47.08it/s]

Episode 3368 completed | Reward: 1420.00 | Avg Reward: 2586.00 | Frames: 1819581 | Epsilon: 0.1000


Training:  18%|█▊        | 1820318/10000000 [7:49:50<48:07:35, 47.21it/s]

Episode 3369 completed | Reward: 4170.00 | Avg Reward: 2594.90 | Frames: 1820313 | Epsilon: 0.1000


Training:  18%|█▊        | 1821149/10000000 [7:50:06<49:29:37, 45.90it/s]

Episode 3370 completed | Reward: 4360.00 | Avg Reward: 2613.70 | Frames: 1821142 | Epsilon: 0.1000


Training:  18%|█▊        | 1821534/10000000 [7:50:14<47:01:46, 48.31it/s]

Episode 3371 completed | Reward: 1180.00 | Avg Reward: 2603.70 | Frames: 1821531 | Epsilon: 0.1000


Training:  18%|█▊        | 1822058/10000000 [7:50:25<46:18:34, 49.05it/s]

Episode 3372 completed | Reward: 4710.00 | Avg Reward: 2607.60 | Frames: 1822054 | Epsilon: 0.1000


Training:  18%|█▊        | 1823243/10000000 [7:50:48<44:43:28, 50.78it/s]

Episode 3373 completed | Reward: 4060.00 | Avg Reward: 2635.40 | Frames: 1823237 | Epsilon: 0.1000


Training:  18%|█▊        | 1823797/10000000 [7:50:59<46:20:20, 49.01it/s]

Episode 3374 completed | Reward: 2740.00 | Avg Reward: 2612.40 | Frames: 1823788 | Epsilon: 0.1000


Training:  18%|█▊        | 1824218/10000000 [7:51:08<48:05:09, 47.23it/s]

Episode 3375 completed | Reward: 3080.00 | Avg Reward: 2592.80 | Frames: 1824213 | Epsilon: 0.1000


Training:  18%|█▊        | 1824895/10000000 [7:51:22<45:37:59, 49.76it/s]

Episode 3376 completed | Reward: 4860.00 | Avg Reward: 2602.60 | Frames: 1824889 | Epsilon: 0.1000


Training:  18%|█▊        | 1825594/10000000 [7:51:36<46:40:21, 48.65it/s]

Episode 3377 completed | Reward: 2840.00 | Avg Reward: 2604.80 | Frames: 1825591 | Epsilon: 0.1000


Training:  18%|█▊        | 1825979/10000000 [7:51:44<47:55:43, 47.37it/s]

Episode 3378 completed | Reward: 860.00 | Avg Reward: 2582.80 | Frames: 1825973 | Epsilon: 0.1000


Training:  18%|█▊        | 1826917/10000000 [7:52:03<47:12:39, 48.09it/s]

Episode 3379 completed | Reward: 5220.00 | Avg Reward: 2611.40 | Frames: 1826908 | Epsilon: 0.1000


Training:  18%|█▊        | 1827373/10000000 [7:52:12<46:24:09, 48.92it/s]

Episode 3380 completed | Reward: 1000.00 | Avg Reward: 2588.00 | Frames: 1827364 | Epsilon: 0.1000


Training:  18%|█▊        | 1827849/10000000 [7:52:21<48:12:29, 47.09it/s]

Episode 3381 completed | Reward: 980.00 | Avg Reward: 2566.60 | Frames: 1827842 | Epsilon: 0.1000


Training:  18%|█▊        | 1828887/10000000 [7:52:42<53:56:42, 42.08it/s]

Episode 3382 completed | Reward: 2220.00 | Avg Reward: 2557.20 | Frames: 1828885 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1829453/10000000 [7:52:53<45:58:55, 49.36it/s]

Episode 3383 completed | Reward: 1380.00 | Avg Reward: 2545.60 | Frames: 1829444 | Epsilon: 0.1000


Training:  18%|█▊        | 1829783/10000000 [7:53:00<43:13:33, 52.50it/s]

Episode 3384 completed | Reward: 1140.00 | Avg Reward: 2520.20 | Frames: 1829779 | Epsilon: 0.1000


Training:  18%|█▊        | 1830259/10000000 [7:53:10<44:17:36, 51.23it/s]

Episode 3385 completed | Reward: 620.00 | Avg Reward: 2512.80 | Frames: 1830253 | Epsilon: 0.1000


Training:  18%|█▊        | 1830913/10000000 [7:53:23<46:16:14, 49.04it/s]

Episode 3386 completed | Reward: 1240.00 | Avg Reward: 2501.20 | Frames: 1830911 | Epsilon: 0.1000


Training:  18%|█▊        | 1831693/10000000 [7:53:38<47:28:01, 47.80it/s]

Episode 3387 completed | Reward: 4160.00 | Avg Reward: 2528.60 | Frames: 1831684 | Epsilon: 0.1000


Training:  18%|█▊        | 1832328/10000000 [7:53:51<42:11:06, 53.78it/s]

Episode 3388 completed | Reward: 2720.00 | Avg Reward: 2526.40 | Frames: 1832320 | Epsilon: 0.1000


Training:  18%|█▊        | 1832754/10000000 [7:54:00<46:45:47, 48.51it/s]

Episode 3389 completed | Reward: 2240.00 | Avg Reward: 2519.40 | Frames: 1832751 | Epsilon: 0.1000


Training:  18%|█▊        | 1833480/10000000 [7:54:14<43:28:38, 52.18it/s]

Episode 3390 completed | Reward: 2760.00 | Avg Reward: 2502.20 | Frames: 1833472 | Epsilon: 0.1000


Training:  18%|█▊        | 1834161/10000000 [7:54:28<49:10:31, 46.13it/s]

Episode 3391 completed | Reward: 4020.00 | Avg Reward: 2528.20 | Frames: 1834153 | Epsilon: 0.1000


Training:  18%|█▊        | 1834891/10000000 [7:54:42<44:49:59, 50.59it/s]

Episode 3392 completed | Reward: 2460.00 | Avg Reward: 2523.60 | Frames: 1834886 | Epsilon: 0.1000


Training:  18%|█▊        | 1835393/10000000 [7:54:53<46:54:43, 48.34it/s]

Episode 3393 completed | Reward: 1220.00 | Avg Reward: 2494.40 | Frames: 1835390 | Epsilon: 0.1000


Training:  18%|█▊        | 1835893/10000000 [7:55:03<47:18:29, 47.94it/s]

Episode 3394 completed | Reward: 680.00 | Avg Reward: 2494.60 | Frames: 1835884 | Epsilon: 0.1000


Training:  18%|█▊        | 1836359/10000000 [7:55:12<43:44:01, 51.85it/s]

Episode 3395 completed | Reward: 1080.00 | Avg Reward: 2485.00 | Frames: 1836354 | Epsilon: 0.1000


Training:  18%|█▊        | 1836974/10000000 [7:55:24<46:17:24, 48.98it/s]

Episode 3396 completed | Reward: 1960.00 | Avg Reward: 2483.40 | Frames: 1836970 | Epsilon: 0.1000


Training:  18%|█▊        | 1837491/10000000 [7:55:35<44:28:23, 50.98it/s]

Episode 3397 completed | Reward: 1220.00 | Avg Reward: 2459.20 | Frames: 1837487 | Epsilon: 0.1000


Training:  18%|█▊        | 1838433/10000000 [7:55:53<49:19:28, 45.96it/s]

Episode 3398 completed | Reward: 3460.00 | Avg Reward: 2480.40 | Frames: 1838425 | Epsilon: 0.1000


Training:  18%|█▊        | 1838831/10000000 [7:56:01<44:31:57, 50.91it/s]

Episode 3399 completed | Reward: 2340.00 | Avg Reward: 2488.00 | Frames: 1838825 | Epsilon: 0.1000


Training:  18%|█▊        | 1839529/10000000 [7:56:16<55:50:06, 40.60it/s]

Episode 3400 completed | Reward: 3020.00 | Avg Reward: 2493.80 | Frames: 1839528 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  18%|█▊        | 1840245/10000000 [7:56:30<48:49:23, 46.42it/s]

Episode 3401 completed | Reward: 1140.00 | Avg Reward: 2493.20 | Frames: 1840238 | Epsilon: 0.1000


Training:  18%|█▊        | 1840897/10000000 [7:56:43<47:31:16, 47.69it/s]

Episode 3402 completed | Reward: 2040.00 | Avg Reward: 2480.00 | Frames: 1840888 | Epsilon: 0.1000


Training:  18%|█▊        | 1841627/10000000 [7:56:58<44:35:39, 50.82it/s]

Episode 3403 completed | Reward: 4040.00 | Avg Reward: 2491.80 | Frames: 1841623 | Epsilon: 0.1000


Training:  18%|█▊        | 1842097/10000000 [7:57:07<47:48:20, 47.40it/s]

Episode 3404 completed | Reward: 920.00 | Avg Reward: 2478.80 | Frames: 1842088 | Epsilon: 0.1000


Training:  18%|█▊        | 1842638/10000000 [7:57:18<46:17:06, 48.96it/s]

Episode 3405 completed | Reward: 1100.00 | Avg Reward: 2465.40 | Frames: 1842628 | Epsilon: 0.1000


Training:  18%|█▊        | 1843234/10000000 [7:57:30<47:13:00, 47.99it/s]

Episode 3406 completed | Reward: 2540.00 | Avg Reward: 2469.40 | Frames: 1843231 | Epsilon: 0.1000


Training:  18%|█▊        | 1843814/10000000 [7:57:42<47:49:27, 47.37it/s]

Episode 3407 completed | Reward: 3080.00 | Avg Reward: 2478.80 | Frames: 1843807 | Epsilon: 0.1000


Training:  18%|█▊        | 1844406/10000000 [7:57:53<46:53:26, 48.31it/s]

Episode 3408 completed | Reward: 2920.00 | Avg Reward: 2491.20 | Frames: 1844401 | Epsilon: 0.1000


Training:  18%|█▊        | 1845234/10000000 [7:58:10<46:06:21, 49.13it/s]

Episode 3409 completed | Reward: 2640.00 | Avg Reward: 2491.70 | Frames: 1845227 | Epsilon: 0.1000


Training:  18%|█▊        | 1845661/10000000 [7:58:19<48:31:29, 46.68it/s]

Episode 3410 completed | Reward: 4580.00 | Avg Reward: 2524.90 | Frames: 1845653 | Epsilon: 0.1000


Training:  18%|█▊        | 1846149/10000000 [7:58:28<47:23:55, 47.79it/s]

Episode 3411 completed | Reward: 3100.00 | Avg Reward: 2521.50 | Frames: 1846140 | Epsilon: 0.1000


Training:  18%|█▊        | 1846749/10000000 [7:58:40<46:26:16, 48.77it/s]

Episode 3412 completed | Reward: 1140.00 | Avg Reward: 2517.30 | Frames: 1846740 | Epsilon: 0.1000


Training:  18%|█▊        | 1847275/10000000 [7:58:51<45:31:57, 49.74it/s]

Episode 3413 completed | Reward: 3140.00 | Avg Reward: 2538.50 | Frames: 1847271 | Epsilon: 0.1000


Training:  18%|█▊        | 1848030/10000000 [7:59:06<48:16:32, 46.91it/s]

Episode 3414 completed | Reward: 1660.00 | Avg Reward: 2528.70 | Frames: 1848026 | Epsilon: 0.1000


Training:  18%|█▊        | 1848474/10000000 [7:59:15<47:05:34, 48.08it/s]

Episode 3415 completed | Reward: 860.00 | Avg Reward: 2505.90 | Frames: 1848469 | Epsilon: 0.1000


Training:  18%|█▊        | 1849014/10000000 [7:59:26<47:26:17, 47.73it/s]

Episode 3416 completed | Reward: 1580.00 | Avg Reward: 2482.30 | Frames: 1849008 | Epsilon: 0.1000


Training:  18%|█▊        | 1849412/10000000 [7:59:34<42:53:59, 52.78it/s]

Episode 3417 completed | Reward: 1280.00 | Avg Reward: 2461.60 | Frames: 1849404 | Epsilon: 0.1000


Training:  18%|█▊        | 1849982/10000000 [7:59:46<56:19:55, 40.19it/s]

Episode 3418 completed | Reward: 1640.00 | Avg Reward: 2472.00 | Frames: 1849980 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▊        | 1850454/10000000 [7:59:55<48:01:02, 47.14it/s]

Episode 3419 completed | Reward: 1760.00 | Avg Reward: 2447.00 | Frames: 1850449 | Epsilon: 0.1000


Training:  19%|█▊        | 1851009/10000000 [8:00:07<47:15:49, 47.89it/s]

Episode 3420 completed | Reward: 1880.00 | Avg Reward: 2426.40 | Frames: 1851000 | Epsilon: 0.1000


Training:  19%|█▊        | 1851647/10000000 [8:00:19<43:42:26, 51.79it/s]

Episode 3421 completed | Reward: 3460.00 | Avg Reward: 2428.60 | Frames: 1851643 | Epsilon: 0.1000


Training:  19%|█▊        | 1852105/10000000 [8:00:29<49:43:05, 45.52it/s]

Episode 3422 completed | Reward: 680.00 | Avg Reward: 2410.40 | Frames: 1852098 | Epsilon: 0.1000


Training:  19%|█▊        | 1853301/10000000 [8:00:53<48:48:46, 46.36it/s]

Episode 3423 completed | Reward: 4660.00 | Avg Reward: 2428.80 | Frames: 1853292 | Epsilon: 0.1000


Training:  19%|█▊        | 1853787/10000000 [8:01:02<43:48:22, 51.66it/s]

Episode 3424 completed | Reward: 1120.00 | Avg Reward: 2405.40 | Frames: 1853782 | Epsilon: 0.1000


Training:  19%|█▊        | 1854303/10000000 [8:01:13<43:07:12, 52.47it/s]

Episode 3425 completed | Reward: 4750.00 | Avg Reward: 2413.50 | Frames: 1854298 | Epsilon: 0.1000


Training:  19%|█▊        | 1855093/10000000 [8:01:29<46:56:41, 48.19it/s]

Episode 3426 completed | Reward: 3860.00 | Avg Reward: 2436.50 | Frames: 1855092 | Epsilon: 0.1000


Training:  19%|█▊        | 1855629/10000000 [8:01:39<48:16:08, 46.87it/s]

Episode 3427 completed | Reward: 2140.00 | Avg Reward: 2436.50 | Frames: 1855621 | Epsilon: 0.1000


Training:  19%|█▊        | 1856089/10000000 [8:01:49<47:45:18, 47.37it/s]

Episode 3428 completed | Reward: 600.00 | Avg Reward: 2413.50 | Frames: 1856081 | Epsilon: 0.1000


Training:  19%|█▊        | 1856599/10000000 [8:01:59<44:49:27, 50.46it/s]

Episode 3429 completed | Reward: 1740.00 | Avg Reward: 2395.50 | Frames: 1856595 | Epsilon: 0.1000


Training:  19%|█▊        | 1857235/10000000 [8:02:11<44:48:50, 50.47it/s]

Episode 3430 completed | Reward: 1000.00 | Avg Reward: 2390.50 | Frames: 1857231 | Epsilon: 0.1000


Training:  19%|█▊        | 1857710/10000000 [8:02:21<46:40:47, 48.45it/s]

Episode 3431 completed | Reward: 2000.00 | Avg Reward: 2384.70 | Frames: 1857707 | Epsilon: 0.1000


Training:  19%|█▊        | 1858366/10000000 [8:02:34<46:38:56, 48.48it/s]

Episode 3432 completed | Reward: 5620.00 | Avg Reward: 2410.10 | Frames: 1858363 | Epsilon: 0.1000


Training:  19%|█▊        | 1858953/10000000 [8:02:46<49:06:57, 46.04it/s]

Episode 3433 completed | Reward: 4680.00 | Avg Reward: 2417.50 | Frames: 1858946 | Epsilon: 0.1000


Training:  19%|█▊        | 1859273/10000000 [8:02:52<47:22:34, 47.73it/s]

Episode 3434 completed | Reward: 3890.00 | Avg Reward: 2431.20 | Frames: 1859264 | Epsilon: 0.1000


Training:  19%|█▊        | 1859756/10000000 [8:03:02<43:42:12, 51.74it/s]

Episode 3435 completed | Reward: 1200.00 | Avg Reward: 2423.00 | Frames: 1859755 | Epsilon: 0.1000


Training:  19%|█▊        | 1860129/10000000 [8:03:10<46:11:12, 48.96it/s]

Episode 3436 completed | Reward: 3320.00 | Avg Reward: 2426.80 | Frames: 1860129 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▊        | 1860654/10000000 [8:03:21<50:34:03, 44.71it/s]

Episode 3437 completed | Reward: 1760.00 | Avg Reward: 2415.20 | Frames: 1860649 | Epsilon: 0.1000


Training:  19%|█▊        | 1861037/10000000 [8:03:28<46:31:45, 48.59it/s]

Episode 3438 completed | Reward: 400.00 | Avg Reward: 2387.60 | Frames: 1861028 | Epsilon: 0.1000


Training:  19%|█▊        | 1861714/10000000 [8:03:42<46:53:56, 48.20it/s]

Episode 3439 completed | Reward: 1560.00 | Avg Reward: 2392.00 | Frames: 1861710 | Epsilon: 0.1000


Training:  19%|█▊        | 1862421/10000000 [8:03:56<53:21:59, 42.36it/s]

Episode 3440 completed | Reward: 3660.00 | Avg Reward: 2403.20 | Frames: 1862412 | Epsilon: 0.1000


Training:  19%|█▊        | 1863029/10000000 [8:04:09<48:49:12, 46.30it/s]

Episode 3441 completed | Reward: 3160.00 | Avg Reward: 2403.40 | Frames: 1863021 | Epsilon: 0.1000


Training:  19%|█▊        | 1863531/10000000 [8:04:19<45:51:59, 49.28it/s]

Episode 3442 completed | Reward: 1640.00 | Avg Reward: 2407.80 | Frames: 1863527 | Epsilon: 0.1000


Training:  19%|█▊        | 1864209/10000000 [8:04:33<48:53:53, 46.22it/s]

Episode 3443 completed | Reward: 1040.00 | Avg Reward: 2388.20 | Frames: 1864201 | Epsilon: 0.1000


Training:  19%|█▊        | 1864682/10000000 [8:04:42<48:04:36, 47.00it/s]

Episode 3444 completed | Reward: 900.00 | Avg Reward: 2345.80 | Frames: 1864675 | Epsilon: 0.1000


Training:  19%|█▊        | 1865382/10000000 [8:04:56<49:02:13, 46.08it/s]

Episode 3445 completed | Reward: 2540.00 | Avg Reward: 2350.00 | Frames: 1865375 | Epsilon: 0.1000


Training:  19%|█▊        | 1866314/10000000 [8:05:15<46:29:38, 48.59it/s]

Episode 3446 completed | Reward: 4830.00 | Avg Reward: 2387.90 | Frames: 1866311 | Epsilon: 0.1000


Training:  19%|█▊        | 1866730/10000000 [8:05:24<48:54:43, 46.19it/s]

Episode 3447 completed | Reward: 460.00 | Avg Reward: 2375.50 | Frames: 1866723 | Epsilon: 0.1000


Training:  19%|█▊        | 1867329/10000000 [8:05:36<49:18:48, 45.81it/s]

Episode 3448 completed | Reward: 980.00 | Avg Reward: 2350.70 | Frames: 1867322 | Epsilon: 0.1000


Training:  19%|█▊        | 1867926/10000000 [8:05:48<48:11:59, 46.87it/s]

Episode 3449 completed | Reward: 1440.00 | Avg Reward: 2328.70 | Frames: 1867921 | Epsilon: 0.1000


Training:  19%|█▊        | 1868602/10000000 [8:06:02<46:17:36, 48.79it/s]

Episode 3450 completed | Reward: 1300.00 | Avg Reward: 2335.90 | Frames: 1868599 | Epsilon: 0.1000


Training:  19%|█▊        | 1869021/10000000 [8:06:10<48:27:39, 46.61it/s]

Episode 3451 completed | Reward: 940.00 | Avg Reward: 2334.90 | Frames: 1869014 | Epsilon: 0.1000


Training:  19%|█▊        | 1869521/10000000 [8:06:20<47:18:30, 47.74it/s]

Episode 3452 completed | Reward: 1080.00 | Avg Reward: 2298.50 | Frames: 1869512 | Epsilon: 0.1000


Training:  19%|█▊        | 1870146/10000000 [8:06:33<60:38:02, 37.24it/s]

Episode 3453 completed | Reward: 3540.00 | Avg Reward: 2318.50 | Frames: 1870145 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▊        | 1870638/10000000 [8:06:43<47:11:58, 47.84it/s]

Episode 3454 completed | Reward: 860.00 | Avg Reward: 2316.90 | Frames: 1870637 | Epsilon: 0.1000


Training:  19%|█▊        | 1871610/10000000 [8:07:03<47:01:21, 48.02it/s]

Episode 3455 completed | Reward: 4580.00 | Avg Reward: 2329.30 | Frames: 1871603 | Epsilon: 0.1000


Training:  19%|█▊        | 1872280/10000000 [8:07:17<43:37:00, 51.76it/s]

Episode 3456 completed | Reward: 2400.00 | Avg Reward: 2330.90 | Frames: 1872272 | Epsilon: 0.1000


Training:  19%|█▊        | 1872607/10000000 [8:07:23<44:10:43, 51.10it/s]

Episode 3457 completed | Reward: 1060.00 | Avg Reward: 2311.50 | Frames: 1872601 | Epsilon: 0.1000


Training:  19%|█▊        | 1873110/10000000 [8:07:33<45:20:52, 49.78it/s]

Episode 3458 completed | Reward: 640.00 | Avg Reward: 2287.30 | Frames: 1873107 | Epsilon: 0.1000


Training:  19%|█▊        | 1873814/10000000 [8:07:48<47:54:05, 47.12it/s]

Episode 3459 completed | Reward: 2760.00 | Avg Reward: 2299.10 | Frames: 1873807 | Epsilon: 0.1000


Training:  19%|█▊        | 1874325/10000000 [8:07:58<47:11:57, 47.82it/s]

Episode 3460 completed | Reward: 2100.00 | Avg Reward: 2303.30 | Frames: 1874321 | Epsilon: 0.1000


Training:  19%|█▊        | 1874833/10000000 [8:08:08<49:26:43, 45.65it/s]

Episode 3461 completed | Reward: 520.00 | Avg Reward: 2257.50 | Frames: 1874826 | Epsilon: 0.1000


Training:  19%|█▉        | 1875421/10000000 [8:08:20<47:35:17, 47.42it/s]

Episode 3462 completed | Reward: 3580.00 | Avg Reward: 2265.90 | Frames: 1875413 | Epsilon: 0.1000


Training:  19%|█▉        | 1875842/10000000 [8:08:28<45:38:20, 49.45it/s]

Episode 3463 completed | Reward: 1240.00 | Avg Reward: 2264.90 | Frames: 1875832 | Epsilon: 0.1000


Training:  19%|█▉        | 1876383/10000000 [8:08:39<43:49:31, 51.49it/s]

Episode 3464 completed | Reward: 2080.00 | Avg Reward: 2273.50 | Frames: 1876379 | Epsilon: 0.1000


Training:  19%|█▉        | 1876949/10000000 [8:08:51<45:25:39, 49.67it/s]

Episode 3465 completed | Reward: 1720.00 | Avg Reward: 2280.30 | Frames: 1876946 | Epsilon: 0.1000


Training:  19%|█▉        | 1877999/10000000 [8:09:12<43:51:03, 51.45it/s]

Episode 3466 completed | Reward: 1380.00 | Avg Reward: 2285.50 | Frames: 1877994 | Epsilon: 0.1000


Training:  19%|█▉        | 1878581/10000000 [8:09:23<47:54:18, 47.09it/s]

Episode 3467 completed | Reward: 2040.00 | Avg Reward: 2273.70 | Frames: 1878580 | Epsilon: 0.1000


Training:  19%|█▉        | 1879154/10000000 [8:09:35<46:23:48, 48.62it/s]

Episode 3468 completed | Reward: 560.00 | Avg Reward: 2265.10 | Frames: 1879151 | Epsilon: 0.1000


Training:  19%|█▉        | 1879869/10000000 [8:09:50<47:42:35, 47.28it/s]

Episode 3469 completed | Reward: 2000.00 | Avg Reward: 2243.40 | Frames: 1879862 | Epsilon: 0.1000


Training:  19%|█▉        | 1880454/10000000 [8:10:02<57:34:22, 39.18it/s]

Episode 3470 completed | Reward: 2260.00 | Avg Reward: 2222.40 | Frames: 1880452 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1881345/10000000 [8:10:19<48:52:48, 46.14it/s]

Episode 3471 completed | Reward: 1140.00 | Avg Reward: 2222.00 | Frames: 1881337 | Epsilon: 0.1000


Training:  19%|█▉        | 1881845/10000000 [8:10:30<48:35:47, 46.40it/s]

Episode 3472 completed | Reward: 820.00 | Avg Reward: 2183.10 | Frames: 1881836 | Epsilon: 0.1000


Training:  19%|█▉        | 1882594/10000000 [8:10:45<47:40:10, 47.30it/s]

Episode 3473 completed | Reward: 1140.00 | Avg Reward: 2153.90 | Frames: 1882587 | Epsilon: 0.1000


Training:  19%|█▉        | 1883343/10000000 [8:11:00<45:41:42, 49.34it/s]

Episode 3474 completed | Reward: 2260.00 | Avg Reward: 2149.10 | Frames: 1883338 | Epsilon: 0.1000


Training:  19%|█▉        | 1884025/10000000 [8:11:13<47:59:55, 46.97it/s]

Episode 3475 completed | Reward: 1960.00 | Avg Reward: 2137.90 | Frames: 1884016 | Epsilon: 0.1000


Training:  19%|█▉        | 1884613/10000000 [8:11:25<45:57:17, 49.05it/s]

Episode 3476 completed | Reward: 940.00 | Avg Reward: 2098.70 | Frames: 1884611 | Epsilon: 0.1000


Training:  19%|█▉        | 1885062/10000000 [8:11:34<47:48:06, 47.16it/s]

Episode 3477 completed | Reward: 980.00 | Avg Reward: 2080.10 | Frames: 1885059 | Epsilon: 0.1000


Training:  19%|█▉        | 1885645/10000000 [8:11:46<48:33:25, 46.42it/s]

Episode 3478 completed | Reward: 2220.00 | Avg Reward: 2093.70 | Frames: 1885637 | Epsilon: 0.1000


Training:  19%|█▉        | 1886397/10000000 [8:12:01<47:31:29, 47.42it/s]

Episode 3479 completed | Reward: 4060.00 | Avg Reward: 2082.10 | Frames: 1886388 | Epsilon: 0.1000


Training:  19%|█▉        | 1886906/10000000 [8:12:11<46:06:20, 48.88it/s]

Episode 3480 completed | Reward: 1000.00 | Avg Reward: 2082.10 | Frames: 1886899 | Epsilon: 0.1000


Training:  19%|█▉        | 1887434/10000000 [8:12:22<48:32:14, 46.43it/s]

Episode 3481 completed | Reward: 3240.00 | Avg Reward: 2104.70 | Frames: 1887431 | Epsilon: 0.1000


Training:  19%|█▉        | 1887974/10000000 [8:12:33<46:02:33, 48.94it/s]

Episode 3482 completed | Reward: 1000.00 | Avg Reward: 2092.50 | Frames: 1887965 | Epsilon: 0.1000


Training:  19%|█▉        | 1888544/10000000 [8:12:44<42:10:58, 53.41it/s]

Episode 3483 completed | Reward: 1820.00 | Avg Reward: 2096.90 | Frames: 1888537 | Epsilon: 0.1000


Training:  19%|█▉        | 1889043/10000000 [8:12:54<44:10:25, 51.00it/s]

Episode 3484 completed | Reward: 1920.00 | Avg Reward: 2104.70 | Frames: 1889040 | Epsilon: 0.1000


Training:  19%|█▉        | 1889665/10000000 [8:13:07<46:30:09, 48.45it/s]

Episode 3485 completed | Reward: 2240.00 | Avg Reward: 2120.90 | Frames: 1889656 | Epsilon: 0.1000


Training:  19%|█▉        | 1890294/10000000 [8:13:19<46:15:56, 48.69it/s]

Episode 3486 completed | Reward: 980.00 | Avg Reward: 2118.30 | Frames: 1890287 | Epsilon: 0.1000


Training:  19%|█▉        | 1891213/10000000 [8:13:38<55:33:26, 40.54it/s]

Episode 3487 completed | Reward: 4480.00 | Avg Reward: 2121.50 | Frames: 1891212 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1891785/10000000 [8:13:49<46:46:00, 48.16it/s]

Episode 3488 completed | Reward: 2280.00 | Avg Reward: 2117.10 | Frames: 1891782 | Epsilon: 0.1000


Training:  19%|█▉        | 1892310/10000000 [8:14:00<47:04:44, 47.84it/s]

Episode 3489 completed | Reward: 2320.00 | Avg Reward: 2117.90 | Frames: 1892308 | Epsilon: 0.1000


Training:  19%|█▉        | 1893427/10000000 [8:14:22<43:36:20, 51.64it/s]

Episode 3490 completed | Reward: 5670.00 | Avg Reward: 2147.00 | Frames: 1893421 | Epsilon: 0.1000


Training:  19%|█▉        | 1894026/10000000 [8:14:35<45:04:57, 49.95it/s]

Episode 3491 completed | Reward: 3360.00 | Avg Reward: 2140.40 | Frames: 1894023 | Epsilon: 0.1000


Training:  19%|█▉        | 1894970/10000000 [8:14:54<47:21:13, 47.54it/s]

Episode 3492 completed | Reward: 3460.00 | Avg Reward: 2150.40 | Frames: 1894968 | Epsilon: 0.1000


Training:  19%|█▉        | 1895437/10000000 [8:15:03<49:41:24, 45.31it/s]

Episode 3493 completed | Reward: 2000.00 | Avg Reward: 2158.20 | Frames: 1895430 | Epsilon: 0.1000


Training:  19%|█▉        | 1895801/10000000 [8:15:11<47:44:12, 47.16it/s]

Episode 3494 completed | Reward: 2260.00 | Avg Reward: 2174.00 | Frames: 1895793 | Epsilon: 0.1000


Training:  19%|█▉        | 1896301/10000000 [8:15:21<48:27:57, 46.45it/s]

Episode 3495 completed | Reward: 2000.00 | Avg Reward: 2183.20 | Frames: 1896293 | Epsilon: 0.1000


Training:  19%|█▉        | 1896922/10000000 [8:15:33<46:28:47, 48.43it/s]

Episode 3496 completed | Reward: 3140.00 | Avg Reward: 2195.00 | Frames: 1896918 | Epsilon: 0.1000


Training:  19%|█▉        | 1897410/10000000 [8:15:43<46:46:36, 48.12it/s]

Episode 3497 completed | Reward: 2120.00 | Avg Reward: 2204.00 | Frames: 1897406 | Epsilon: 0.1000


Training:  19%|█▉        | 1897977/10000000 [8:15:55<48:05:45, 46.79it/s]

Episode 3498 completed | Reward: 3300.00 | Avg Reward: 2202.40 | Frames: 1897970 | Epsilon: 0.1000


Training:  19%|█▉        | 1898490/10000000 [8:16:05<48:26:10, 46.46it/s]

Episode 3499 completed | Reward: 4140.00 | Avg Reward: 2220.40 | Frames: 1898485 | Epsilon: 0.1000


Training:  19%|█▉        | 1898857/10000000 [8:16:13<47:18:10, 47.57it/s]

Episode 3500 completed | Reward: 1480.00 | Avg Reward: 2205.00 | Frames: 1898855 | Epsilon: 0.1000


Training:  19%|█▉        | 1899366/10000000 [8:16:23<46:33:47, 48.33it/s]

Episode 3501 completed | Reward: 680.00 | Avg Reward: 2200.40 | Frames: 1899362 | Epsilon: 0.1000


Training:  19%|█▉        | 1900017/10000000 [8:16:36<46:22:37, 48.52it/s]

Episode 3502 completed | Reward: 2740.00 | Avg Reward: 2207.40 | Frames: 1900013 | Epsilon: 0.1000


Training:  19%|█▉        | 1900534/10000000 [8:16:47<47:44:41, 47.12it/s]

Episode 3503 completed | Reward: 1440.00 | Avg Reward: 2181.40 | Frames: 1900531 | Epsilon: 0.1000


Training:  19%|█▉        | 1901230/10000000 [8:17:01<55:57:54, 40.20it/s]

Episode 3504 completed | Reward: 3780.00 | Avg Reward: 2210.00 | Frames: 1901229 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1901843/10000000 [8:17:13<43:15:41, 52.00it/s]

Episode 3505 completed | Reward: 460.00 | Avg Reward: 2203.60 | Frames: 1901839 | Epsilon: 0.1000


Training:  19%|█▉        | 1902353/10000000 [8:17:24<47:08:55, 47.71it/s]

Episode 3506 completed | Reward: 900.00 | Avg Reward: 2187.20 | Frames: 1902344 | Epsilon: 0.1000


Training:  19%|█▉        | 1903199/10000000 [8:17:41<43:26:59, 51.76it/s]

Episode 3507 completed | Reward: 2140.00 | Avg Reward: 2177.80 | Frames: 1903195 | Epsilon: 0.1000


Training:  19%|█▉        | 1904046/10000000 [8:17:58<46:47:07, 48.07it/s]

Episode 3508 completed | Reward: 1320.00 | Avg Reward: 2161.80 | Frames: 1904041 | Epsilon: 0.1000


Training:  19%|█▉        | 1904630/10000000 [8:18:10<45:58:29, 48.91it/s]

Episode 3509 completed | Reward: 2820.00 | Avg Reward: 2163.60 | Frames: 1904626 | Epsilon: 0.1000


Training:  19%|█▉        | 1905113/10000000 [8:18:20<49:23:20, 45.53it/s]

Episode 3510 completed | Reward: 860.00 | Avg Reward: 2126.40 | Frames: 1905106 | Epsilon: 0.1000


Training:  19%|█▉        | 1905599/10000000 [8:18:29<45:06:46, 49.84it/s]

Episode 3511 completed | Reward: 3260.00 | Avg Reward: 2128.00 | Frames: 1905594 | Epsilon: 0.1000


Training:  19%|█▉        | 1906038/10000000 [8:18:38<46:20:56, 48.51it/s]

Episode 3512 completed | Reward: 1360.00 | Avg Reward: 2130.20 | Frames: 1906033 | Epsilon: 0.1000


Training:  19%|█▉        | 1906644/10000000 [8:18:51<41:28:15, 54.21it/s]

Episode 3513 completed | Reward: 1920.00 | Avg Reward: 2118.00 | Frames: 1906639 | Epsilon: 0.1000


Training:  19%|█▉        | 1907237/10000000 [8:19:03<47:24:17, 47.42it/s]

Episode 3514 completed | Reward: 1120.00 | Avg Reward: 2112.60 | Frames: 1907229 | Epsilon: 0.1000


Training:  19%|█▉        | 1907682/10000000 [8:19:12<46:26:54, 48.39it/s]

Episode 3515 completed | Reward: 1180.00 | Avg Reward: 2115.80 | Frames: 1907674 | Epsilon: 0.1000


Training:  19%|█▉        | 1908046/10000000 [8:19:19<46:29:35, 48.35it/s]

Episode 3516 completed | Reward: 2040.00 | Avg Reward: 2120.40 | Frames: 1908042 | Epsilon: 0.1000


Training:  19%|█▉        | 1909525/10000000 [8:19:49<47:50:48, 46.97it/s]

Episode 3517 completed | Reward: 3940.00 | Avg Reward: 2147.00 | Frames: 1909523 | Epsilon: 0.1000


Training:  19%|█▉        | 1910275/10000000 [8:20:04<42:39:44, 52.67it/s]

Episode 3518 completed | Reward: 2300.00 | Avg Reward: 2153.60 | Frames: 1910271 | Epsilon: 0.1000


Training:  19%|█▉        | 1910789/10000000 [8:20:14<48:07:52, 46.68it/s]

Episode 3519 completed | Reward: 2840.00 | Avg Reward: 2164.40 | Frames: 1910782 | Epsilon: 0.1000


Training:  19%|█▉        | 1911362/10000000 [8:20:26<44:55:01, 50.02it/s]

Episode 3520 completed | Reward: 1540.00 | Avg Reward: 2161.00 | Frames: 1911362 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1911873/10000000 [8:20:36<46:06:45, 48.72it/s]

Episode 3521 completed | Reward: 2540.00 | Avg Reward: 2151.80 | Frames: 1911864 | Epsilon: 0.1000


Training:  19%|█▉        | 1912288/10000000 [8:20:44<41:23:09, 54.28it/s]

Episode 3522 completed | Reward: 1680.00 | Avg Reward: 2161.80 | Frames: 1912282 | Epsilon: 0.1000


Training:  19%|█▉        | 1912945/10000000 [8:20:58<47:18:12, 47.49it/s]

Episode 3523 completed | Reward: 2760.00 | Avg Reward: 2142.80 | Frames: 1912939 | Epsilon: 0.1000


Training:  19%|█▉        | 1913261/10000000 [8:21:04<47:34:09, 47.22it/s]

Episode 3524 completed | Reward: 740.00 | Avg Reward: 2139.00 | Frames: 1913252 | Epsilon: 0.1000


Training:  19%|█▉        | 1913889/10000000 [8:21:17<48:31:31, 46.29it/s]

Episode 3525 completed | Reward: 4360.00 | Avg Reward: 2135.10 | Frames: 1913881 | Epsilon: 0.1000


Training:  19%|█▉        | 1914689/10000000 [8:21:33<48:03:06, 46.74it/s]

Episode 3526 completed | Reward: 3980.00 | Avg Reward: 2136.30 | Frames: 1914680 | Epsilon: 0.1000


Training:  19%|█▉        | 1915407/10000000 [8:21:47<45:57:54, 48.86it/s]

Episode 3527 completed | Reward: 3740.00 | Avg Reward: 2152.30 | Frames: 1915404 | Epsilon: 0.1000


Training:  19%|█▉        | 1915862/10000000 [8:21:57<46:44:04, 48.05it/s]

Episode 3528 completed | Reward: 2960.00 | Avg Reward: 2175.90 | Frames: 1915859 | Epsilon: 0.1000


Training:  19%|█▉        | 1916434/10000000 [8:22:08<47:11:11, 47.59it/s]

Episode 3529 completed | Reward: 3920.00 | Avg Reward: 2197.70 | Frames: 1916429 | Epsilon: 0.1000


Training:  19%|█▉        | 1917287/10000000 [8:22:25<45:19:37, 49.53it/s]

Episode 3530 completed | Reward: 3560.00 | Avg Reward: 2223.30 | Frames: 1917283 | Epsilon: 0.1000


Training:  19%|█▉        | 1917749/10000000 [8:22:35<46:56:43, 47.82it/s]

Episode 3531 completed | Reward: 880.00 | Avg Reward: 2212.10 | Frames: 1917740 | Epsilon: 0.1000


Training:  19%|█▉        | 1918235/10000000 [8:22:44<43:39:54, 51.41it/s]

Episode 3532 completed | Reward: 1960.00 | Avg Reward: 2175.50 | Frames: 1918231 | Epsilon: 0.1000


Training:  19%|█▉        | 1918702/10000000 [8:22:54<45:17:41, 49.56it/s]

Episode 3533 completed | Reward: 3640.00 | Avg Reward: 2165.10 | Frames: 1918697 | Epsilon: 0.1000


Training:  19%|█▉        | 1919893/10000000 [8:23:18<49:12:56, 45.60it/s]

Episode 3534 completed | Reward: 3520.00 | Avg Reward: 2161.40 | Frames: 1919885 | Epsilon: 0.1000


Training:  19%|█▉        | 1920329/10000000 [8:23:27<46:36:02, 48.16it/s]

Episode 3535 completed | Reward: 2840.00 | Avg Reward: 2177.80 | Frames: 1920320 | Epsilon: 0.1000


Training:  19%|█▉        | 1920881/10000000 [8:23:38<45:06:33, 49.75it/s]

Episode 3536 completed | Reward: 3100.00 | Avg Reward: 2175.60 | Frames: 1920872 | Epsilon: 0.1000


Training:  19%|█▉        | 1921411/10000000 [8:23:48<45:34:48, 49.23it/s]

Episode 3537 completed | Reward: 4060.00 | Avg Reward: 2198.60 | Frames: 1921411 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1921899/10000000 [8:23:58<44:26:38, 50.49it/s]

Episode 3538 completed | Reward: 1980.00 | Avg Reward: 2214.40 | Frames: 1921894 | Epsilon: 0.1000


Training:  19%|█▉        | 1922353/10000000 [8:24:07<48:23:38, 46.36it/s]

Episode 3539 completed | Reward: 4290.00 | Avg Reward: 2241.70 | Frames: 1922346 | Epsilon: 0.1000


Training:  19%|█▉        | 1922823/10000000 [8:24:17<43:11:26, 51.95it/s]

Episode 3540 completed | Reward: 2940.00 | Avg Reward: 2234.50 | Frames: 1922818 | Epsilon: 0.1000


Training:  19%|█▉        | 1923203/10000000 [8:24:24<43:10:38, 51.96it/s]

Episode 3541 completed | Reward: 4340.00 | Avg Reward: 2246.30 | Frames: 1923198 | Epsilon: 0.1000


Training:  19%|█▉        | 1923841/10000000 [8:24:37<47:00:15, 47.73it/s]

Episode 3542 completed | Reward: 1580.00 | Avg Reward: 2245.70 | Frames: 1923839 | Epsilon: 0.1000


Training:  19%|█▉        | 1924279/10000000 [8:24:46<44:17:52, 50.64it/s]

Episode 3543 completed | Reward: 1880.00 | Avg Reward: 2254.10 | Frames: 1924275 | Epsilon: 0.1000


Training:  19%|█▉        | 1924905/10000000 [8:24:58<46:05:34, 48.66it/s]

Episode 3544 completed | Reward: 1760.00 | Avg Reward: 2262.70 | Frames: 1924896 | Epsilon: 0.1000


Training:  19%|█▉        | 1926184/10000000 [8:25:24<42:48:34, 52.39it/s]

Episode 3545 completed | Reward: 3560.00 | Avg Reward: 2272.90 | Frames: 1926176 | Epsilon: 0.1000


Training:  19%|█▉        | 1926689/10000000 [8:25:34<47:54:35, 46.81it/s]

Episode 3546 completed | Reward: 1780.00 | Avg Reward: 2242.40 | Frames: 1926680 | Epsilon: 0.1000


Training:  19%|█▉        | 1927310/10000000 [8:25:46<46:32:01, 48.19it/s]

Episode 3547 completed | Reward: 3980.00 | Avg Reward: 2277.60 | Frames: 1927306 | Epsilon: 0.1000


Training:  19%|█▉        | 1927982/10000000 [8:26:00<47:08:57, 47.56it/s]

Episode 3548 completed | Reward: 4640.00 | Avg Reward: 2314.20 | Frames: 1927978 | Epsilon: 0.1000


Training:  19%|█▉        | 1928665/10000000 [8:26:14<50:07:21, 44.73it/s]

Episode 3549 completed | Reward: 4560.00 | Avg Reward: 2345.40 | Frames: 1928657 | Epsilon: 0.1000


Training:  19%|█▉        | 1929017/10000000 [8:26:21<47:42:08, 47.00it/s]

Episode 3550 completed | Reward: 740.00 | Avg Reward: 2339.80 | Frames: 1929015 | Epsilon: 0.1000


Training:  19%|█▉        | 1929519/10000000 [8:26:31<44:29:31, 50.39it/s]

Episode 3551 completed | Reward: 1600.00 | Avg Reward: 2346.40 | Frames: 1929518 | Epsilon: 0.1000


Training:  19%|█▉        | 1930029/10000000 [8:26:42<46:33:56, 48.14it/s]

Episode 3552 completed | Reward: 780.00 | Avg Reward: 2343.40 | Frames: 1930024 | Epsilon: 0.1000


Training:  19%|█▉        | 1930594/10000000 [8:26:53<45:19:20, 49.46it/s]

Episode 3553 completed | Reward: 1360.00 | Avg Reward: 2321.60 | Frames: 1930590 | Epsilon: 0.1000


Training:  19%|█▉        | 1931180/10000000 [8:27:05<45:17:15, 49.49it/s]

Episode 3554 completed | Reward: 1320.00 | Avg Reward: 2326.20 | Frames: 1931173 | Epsilon: 0.1000


Training:  19%|█▉        | 1931615/10000000 [8:27:14<54:34:54, 41.06it/s]

Episode 3555 completed | Reward: 1940.00 | Avg Reward: 2299.80 | Frames: 1931614 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1932172/10000000 [8:27:25<42:03:26, 53.29it/s]

Episode 3556 completed | Reward: 3640.00 | Avg Reward: 2312.20 | Frames: 1932164 | Epsilon: 0.1000


Training:  19%|█▉        | 1932791/10000000 [8:27:38<46:17:19, 48.41it/s]

Episode 3557 completed | Reward: 1620.00 | Avg Reward: 2317.80 | Frames: 1932784 | Epsilon: 0.1000


Training:  19%|█▉        | 1933798/10000000 [8:27:58<46:50:38, 47.83it/s]

Episode 3558 completed | Reward: 4900.00 | Avg Reward: 2360.40 | Frames: 1933791 | Epsilon: 0.1000


Training:  19%|█▉        | 1934317/10000000 [8:28:08<47:05:20, 47.58it/s]

Episode 3559 completed | Reward: 3560.00 | Avg Reward: 2368.40 | Frames: 1934308 | Epsilon: 0.1000


Training:  19%|█▉        | 1934718/10000000 [8:28:16<45:20:45, 49.41it/s]

Episode 3560 completed | Reward: 2140.00 | Avg Reward: 2368.80 | Frames: 1934713 | Epsilon: 0.1000


Training:  19%|█▉        | 1935534/10000000 [8:28:33<48:55:14, 45.79it/s]

Episode 3561 completed | Reward: 3700.00 | Avg Reward: 2400.60 | Frames: 1935528 | Epsilon: 0.1000


Training:  19%|█▉        | 1936013/10000000 [8:28:43<48:30:14, 46.18it/s]

Episode 3562 completed | Reward: 860.00 | Avg Reward: 2373.40 | Frames: 1936004 | Epsilon: 0.1000


Training:  19%|█▉        | 1936709/10000000 [8:28:57<48:01:25, 46.64it/s]

Episode 3563 completed | Reward: 3540.00 | Avg Reward: 2396.40 | Frames: 1936700 | Epsilon: 0.1000


Training:  19%|█▉        | 1937166/10000000 [8:29:06<47:32:13, 47.11it/s]

Episode 3564 completed | Reward: 2800.00 | Avg Reward: 2403.60 | Frames: 1937159 | Epsilon: 0.1000


Training:  19%|█▉        | 1937754/10000000 [8:29:18<45:43:40, 48.97it/s]

Episode 3565 completed | Reward: 2400.00 | Avg Reward: 2410.40 | Frames: 1937747 | Epsilon: 0.1000


Training:  19%|█▉        | 1938323/10000000 [8:29:29<44:34:45, 50.23it/s]

Episode 3566 completed | Reward: 2520.00 | Avg Reward: 2421.80 | Frames: 1938318 | Epsilon: 0.1000


Training:  19%|█▉        | 1938798/10000000 [8:29:39<47:55:06, 46.73it/s]

Episode 3567 completed | Reward: 3600.00 | Avg Reward: 2437.40 | Frames: 1938791 | Epsilon: 0.1000


Training:  19%|█▉        | 1939477/10000000 [8:29:52<46:01:09, 48.65it/s]

Episode 3568 completed | Reward: 3140.00 | Avg Reward: 2463.20 | Frames: 1939468 | Epsilon: 0.1000


Training:  19%|█▉        | 1940138/10000000 [8:30:06<45:33:31, 49.14it/s]

Episode 3569 completed | Reward: 3100.00 | Avg Reward: 2474.20 | Frames: 1940132 | Epsilon: 0.1000


Training:  19%|█▉        | 1940751/10000000 [8:30:18<43:09:01, 51.88it/s]

Episode 3570 completed | Reward: 2400.00 | Avg Reward: 2475.60 | Frames: 1940747 | Epsilon: 0.1000


Training:  19%|█▉        | 1941303/10000000 [8:30:29<44:58:54, 49.77it/s]

Episode 3571 completed | Reward: 1060.00 | Avg Reward: 2474.80 | Frames: 1941297 | Epsilon: 0.1000


Training:  19%|█▉        | 1941940/10000000 [8:30:42<51:39:25, 43.33it/s]

Episode 3572 completed | Reward: 2880.00 | Avg Reward: 2495.40 | Frames: 1941939 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  19%|█▉        | 1942745/10000000 [8:30:58<48:07:19, 46.51it/s]

Episode 3573 completed | Reward: 4110.00 | Avg Reward: 2525.10 | Frames: 1942737 | Epsilon: 0.1000


Training:  19%|█▉        | 1943334/10000000 [8:31:10<45:47:11, 48.88it/s]

Episode 3574 completed | Reward: 1760.00 | Avg Reward: 2520.10 | Frames: 1943331 | Epsilon: 0.1000


Training:  19%|█▉        | 1943849/10000000 [8:31:21<47:32:09, 47.08it/s]

Episode 3575 completed | Reward: 2920.00 | Avg Reward: 2529.70 | Frames: 1943844 | Epsilon: 0.1000


Training:  19%|█▉        | 1944253/10000000 [8:31:29<48:42:35, 45.94it/s]

Episode 3576 completed | Reward: 1040.00 | Avg Reward: 2530.70 | Frames: 1944246 | Epsilon: 0.1000


Training:  19%|█▉        | 1944745/10000000 [8:31:39<47:32:09, 47.07it/s]

Episode 3577 completed | Reward: 2360.00 | Avg Reward: 2544.50 | Frames: 1944737 | Epsilon: 0.1000


Training:  19%|█▉        | 1945453/10000000 [8:31:53<47:36:17, 47.00it/s]

Episode 3578 completed | Reward: 3900.00 | Avg Reward: 2561.30 | Frames: 1945452 | Epsilon: 0.1000


Training:  19%|█▉        | 1946128/10000000 [8:32:07<41:39:52, 53.70it/s]

Episode 3579 completed | Reward: 1840.00 | Avg Reward: 2539.10 | Frames: 1946122 | Epsilon: 0.1000


Training:  19%|█▉        | 1946519/10000000 [8:32:14<42:29:09, 52.65it/s]

Episode 3580 completed | Reward: 660.00 | Avg Reward: 2535.70 | Frames: 1946513 | Epsilon: 0.1000


Training:  19%|█▉        | 1947210/10000000 [8:32:29<46:47:12, 47.81it/s]

Episode 3581 completed | Reward: 2800.00 | Avg Reward: 2531.30 | Frames: 1947206 | Epsilon: 0.1000


Training:  19%|█▉        | 1947734/10000000 [8:32:39<45:58:56, 48.64it/s]

Episode 3582 completed | Reward: 3740.00 | Avg Reward: 2558.70 | Frames: 1947727 | Epsilon: 0.1000


Training:  19%|█▉        | 1948313/10000000 [8:32:51<47:04:54, 47.50it/s]

Episode 3583 completed | Reward: 3440.00 | Avg Reward: 2574.90 | Frames: 1948306 | Epsilon: 0.1000


Training:  19%|█▉        | 1948881/10000000 [8:33:02<48:01:58, 46.56it/s]

Episode 3584 completed | Reward: 3260.00 | Avg Reward: 2588.30 | Frames: 1948872 | Epsilon: 0.1000


Training:  19%|█▉        | 1949374/10000000 [8:33:12<46:37:46, 47.96it/s]

Episode 3585 completed | Reward: 1980.00 | Avg Reward: 2585.70 | Frames: 1949370 | Epsilon: 0.1000


Training:  20%|█▉        | 1950077/10000000 [8:33:27<47:00:31, 47.57it/s]

Episode 3586 completed | Reward: 3060.00 | Avg Reward: 2606.50 | Frames: 1950068 | Epsilon: 0.1000


Training:  20%|█▉        | 1950538/10000000 [8:33:36<47:09:44, 47.41it/s]

Episode 3587 completed | Reward: 4630.00 | Avg Reward: 2608.00 | Frames: 1950531 | Epsilon: 0.1000


Training:  20%|█▉        | 1951289/10000000 [8:33:51<47:30:09, 47.07it/s]

Episode 3588 completed | Reward: 3040.00 | Avg Reward: 2615.60 | Frames: 1951280 | Epsilon: 0.1000


Training:  20%|█▉        | 1951959/10000000 [8:34:05<54:44:37, 40.84it/s]

Episode 3589 completed | Reward: 3060.00 | Avg Reward: 2623.00 | Frames: 1951958 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|█▉        | 1952438/10000000 [8:34:15<45:51:17, 48.75it/s]

Episode 3590 completed | Reward: 1980.00 | Avg Reward: 2586.10 | Frames: 1952435 | Epsilon: 0.1000


Training:  20%|█▉        | 1953777/10000000 [8:34:42<46:56:34, 47.61it/s]

Episode 3591 completed | Reward: 4200.00 | Avg Reward: 2594.50 | Frames: 1953768 | Epsilon: 0.1000


Training:  20%|█▉        | 1954374/10000000 [8:34:54<46:35:21, 47.97it/s]

Episode 3592 completed | Reward: 2040.00 | Avg Reward: 2580.30 | Frames: 1954367 | Epsilon: 0.1000


Training:  20%|█▉        | 1954993/10000000 [8:35:07<49:14:09, 45.39it/s]

Episode 3593 completed | Reward: 3140.00 | Avg Reward: 2591.70 | Frames: 1954986 | Epsilon: 0.1000


Training:  20%|█▉        | 1955342/10000000 [8:35:14<46:07:32, 48.45it/s]

Episode 3594 completed | Reward: 3280.00 | Avg Reward: 2601.90 | Frames: 1955335 | Epsilon: 0.1000


Training:  20%|█▉        | 1955755/10000000 [8:35:22<44:13:59, 50.52it/s]

Episode 3595 completed | Reward: 2680.00 | Avg Reward: 2608.70 | Frames: 1955751 | Epsilon: 0.1000


Training:  20%|█▉        | 1956322/10000000 [8:35:33<48:40:36, 45.90it/s]

Episode 3596 completed | Reward: 2900.00 | Avg Reward: 2606.30 | Frames: 1956319 | Epsilon: 0.1000


Training:  20%|█▉        | 1956901/10000000 [8:35:45<48:20:11, 46.22it/s]

Episode 3597 completed | Reward: 3240.00 | Avg Reward: 2617.50 | Frames: 1956892 | Epsilon: 0.1000


Training:  20%|█▉        | 1957353/10000000 [8:35:54<47:57:04, 46.59it/s]

Episode 3598 completed | Reward: 2500.00 | Avg Reward: 2609.50 | Frames: 1957344 | Epsilon: 0.1000


Training:  20%|█▉        | 1957866/10000000 [8:36:05<46:58:51, 47.55it/s]

Episode 3599 completed | Reward: 3280.00 | Avg Reward: 2600.90 | Frames: 1957861 | Epsilon: 0.1000


Training:  20%|█▉        | 1958281/10000000 [8:36:13<47:50:02, 46.70it/s]

Episode 3600 completed | Reward: 1900.00 | Avg Reward: 2605.10 | Frames: 1958272 | Epsilon: 0.1000


Training:  20%|█▉        | 1958741/10000000 [8:36:22<46:30:03, 48.04it/s]

Episode 3601 completed | Reward: 3960.00 | Avg Reward: 2637.90 | Frames: 1958732 | Epsilon: 0.1000


Training:  20%|█▉        | 1959272/10000000 [8:36:33<44:53:41, 49.75it/s]

Episode 3602 completed | Reward: 3980.00 | Avg Reward: 2650.30 | Frames: 1959271 | Epsilon: 0.1000


Training:  20%|█▉        | 1959767/10000000 [8:36:43<43:23:04, 51.48it/s]

Episode 3603 completed | Reward: 1480.00 | Avg Reward: 2650.70 | Frames: 1959762 | Epsilon: 0.1000


Training:  20%|█▉        | 1960325/10000000 [8:36:54<46:03:27, 48.49it/s]

Episode 3604 completed | Reward: 4220.00 | Avg Reward: 2655.10 | Frames: 1960318 | Epsilon: 0.1000


Training:  20%|█▉        | 1960961/10000000 [8:37:07<47:03:08, 47.46it/s]

Episode 3605 completed | Reward: 3280.00 | Avg Reward: 2683.30 | Frames: 1960952 | Epsilon: 0.1000


Training:  20%|█▉        | 1961669/10000000 [8:37:22<47:34:16, 46.94it/s]

Episode 3606 completed | Reward: 2440.00 | Avg Reward: 2698.70 | Frames: 1961661 | Epsilon: 0.1000


Training:  20%|█▉        | 1962079/10000000 [8:37:30<52:05:27, 42.86it/s]

Episode 3607 completed | Reward: 5760.00 | Avg Reward: 2734.90 | Frames: 1962076 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|█▉        | 1962553/10000000 [8:37:39<48:13:35, 46.29it/s]

Episode 3608 completed | Reward: 3490.00 | Avg Reward: 2756.60 | Frames: 1962546 | Epsilon: 0.1000


Training:  20%|█▉        | 1963110/10000000 [8:37:51<46:18:13, 48.21it/s]

Episode 3609 completed | Reward: 3440.00 | Avg Reward: 2762.80 | Frames: 1963103 | Epsilon: 0.1000


Training:  20%|█▉        | 1963639/10000000 [8:38:01<42:43:13, 52.25it/s]

Episode 3610 completed | Reward: 1780.00 | Avg Reward: 2772.00 | Frames: 1963633 | Epsilon: 0.1000


Training:  20%|█▉        | 1964217/10000000 [8:38:13<48:18:30, 46.21it/s]

Episode 3611 completed | Reward: 4840.00 | Avg Reward: 2787.80 | Frames: 1964209 | Epsilon: 0.1000


Training:  20%|█▉        | 1964674/10000000 [8:38:22<45:50:59, 48.68it/s]

Episode 3612 completed | Reward: 1880.00 | Avg Reward: 2793.00 | Frames: 1964665 | Epsilon: 0.1000


Training:  20%|█▉        | 1965461/10000000 [8:38:38<47:40:05, 46.82it/s]

Episode 3613 completed | Reward: 3240.00 | Avg Reward: 2806.20 | Frames: 1965454 | Epsilon: 0.1000


Training:  20%|█▉        | 1965937/10000000 [8:38:47<48:36:28, 45.91it/s]

Episode 3614 completed | Reward: 3140.00 | Avg Reward: 2826.40 | Frames: 1965930 | Epsilon: 0.1000


Training:  20%|█▉        | 1966486/10000000 [8:38:58<47:35:12, 46.89it/s]

Episode 3615 completed | Reward: 2720.00 | Avg Reward: 2841.80 | Frames: 1966484 | Epsilon: 0.1000


Training:  20%|█▉        | 1966979/10000000 [8:39:08<44:24:10, 50.25it/s]

Episode 3616 completed | Reward: 1020.00 | Avg Reward: 2831.60 | Frames: 1966974 | Epsilon: 0.1000


Training:  20%|█▉        | 1967495/10000000 [8:39:19<43:41:32, 51.07it/s]

Episode 3617 completed | Reward: 820.00 | Avg Reward: 2800.40 | Frames: 1967491 | Epsilon: 0.1000


Training:  20%|█▉        | 1967906/10000000 [8:39:27<47:00:24, 47.46it/s]

Episode 3618 completed | Reward: 1840.00 | Avg Reward: 2795.80 | Frames: 1967900 | Epsilon: 0.1000


Training:  20%|█▉        | 1968393/10000000 [8:39:37<46:12:19, 48.28it/s]

Episode 3619 completed | Reward: 3950.00 | Avg Reward: 2806.90 | Frames: 1968384 | Epsilon: 0.1000


Training:  20%|█▉        | 1968751/10000000 [8:39:44<44:04:05, 50.62it/s]

Episode 3620 completed | Reward: 2360.00 | Avg Reward: 2815.10 | Frames: 1968746 | Epsilon: 0.1000


Training:  20%|█▉        | 1969185/10000000 [8:39:53<47:02:40, 47.42it/s]

Episode 3621 completed | Reward: 1640.00 | Avg Reward: 2806.10 | Frames: 1969176 | Epsilon: 0.1000


Training:  20%|█▉        | 1970235/10000000 [8:40:14<43:30:35, 51.26it/s]

Episode 3622 completed | Reward: 3460.00 | Avg Reward: 2823.90 | Frames: 1970229 | Epsilon: 0.1000


Training:  20%|█▉        | 1970953/10000000 [8:40:28<47:35:50, 46.86it/s]

Episode 3623 completed | Reward: 1220.00 | Avg Reward: 2808.50 | Frames: 1970945 | Epsilon: 0.1000


Training:  20%|█▉        | 1971693/10000000 [8:40:43<46:01:05, 48.46it/s]

Episode 3624 completed | Reward: 860.00 | Avg Reward: 2809.70 | Frames: 1971684 | Epsilon: 0.1000


Training:  20%|█▉        | 1972265/10000000 [8:40:55<56:47:32, 39.26it/s]

Episode 3625 completed | Reward: 2820.00 | Avg Reward: 2794.30 | Frames: 1972264 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|█▉        | 1972902/10000000 [8:41:08<46:56:12, 47.51it/s]

Episode 3626 completed | Reward: 1320.00 | Avg Reward: 2767.70 | Frames: 1972897 | Epsilon: 0.1000


Training:  20%|█▉        | 1973373/10000000 [8:41:17<46:36:24, 47.84it/s]

Episode 3627 completed | Reward: 1640.00 | Avg Reward: 2746.70 | Frames: 1973364 | Epsilon: 0.1000


Training:  20%|█▉        | 1973886/10000000 [8:41:28<46:19:53, 48.12it/s]

Episode 3628 completed | Reward: 1940.00 | Avg Reward: 2736.50 | Frames: 1973879 | Epsilon: 0.1000


Training:  20%|█▉        | 1974353/10000000 [8:41:37<47:12:07, 47.23it/s]

Episode 3629 completed | Reward: 2740.00 | Avg Reward: 2724.70 | Frames: 1974346 | Epsilon: 0.1000


Training:  20%|█▉        | 1974897/10000000 [8:41:48<48:20:12, 46.12it/s]

Episode 3630 completed | Reward: 740.00 | Avg Reward: 2696.50 | Frames: 1974890 | Epsilon: 0.1000


Training:  20%|█▉        | 1975342/10000000 [8:41:57<45:16:01, 49.24it/s]

Episode 3631 completed | Reward: 2020.00 | Avg Reward: 2707.90 | Frames: 1975338 | Epsilon: 0.1000


Training:  20%|█▉        | 1975833/10000000 [8:42:07<46:43:08, 47.71it/s]

Episode 3632 completed | Reward: 620.00 | Avg Reward: 2694.50 | Frames: 1975824 | Epsilon: 0.1000


Training:  20%|█▉        | 1976378/10000000 [8:42:18<45:41:56, 48.77it/s]

Episode 3633 completed | Reward: 2280.00 | Avg Reward: 2680.90 | Frames: 1976369 | Epsilon: 0.1000


Training:  20%|█▉        | 1976849/10000000 [8:42:27<48:06:43, 46.32it/s]

Episode 3634 completed | Reward: 3420.00 | Avg Reward: 2679.90 | Frames: 1976842 | Epsilon: 0.1000


Training:  20%|█▉        | 1977334/10000000 [8:42:37<45:49:28, 48.63it/s]

Episode 3635 completed | Reward: 1460.00 | Avg Reward: 2666.10 | Frames: 1977330 | Epsilon: 0.1000


Training:  20%|█▉        | 1977865/10000000 [8:42:48<48:25:25, 46.02it/s]

Episode 3636 completed | Reward: 2180.00 | Avg Reward: 2656.90 | Frames: 1977856 | Epsilon: 0.1000


Training:  20%|█▉        | 1978425/10000000 [8:42:59<46:57:33, 47.45it/s]

Episode 3637 completed | Reward: 4360.00 | Avg Reward: 2659.90 | Frames: 1978417 | Epsilon: 0.1000


Training:  20%|█▉        | 1978898/10000000 [8:43:08<47:04:33, 47.33it/s]

Episode 3638 completed | Reward: 2040.00 | Avg Reward: 2660.50 | Frames: 1978895 | Epsilon: 0.1000


Training:  20%|█▉        | 1979446/10000000 [8:43:20<46:46:09, 47.64it/s]

Episode 3639 completed | Reward: 1780.00 | Avg Reward: 2635.40 | Frames: 1979439 | Epsilon: 0.1000


Training:  20%|█▉        | 1979935/10000000 [8:43:29<44:05:10, 50.53it/s]

Episode 3640 completed | Reward: 960.00 | Avg Reward: 2615.60 | Frames: 1979931 | Epsilon: 0.1000


Training:  20%|█▉        | 1980438/10000000 [8:43:40<44:55:24, 49.59it/s]

Episode 3641 completed | Reward: 3640.00 | Avg Reward: 2608.60 | Frames: 1980433 | Epsilon: 0.1000


Training:  20%|█▉        | 1981030/10000000 [8:43:51<46:11:37, 48.22it/s]

Episode 3642 completed | Reward: 1960.00 | Avg Reward: 2612.40 | Frames: 1981021 | Epsilon: 0.1000


Training:  20%|█▉        | 1981755/10000000 [8:44:06<45:08:25, 49.34it/s]

Episode 3643 completed | Reward: 3760.00 | Avg Reward: 2631.20 | Frames: 1981748 | Epsilon: 0.1000


Training:  20%|█▉        | 1982486/10000000 [8:44:21<55:45:31, 39.94it/s]

Episode 3644 completed | Reward: 3640.00 | Avg Reward: 2650.00 | Frames: 1982485 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|█▉        | 1983125/10000000 [8:44:34<48:05:26, 46.31it/s]

Episode 3645 completed | Reward: 1000.00 | Avg Reward: 2624.40 | Frames: 1983116 | Epsilon: 0.1000


Training:  20%|█▉        | 1983589/10000000 [8:44:43<46:21:33, 48.03it/s]

Episode 3646 completed | Reward: 3890.00 | Avg Reward: 2645.50 | Frames: 1983587 | Epsilon: 0.1000


Training:  20%|█▉        | 1984325/10000000 [8:44:58<46:48:21, 47.57it/s]

Episode 3647 completed | Reward: 4220.00 | Avg Reward: 2647.90 | Frames: 1984316 | Epsilon: 0.1000


Training:  20%|█▉        | 1984902/10000000 [8:45:10<48:11:41, 46.20it/s]

Episode 3648 completed | Reward: 1880.00 | Avg Reward: 2620.30 | Frames: 1984900 | Epsilon: 0.1000


Training:  20%|█▉        | 1985551/10000000 [8:45:23<42:48:01, 52.01it/s]

Episode 3649 completed | Reward: 1860.00 | Avg Reward: 2593.30 | Frames: 1985546 | Epsilon: 0.1000


Training:  20%|█▉        | 1986070/10000000 [8:45:33<47:26:34, 46.92it/s]

Episode 3650 completed | Reward: 2140.00 | Avg Reward: 2607.30 | Frames: 1986067 | Epsilon: 0.1000


Training:  20%|█▉        | 1986622/10000000 [8:45:44<46:49:24, 47.54it/s]

Episode 3651 completed | Reward: 1680.00 | Avg Reward: 2608.10 | Frames: 1986615 | Epsilon: 0.1000


Training:  20%|█▉        | 1987129/10000000 [8:45:55<46:48:00, 47.56it/s]

Episode 3652 completed | Reward: 2440.00 | Avg Reward: 2624.70 | Frames: 1987121 | Epsilon: 0.1000


Training:  20%|█▉        | 1987619/10000000 [8:46:04<43:14:09, 51.48it/s]

Episode 3653 completed | Reward: 5210.00 | Avg Reward: 2663.20 | Frames: 1987613 | Epsilon: 0.1000


Training:  20%|█▉        | 1988305/10000000 [8:46:18<46:44:42, 47.61it/s]

Episode 3654 completed | Reward: 3240.00 | Avg Reward: 2682.40 | Frames: 1988301 | Epsilon: 0.1000


Training:  20%|█▉        | 1988870/10000000 [8:46:30<46:26:29, 47.92it/s]

Episode 3655 completed | Reward: 4420.00 | Avg Reward: 2707.20 | Frames: 1988867 | Epsilon: 0.1000


Training:  20%|█▉        | 1989365/10000000 [8:46:40<47:36:57, 46.73it/s]

Episode 3656 completed | Reward: 3040.00 | Avg Reward: 2701.20 | Frames: 1989356 | Epsilon: 0.1000


Training:  20%|█▉        | 1989990/10000000 [8:46:52<45:07:16, 49.31it/s]

Episode 3657 completed | Reward: 2220.00 | Avg Reward: 2707.20 | Frames: 1989986 | Epsilon: 0.1000


Training:  20%|█▉        | 1990700/10000000 [8:47:07<41:10:30, 54.03it/s]

Episode 3658 completed | Reward: 4720.00 | Avg Reward: 2705.40 | Frames: 1990695 | Epsilon: 0.1000


Training:  20%|█▉        | 1991305/10000000 [8:47:19<46:55:04, 47.42it/s]

Episode 3659 completed | Reward: 4200.00 | Avg Reward: 2711.80 | Frames: 1991298 | Epsilon: 0.1000


Training:  20%|█▉        | 1991841/10000000 [8:47:30<45:38:55, 48.73it/s]

Episode 3660 completed | Reward: 2860.00 | Avg Reward: 2719.00 | Frames: 1991832 | Epsilon: 0.1000


Training:  20%|█▉        | 1992205/10000000 [8:47:37<48:18:39, 46.04it/s]

Episode 3661 completed | Reward: 1300.00 | Avg Reward: 2695.00 | Frames: 1992196 | Epsilon: 0.1000


Training:  20%|█▉        | 1992739/10000000 [8:47:48<53:35:06, 41.51it/s]

Episode 3662 completed | Reward: 2700.00 | Avg Reward: 2713.40 | Frames: 1992737 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|█▉        | 1993261/10000000 [8:47:58<47:11:55, 47.12it/s]

Episode 3663 completed | Reward: 3240.00 | Avg Reward: 2710.40 | Frames: 1993252 | Epsilon: 0.1000


Training:  20%|█▉        | 1994714/10000000 [8:48:27<46:14:56, 48.08it/s]

Episode 3664 completed | Reward: 5120.00 | Avg Reward: 2733.60 | Frames: 1994709 | Epsilon: 0.1000


Training:  20%|█▉        | 1995586/10000000 [8:48:45<48:05:15, 46.24it/s]

Episode 3665 completed | Reward: 3440.00 | Avg Reward: 2744.00 | Frames: 1995583 | Epsilon: 0.1000


Training:  20%|█▉        | 1996141/10000000 [8:48:56<50:26:47, 44.07it/s]

Episode 3666 completed | Reward: 2180.00 | Avg Reward: 2740.60 | Frames: 1996139 | Epsilon: 0.1000


Training:  20%|█▉        | 1996619/10000000 [8:49:06<43:08:38, 51.53it/s]

Episode 3667 completed | Reward: 4940.00 | Avg Reward: 2754.00 | Frames: 1996615 | Epsilon: 0.1000


Training:  20%|█▉        | 1997133/10000000 [8:49:16<49:09:57, 45.21it/s]

Episode 3668 completed | Reward: 1420.00 | Avg Reward: 2736.80 | Frames: 1997126 | Epsilon: 0.1000


Training:  20%|█▉        | 1997725/10000000 [8:49:28<46:42:07, 47.60it/s]

Episode 3669 completed | Reward: 1600.00 | Avg Reward: 2721.80 | Frames: 1997717 | Epsilon: 0.1000


Training:  20%|█▉        | 1998138/10000000 [8:49:37<45:38:35, 48.70it/s]

Episode 3670 completed | Reward: 1680.00 | Avg Reward: 2714.60 | Frames: 1998134 | Epsilon: 0.1000


Training:  20%|█▉        | 1998693/10000000 [8:49:48<47:26:49, 46.84it/s]

Episode 3671 completed | Reward: 3790.00 | Avg Reward: 2741.90 | Frames: 1998684 | Epsilon: 0.1000


Training:  20%|█▉        | 1999177/10000000 [8:49:58<48:06:32, 46.20it/s]

Episode 3672 completed | Reward: 1540.00 | Avg Reward: 2728.50 | Frames: 1999168 | Epsilon: 0.1000


Training:  20%|█▉        | 1999993/10000000 [8:50:14<44:01:33, 50.48it/s]

Model saved to weights/CarnivalDeterministic-v4_dqn_2000000frames.pth


Training:  20%|██        | 2000007/10000000 [8:50:29<945:39:43,  2.35it/s] 


Evaluation at frame 2000000: 632.00
Episode 3673 completed | Reward: 3640.00 | Avg Reward: 2723.80 | Frames: 2000001 | Epsilon: 0.1000


Training:  20%|██        | 2001043/10000000 [8:50:50<42:46:07, 51.95it/s]

Episode 3674 completed | Reward: 3960.00 | Avg Reward: 2745.80 | Frames: 2001039 | Epsilon: 0.1000


Training:  20%|██        | 2001652/10000000 [8:51:02<42:14:58, 52.59it/s]

Episode 3675 completed | Reward: 2120.00 | Avg Reward: 2737.80 | Frames: 2001646 | Epsilon: 0.1000


Training:  20%|██        | 2002177/10000000 [8:51:13<48:10:57, 46.11it/s]

Episode 3676 completed | Reward: 2240.00 | Avg Reward: 2749.80 | Frames: 2002169 | Epsilon: 0.1000


Training:  20%|██        | 2002633/10000000 [8:51:22<47:12:24, 47.06it/s]

Episode 3677 completed | Reward: 3340.00 | Avg Reward: 2759.60 | Frames: 2002631 | Epsilon: 0.1000


Training:  20%|██        | 2003157/10000000 [8:51:32<55:04:03, 40.34it/s]

Episode 3678 completed | Reward: 580.00 | Avg Reward: 2726.40 | Frames: 2003156 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|██        | 2003675/10000000 [8:51:43<44:23:07, 50.04it/s]

Episode 3679 completed | Reward: 1320.00 | Avg Reward: 2721.20 | Frames: 2003669 | Epsilon: 0.1000


Training:  20%|██        | 2004190/10000000 [8:51:53<45:57:35, 48.33it/s]

Episode 3680 completed | Reward: 1080.00 | Avg Reward: 2725.40 | Frames: 2004183 | Epsilon: 0.1000


Training:  20%|██        | 2004710/10000000 [8:52:04<46:48:01, 47.46it/s]

Episode 3681 completed | Reward: 1620.00 | Avg Reward: 2713.60 | Frames: 2004703 | Epsilon: 0.1000


Training:  20%|██        | 2005125/10000000 [8:52:12<46:39:09, 47.60it/s]

Episode 3682 completed | Reward: 1180.00 | Avg Reward: 2688.00 | Frames: 2005116 | Epsilon: 0.1000


Training:  20%|██        | 2005529/10000000 [8:52:20<47:48:40, 46.45it/s]

Episode 3683 completed | Reward: 3890.00 | Avg Reward: 2692.50 | Frames: 2005520 | Epsilon: 0.1000


Training:  20%|██        | 2006161/10000000 [8:52:33<48:35:03, 45.70it/s]

Episode 3684 completed | Reward: 3360.00 | Avg Reward: 2693.50 | Frames: 2006152 | Epsilon: 0.1000


Training:  20%|██        | 2006533/10000000 [8:52:40<45:01:10, 49.32it/s]

Episode 3685 completed | Reward: 3540.00 | Avg Reward: 2709.10 | Frames: 2006525 | Epsilon: 0.1000


Training:  20%|██        | 2007037/10000000 [8:52:51<47:07:10, 47.12it/s]

Episode 3686 completed | Reward: 2040.00 | Avg Reward: 2698.90 | Frames: 2007029 | Epsilon: 0.1000


Training:  20%|██        | 2007559/10000000 [8:53:01<43:28:21, 51.07it/s]

Episode 3687 completed | Reward: 1160.00 | Avg Reward: 2664.20 | Frames: 2007555 | Epsilon: 0.1000


Training:  20%|██        | 2008666/10000000 [8:53:23<47:27:14, 46.78it/s]

Episode 3688 completed | Reward: 3100.00 | Avg Reward: 2664.80 | Frames: 2008662 | Epsilon: 0.1000


Training:  20%|██        | 2009166/10000000 [8:53:33<45:59:29, 48.26it/s]

Episode 3689 completed | Reward: 3200.00 | Avg Reward: 2666.20 | Frames: 2009159 | Epsilon: 0.1000


Training:  20%|██        | 2009549/10000000 [8:53:41<47:34:07, 46.66it/s]

Episode 3690 completed | Reward: 1840.00 | Avg Reward: 2664.80 | Frames: 2009541 | Epsilon: 0.1000


Training:  20%|██        | 2010137/10000000 [8:53:53<45:50:54, 48.41it/s]

Episode 3691 completed | Reward: 940.00 | Avg Reward: 2632.20 | Frames: 2010128 | Epsilon: 0.1000


Training:  20%|██        | 2010639/10000000 [8:54:03<44:02:59, 50.38it/s]

Episode 3692 completed | Reward: 1740.00 | Avg Reward: 2629.20 | Frames: 2010637 | Epsilon: 0.1000


Training:  20%|██        | 2011168/10000000 [8:54:14<42:59:25, 51.62it/s]

Episode 3693 completed | Reward: 1680.00 | Avg Reward: 2614.60 | Frames: 2011159 | Epsilon: 0.1000


Training:  20%|██        | 2011561/10000000 [8:54:22<48:30:04, 45.75it/s]

Episode 3694 completed | Reward: 960.00 | Avg Reward: 2591.40 | Frames: 2011552 | Epsilon: 0.1000


Training:  20%|██        | 2012478/10000000 [8:54:40<48:59:53, 45.28it/s]

Episode 3695 completed | Reward: 3180.00 | Avg Reward: 2596.40 | Frames: 2012474 | Epsilon: 0.1000


Training:  20%|██        | 2012946/10000000 [8:54:50<46:02:59, 48.18it/s]

Episode 3696 completed | Reward: 860.00 | Avg Reward: 2576.00 | Frames: 2012939 | Epsilon: 0.1000


Training:  20%|██        | 2013477/10000000 [8:55:01<43:56:27, 50.49it/s]

Episode 3697 completed | Reward: 2420.00 | Avg Reward: 2567.80 | Frames: 2013477 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|██        | 2014413/10000000 [8:55:20<46:48:41, 47.39it/s]

Episode 3698 completed | Reward: 3800.00 | Avg Reward: 2580.80 | Frames: 2014404 | Epsilon: 0.1000


Training:  20%|██        | 2014994/10000000 [8:55:31<46:03:01, 48.17it/s]

Episode 3699 completed | Reward: 4000.00 | Avg Reward: 2588.00 | Frames: 2014988 | Epsilon: 0.1000


Training:  20%|██        | 2015345/10000000 [8:55:38<47:55:15, 46.28it/s]

Episode 3700 completed | Reward: 3290.00 | Avg Reward: 2601.90 | Frames: 2015336 | Epsilon: 0.1000


Training:  20%|██        | 2016250/10000000 [8:55:57<47:44:12, 46.46it/s]

Episode 3701 completed | Reward: 3740.00 | Avg Reward: 2599.70 | Frames: 2016246 | Epsilon: 0.1000


Training:  20%|██        | 2016821/10000000 [8:56:08<46:07:16, 48.08it/s]

Episode 3702 completed | Reward: 1080.00 | Avg Reward: 2570.70 | Frames: 2016820 | Epsilon: 0.1000


Training:  20%|██        | 2017517/10000000 [8:56:22<47:59:46, 46.20it/s]

Episode 3703 completed | Reward: 1460.00 | Avg Reward: 2570.50 | Frames: 2017508 | Epsilon: 0.1000


Training:  20%|██        | 2018302/10000000 [8:56:38<45:17:31, 48.95it/s]

Episode 3704 completed | Reward: 2400.00 | Avg Reward: 2552.30 | Frames: 2018295 | Epsilon: 0.1000


Training:  20%|██        | 2018730/10000000 [8:56:47<45:48:49, 48.39it/s]

Episode 3705 completed | Reward: 1340.00 | Avg Reward: 2532.90 | Frames: 2018723 | Epsilon: 0.1000


Training:  20%|██        | 2019117/10000000 [8:56:54<47:43:00, 46.46it/s]

Episode 3706 completed | Reward: 3340.00 | Avg Reward: 2541.90 | Frames: 2019110 | Epsilon: 0.1000


Training:  20%|██        | 2019666/10000000 [8:57:05<45:30:19, 48.71it/s]

Episode 3707 completed | Reward: 3160.00 | Avg Reward: 2515.90 | Frames: 2019659 | Epsilon: 0.1000


Training:  20%|██        | 2020415/10000000 [8:57:20<44:16:49, 50.06it/s]

Episode 3708 completed | Reward: 2680.00 | Avg Reward: 2507.80 | Frames: 2020409 | Epsilon: 0.1000


Training:  20%|██        | 2021034/10000000 [8:57:33<45:13:06, 49.01it/s]

Episode 3709 completed | Reward: 3740.00 | Avg Reward: 2510.80 | Frames: 2021029 | Epsilon: 0.1000


Training:  20%|██        | 2021758/10000000 [8:57:47<47:25:21, 46.73it/s]

Episode 3710 completed | Reward: 3080.00 | Avg Reward: 2523.80 | Frames: 2021749 | Epsilon: 0.1000


Training:  20%|██        | 2022229/10000000 [8:57:57<45:28:27, 48.73it/s]

Episode 3711 completed | Reward: 3340.00 | Avg Reward: 2508.80 | Frames: 2022220 | Epsilon: 0.1000


Training:  20%|██        | 2022593/10000000 [8:58:04<45:45:16, 48.43it/s]

Episode 3712 completed | Reward: 1940.00 | Avg Reward: 2509.40 | Frames: 2022589 | Epsilon: 0.1000


Training:  20%|██        | 2023145/10000000 [8:58:15<46:07:54, 48.03it/s]

Episode 3713 completed | Reward: 4660.00 | Avg Reward: 2523.60 | Frames: 2023136 | Epsilon: 0.1000


Training:  20%|██        | 2023529/10000000 [8:58:23<44:22:50, 49.92it/s]

Episode 3714 completed | Reward: 1460.00 | Avg Reward: 2506.80 | Frames: 2023529 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|██        | 2024189/10000000 [8:58:37<47:02:58, 47.09it/s]

Episode 3715 completed | Reward: 3220.00 | Avg Reward: 2511.80 | Frames: 2024180 | Epsilon: 0.1000


Training:  20%|██        | 2024706/10000000 [8:58:47<45:32:50, 48.64it/s]

Episode 3716 completed | Reward: 2900.00 | Avg Reward: 2530.60 | Frames: 2024701 | Epsilon: 0.1000


Training:  20%|██        | 2025366/10000000 [8:59:00<46:35:49, 47.54it/s]

Episode 3717 completed | Reward: 2620.00 | Avg Reward: 2548.60 | Frames: 2025359 | Epsilon: 0.1000


Training:  20%|██        | 2025777/10000000 [8:59:09<45:32:27, 48.64it/s]

Episode 3718 completed | Reward: 1280.00 | Avg Reward: 2543.00 | Frames: 2025771 | Epsilon: 0.1000


Training:  20%|██        | 2026225/10000000 [8:59:18<48:10:06, 45.98it/s]

Episode 3719 completed | Reward: 1240.00 | Avg Reward: 2515.90 | Frames: 2026218 | Epsilon: 0.1000


Training:  20%|██        | 2026705/10000000 [8:59:27<48:06:26, 46.04it/s]

Episode 3720 completed | Reward: 2640.00 | Avg Reward: 2518.70 | Frames: 2026696 | Epsilon: 0.1000


Training:  20%|██        | 2027186/10000000 [8:59:37<44:40:57, 49.56it/s]

Episode 3721 completed | Reward: 3220.00 | Avg Reward: 2534.50 | Frames: 2027183 | Epsilon: 0.1000


Training:  20%|██        | 2027880/10000000 [8:59:51<42:31:09, 52.08it/s]

Episode 3722 completed | Reward: 2740.00 | Avg Reward: 2527.30 | Frames: 2027872 | Epsilon: 0.1000


Training:  20%|██        | 2028313/10000000 [9:00:00<45:04:05, 49.13it/s]

Episode 3723 completed | Reward: 2480.00 | Avg Reward: 2539.90 | Frames: 2028307 | Epsilon: 0.1000


Training:  20%|██        | 2028985/10000000 [9:00:13<47:00:13, 47.11it/s]

Episode 3724 completed | Reward: 1900.00 | Avg Reward: 2550.30 | Frames: 2028984 | Epsilon: 0.1000


Training:  20%|██        | 2029603/10000000 [9:00:26<43:23:04, 51.03it/s]

Episode 3725 completed | Reward: 2540.00 | Avg Reward: 2547.50 | Frames: 2029598 | Epsilon: 0.1000


Training:  20%|██        | 2030081/10000000 [9:00:36<47:12:09, 46.90it/s]

Episode 3726 completed | Reward: 1160.00 | Avg Reward: 2545.90 | Frames: 2030072 | Epsilon: 0.1000


Training:  20%|██        | 2030610/10000000 [9:00:46<48:25:53, 45.71it/s]

Episode 3727 completed | Reward: 3060.00 | Avg Reward: 2560.10 | Frames: 2030603 | Epsilon: 0.1000


Training:  20%|██        | 2031270/10000000 [9:00:59<46:40:26, 47.43it/s]

Episode 3728 completed | Reward: 2040.00 | Avg Reward: 2561.10 | Frames: 2031265 | Epsilon: 0.1000


Training:  20%|██        | 2031646/10000000 [9:01:07<46:44:31, 47.35it/s]

Episode 3729 completed | Reward: 1360.00 | Avg Reward: 2547.30 | Frames: 2031639 | Epsilon: 0.1000


Training:  20%|██        | 2032194/10000000 [9:01:18<45:45:39, 48.37it/s]

Episode 3730 completed | Reward: 1540.00 | Avg Reward: 2555.30 | Frames: 2032185 | Epsilon: 0.1000


Training:  20%|██        | 2032962/10000000 [9:01:34<47:09:07, 46.93it/s]

Episode 3731 completed | Reward: 2960.00 | Avg Reward: 2564.70 | Frames: 2032958 | Epsilon: 0.1000


Training:  20%|██        | 2033419/10000000 [9:01:43<44:18:37, 49.94it/s]

Episode 3732 completed | Reward: 3280.00 | Avg Reward: 2591.30 | Frames: 2033414 | Epsilon: 0.1000


Training:  20%|██        | 2033896/10000000 [9:01:52<50:45:58, 43.59it/s]

Episode 3733 completed | Reward: 3880.00 | Avg Reward: 2607.30 | Frames: 2033895 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|██        | 2034533/10000000 [9:02:05<47:00:13, 47.07it/s]

Episode 3734 completed | Reward: 1180.00 | Avg Reward: 2584.90 | Frames: 2034526 | Epsilon: 0.1000


Training:  20%|██        | 2035011/10000000 [9:02:15<43:55:30, 50.37it/s]

Episode 3735 completed | Reward: 1100.00 | Avg Reward: 2581.30 | Frames: 2035004 | Epsilon: 0.1000


Training:  20%|██        | 2035636/10000000 [9:02:28<41:45:01, 52.99it/s]

Episode 3736 completed | Reward: 1760.00 | Avg Reward: 2577.10 | Frames: 2035631 | Epsilon: 0.1000


Training:  20%|██        | 2036089/10000000 [9:02:37<45:34:26, 48.54it/s]

Episode 3737 completed | Reward: 3940.00 | Avg Reward: 2572.90 | Frames: 2036080 | Epsilon: 0.1000


Training:  20%|██        | 2036698/10000000 [9:02:49<45:36:55, 48.49it/s]

Episode 3738 completed | Reward: 1220.00 | Avg Reward: 2564.70 | Frames: 2036695 | Epsilon: 0.1000


Training:  20%|██        | 2037391/10000000 [9:03:03<43:13:53, 51.16it/s]

Episode 3739 completed | Reward: 4540.00 | Avg Reward: 2592.30 | Frames: 2037386 | Epsilon: 0.1000


Training:  20%|██        | 2037863/10000000 [9:03:12<42:56:44, 51.50it/s]

Episode 3740 completed | Reward: 1620.00 | Avg Reward: 2598.90 | Frames: 2037857 | Epsilon: 0.1000


Training:  20%|██        | 2038498/10000000 [9:03:25<47:24:06, 46.65it/s]

Episode 3741 completed | Reward: 1580.00 | Avg Reward: 2578.30 | Frames: 2038491 | Epsilon: 0.1000


Training:  20%|██        | 2039157/10000000 [9:03:38<46:19:39, 47.73it/s]

Episode 3742 completed | Reward: 1740.00 | Avg Reward: 2576.10 | Frames: 2039150 | Epsilon: 0.1000


Training:  20%|██        | 2039734/10000000 [9:03:50<46:36:16, 47.45it/s]

Episode 3743 completed | Reward: 3220.00 | Avg Reward: 2570.70 | Frames: 2039727 | Epsilon: 0.1000


Training:  20%|██        | 2040481/10000000 [9:04:05<46:43:24, 47.32it/s]

Episode 3744 completed | Reward: 3080.00 | Avg Reward: 2565.10 | Frames: 2040474 | Epsilon: 0.1000


Training:  20%|██        | 2041022/10000000 [9:04:16<48:00:44, 46.05it/s]

Episode 3745 completed | Reward: 980.00 | Avg Reward: 2564.90 | Frames: 2041016 | Epsilon: 0.1000


Training:  20%|██        | 2041389/10000000 [9:04:23<47:14:17, 46.80it/s]

Episode 3746 completed | Reward: 1100.00 | Avg Reward: 2537.00 | Frames: 2041381 | Epsilon: 0.1000


Training:  20%|██        | 2041922/10000000 [9:04:34<46:15:32, 47.79it/s]

Episode 3747 completed | Reward: 3780.00 | Avg Reward: 2532.60 | Frames: 2041917 | Epsilon: 0.1000


Training:  20%|██        | 2042567/10000000 [9:04:47<43:45:45, 50.51it/s]

Episode 3748 completed | Reward: 1040.00 | Avg Reward: 2524.20 | Frames: 2042560 | Epsilon: 0.1000


Training:  20%|██        | 2043370/10000000 [9:05:03<45:53:39, 48.16it/s]

Episode 3749 completed | Reward: 4040.00 | Avg Reward: 2546.00 | Frames: 2043363 | Epsilon: 0.1000


Training:  20%|██        | 2043915/10000000 [9:05:14<51:38:04, 42.80it/s]

Episode 3750 completed | Reward: 900.00 | Avg Reward: 2533.60 | Frames: 2043914 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  20%|██        | 2044483/10000000 [9:05:26<44:27:06, 49.71it/s]

Episode 3751 completed | Reward: 2180.00 | Avg Reward: 2538.60 | Frames: 2044478 | Epsilon: 0.1000


Training:  20%|██        | 2044987/10000000 [9:05:36<46:27:13, 47.57it/s]

Episode 3752 completed | Reward: 2760.00 | Avg Reward: 2541.80 | Frames: 2044981 | Epsilon: 0.1000


Training:  20%|██        | 2045598/10000000 [9:05:48<48:00:40, 46.02it/s]

Episode 3753 completed | Reward: 2560.00 | Avg Reward: 2515.30 | Frames: 2045588 | Epsilon: 0.1000


Training:  20%|██        | 2046238/10000000 [9:06:01<47:24:07, 46.61it/s]

Episode 3754 completed | Reward: 3120.00 | Avg Reward: 2514.10 | Frames: 2046231 | Epsilon: 0.1000


Training:  20%|██        | 2046798/10000000 [9:06:12<46:12:26, 47.81it/s]

Episode 3755 completed | Reward: 2100.00 | Avg Reward: 2490.90 | Frames: 2046789 | Epsilon: 0.1000


Training:  20%|██        | 2047293/10000000 [9:06:22<46:36:10, 47.40it/s]

Episode 3756 completed | Reward: 1980.00 | Avg Reward: 2480.30 | Frames: 2047286 | Epsilon: 0.1000


Training:  20%|██        | 2047797/10000000 [9:06:32<44:45:24, 49.35it/s]

Episode 3757 completed | Reward: 560.00 | Avg Reward: 2463.70 | Frames: 2047788 | Epsilon: 0.1000


Training:  20%|██        | 2048298/10000000 [9:06:42<45:32:41, 48.50it/s]

Episode 3758 completed | Reward: 2060.00 | Avg Reward: 2437.10 | Frames: 2048294 | Epsilon: 0.1000


Training:  20%|██        | 2048863/10000000 [9:06:54<43:56:29, 50.26it/s]

Episode 3759 completed | Reward: 2480.00 | Avg Reward: 2419.90 | Frames: 2048858 | Epsilon: 0.1000


Training:  20%|██        | 2049441/10000000 [9:07:05<45:10:58, 48.88it/s]

Episode 3760 completed | Reward: 2580.00 | Avg Reward: 2417.10 | Frames: 2049439 | Epsilon: 0.1000


Training:  20%|██        | 2049935/10000000 [9:07:15<43:57:21, 50.24it/s]

Episode 3761 completed | Reward: 1740.00 | Avg Reward: 2421.50 | Frames: 2049929 | Epsilon: 0.1000


Training:  21%|██        | 2050424/10000000 [9:07:25<41:47:45, 52.83it/s]

Episode 3762 completed | Reward: 1680.00 | Avg Reward: 2411.30 | Frames: 2050416 | Epsilon: 0.1000


Training:  21%|██        | 2050925/10000000 [9:07:35<48:08:23, 45.87it/s]

Episode 3763 completed | Reward: 3920.00 | Avg Reward: 2418.10 | Frames: 2050918 | Epsilon: 0.1000


Training:  21%|██        | 2051778/10000000 [9:07:53<46:57:11, 47.02it/s]

Episode 3764 completed | Reward: 1660.00 | Avg Reward: 2383.50 | Frames: 2051771 | Epsilon: 0.1000


Training:  21%|██        | 2052361/10000000 [9:08:04<46:22:43, 47.60it/s]

Episode 3765 completed | Reward: 2320.00 | Avg Reward: 2372.30 | Frames: 2052352 | Epsilon: 0.1000


Training:  21%|██        | 2052957/10000000 [9:08:16<46:56:33, 47.03it/s]

Episode 3766 completed | Reward: 2280.00 | Avg Reward: 2373.30 | Frames: 2052948 | Epsilon: 0.1000


Training:  21%|██        | 2053455/10000000 [9:08:26<42:37:05, 51.79it/s]

Episode 3767 completed | Reward: 1720.00 | Avg Reward: 2341.10 | Frames: 2053449 | Epsilon: 0.1000


Training:  21%|██        | 2053975/10000000 [9:08:37<53:06:56, 41.56it/s]

Episode 3768 completed | Reward: 2890.00 | Avg Reward: 2355.80 | Frames: 2053972 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  21%|██        | 2054341/10000000 [9:08:44<47:06:02, 46.86it/s]

Episode 3769 completed | Reward: 1860.00 | Avg Reward: 2358.40 | Frames: 2054334 | Epsilon: 0.1000


Training:  21%|██        | 2054845/10000000 [9:08:54<46:50:27, 47.12it/s]

Episode 3770 completed | Reward: 920.00 | Avg Reward: 2350.80 | Frames: 2054838 | Epsilon: 0.1000


Training:  21%|██        | 2055367/10000000 [9:09:05<42:48:59, 51.54it/s]

Episode 3771 completed | Reward: 2500.00 | Avg Reward: 2337.90 | Frames: 2055364 | Epsilon: 0.1000


Training:  21%|██        | 2055854/10000000 [9:09:15<46:51:25, 47.09it/s]

Episode 3772 completed | Reward: 3730.00 | Avg Reward: 2359.80 | Frames: 2055847 | Epsilon: 0.1000


Training:  21%|██        | 2056417/10000000 [9:09:26<46:09:38, 47.80it/s]

Episode 3773 completed | Reward: 3940.00 | Avg Reward: 2362.80 | Frames: 2056408 | Epsilon: 0.1000


Training:  21%|██        | 2056897/10000000 [9:09:36<47:24:48, 46.54it/s]

Episode 3774 completed | Reward: 4560.00 | Avg Reward: 2368.80 | Frames: 2056890 | Epsilon: 0.1000


Training:  21%|██        | 2057434/10000000 [9:09:47<47:55:02, 46.04it/s]

Episode 3775 completed | Reward: 1740.00 | Avg Reward: 2365.00 | Frames: 2057428 | Epsilon: 0.1000


Training:  21%|██        | 2058074/10000000 [9:10:00<46:49:47, 47.11it/s]

Episode 3776 completed | Reward: 2440.00 | Avg Reward: 2367.00 | Frames: 2058066 | Epsilon: 0.1000


Training:  21%|██        | 2058642/10000000 [9:10:11<45:29:20, 48.49it/s]

Episode 3777 completed | Reward: 3780.00 | Avg Reward: 2371.40 | Frames: 2058639 | Epsilon: 0.1000


Training:  21%|██        | 2059318/10000000 [9:10:25<45:01:20, 48.99it/s]

Episode 3778 completed | Reward: 2960.00 | Avg Reward: 2395.20 | Frames: 2059314 | Epsilon: 0.1000


Training:  21%|██        | 2059810/10000000 [9:10:35<47:44:27, 46.20it/s]

Episode 3779 completed | Reward: 1600.00 | Avg Reward: 2398.00 | Frames: 2059805 | Epsilon: 0.1000


Training:  21%|██        | 2060285/10000000 [9:10:44<46:22:23, 47.56it/s]

Episode 3780 completed | Reward: 2160.00 | Avg Reward: 2408.80 | Frames: 2060276 | Epsilon: 0.1000


Training:  21%|██        | 2060718/10000000 [9:10:53<44:49:59, 49.19it/s]

Episode 3781 completed | Reward: 900.00 | Avg Reward: 2401.60 | Frames: 2060714 | Epsilon: 0.1000


Training:  21%|██        | 2061262/10000000 [9:11:04<46:10:26, 47.76it/s]

Episode 3782 completed | Reward: 3340.00 | Avg Reward: 2423.20 | Frames: 2061257 | Epsilon: 0.1000


Training:  21%|██        | 2061697/10000000 [9:11:13<44:49:41, 49.19it/s]

Episode 3783 completed | Reward: 2190.00 | Avg Reward: 2406.20 | Frames: 2061692 | Epsilon: 0.1000


Training:  21%|██        | 2062174/10000000 [9:11:23<45:16:06, 48.71it/s]

Episode 3784 completed | Reward: 640.00 | Avg Reward: 2379.00 | Frames: 2062169 | Epsilon: 0.1000


Training:  21%|██        | 2062703/10000000 [9:11:33<43:11:11, 51.05it/s]

Episode 3785 completed | Reward: 2080.00 | Avg Reward: 2364.40 | Frames: 2062699 | Epsilon: 0.1000


Training:  21%|██        | 2063233/10000000 [9:11:44<46:56:52, 46.96it/s]

Episode 3786 completed | Reward: 2140.00 | Avg Reward: 2365.40 | Frames: 2063224 | Epsilon: 0.1000


Training:  21%|██        | 2063762/10000000 [9:11:55<47:11:30, 46.71it/s]

Episode 3787 completed | Reward: 1160.00 | Avg Reward: 2365.40 | Frames: 2063759 | Epsilon: 0.1000


Training:  21%|██        | 2064321/10000000 [9:12:06<54:19:20, 40.58it/s]

Episode 3788 completed | Reward: 1640.00 | Avg Reward: 2350.80 | Frames: 2064320 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  21%|██        | 2064807/10000000 [9:12:16<43:48:43, 50.31it/s]

Episode 3789 completed | Reward: 1540.00 | Avg Reward: 2334.20 | Frames: 2064802 | Epsilon: 0.1000


Training:  21%|██        | 2065445/10000000 [9:12:29<46:46:21, 47.12it/s]

Episode 3790 completed | Reward: 3660.00 | Avg Reward: 2352.40 | Frames: 2065437 | Epsilon: 0.1000


Training:  21%|██        | 2065941/10000000 [9:12:39<45:24:15, 48.54it/s]

Episode 3791 completed | Reward: 2560.00 | Avg Reward: 2368.60 | Frames: 2065932 | Epsilon: 0.1000


Training:  21%|██        | 2066460/10000000 [9:12:49<42:29:55, 51.85it/s]

Episode 3792 completed | Reward: 2500.00 | Avg Reward: 2376.20 | Frames: 2066459 | Epsilon: 0.1000


Training:  21%|██        | 2067217/10000000 [9:13:05<45:51:35, 48.05it/s]

Episode 3793 completed | Reward: 1980.00 | Avg Reward: 2379.20 | Frames: 2067208 | Epsilon: 0.1000


Training:  21%|██        | 2067770/10000000 [9:13:16<46:14:45, 47.65it/s]

Episode 3794 completed | Reward: 2160.00 | Avg Reward: 2391.20 | Frames: 2067763 | Epsilon: 0.1000


Training:  21%|██        | 2068169/10000000 [9:13:24<47:44:41, 46.15it/s]

Episode 3795 completed | Reward: 940.00 | Avg Reward: 2368.80 | Frames: 2068161 | Epsilon: 0.1000


Training:  21%|██        | 2068678/10000000 [9:13:34<46:04:04, 47.82it/s]

Episode 3796 completed | Reward: 1520.00 | Avg Reward: 2375.40 | Frames: 2068673 | Epsilon: 0.1000


Training:  21%|██        | 2069409/10000000 [9:13:49<46:22:48, 47.50it/s]

Episode 3797 completed | Reward: 3580.00 | Avg Reward: 2387.00 | Frames: 2069400 | Epsilon: 0.1000


Training:  21%|██        | 2069949/10000000 [9:14:00<47:35:11, 46.29it/s]

Episode 3798 completed | Reward: 1920.00 | Avg Reward: 2368.20 | Frames: 2069940 | Epsilon: 0.1000


Training:  21%|██        | 2070458/10000000 [9:14:10<46:01:28, 47.86it/s]

Episode 3799 completed | Reward: 2080.00 | Avg Reward: 2349.00 | Frames: 2070455 | Epsilon: 0.1000


Training:  21%|██        | 2070974/10000000 [9:14:21<46:40:15, 47.19it/s]

Episode 3800 completed | Reward: 1920.00 | Avg Reward: 2335.30 | Frames: 2070967 | Epsilon: 0.1000


Training:  21%|██        | 2071677/10000000 [9:14:35<48:27:28, 45.45it/s]

Episode 3801 completed | Reward: 980.00 | Avg Reward: 2307.70 | Frames: 2071670 | Epsilon: 0.1000


Training:  21%|██        | 2072098/10000000 [9:14:43<48:01:17, 45.86it/s]

Episode 3802 completed | Reward: 1840.00 | Avg Reward: 2315.30 | Frames: 2072091 | Epsilon: 0.1000


Training:  21%|██        | 2072614/10000000 [9:14:54<47:46:50, 46.09it/s]

Episode 3803 completed | Reward: 1740.00 | Avg Reward: 2318.10 | Frames: 2072605 | Epsilon: 0.1000


Training:  21%|██        | 2073149/10000000 [9:15:05<47:11:25, 46.66it/s]

Episode 3804 completed | Reward: 860.00 | Avg Reward: 2302.70 | Frames: 2073140 | Epsilon: 0.1000


Training:  21%|██        | 2073654/10000000 [9:15:15<45:09:44, 48.75it/s]

Episode 3805 completed | Reward: 1980.00 | Avg Reward: 2309.10 | Frames: 2073651 | Epsilon: 0.1000


Training:  21%|██        | 2074317/10000000 [9:15:28<45:39:37, 48.22it/s]

Episode 3806 completed | Reward: 760.00 | Avg Reward: 2283.30 | Frames: 2074308 | Epsilon: 0.1000


Training:  21%|██        | 2074907/10000000 [9:15:40<53:09:13, 41.42it/s]

Episode 3807 completed | Reward: 2240.00 | Avg Reward: 2274.10 | Frames: 2074904 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  21%|██        | 2075595/10000000 [9:15:54<43:09:40, 51.00it/s]

Episode 3808 completed | Reward: 2080.00 | Avg Reward: 2268.10 | Frames: 2075589 | Epsilon: 0.1000


Training:  21%|██        | 2076131/10000000 [9:16:05<42:22:28, 51.94it/s]

Episode 3809 completed | Reward: 3990.00 | Avg Reward: 2270.60 | Frames: 2076127 | Epsilon: 0.1000


Training:  21%|██        | 2076477/10000000 [9:16:12<47:26:37, 46.39it/s]

Episode 3810 completed | Reward: 2780.00 | Avg Reward: 2267.60 | Frames: 2076469 | Epsilon: 0.1000


Training:  21%|██        | 2076921/10000000 [9:16:21<48:40:52, 45.21it/s]

Episode 3811 completed | Reward: 1940.00 | Avg Reward: 2253.60 | Frames: 2076914 | Epsilon: 0.1000


Training:  21%|██        | 2077291/10000000 [9:16:29<43:08:12, 51.02it/s]

Episode 3812 completed | Reward: 1340.00 | Avg Reward: 2247.60 | Frames: 2077284 | Epsilon: 0.1000


Training:  21%|██        | 2077809/10000000 [9:16:39<45:31:42, 48.33it/s]

Episode 3813 completed | Reward: 2540.00 | Avg Reward: 2226.40 | Frames: 2077800 | Epsilon: 0.1000


Training:  21%|██        | 2078449/10000000 [9:16:52<47:13:38, 46.59it/s]

Episode 3814 completed | Reward: 1920.00 | Avg Reward: 2231.00 | Frames: 2078440 | Epsilon: 0.1000


Training:  21%|██        | 2078917/10000000 [9:17:02<48:44:05, 45.15it/s]

Episode 3815 completed | Reward: 2480.00 | Avg Reward: 2223.60 | Frames: 2078909 | Epsilon: 0.1000


Training:  21%|██        | 2079609/10000000 [9:17:15<46:51:40, 46.95it/s]

Episode 3816 completed | Reward: 2360.00 | Avg Reward: 2218.20 | Frames: 2079600 | Epsilon: 0.1000


Training:  21%|██        | 2080090/10000000 [9:17:25<45:56:20, 47.89it/s]

Episode 3817 completed | Reward: 2420.00 | Avg Reward: 2216.20 | Frames: 2080087 | Epsilon: 0.1000


Training:  21%|██        | 2080674/10000000 [9:17:37<45:00:36, 48.87it/s]

Episode 3818 completed | Reward: 860.00 | Avg Reward: 2212.00 | Frames: 2080669 | Epsilon: 0.1000


Training:  21%|██        | 2081302/10000000 [9:17:50<44:38:18, 49.28it/s]

Episode 3819 completed | Reward: 1260.00 | Avg Reward: 2212.20 | Frames: 2081298 | Epsilon: 0.1000


Training:  21%|██        | 2081905/10000000 [9:18:02<46:42:13, 47.09it/s]

Episode 3820 completed | Reward: 2520.00 | Avg Reward: 2211.00 | Frames: 2081897 | Epsilon: 0.1000


Training:  21%|██        | 2082583/10000000 [9:18:16<45:31:55, 48.30it/s]

Episode 3821 completed | Reward: 1840.00 | Avg Reward: 2197.20 | Frames: 2082576 | Epsilon: 0.1000


Training:  21%|██        | 2083389/10000000 [9:18:32<45:07:12, 48.74it/s]

Episode 3822 completed | Reward: 1200.00 | Avg Reward: 2181.80 | Frames: 2083383 | Epsilon: 0.1000


Training:  21%|██        | 2083829/10000000 [9:18:41<46:52:08, 46.92it/s]

Episode 3823 completed | Reward: 3200.00 | Avg Reward: 2189.00 | Frames: 2083820 | Epsilon: 0.1000


Training:  21%|██        | 2084238/10000000 [9:18:49<48:51:31, 45.00it/s]

Episode 3824 completed | Reward: 440.00 | Avg Reward: 2174.40 | Frames: 2084232 | Epsilon: 0.1000


Training:  21%|██        | 2085241/10000000 [9:19:09<53:53:01, 40.80it/s]

Episode 3825 completed | Reward: 5400.00 | Avg Reward: 2203.00 | Frames: 2085240 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  21%|██        | 2085990/10000000 [9:19:24<45:43:38, 48.07it/s]

Episode 3826 completed | Reward: 3400.00 | Avg Reward: 2225.40 | Frames: 2085988 | Epsilon: 0.1000


Training:  21%|██        | 2086433/10000000 [9:19:33<47:04:08, 46.70it/s]

Episode 3827 completed | Reward: 2260.00 | Avg Reward: 2217.40 | Frames: 2086424 | Epsilon: 0.1000


Training:  21%|██        | 2086881/10000000 [9:19:43<46:56:24, 46.83it/s]

Episode 3828 completed | Reward: 2880.00 | Avg Reward: 2225.80 | Frames: 2086873 | Epsilon: 0.1000


Training:  21%|██        | 2087581/10000000 [9:19:57<47:08:34, 46.62it/s]

Episode 3829 completed | Reward: 1080.00 | Avg Reward: 2223.00 | Frames: 2087574 | Epsilon: 0.1000


Training:  21%|██        | 2088145/10000000 [9:20:08<45:20:10, 48.48it/s]

Episode 3830 completed | Reward: 4060.00 | Avg Reward: 2248.20 | Frames: 2088142 | Epsilon: 0.1000


Training:  21%|██        | 2088669/10000000 [9:20:19<47:10:23, 46.59it/s]

Episode 3831 completed | Reward: 3480.00 | Avg Reward: 2253.40 | Frames: 2088668 | Epsilon: 0.1000


Training:  21%|██        | 2089190/10000000 [9:20:30<45:05:34, 48.73it/s]

Episode 3832 completed | Reward: 2040.00 | Avg Reward: 2241.00 | Frames: 2089187 | Epsilon: 0.1000


Training:  21%|██        | 2089750/10000000 [9:20:41<46:27:00, 47.30it/s]

Episode 3833 completed | Reward: 900.00 | Avg Reward: 2211.20 | Frames: 2089743 | Epsilon: 0.1000


Training:  21%|██        | 2090318/10000000 [9:20:53<45:25:03, 48.38it/s]

Episode 3834 completed | Reward: 2560.00 | Avg Reward: 2225.00 | Frames: 2090311 | Epsilon: 0.1000


Training:  21%|██        | 2090856/10000000 [9:21:04<43:18:30, 50.73it/s]

Episode 3835 completed | Reward: 2940.00 | Avg Reward: 2243.40 | Frames: 2090848 | Epsilon: 0.1000


Training:  21%|██        | 2091514/10000000 [9:21:17<45:32:35, 48.24it/s]

Episode 3836 completed | Reward: 3440.00 | Avg Reward: 2260.20 | Frames: 2091507 | Epsilon: 0.1000


Training:  21%|██        | 2091973/10000000 [9:21:26<45:49:51, 47.93it/s]

Episode 3837 completed | Reward: 1820.00 | Avg Reward: 2239.00 | Frames: 2091964 | Epsilon: 0.1000


Training:  21%|██        | 2092718/10000000 [9:21:41<45:23:22, 48.39it/s]

Episode 3838 completed | Reward: 4560.00 | Avg Reward: 2272.40 | Frames: 2092710 | Epsilon: 0.1000


Training:  21%|██        | 2093110/10000000 [9:21:49<44:49:45, 48.99it/s]

Episode 3839 completed | Reward: 1540.00 | Avg Reward: 2242.40 | Frames: 2093105 | Epsilon: 0.1000


Training:  21%|██        | 2093897/10000000 [9:22:05<46:57:24, 46.77it/s]

Episode 3840 completed | Reward: 1140.00 | Avg Reward: 2237.60 | Frames: 2093889 | Epsilon: 0.1000


Training:  21%|██        | 2094330/10000000 [9:22:14<45:48:07, 47.95it/s]

Episode 3841 completed | Reward: 1720.00 | Avg Reward: 2239.00 | Frames: 2094327 | Epsilon: 0.1000


Training:  21%|██        | 2094785/10000000 [9:22:23<47:27:21, 46.27it/s]

Episode 3842 completed | Reward: 3660.00 | Avg Reward: 2258.20 | Frames: 2094778 | Epsilon: 0.1000


Training:  21%|██        | 2095137/10000000 [9:22:30<46:35:49, 47.12it/s]

Episode 3843 completed | Reward: 740.00 | Avg Reward: 2233.40 | Frames: 2095129 | Epsilon: 0.1000


Training:  21%|██        | 2095631/10000000 [9:22:41<44:55:05, 48.88it/s]

Episode 3844 completed | Reward: 1860.00 | Avg Reward: 2221.20 | Frames: 2095631 | Epsilon: 0.1000

Memory usage: 1.13 GB


Training:  21%|██        | 2096358/10000000 [9:22:55<45:12:41, 48.56it/s]

Episode 3845 completed | Reward: 1640.00 | Avg Reward: 2227.80 | Frames: 2096348 | Epsilon: 0.1000


Training:  21%|██        | 2096861/10000000 [9:23:06<45:05:28, 48.69it/s]

Episode 3846 completed | Reward: 1220.00 | Avg Reward: 2229.00 | Frames: 2096855 | Epsilon: 0.1000


Training:  21%|██        | 2097374/10000000 [9:23:16<46:01:01, 47.70it/s]

Episode 3847 completed | Reward: 1960.00 | Avg Reward: 2210.80 | Frames: 2097371 | Epsilon: 0.1000


Training:  21%|██        | 2098017/10000000 [9:23:29<45:44:40, 47.98it/s]

Episode 3848 completed | Reward: 2400.00 | Avg Reward: 2224.40 | Frames: 2098008 | Epsilon: 0.1000


Training:  21%|██        | 2098541/10000000 [9:23:40<47:14:46, 46.46it/s]

Episode 3849 completed | Reward: 2990.00 | Avg Reward: 2213.90 | Frames: 2098534 | Epsilon: 0.1000


Training:  21%|██        | 2099123/10000000 [9:23:52<42:03:08, 52.19it/s]

Episode 3850 completed | Reward: 1780.00 | Avg Reward: 2222.70 | Frames: 2099118 | Epsilon: 0.1000


Training:  21%|██        | 2099626/10000000 [9:24:02<44:38:57, 49.15it/s]

Episode 3851 completed | Reward: 3300.00 | Avg Reward: 2233.90 | Frames: 2099623 | Epsilon: 0.1000


Training:  21%|██        | 2100177/10000000 [9:24:13<46:48:52, 46.87it/s]

Episode 3852 completed | Reward: 1700.00 | Avg Reward: 2223.30 | Frames: 2100168 | Epsilon: 0.1000


Training:  21%|██        | 2100709/10000000 [9:24:24<46:49:14, 46.86it/s]

Episode 3853 completed | Reward: 1260.00 | Avg Reward: 2210.30 | Frames: 2100701 | Epsilon: 0.1000


Training:  21%|██        | 2100742/10000000 [9:24:24<45:27:01, 48.28it/s]