In [None]:
'''
!pip install "numpy<2.0.0"
'''

In [1]:
import gymnasium as gym
from gymnasium import envs
import numpy as np
from collections import deque
import random
import matplotlib.pyplot as plt
import cv2 
import time
import warnings
import ale_py
import multiprocessing as mp
from datetime import datetime
import os
import pandas as pd
from keras.models import Sequential, clone_model
from keras.layers import Conv2D, Flatten, Dense
from keras.optimizers import Adam
import matplotlib.pyplot as plt
from collections import deque
import random
import math



# Please experiment with the numbers, we want to make it to the end, and I know it'll take a while so please do not randomly choose

In [None]:
# --- Hyperparameters ---
EPISODES = 5000                
BATCH_SIZE = 32                
MEMORY_SIZE = 100000           
GAMMA = 0.99                   
EPSILON_START = 1.0            
EPSILON_MIN = 0.01             
EPSILON_DECAY = 0.995          
LEARNING_RATE = 0.00025        
UPDATE_TARGET_FREQ = 1000      
FRAME_STACK = 4                
SAVE_FREQ = 50                 

# --- Prioritized Experience Replay (PER) ---
class PrioritizedReplayBuffer:
    def __init__(self, max_size):
        self.max_size = max_size
        self.buffer = deque(maxlen=max_size)
        self.priorities = deque(maxlen=max_size)
    
    def add(self, state, action, reward, next_state, done):
        max_priority = max(self.priorities) if self.buffer else 1.0
        self.buffer.append((state, action, reward, next_state, done))
        self.priorities.append(max_priority)
    
    def sample(self, batch_size, alpha=0.6):
        priorities = np.array(self.priorities)
        probs = priorities ** alpha
        probs /= probs.sum()
        
        indices = np.random.choice(len(self.buffer), batch_size, p=probs)
        samples = [self.buffer[i] for i in indices]
        weights = (len(self.buffer) * probs[indices]) ** (-0.4)
        weights /= weights.max()
        
        states = np.array([x[0] for x in samples])
        actions = np.array([x[1] for x in samples])
        rewards = np.array([x[2] for x in samples])
        next_states = np.array([x[3] for x in samples])
        dones = np.array([x[4] for x in samples])
        
        return states, actions, rewards, next_states, dones, indices, weights
    
    def update_priorities(self, indices, errors, offset=0.01):
        for i, error in zip(indices, errors):
            self.priorities[i] = abs(error) + offset

# --- Environment Setup ---
env = gym.make("ALE/Frogger-v5", render_mode="rgb_array")
state_shape = (84, 84, FRAME_STACK)
action_size = env.action_space.n

# --- Model Definition ---
def build_model():
    model = Sequential([
        Conv2D(32, (8, 8), strides=4, activation="relu", input_shape=state_shape),
        Conv2D(64, (4, 4), strides=2, activation="relu"),
        Conv2D(64, (3, 3), strides=1, activation="relu"),
        Flatten(),
        Dense(512, activation="relu"),
        Dense(action_size, activation="linear")
    ])
    model.compile(loss="mse", optimizer=Adam(learning_rate=LEARNING_RATE))
    return model

# --- Frame Stacking ---
class FrameStacker:
    def __init__(self):
        self.frames = deque(maxlen=FRAME_STACK)
    
    def reset(self, state):
        for _ in range(FRAME_STACK):
            self.frames.append(state)
        return np.stack(self.frames, axis=-1)
    
    def append(self, state):
        self.frames.append(state)
        return np.stack(self.frames, axis=-1)

# --- Preprocessing ---
def preprocess_state(state):
    state = np.mean(state, axis=2)  
    state = state[34:194, :]        
    state = state[::2, ::2]         
    state = np.pad(state, ((2,2),(2,2)), mode='constant')
    return state / 255.0

# --- Training Functions ---
def train_dqn():
    model = build_model()
    target_model = clone_model(model)  
    target_model.set_weights(model.get_weights())
    
    memory = PrioritizedReplayBuffer(MEMORY_SIZE)  
    frame_stacker = FrameStacker()
    epsilon = EPSILON_START
    rewards_history = []
    start_time = time.time()
    global_step = 0

    for episode in range(1, EPISODES + 1):
        state, _ = env.reset()
        state = preprocess_state(state)
        state = frame_stacker.reset(state)
        total_reward = 0
        done = False

        while not done:
            global_step += 1

            if np.random.rand() <= epsilon:
                action = env.action_space.sample()
            else:
                q_values = model.predict(np.expand_dims(state, axis=0), verbose=0)
                action = np.argmax(q_values[0])

            next_state, reward, done, _, _ = env.step(action)
            next_state = preprocess_state(next_state)
            next_state = frame_stacker.append(next_state)
            memory.add(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            if global_step % 4 == 0 and len(memory.buffer) >= BATCH_SIZE:
                states, actions, rewards, next_states, dones, indices, weights = memory.sample(BATCH_SIZE)
                
                next_q_values = target_model.predict(next_states, verbose=0)
                best_actions = np.argmax(model.predict(next_states, verbose=0), axis=1)
                target_q = rewards + GAMMA * next_q_values[np.arange(BATCH_SIZE), best_actions] * (1 - dones)
                
                current_q = model.predict(states, verbose=0)
                td_errors = target_q - current_q[np.arange(BATCH_SIZE), actions]
                memory.update_priorities(indices, td_errors)
                
                target = current_q.copy()
                target[np.arange(BATCH_SIZE), actions] = target_q
                model.fit(states, target, sample_weight=weights, epochs=1, verbose=0)

            if global_step % UPDATE_TARGET_FREQ == 0:
                target_model.set_weights(model.get_weights())

        if epsilon > EPSILON_MIN:
            epsilon *= EPSILON_DECAY

        rewards_history.append(total_reward)

        elapsed_time = (time.time() - start_time) / 60  # in minutes
        remaining_time = (elapsed_time / episode) * (EPISODES - episode)
        print(f"Episode: {episode}/{EPISODES}, Reward: {total_reward}, Epsilon: {epsilon:.2f}, Time Elapsed: {elapsed_time:.2f} mins, Remaining: {remaining_time:.2f} mins")

        if episode % SAVE_FREQ == 0:
            model.save_weights(f"frogger_weights_ep{episode}.weights.h5")
            print(f"Saved weights at episode {episode}")

    plt.plot(rewards_history)
    plt.xlabel("Episode")
    plt.ylabel("Reward")
    plt.title("Frogger DQN Training (PER + Double DQN + Frame Stack)")
    plt.savefig("frogger_training_enhanced.png")
    plt.show()

    return model

# --- Run Training ---
if __name__ == "__main__":
    model = train_dqn()
    model.save_weights("frogger_final_weights_enhanced.weights.h5")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Episode: 1/5000, Reward: 11.0, Epsilon: 0.99, Time Elapsed: 0.35 mins, Remaining: 1739.96 mins
Episode: 2/5000, Reward: 8.0, Epsilon: 0.99, Time Elapsed: 0.69 mins, Remaining: 1716.45 mins
Episode: 3/5000, Reward: 10.0, Epsilon: 0.99, Time Elapsed: 1.03 mins, Remaining: 1717.78 mins
Episode: 4/5000, Reward: 8.0, Epsilon: 0.98, Time Elapsed: 1.32 mins, Remaining: 1648.72 mins
Episode: 5/5000, Reward: 10.0, Epsilon: 0.98, Time Elapsed: 1.71 mins, Remaining: 1710.22 mins
Episode: 6/5000, Reward: 8.0, Epsilon: 0.97, Time Elapsed: 2.05 mins, Remaining: 1707.77 mins
Episode: 7/5000, Reward: 11.0, Epsilon: 0.97, Time Elapsed: 2.41 mins, Remaining: 1720.89 mins
Episode: 8/5000, Reward: 10.0, Epsilon: 0.96, Time Elapsed: 2.72 mins, Remaining: 1695.07 mins
Episode: 9/5000, Reward: 10.0, Epsilon: 0.96, Time Elapsed: 3.08 mins, Remaining: 1710.03 mins
Episode: 10/5000, Reward: 12.0, Epsilon: 0.95, Time Elapsed: 3.43 mins, Remaining: 1712.74 mins
Episode: 11/5000, Reward: 10.0, Epsilon: 0.95, Time 

Episode: 86/5000, Reward: 9.0, Epsilon: 0.65, Time Elapsed: 34.94 mins, Remaining: 1996.58 mins
Episode: 87/5000, Reward: 8.0, Epsilon: 0.65, Time Elapsed: 35.36 mins, Remaining: 1996.67 mins
Episode: 88/5000, Reward: 11.0, Epsilon: 0.64, Time Elapsed: 35.89 mins, Remaining: 2003.29 mins
Episode: 89/5000, Reward: 15.0, Epsilon: 0.64, Time Elapsed: 36.41 mins, Remaining: 2009.28 mins
Episode: 90/5000, Reward: 13.0, Epsilon: 0.64, Time Elapsed: 36.99 mins, Remaining: 2017.83 mins
Episode: 91/5000, Reward: 21.0, Epsilon: 0.63, Time Elapsed: 37.63 mins, Remaining: 2030.02 mins
Episode: 92/5000, Reward: 10.0, Epsilon: 0.63, Time Elapsed: 38.30 mins, Remaining: 2043.08 mins
Episode: 93/5000, Reward: 14.0, Epsilon: 0.63, Time Elapsed: 39.07 mins, Remaining: 2061.49 mins
Episode: 94/5000, Reward: 15.0, Epsilon: 0.62, Time Elapsed: 39.76 mins, Remaining: 2075.15 mins
Episode: 95/5000, Reward: 14.0, Epsilon: 0.62, Time Elapsed: 40.50 mins, Remaining: 2091.32 mins
Episode: 96/5000, Reward: 10.0, 

Episode: 170/5000, Reward: 8.0, Epsilon: 0.43, Time Elapsed: 81.70 mins, Remaining: 2321.10 mins
Episode: 171/5000, Reward: 11.0, Epsilon: 0.42, Time Elapsed: 82.26 mins, Remaining: 2323.05 mins
Episode: 172/5000, Reward: 16.0, Epsilon: 0.42, Time Elapsed: 82.89 mins, Remaining: 2326.60 mins
Episode: 173/5000, Reward: 18.0, Epsilon: 0.42, Time Elapsed: 83.51 mins, Remaining: 2330.07 mins
Episode: 174/5000, Reward: 21.0, Epsilon: 0.42, Time Elapsed: 84.14 mins, Remaining: 2333.71 mins
Episode: 175/5000, Reward: 15.0, Epsilon: 0.42, Time Elapsed: 84.76 mins, Remaining: 2336.88 mins
Episode: 176/5000, Reward: 21.0, Epsilon: 0.41, Time Elapsed: 85.38 mins, Remaining: 2340.32 mins
Episode: 177/5000, Reward: 23.0, Epsilon: 0.41, Time Elapsed: 86.11 mins, Remaining: 2346.36 mins
Episode: 178/5000, Reward: 17.0, Epsilon: 0.41, Time Elapsed: 86.91 mins, Remaining: 2354.42 mins
Episode: 179/5000, Reward: 10.0, Epsilon: 0.41, Time Elapsed: 87.63 mins, Remaining: 2360.20 mins
Episode: 180/5000, Re

Episode: 253/5000, Reward: 24.0, Epsilon: 0.28, Time Elapsed: 140.36 mins, Remaining: 2633.46 mins
Episode: 254/5000, Reward: 18.0, Epsilon: 0.28, Time Elapsed: 141.15 mins, Remaining: 2637.39 mins
Episode: 255/5000, Reward: 20.0, Epsilon: 0.28, Time Elapsed: 141.90 mins, Remaining: 2640.37 mins
Episode: 256/5000, Reward: 22.0, Epsilon: 0.28, Time Elapsed: 142.62 mins, Remaining: 2642.85 mins
Episode: 257/5000, Reward: 14.0, Epsilon: 0.28, Time Elapsed: 143.37 mins, Remaining: 2646.01 mins
Episode: 258/5000, Reward: 22.0, Epsilon: 0.27, Time Elapsed: 144.18 mins, Remaining: 2649.92 mins
Episode: 259/5000, Reward: 18.0, Epsilon: 0.27, Time Elapsed: 144.94 mins, Remaining: 2653.09 mins
Episode: 260/5000, Reward: 21.0, Epsilon: 0.27, Time Elapsed: 145.90 mins, Remaining: 2659.84 mins
Episode: 261/5000, Reward: 10.0, Epsilon: 0.27, Time Elapsed: 146.51 mins, Remaining: 2660.27 mins
Episode: 262/5000, Reward: 24.0, Epsilon: 0.27, Time Elapsed: 147.40 mins, Remaining: 2665.64 mins
Episode: 2

Episode: 336/5000, Reward: 18.0, Epsilon: 0.19, Time Elapsed: 216.80 mins, Remaining: 3009.35 mins
Episode: 337/5000, Reward: 19.0, Epsilon: 0.18, Time Elapsed: 217.94 mins, Remaining: 3015.52 mins
Episode: 338/5000, Reward: 27.0, Epsilon: 0.18, Time Elapsed: 219.34 mins, Remaining: 3025.31 mins
Episode: 339/5000, Reward: 29.0, Epsilon: 0.18, Time Elapsed: 220.63 mins, Remaining: 3033.54 mins
Episode: 340/5000, Reward: 19.0, Epsilon: 0.18, Time Elapsed: 221.44 mins, Remaining: 3035.07 mins
Episode: 341/5000, Reward: 22.0, Epsilon: 0.18, Time Elapsed: 222.58 mins, Remaining: 3041.06 mins
Episode: 342/5000, Reward: 31.0, Epsilon: 0.18, Time Elapsed: 223.83 mins, Remaining: 3048.54 mins
Episode: 343/5000, Reward: 20.0, Epsilon: 0.18, Time Elapsed: 224.87 mins, Remaining: 3053.18 mins
Episode: 344/5000, Reward: 29.0, Epsilon: 0.18, Time Elapsed: 226.21 mins, Remaining: 3061.77 mins
Episode: 345/5000, Reward: 21.0, Epsilon: 0.18, Time Elapsed: 227.10 mins, Remaining: 3064.21 mins
Episode: 3

Episode: 419/5000, Reward: 17.0, Epsilon: 0.12, Time Elapsed: 301.29 mins, Remaining: 3294.04 mins
Episode: 420/5000, Reward: 22.0, Epsilon: 0.12, Time Elapsed: 302.44 mins, Remaining: 3298.00 mins
Episode: 421/5000, Reward: 18.0, Epsilon: 0.12, Time Elapsed: 303.39 mins, Remaining: 3299.85 mins
Episode: 422/5000, Reward: 18.0, Epsilon: 0.12, Time Elapsed: 304.19 mins, Remaining: 3299.97 mins
Episode: 423/5000, Reward: 28.0, Epsilon: 0.12, Time Elapsed: 305.13 mins, Remaining: 3301.60 mins
Episode: 424/5000, Reward: 27.0, Epsilon: 0.12, Time Elapsed: 306.14 mins, Remaining: 3303.99 mins
Episode: 425/5000, Reward: 29.0, Epsilon: 0.12, Time Elapsed: 307.37 mins, Remaining: 3308.72 mins
Episode: 426/5000, Reward: 28.0, Epsilon: 0.12, Time Elapsed: 308.78 mins, Remaining: 3315.40 mins
Episode: 427/5000, Reward: 19.0, Epsilon: 0.12, Time Elapsed: 309.93 mins, Remaining: 3319.20 mins
Episode: 428/5000, Reward: 23.0, Epsilon: 0.12, Time Elapsed: 311.66 mins, Remaining: 3329.25 mins
Episode: 4

Episode: 502/5000, Reward: 19.0, Epsilon: 0.08, Time Elapsed: 394.66 mins, Remaining: 3536.19 mins
Episode: 503/5000, Reward: 27.0, Epsilon: 0.08, Time Elapsed: 395.61 mins, Remaining: 3536.89 mins
Episode: 504/5000, Reward: 22.0, Epsilon: 0.08, Time Elapsed: 396.50 mins, Remaining: 3536.99 mins
Episode: 505/5000, Reward: 27.0, Epsilon: 0.08, Time Elapsed: 397.96 mins, Remaining: 3542.27 mins
Episode: 506/5000, Reward: 34.0, Epsilon: 0.08, Time Elapsed: 399.32 mins, Remaining: 3546.57 mins
Episode: 507/5000, Reward: 27.0, Epsilon: 0.08, Time Elapsed: 400.28 mins, Remaining: 3547.26 mins
Episode: 508/5000, Reward: 17.0, Epsilon: 0.08, Time Elapsed: 401.25 mins, Remaining: 3548.10 mins
Episode: 509/5000, Reward: 21.0, Epsilon: 0.08, Time Elapsed: 402.09 mins, Remaining: 3547.71 mins
Episode: 510/5000, Reward: 24.0, Epsilon: 0.08, Time Elapsed: 402.96 mins, Remaining: 3547.65 mins
Episode: 511/5000, Reward: 25.0, Epsilon: 0.08, Time Elapsed: 404.58 mins, Remaining: 3554.09 mins
Episode: 5

Episode: 585/5000, Reward: 21.0, Epsilon: 0.05, Time Elapsed: 489.65 mins, Remaining: 3695.42 mins
Episode: 586/5000, Reward: 33.0, Epsilon: 0.05, Time Elapsed: 490.78 mins, Remaining: 3696.76 mins
Episode: 587/5000, Reward: 25.0, Epsilon: 0.05, Time Elapsed: 491.67 mins, Remaining: 3696.29 mins
Episode: 588/5000, Reward: 33.0, Epsilon: 0.05, Time Elapsed: 492.87 mins, Remaining: 3698.24 mins
Episode: 589/5000, Reward: 33.0, Epsilon: 0.05, Time Elapsed: 493.99 mins, Remaining: 3699.49 mins
Episode: 590/5000, Reward: 37.0, Epsilon: 0.05, Time Elapsed: 495.34 mins, Remaining: 3702.49 mins
Episode: 591/5000, Reward: 33.0, Epsilon: 0.05, Time Elapsed: 496.69 mins, Remaining: 3705.41 mins
Episode: 592/5000, Reward: 30.0, Epsilon: 0.05, Time Elapsed: 497.75 mins, Remaining: 3706.20 mins
Episode: 593/5000, Reward: 30.0, Epsilon: 0.05, Time Elapsed: 498.63 mins, Remaining: 3705.68 mins
Episode: 594/5000, Reward: 23.0, Epsilon: 0.05, Time Elapsed: 499.97 mins, Remaining: 3708.51 mins
Episode: 5

Episode: 668/5000, Reward: 26.0, Epsilon: 0.04, Time Elapsed: 585.13 mins, Remaining: 3794.60 mins
Episode: 669/5000, Reward: 34.0, Epsilon: 0.03, Time Elapsed: 586.39 mins, Remaining: 3796.19 mins
Episode: 670/5000, Reward: 36.0, Epsilon: 0.03, Time Elapsed: 587.98 mins, Remaining: 3799.91 mins
Episode: 671/5000, Reward: 31.0, Epsilon: 0.03, Time Elapsed: 588.94 mins, Remaining: 3799.60 mins
Episode: 672/5000, Reward: 33.0, Epsilon: 0.03, Time Elapsed: 590.16 mins, Remaining: 3800.92 mins
Episode: 673/5000, Reward: 38.0, Epsilon: 0.03, Time Elapsed: 591.32 mins, Remaining: 3801.84 mins
Episode: 674/5000, Reward: 32.0, Epsilon: 0.03, Time Elapsed: 592.36 mins, Remaining: 3802.02 mins
Episode: 675/5000, Reward: 29.0, Epsilon: 0.03, Time Elapsed: 593.37 mins, Remaining: 3801.99 mins
Episode: 676/5000, Reward: 30.0, Epsilon: 0.03, Time Elapsed: 594.58 mins, Remaining: 3803.20 mins
Episode: 677/5000, Reward: 26.0, Epsilon: 0.03, Time Elapsed: 595.71 mins, Remaining: 3803.92 mins
Episode: 6

Episode: 751/5000, Reward: 24.0, Epsilon: 0.02, Time Elapsed: 690.18 mins, Remaining: 3904.90 mins
Episode: 752/5000, Reward: 30.0, Epsilon: 0.02, Time Elapsed: 691.28 mins, Remaining: 3904.98 mins
Episode: 753/5000, Reward: 35.0, Epsilon: 0.02, Time Elapsed: 692.55 mins, Remaining: 3906.05 mins
Episode: 754/5000, Reward: 33.0, Epsilon: 0.02, Time Elapsed: 693.62 mins, Remaining: 3905.97 mins
Episode: 755/5000, Reward: 23.0, Epsilon: 0.02, Time Elapsed: 694.46 mins, Remaining: 3904.63 mins
Episode: 756/5000, Reward: 34.0, Epsilon: 0.02, Time Elapsed: 696.28 mins, Remaining: 3908.73 mins
Episode: 757/5000, Reward: 28.0, Epsilon: 0.02, Time Elapsed: 697.64 mins, Remaining: 3910.29 mins
Episode: 758/5000, Reward: 30.0, Epsilon: 0.02, Time Elapsed: 699.14 mins, Remaining: 3912.60 mins
Episode: 759/5000, Reward: 34.0, Epsilon: 0.02, Time Elapsed: 700.68 mins, Remaining: 3915.16 mins
Episode: 760/5000, Reward: 28.0, Epsilon: 0.02, Time Elapsed: 701.67 mins, Remaining: 3914.57 mins
Episode: 7

Episode: 834/5000, Reward: 33.0, Epsilon: 0.02, Time Elapsed: 800.72 mins, Remaining: 3999.76 mins
Episode: 835/5000, Reward: 35.0, Epsilon: 0.02, Time Elapsed: 802.15 mins, Remaining: 4001.17 mins
Episode: 836/5000, Reward: 21.0, Epsilon: 0.02, Time Elapsed: 803.01 mins, Remaining: 3999.70 mins
Episode: 837/5000, Reward: 38.0, Epsilon: 0.02, Time Elapsed: 804.45 mins, Remaining: 4001.08 mins
Episode: 838/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 806.48 mins, Remaining: 4005.44 mins
Episode: 839/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 807.70 mins, Remaining: 4005.79 mins
Episode: 840/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 809.93 mins, Remaining: 4011.07 mins
Episode: 841/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 811.30 mins, Remaining: 4012.10 mins
Episode: 842/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 812.61 mins, Remaining: 4012.86 mins
Episode: 843/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 814.05 mins, Remaining: 4014.26 mins
Episode: 8

Episode: 917/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 906.74 mins, Remaining: 4037.32 mins
Episode: 918/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 907.71 mins, Remaining: 4036.26 mins
Episode: 919/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 909.32 mins, Remaining: 4038.02 mins
Episode: 920/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 910.55 mins, Remaining: 4038.09 mins
Episode: 921/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 911.92 mins, Remaining: 4038.80 mins
Episode: 922/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 913.65 mins, Remaining: 4041.09 mins
Episode: 923/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 914.98 mins, Remaining: 4041.60 mins
Episode: 924/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 916.29 mins, Remaining: 4041.99 mins
Episode: 925/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 917.42 mins, Remaining: 4041.61 mins
Episode: 926/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 918.86 mins, Remaining: 4042.60 mins
Episode: 9

Episode: 1000/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 1014.34 mins, Remaining: 4057.37 mins
Saved weights at episode 1000
Episode: 1001/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 1015.95 mins, Remaining: 4058.71 mins
Episode: 1002/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 1017.11 mins, Remaining: 4058.28 mins
Episode: 1003/5000, Reward: 27.0, Epsilon: 0.01, Time Elapsed: 1018.57 mins, Remaining: 4059.05 mins
Episode: 1004/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1020.84 mins, Remaining: 4063.02 mins
Episode: 1005/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1022.94 mins, Remaining: 4066.30 mins
Episode: 1006/5000, Reward: 25.0, Epsilon: 0.01, Time Elapsed: 1024.16 mins, Remaining: 4066.10 mins
Episode: 1007/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 1025.10 mins, Remaining: 4064.76 mins
Episode: 1008/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 1026.41 mins, Remaining: 4064.89 mins
Episode: 1009/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed

Episode: 1081/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1124.24 mins, Remaining: 4075.75 mins
Episode: 1082/5000, Reward: 21.0, Epsilon: 0.01, Time Elapsed: 1125.85 mins, Remaining: 4076.79 mins
Episode: 1083/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1127.35 mins, Remaining: 4077.41 mins
Episode: 1084/5000, Reward: 42.0, Epsilon: 0.01, Time Elapsed: 1128.56 mins, Remaining: 4076.96 mins
Episode: 1085/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1130.30 mins, Remaining: 4078.44 mins
Episode: 1086/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1132.23 mins, Remaining: 4080.61 mins
Episode: 1087/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 1134.04 mins, Remaining: 4082.35 mins
Episode: 1088/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 1135.34 mins, Remaining: 4082.20 mins
Episode: 1089/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 1137.51 mins, Remaining: 4085.23 mins
Episode: 1090/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 1138.90 mins, Remaining: 408

Episode: 1162/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 1240.13 mins, Remaining: 4096.04 mins
Episode: 1163/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 1241.06 mins, Remaining: 4094.55 mins
Episode: 1164/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1242.01 mins, Remaining: 4093.09 mins
Episode: 1165/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1243.31 mins, Remaining: 4092.77 mins
Episode: 1166/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1244.80 mins, Remaining: 4093.11 mins
Episode: 1167/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 1245.99 mins, Remaining: 4092.45 mins
Episode: 1168/5000, Reward: 29.0, Epsilon: 0.01, Time Elapsed: 1247.13 mins, Remaining: 4091.62 mins
Episode: 1169/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 1248.72 mins, Remaining: 4092.25 mins
Episode: 1170/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 1250.45 mins, Remaining: 4093.34 mins
Episode: 1171/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 1251.92 mins, Remaining: 409

Episode: 1243/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 1351.02 mins, Remaining: 4083.48 mins
Episode: 1244/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1352.26 mins, Remaining: 4082.87 mins
Episode: 1245/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 1353.42 mins, Remaining: 4082.00 mins
Episode: 1246/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 1355.12 mins, Remaining: 4082.77 mins
Episode: 1247/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 1356.83 mins, Remaining: 4083.54 mins
Episode: 1248/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 1358.17 mins, Remaining: 4083.20 mins
Episode: 1249/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 1359.55 mins, Remaining: 4083.01 mins
Episode: 1250/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 1361.92 mins, Remaining: 4085.77 mins
Saved weights at episode 1250
Episode: 1251/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1363.64 mins, Remaining: 4086.57 mins
Episode: 1252/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed

Episode: 1324/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1483.75 mins, Remaining: 4119.54 mins
Episode: 1325/5000, Reward: 23.0, Epsilon: 0.01, Time Elapsed: 1486.34 mins, Remaining: 4122.49 mins
Episode: 1326/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 1488.60 mins, Remaining: 4124.52 mins
Episode: 1327/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 1490.29 mins, Remaining: 4124.96 mins
Episode: 1328/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 1491.83 mins, Remaining: 4124.99 mins
Episode: 1329/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 1493.49 mins, Remaining: 4125.37 mins
Episode: 1330/5000, Reward: 42.0, Epsilon: 0.01, Time Elapsed: 1495.20 mins, Remaining: 4125.86 mins
Episode: 1331/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 1497.76 mins, Remaining: 4128.69 mins
Episode: 1332/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 1500.60 mins, Remaining: 4132.27 mins
Episode: 1333/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 1503.82 mins, Remaining: 413

Episode: 1405/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 1629.27 mins, Remaining: 4168.85 mins
Episode: 1406/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1631.22 mins, Remaining: 4169.70 mins
Episode: 1407/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1632.59 mins, Remaining: 4169.08 mins
Episode: 1408/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1634.54 mins, Remaining: 4169.94 mins
Episode: 1409/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1636.34 mins, Remaining: 4170.39 mins
Episode: 1410/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 1638.68 mins, Remaining: 4172.24 mins
Episode: 1411/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 1640.23 mins, Remaining: 4172.08 mins
Episode: 1412/5000, Reward: 29.0, Epsilon: 0.01, Time Elapsed: 1641.45 mins, Remaining: 4171.06 mins
Episode: 1413/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1643.86 mins, Remaining: 4173.06 mins
Episode: 1414/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 1645.37 mins, Remaining: 417

Episode: 1486/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1779.54 mins, Remaining: 4208.15 mins
Episode: 1487/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 1781.43 mins, Remaining: 4208.58 mins
Episode: 1488/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 1783.10 mins, Remaining: 4208.51 mins
Episode: 1489/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 1784.94 mins, Remaining: 4208.82 mins
Episode: 1490/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1787.05 mins, Remaining: 4209.76 mins
Episode: 1491/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 1788.27 mins, Remaining: 4208.61 mins
Episode: 1492/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 1789.68 mins, Remaining: 4207.90 mins
Episode: 1493/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 1791.62 mins, Remaining: 4208.44 mins
Episode: 1494/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1794.73 mins, Remaining: 4211.72 mins
Episode: 1495/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 1796.47 mins, Remaining: 421

Episode: 1567/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1946.48 mins, Remaining: 4264.37 mins
Episode: 1568/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1948.18 mins, Remaining: 4264.13 mins
Episode: 1569/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 1950.01 mins, Remaining: 4264.17 mins
Episode: 1570/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1952.12 mins, Remaining: 4264.82 mins
Episode: 1571/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 1954.31 mins, Remaining: 4265.65 mins
Episode: 1572/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 1957.61 mins, Remaining: 4268.88 mins
Episode: 1573/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 1958.94 mins, Remaining: 4267.83 mins
Episode: 1574/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 1961.56 mins, Remaining: 4269.56 mins
Episode: 1575/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 1963.51 mins, Remaining: 4269.85 mins
Episode: 1576/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 1965.54 mins, Remaining: 427

Episode: 1648/5000, Reward: 27.0, Epsilon: 0.01, Time Elapsed: 2122.34 mins, Remaining: 4316.80 mins
Episode: 1649/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 2125.64 mins, Remaining: 4319.59 mins
Episode: 1650/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 2128.38 mins, Remaining: 4321.25 mins
Saved weights at episode 1650
Episode: 1651/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 2130.50 mins, Remaining: 4321.65 mins
Episode: 1652/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2132.27 mins, Remaining: 4321.34 mins
Episode: 1653/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 2134.03 mins, Remaining: 4320.99 mins
Episode: 1654/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 2135.48 mins, Remaining: 4320.03 mins
Episode: 1655/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 2137.37 mins, Remaining: 4319.95 mins
Episode: 1656/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 2139.33 mins, Remaining: 4320.00 mins
Episode: 1657/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed

Episode: 1729/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 2293.52 mins, Remaining: 4338.99 mins
Episode: 1730/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 2295.27 mins, Remaining: 4338.46 mins
Episode: 1731/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 2296.91 mins, Remaining: 4337.72 mins
Episode: 1732/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 2300.11 mins, Remaining: 4339.92 mins
Episode: 1733/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 2301.98 mins, Remaining: 4339.63 mins
Episode: 1734/5000, Reward: 42.0, Epsilon: 0.01, Time Elapsed: 2304.15 mins, Remaining: 4339.88 mins
Episode: 1735/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 2306.13 mins, Remaining: 4339.79 mins
Episode: 1736/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 2308.01 mins, Remaining: 4339.48 mins
Episode: 1737/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 2310.10 mins, Remaining: 4339.58 mins
Episode: 1738/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 2312.66 mins, Remaining: 434

Episode: 1810/5000, Reward: 20.0, Epsilon: 0.01, Time Elapsed: 2476.93 mins, Remaining: 4365.42 mins
Episode: 1811/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 2480.71 mins, Remaining: 4368.29 mins
Episode: 1812/5000, Reward: 89.0, Epsilon: 0.01, Time Elapsed: 2482.49 mins, Remaining: 4367.65 mins
Episode: 1813/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 2484.23 mins, Remaining: 4366.93 mins
Episode: 1814/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 2486.50 mins, Remaining: 4367.14 mins
Episode: 1815/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2489.07 mins, Remaining: 4367.88 mins
Episode: 1816/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2490.78 mins, Remaining: 4367.10 mins
Episode: 1817/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2494.48 mins, Remaining: 4369.80 mins
Episode: 1818/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 2496.11 mins, Remaining: 4368.89 mins
Episode: 1819/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 2499.69 mins, Remaining: 437

Episode: 1891/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 2659.33 mins, Remaining: 4372.21 mins
Episode: 1892/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 2663.02 mins, Remaining: 4374.56 mins
Episode: 1893/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 2665.05 mins, Remaining: 4374.17 mins
Episode: 1894/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 2667.66 mins, Remaining: 4374.74 mins
Episode: 1895/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2669.71 mins, Remaining: 4374.37 mins
Episode: 1896/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 2671.59 mins, Remaining: 4373.75 mins
Episode: 1897/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 2674.01 mins, Remaining: 4373.99 mins
Episode: 1898/5000, Reward: 38.0, Epsilon: 0.01, Time Elapsed: 2677.50 mins, Remaining: 4375.98 mins
Episode: 1899/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 2679.55 mins, Remaining: 4375.62 mins
Episode: 1900/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 2681.68 mins, Remaining: 437

Episode: 1972/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 2841.45 mins, Remaining: 4363.03 mins
Episode: 1973/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 2844.03 mins, Remaining: 4363.35 mins
Episode: 1974/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2848.35 mins, Remaining: 4366.32 mins
Episode: 1975/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2852.30 mins, Remaining: 4368.71 mins
Episode: 1976/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 2853.91 mins, Remaining: 4367.53 mins
Episode: 1977/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 2856.33 mins, Remaining: 4367.57 mins
Episode: 1978/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 2858.41 mins, Remaining: 4367.09 mins
Episode: 1979/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 2861.66 mins, Remaining: 4368.40 mins
Episode: 1980/5000, Reward: 27.0, Epsilon: 0.01, Time Elapsed: 2864.85 mins, Remaining: 4369.61 mins
Episode: 1981/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 2866.65 mins, Remaining: 436

Episode: 2053/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 3024.98 mins, Remaining: 4342.24 mins
Episode: 2054/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 3027.18 mins, Remaining: 4341.80 mins
Episode: 2055/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 3029.68 mins, Remaining: 4341.81 mins
Episode: 2056/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 3031.87 mins, Remaining: 4341.35 mins
Episode: 2057/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 3034.89 mins, Remaining: 4342.08 mins
Episode: 2058/5000, Reward: 43.0, Epsilon: 0.01, Time Elapsed: 3037.09 mins, Remaining: 4341.65 mins
Episode: 2059/5000, Reward: 29.0, Epsilon: 0.01, Time Elapsed: 3038.88 mins, Remaining: 4340.63 mins
Episode: 2060/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 3041.10 mins, Remaining: 4340.21 mins
Episode: 2061/5000, Reward: 29.0, Epsilon: 0.01, Time Elapsed: 3043.07 mins, Remaining: 4339.43 mins
Episode: 2062/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 3046.00 mins, Remaining: 434

Episode: 2134/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 3222.79 mins, Remaining: 4328.26 mins
Episode: 2135/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 3224.49 mins, Remaining: 4327.01 mins
Episode: 2136/5000, Reward: 42.0, Epsilon: 0.01, Time Elapsed: 3226.96 mins, Remaining: 4326.78 mins
Episode: 2137/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 3229.64 mins, Remaining: 4326.84 mins
Episode: 2138/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 3232.72 mins, Remaining: 4327.42 mins
Episode: 2139/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 3236.64 mins, Remaining: 4329.14 mins
Episode: 2140/5000, Reward: 25.0, Epsilon: 0.01, Time Elapsed: 3239.83 mins, Remaining: 4329.86 mins
Episode: 2141/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 3241.34 mins, Remaining: 4328.34 mins
Episode: 2142/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 3243.55 mins, Remaining: 4327.76 mins
Episode: 2143/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 3245.54 mins, Remaining: 432

Episode: 2215/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 3422.00 mins, Remaining: 4302.61 mins
Episode: 2216/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 3426.34 mins, Remaining: 4304.58 mins
Episode: 2217/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 3428.99 mins, Remaining: 4304.41 mins
Episode: 2218/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 3431.13 mins, Remaining: 4303.61 mins
Episode: 2219/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 3433.44 mins, Remaining: 4303.02 mins
Episode: 2220/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 3435.31 mins, Remaining: 4301.87 mins
Episode: 2221/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 3438.23 mins, Remaining: 4302.05 mins
Episode: 2222/5000, Reward: 95.0, Epsilon: 0.01, Time Elapsed: 3440.24 mins, Remaining: 4301.07 mins
Episode: 2223/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 3444.20 mins, Remaining: 4302.53 mins
Episode: 2224/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 3446.39 mins, Remaining: 430

Episode: 2296/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 3628.62 mins, Remaining: 4273.43 mins
Episode: 2297/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 3630.84 mins, Remaining: 4272.59 mins
Episode: 2298/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 3632.76 mins, Remaining: 4271.42 mins
Episode: 2299/5000, Reward: 93.0, Epsilon: 0.01, Time Elapsed: 3634.40 mins, Remaining: 4269.91 mins
Episode: 2300/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 3636.19 mins, Remaining: 4268.57 mins
Saved weights at episode 2300
Episode: 2301/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 3638.38 mins, Remaining: 4267.71 mins
Episode: 2302/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 3640.32 mins, Remaining: 4266.54 mins
Episode: 2303/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 3642.69 mins, Remaining: 4265.88 mins
Episode: 2304/5000, Reward: 40.0, Epsilon: 0.01, Time Elapsed: 3646.61 mins, Remaining: 4267.04 mins
Episode: 2305/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed

Episode: 2377/5000, Reward: 39.0, Epsilon: 0.01, Time Elapsed: 3828.29 mins, Remaining: 4224.49 mins
Episode: 2378/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 3830.25 mins, Remaining: 4223.26 mins
Episode: 2379/5000, Reward: 28.0, Epsilon: 0.01, Time Elapsed: 3832.14 mins, Remaining: 4221.96 mins
Episode: 2380/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 3835.27 mins, Remaining: 4222.02 mins
Episode: 2381/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 3837.52 mins, Remaining: 4221.11 mins
Episode: 2382/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 3839.82 mins, Remaining: 4220.26 mins
Episode: 2383/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 3842.08 mins, Remaining: 4219.35 mins
Episode: 2384/5000, Reward: 29.0, Epsilon: 0.01, Time Elapsed: 3844.87 mins, Remaining: 4219.04 mins
Episode: 2385/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 3846.47 mins, Remaining: 4217.41 mins
Episode: 2386/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 3849.52 mins, Remaining: 421

Episode: 2458/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 4022.14 mins, Remaining: 4159.60 mins
Episode: 2459/5000, Reward: 97.0, Epsilon: 0.01, Time Elapsed: 4024.63 mins, Remaining: 4158.83 mins
Episode: 2460/5000, Reward: 89.0, Epsilon: 0.01, Time Elapsed: 4026.50 mins, Remaining: 4157.44 mins
Episode: 2461/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 4028.70 mins, Remaining: 4156.39 mins
Episode: 2462/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 4031.16 mins, Remaining: 4155.60 mins
Episode: 2463/5000, Reward: 23.0, Epsilon: 0.01, Time Elapsed: 4032.76 mins, Remaining: 4153.92 mins
Episode: 2464/5000, Reward: 33.0, Epsilon: 0.01, Time Elapsed: 4035.59 mins, Remaining: 4153.51 mins
Episode: 2465/5000, Reward: 99.0, Epsilon: 0.01, Time Elapsed: 4039.15 mins, Remaining: 4153.85 mins
Episode: 2466/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 4041.69 mins, Remaining: 4153.14 mins
Episode: 2467/5000, Reward: 106.0, Epsilon: 0.01, Time Elapsed: 4044.58 mins, Remaining: 41

Episode: 2539/5000, Reward: 98.0, Epsilon: 0.01, Time Elapsed: 4224.13 mins, Remaining: 4094.37 mins
Episode: 2540/5000, Reward: 29.0, Epsilon: 0.01, Time Elapsed: 4226.97 mins, Remaining: 4093.84 mins
Episode: 2541/5000, Reward: 36.0, Epsilon: 0.01, Time Elapsed: 4229.35 mins, Remaining: 4092.87 mins
Episode: 2542/5000, Reward: 107.0, Epsilon: 0.01, Time Elapsed: 4232.03 mins, Remaining: 4092.18 mins
Episode: 2543/5000, Reward: 89.0, Epsilon: 0.01, Time Elapsed: 4234.02 mins, Remaining: 4090.84 mins
Episode: 2544/5000, Reward: 19.0, Epsilon: 0.01, Time Elapsed: 4235.61 mins, Remaining: 4089.10 mins
Episode: 2545/5000, Reward: 30.0, Epsilon: 0.01, Time Elapsed: 4238.25 mins, Remaining: 4088.37 mins
Episode: 2546/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 4241.17 mins, Remaining: 4087.92 mins
Episode: 2547/5000, Reward: 26.0, Epsilon: 0.01, Time Elapsed: 4244.00 mins, Remaining: 4087.37 mins
Episode: 2548/5000, Reward: 32.0, Epsilon: 0.01, Time Elapsed: 4247.02 mins, Remaining: 40

Episode: 2620/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 4428.67 mins, Remaining: 4022.99 mins
Episode: 2621/5000, Reward: 41.0, Epsilon: 0.01, Time Elapsed: 4431.03 mins, Remaining: 4021.90 mins
Episode: 2622/5000, Reward: 89.0, Epsilon: 0.01, Time Elapsed: 4433.09 mins, Remaining: 4020.55 mins
Episode: 2623/5000, Reward: 91.0, Epsilon: 0.01, Time Elapsed: 4435.36 mins, Remaining: 4019.39 mins
Episode: 2624/5000, Reward: 97.0, Epsilon: 0.01, Time Elapsed: 4437.58 mins, Remaining: 4018.17 mins
Episode: 2625/5000, Reward: 35.0, Epsilon: 0.01, Time Elapsed: 4439.96 mins, Remaining: 4017.11 mins
Episode: 2626/5000, Reward: 96.0, Epsilon: 0.01, Time Elapsed: 4443.05 mins, Remaining: 4016.68 mins
Episode: 2627/5000, Reward: 82.0, Epsilon: 0.01, Time Elapsed: 4445.10 mins, Remaining: 4015.31 mins
Episode: 2628/5000, Reward: 101.0, Epsilon: 0.01, Time Elapsed: 4447.74 mins, Remaining: 4014.47 mins
Episode: 2629/5000, Reward: 37.0, Epsilon: 0.01, Time Elapsed: 4450.31 mins, Remaining: 40

Episode: 2701/5000, Reward: 34.0, Epsilon: 0.01, Time Elapsed: 4639.81 mins, Remaining: 3949.25 mins
Episode: 2702/5000, Reward: 97.0, Epsilon: 0.01, Time Elapsed: 4642.66 mins, Remaining: 3948.50 mins
Episode: 2703/5000, Reward: 31.0, Epsilon: 0.01, Time Elapsed: 4645.05 mins, Remaining: 3947.35 mins
Episode: 2704/5000, Reward: 25.0, Epsilon: 0.01, Time Elapsed: 4646.97 mins, Remaining: 3945.80 mins
