In [1]:
import keras
from keras import layers
import gymnasium as gym
from gymnasium.wrappers.frame_stack import FrameStack
from gymnasium.wrappers.atari_preprocessing import AtariPreprocessing
import numpy as np
import tensorflow as tf
import ale_py


In [2]:
import logging
from gymnasium.wrappers import RecordEpisodeStatistics
import json

In [3]:

gym.register_envs(ale_py)


In [4]:
env = gym.make('SpaceInvadersNoFrameskip-v4', render_mode="rgb_array")

In [5]:

gamma = 0.99 # discount_factor
epsilon = 0.5
epsilon_min = 0.1
epsilon_max = 0.5
epsilon_interval = (epsilon_max - epsilon_min)
batch_size = 32
max_steps_per_episode = 10000 
max_episodes = 0 
max_frames = 1e7 

env = AtariPreprocessing(env)

env = FrameStack(env, 4)
trigger = lambda t: t % 50 == 0
env = gym.wrappers.RecordVideo(env, video_folder="./Outputs/videos", name_prefix='training', episode_trigger=trigger, disable_logger=False)
env = RecordEpisodeStatistics(env)

num_actions = 4

  logger.warn(


In [6]:
model = keras.models.load_model("breakout_qmodel_7764.keras", safe_mode = False)


In [7]:

saved_model = keras.models.load_model("breakout_qmodel_7764.keras", safe_mode = False)


In [46]:
def create_q_model():
    return keras.Sequential(
        [
            layers.Lambda
            (
                lambda tensor: keras.ops.transpose(tensor, [0, 2, 3, 1]),
                output_shape=(84, 84, 4),
                input_shape=(4, 84, 84)
            ),
            layers.Conv2D(32, kernel_size=8, strides=4, activation="leaky_relu"),
            layers.Conv2D(64, kernel_size=4, strides=2, activation="leaky_relu"),
            layers.Conv2D(64, kernel_size=3, strides=1, activation="leaky_relu"),
            layers.Flatten(),
            layers.Dense(512, activation="leaky_relu"),
            layers.Dense(num_actions, activation="linear")
        ]
    )
model = create_q_model()
model_target = create_q_model()
optimizer = keras.optimizers.Adam(learning_rate=0.00025, clipnorm=1.0)


action_history = []
state_history = []
state_next_history = []
rewards_history = []
done_history = []
episode_reward_history = []
running_reward = 0
episode_count = 7764
frame_count = 4930000

# Number of frames to take random action and observe output
epsilon_random_frames = 10000
# Number of frames for exploration
epsilon_greedy_frames = 1000000.0 - frame_count
# Maximum replay length
# Note: The Deepmind paper suggests 1000000 however this causes memory issues
max_memory_length = 10000
# Train the model after 4 actions
update_after_actions = 4
# How often to update the target network
update_target_network = 10000
# Using huber loss for stability
loss_function = keras.losses.Huber()

for layer in model.layers:
    if layer.name in [l.name for l in saved_model.layers]:
        layer.set_weights(saved_model.get_layer(layer.name).get_weights())

  super().__init__(**kwargs)


In [47]:
for layer in model_target.layers:
    if layer.name in [l.name for l in saved_model.layers]:
        layer.set_weights(saved_model.get_layer(layer.name).get_weights())

In [48]:
statistics = []

In [None]:
while True:
    observation, info = env.reset()
    state = np.array(observation)
    episode_reward = 0


    for timestep in range(1, max_steps_per_episode):
        frame_count += 1

        # Use epsilon-greedy for exploration
        if frame_count < epsilon_random_frames or epsilon > np.random.rand(1)[0]:
            # Take random action
            action = np.random.choice(num_actions)
        else:
            try:
            # Predict action Q-values
            # From environment state
                state_tensor = keras.ops.convert_to_tensor(state)
                state_tensor = keras.ops.expand_dims(state_tensor, 0)
                action_probs = model(state_tensor, training=False) 
                # Take best action
                action = keras.ops.argmax(action_probs[0]).numpy()
            except:
                raise Exception(f"State tensor shape {state_tensor.shape}, model input shape: {model.input_shape}")

        # Decay probability of taking random action
        epsilon -= epsilon_interval / epsilon_greedy_frames
        epsilon = max(epsilon, epsilon_min)

        # Apply the sampled action in our environment
        state_next, reward, done, _, info = env.step(action)
        state_next = np.array(state_next)

        episode_reward += reward

        # Save actions and states in replay buffer
        action_history.append(action)
        state_history.append(state)
        state_next_history.append(state_next)
        done_history.append(done)
        rewards_history.append(reward)
        state = state_next

        # Update every fourth frame and once batch size is over 32
        if frame_count % update_after_actions == 0 and len(done_history) > batch_size:
            # Get indices of samples for replay buffers
            indices = np.random.choice(
                range(len(done_history)), size=batch_size)

            # Using list comprehension to sample from replay buffer
            state_sample = np.array([state_history[i] for i in indices])
            state_next_sample = np.array(
                [state_next_history[i] for i in indices])
            rewards_sample = [rewards_history[i] for i in indices]
            action_sample = [action_history[i] for i in indices]
            done_sample = keras.ops.convert_to_tensor(
                [float(done_history[i]) for i in indices]
            )

            # Build the updated Q-values for the sampled future states
            # Use the target model for stability
            future_rewards = model_target.predict(state_next_sample, verbose=0)
            # Q value = reward + discount factor * expected future reward
            updated_q_values = rewards_sample + gamma * keras.ops.amax(
                future_rewards, axis=1
            )

            # If final frame set the last value to -1
            updated_q_values = updated_q_values * \
                (1 - done_sample) - done_sample

            # Create a mask so we only calculate loss on the updated Q-values
            masks = keras.ops.one_hot(action_sample, num_actions)

            with tf.GradientTape() as tape:
                # Train the model on the states and updated Q-values
                q_values = model(state_sample)

                # Apply the masks to the Q-values to get the Q-value for action taken
                q_action = keras.ops.sum(
                    keras.ops.multiply(q_values, masks), axis=1)
                # Calculate loss between new Q-value and old Q-value
                loss = loss_function(updated_q_values, q_action)

            # Backpropagation
            grads = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(grads, model.trainable_variables))

        if frame_count % update_target_network == 0:
            # update the the target network with new weights
            model_target.set_weights(model.get_weights())
            # Log details
            print(f"best score of last 100: {np.max(episode_reward_history)}, running reward: {running_reward:.2f} at episode {episode_count}, frame count {frame_count}")
            stat_dic = {'best_score': np.max(episode_reward_history), 'running_reward': running_reward, 
                        'episode': episode_count, 'frame_count': frame_count}
            statistics.append(stat_dic)
            with open(f"./Outputs/statistics_{episode_count}.json", "w") as json_file:
                json.dump(statistics, json_file)
            model.save(f"./Outputs/breakout_qmodel_{episode_count}.keras")

        # Limit the state and reward history
        if len(rewards_history) > max_memory_length:
            del rewards_history[:1]
            del state_history[:1]
            del state_next_history[:1]
            del action_history[:1]
            del done_history[:1]

        if done:
            break

    # Update running reward to check condition for solving
    episode_reward_history.append(episode_reward)
    if len(episode_reward_history) > 100:
        del episode_reward_history[:1]
    running_reward = np.mean(episode_reward_history)

    logging.info(f"Episode {episode_count}, info['episode']")

    episode_count += 1

    if running_reward > 1000:  # Condition to consider the task solved
        print("Solved at episode {}!".format(episode_count))
        break

    if (
        max_episodes > 0 and episode_count >= max_episodes
    ):  # Maximum number of episodes reached
        print("Stopped at episode {}!".format(episode_count))
        break
    if (max_frames <= frame_count):
        print(f"Stopped at frame {frame_count}!")

  logger.warn(


MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-0.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-0.mp4



                                                                         

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-0.mp4
best score of last 100: 295.0, running reward: 134.47 at episode 635, frame count 350000
best score of last 100: 415.0, running reward: 148.78 at episode 653, frame count 360000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-50.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-50.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-50.mp4
best score of last 100: 415.0, running reward: 150.09 at episode 671, frame count 370000
best score of last 100: 425.0, running reward: 148.78 at episode 690, frame count 380000
best score of last 100: 425.0, running reward: 153.57 at episode 707, frame count 390000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-100.mp4
best score of last 100: 425.0, running reward: 153.20 at episode 724, frame count 400000
best score of last 100: 425.0, running reward: 151.10 at episode 745, frame count 410000
best score of last 100: 440.0, running reward: 154.80 at episode 762, frame count 420000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-150.mp4
best score of last 100: 620.0, running reward: 158.25 at episode 777, frame count 430000
best score of last 100: 620.0, running reward: 165.90 at episode 794, frame count 440000
best score of last 100: 620.0, running reward: 157.35 at episode 814, frame count 450000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-200.mp4
best score of last 100: 620.0, running reward: 157.45 at episode 833, frame count 460000
best score of last 100: 620.0, running reward: 162.05 at episode 849, frame count 470000
best score of last 100: 620.0, running reward: 169.45 at episode 866, frame count 480000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-250.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-250.mp4
best score of last 100: 545.0, running reward: 168.70 at episode 881, frame count 490000
best score of last 100: 545.0, running reward: 163.75 at episode 899, frame count 500000
best score of last 100: 635.0, running reward: 176.50 at episode 916, frame count 510000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-300.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-300.mp4
best score of last 100: 635.0, running reward: 175.75 at episode 934, frame count 520000
best score of last 100: 635.0, running reward: 170.25 at episode 952, frame count 530000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-350.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-350.mp4
best score of last 100: 635.0, running reward: 174.20 at episode 969, frame count 540000
best score of last 100: 635.0, running reward: 168.75 at episode 987, frame count 550000
best score of last 100: 635.0, running reward: 168.00 at episode 1004, frame count 560000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-400.mp4
best score of last 100: 485.0, running reward: 174.65 at episode 1019, frame count 570000
best score of last 100: 485.0, running reward: 179.60 at episode 1037, frame count 580000
best score of last 100: 485.0, running reward: 178.85 at episode 1054, frame count 590000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-450.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-450.mp4
best score of last 100: 485.0, running reward: 176.95 at episode 1068, frame count 600000
best score of last 100: 485.0, running reward: 173.45 at episode 1086, frame count 610000
best score of last 100: 595.0, running reward: 189.05 at episode 1100, frame count 620000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-500.mp4
best score of last 100: 595.0, running reward: 181.95 at episode 1118, frame count 630000
best score of last 100: 595.0, running reward: 187.85 at episode 1135, frame count 640000
best score of last 100: 595.0, running reward: 188.40 at episode 1151, frame count 650000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-550.mp4
best score of last 100: 610.0, running reward: 197.85 at episode 1168, frame count 660000
best score of last 100: 610.0, running reward: 207.85 at episode 1185, frame count 670000
best score of last 100: 610.0, running reward: 205.40 at episode 1199, frame count 680000
best score of last 100: 610.0, running reward: 204.20 at episode 1216, frame count 690000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-600.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-600.mp4
best score of last 100: 610.0, running reward: 209.70 at episode 1233, frame count 700000
best score of last 100: 610.0, running reward: 214.00 at episode 1248, frame count 710000
best score of last 100: 575.0, running reward: 203.60 at episode 1263, frame count 720000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-650.mp4
best score of last 100: 570.0, running reward: 209.15 at episode 1279, frame count 730000
best score of last 100: 570.0, running reward: 206.10 at episode 1295, frame count 740000
best score of last 100: 570.0, running reward: 195.15 at episode 1314, frame count 750000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-700.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-700.mp4
best score of last 100: 590.0, running reward: 201.70 at episode 1327, frame count 760000
best score of last 100: 590.0, running reward: 203.30 at episode 1346, frame count 770000
best score of last 100: 590.0, running reward: 196.60 at episode 1362, frame count 780000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-750.mp4
best score of last 100: 590.0, running reward: 182.60 at episode 1380, frame count 790000
best score of last 100: 635.0, running reward: 183.45 at episode 1397, frame count 800000
best score of last 100: 635.0, running reward: 196.65 at episode 1412, frame count 810000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-800.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-800.mp4
best score of last 100: 635.0, running reward: 193.30 at episode 1428, frame count 820000
best score of last 100: 635.0, running reward: 195.00 at episode 1444, frame count 830000
best score of last 100: 635.0, running reward: 195.80 at episode 1462, frame count 840000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-850.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-850.mp4
best score of last 100: 635.0, running reward: 201.85 at episode 1481, frame count 850000
best score of last 100: 560.0, running reward: 195.10 at episode 1496, frame count 860000
best score of last 100: 560.0, running reward: 192.60 at episode 1514, frame count 870000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-900.mp4
best score of last 100: 560.0, running reward: 181.70 at episode 1532, frame count 880000
best score of last 100: 560.0, running reward: 177.90 at episode 1548, frame count 890000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-950.mp4
best score of last 100: 560.0, running reward: 183.30 at episode 1567, frame count 900000
best score of last 100: 560.0, running reward: 175.15 at episode 1586, frame count 910000
best score of last 100: 560.0, running reward: 177.05 at episode 1603, frame count 920000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1000.mp4
best score of last 100: 560.0, running reward: 175.35 at episode 1619, frame count 930000
best score of last 100: 620.0, running reward: 180.65 at episode 1635, frame count 940000
best score of last 100: 620.0, running reward: 185.10 at episode 1651, frame count 950000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1050.mp4
best score of last 100: 620.0, running reward: 186.15 at episode 1669, frame count 960000
best score of last 100: 620.0, running reward: 190.05 at episode 1686, frame count 970000
best score of last 100: 620.0, running reward: 202.15 at episode 1703, frame count 980000
best score of last 100: 620.0, running reward: 216.95 at episode 1716, frame count 990000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1100.mp4
best score of last 100: 590.0, running reward: 210.45 at episode 1733, frame count 1000000
best score of last 100: 590.0, running reward: 209.10 at episode 1749, frame count 1010000
best score of last 100: 590.0, running reward: 210.70 at episode 1765, frame count 1020000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1150.mp4
best score of last 100: 590.0, running reward: 208.50 at episode 1781, frame count 1030000
best score of last 100: 590.0, running reward: 202.25 at episode 1798, frame count 1040000
best score of last 100: 525.0, running reward: 195.50 at episode 1814, frame count 1050000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1200.mp4
best score of last 100: 525.0, running reward: 194.35 at episode 1830, frame count 1060000
best score of last 100: 760.0, running reward: 195.40 at episode 1849, frame count 1070000
best score of last 100: 760.0, running reward: 198.25 at episode 1865, frame count 1080000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1250.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1250.mp4
best score of last 100: 760.0, running reward: 196.30 at episode 1883, frame count 1090000
best score of last 100: 760.0, running reward: 202.50 at episode 1896, frame count 1100000
best score of last 100: 760.0, running reward: 211.15 at episode 1912, frame count 1110000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1300.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1300.mp4
best score of last 100: 760.0, running reward: 210.50 at episode 1930, frame count 1120000
best score of last 100: 750.0, running reward: 208.85 at episode 1947, frame count 1130000
best score of last 100: 750.0, running reward: 215.25 at episode 1961, frame count 1140000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1350.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1350.mp4
best score of last 100: 750.0, running reward: 218.80 at episode 1977, frame count 1150000
best score of last 100: 750.0, running reward: 212.70 at episode 1992, frame count 1160000
best score of last 100: 750.0, running reward: 214.80 at episode 2007, frame count 1170000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1400.mp4
best score of last 100: 700.0, running reward: 211.95 at episode 2023, frame count 1180000
best score of last 100: 700.0, running reward: 205.50 at episode 2041, frame count 1190000
best score of last 100: 535.0, running reward: 194.60 at episode 2057, frame count 1200000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1450.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1450.mp4
best score of last 100: 535.0, running reward: 194.30 at episode 2073, frame count 1210000
best score of last 100: 535.0, running reward: 188.75 at episode 2089, frame count 1220000
best score of last 100: 520.0, running reward: 180.85 at episode 2105, frame count 1230000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1500.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1500.mp4
best score of last 100: 565.0, running reward: 191.50 at episode 2118, frame count 1240000
best score of last 100: 565.0, running reward: 196.60 at episode 2136, frame count 1250000
best score of last 100: 565.0, running reward: 200.95 at episode 2154, frame count 1260000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1550.mp4
best score of last 100: 565.0, running reward: 205.35 at episode 2171, frame count 1270000
best score of last 100: 565.0, running reward: 193.50 at episode 2190, frame count 1280000
best score of last 100: 565.0, running reward: 195.85 at episode 2206, frame count 1290000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1600.mp4



                                                                          

MoviePy - Done !




MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1600.mp4
best score of last 100: 545.0, running reward: 177.40 at episode 2225, frame count 1300000
best score of last 100: 545.0, running reward: 170.85 at episode 2244, frame count 1310000
best score of last 100: 545.0, running reward: 173.15 at episode 2259, frame count 1320000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1650.mp4
best score of last 100: 545.0, running reward: 176.80 at episode 2275, frame count 1330000
best score of last 100: 490.0, running reward: 174.15 at episode 2293, frame count 1340000
best score of last 100: 490.0, running reward: 174.20 at episode 2311, frame count 1350000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1700.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1700.mp4
best score of last 100: 445.0, running reward: 177.10 at episode 2328, frame count 1360000
best score of last 100: 460.0, running reward: 192.75 at episode 2346, frame count 1370000
best score of last 100: 460.0, running reward: 197.65 at episode 2362, frame count 1380000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1750.mp4
best score of last 100: 460.0, running reward: 197.25 at episode 2379, frame count 1390000
best score of last 100: 460.0, running reward: 199.60 at episode 2394, frame count 1400000
best score of last 100: 460.0, running reward: 204.15 at episode 2411, frame count 1410000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1800.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1800.mp4
best score of last 100: 490.0, running reward: 202.70 at episode 2429, frame count 1420000
best score of last 100: 530.0, running reward: 204.45 at episode 2443, frame count 1430000
best score of last 100: 530.0, running reward: 197.60 at episode 2460, frame count 1440000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1850.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1850.mp4
best score of last 100: 530.0, running reward: 199.35 at episode 2475, frame count 1450000
best score of last 100: 530.0, running reward: 208.50 at episode 2489, frame count 1460000
best score of last 100: 530.0, running reward: 210.70 at episode 2503, frame count 1470000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1900.mp4
best score of last 100: 530.0, running reward: 209.15 at episode 2519, frame count 1480000
best score of last 100: 530.0, running reward: 210.85 at episode 2536, frame count 1490000
best score of last 100: 790.0, running reward: 215.50 at episode 2550, frame count 1500000
best score of last 100: 790.0, running reward: 222.15 at episode 2564, frame count 1510000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-1950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-1950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-1950.mp4
best score of last 100: 790.0, running reward: 224.50 at episode 2580, frame count 1520000
best score of last 100: 790.0, running reward: 227.55 at episode 2594, frame count 1530000
best score of last 100: 790.0, running reward: 217.05 at episode 2612, frame count 1540000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2000.mp4
best score of last 100: 790.0, running reward: 215.85 at episode 2630, frame count 1550000
best score of last 100: 530.0, running reward: 209.35 at episode 2645, frame count 1560000
best score of last 100: 530.0, running reward: 206.95 at episode 2662, frame count 1570000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2050.mp4
best score of last 100: 475.0, running reward: 190.80 at episode 2680, frame count 1580000
best score of last 100: 515.0, running reward: 184.05 at episode 2698, frame count 1590000
best score of last 100: 515.0, running reward: 184.00 at episode 2715, frame count 1600000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2100.mp4
best score of last 100: 515.0, running reward: 191.25 at episode 2730, frame count 1610000
best score of last 100: 590.0, running reward: 191.20 at episode 2746, frame count 1620000
best score of last 100: 590.0, running reward: 189.20 at episode 2761, frame count 1630000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2150.mp4
best score of last 100: 590.0, running reward: 202.20 at episode 2775, frame count 1640000
best score of last 100: 590.0, running reward: 210.75 at episode 2791, frame count 1650000
best score of last 100: 590.0, running reward: 211.15 at episode 2808, frame count 1660000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2200.mp4
best score of last 100: 590.0, running reward: 216.00 at episode 2824, frame count 1670000
best score of last 100: 590.0, running reward: 214.85 at episode 2837, frame count 1680000
best score of last 100: 565.0, running reward: 219.80 at episode 2851, frame count 1690000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2250.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2250.mp4
best score of last 100: 620.0, running reward: 218.70 at episode 2868, frame count 1700000
best score of last 100: 620.0, running reward: 216.70 at episode 2883, frame count 1710000
best score of last 100: 620.0, running reward: 212.55 at episode 2899, frame count 1720000
best score of last 100: 655.0, running reward: 223.25 at episode 2913, frame count 1730000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2300.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2300.mp4
best score of last 100: 655.0, running reward: 224.45 at episode 2929, frame count 1740000
best score of last 100: 655.0, running reward: 223.60 at episode 2945, frame count 1750000
best score of last 100: 655.0, running reward: 222.95 at episode 2959, frame count 1760000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2350.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2350.mp4
best score of last 100: 655.0, running reward: 222.20 at episode 2975, frame count 1770000
best score of last 100: 655.0, running reward: 223.45 at episode 2991, frame count 1780000
best score of last 100: 655.0, running reward: 221.25 at episode 3004, frame count 1790000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2400.mp4
best score of last 100: 660.0, running reward: 220.75 at episode 3021, frame count 1800000
best score of last 100: 660.0, running reward: 227.30 at episode 3035, frame count 1810000
best score of last 100: 660.0, running reward: 215.85 at episode 3052, frame count 1820000
best score of last 100: 660.0, running reward: 221.25 at episode 3066, frame count 1830000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2450.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2450.mp4
best score of last 100: 660.0, running reward: 221.35 at episode 3081, frame count 1840000
best score of last 100: 660.0, running reward: 224.35 at episode 3097, frame count 1850000
best score of last 100: 645.0, running reward: 218.55 at episode 3113, frame count 1860000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2500.mp4
best score of last 100: 565.0, running reward: 211.20 at episode 3130, frame count 1870000
best score of last 100: 565.0, running reward: 212.20 at episode 3144, frame count 1880000
best score of last 100: 565.0, running reward: 211.10 at episode 3160, frame count 1890000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2550.mp4
best score of last 100: 685.0, running reward: 208.20 at episode 3175, frame count 1900000
best score of last 100: 685.0, running reward: 212.70 at episode 3189, frame count 1910000
best score of last 100: 685.0, running reward: 214.15 at episode 3207, frame count 1920000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2600.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2600.mp4
best score of last 100: 685.0, running reward: 210.65 at episode 3223, frame count 1930000
best score of last 100: 685.0, running reward: 208.65 at episode 3240, frame count 1940000
best score of last 100: 685.0, running reward: 207.70 at episode 3256, frame count 1950000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2650.mp4
best score of last 100: 685.0, running reward: 206.20 at episode 3272, frame count 1960000
best score of last 100: 545.0, running reward: 202.85 at episode 3287, frame count 1970000
best score of last 100: 545.0, running reward: 197.30 at episode 3301, frame count 1980000
best score of last 100: 545.0, running reward: 203.70 at episode 3314, frame count 1990000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2700.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2700.mp4
best score of last 100: 545.0, running reward: 208.50 at episode 3328, frame count 2000000
best score of last 100: 565.0, running reward: 216.55 at episode 3344, frame count 2010000
best score of last 100: 565.0, running reward: 211.20 at episode 3360, frame count 2020000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2750.mp4
best score of last 100: 565.0, running reward: 212.75 at episode 3375, frame count 2030000
best score of last 100: 565.0, running reward: 217.95 at episode 3390, frame count 2040000
best score of last 100: 565.0, running reward: 218.00 at episode 3404, frame count 2050000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2800.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2800.mp4
best score of last 100: 565.0, running reward: 210.35 at episode 3421, frame count 2060000
best score of last 100: 530.0, running reward: 205.80 at episode 3437, frame count 2070000
best score of last 100: 530.0, running reward: 212.70 at episode 3451, frame count 2080000
best score of last 100: 530.0, running reward: 220.15 at episode 3465, frame count 2090000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2850.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2850.mp4
best score of last 100: 530.0, running reward: 225.00 at episode 3479, frame count 2100000
best score of last 100: 530.0, running reward: 212.05 at episode 3497, frame count 2110000
best score of last 100: 495.0, running reward: 213.90 at episode 3512, frame count 2120000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2900.mp4
best score of last 100: 495.0, running reward: 225.00 at episode 3527, frame count 2130000
best score of last 100: 490.0, running reward: 214.35 at episode 3543, frame count 2140000
best score of last 100: 540.0, running reward: 225.05 at episode 3557, frame count 2150000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-2950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-2950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-2950.mp4
best score of last 100: 540.0, running reward: 216.30 at episode 3574, frame count 2160000
best score of last 100: 540.0, running reward: 217.05 at episode 3589, frame count 2170000
best score of last 100: 540.0, running reward: 216.45 at episode 3605, frame count 2180000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3000.mp4
best score of last 100: 540.0, running reward: 210.85 at episode 3621, frame count 2190000
best score of last 100: 555.0, running reward: 214.35 at episode 3637, frame count 2200000
best score of last 100: 555.0, running reward: 217.70 at episode 3649, frame count 2210000
best score of last 100: 640.0, running reward: 225.45 at episode 3663, frame count 2220000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3050.mp4
best score of last 100: 640.0, running reward: 233.00 at episode 3677, frame count 2230000
best score of last 100: 640.0, running reward: 241.60 at episode 3691, frame count 2240000
best score of last 100: 640.0, running reward: 253.25 at episode 3707, frame count 2250000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3100.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3100.mp4
best score of last 100: 640.0, running reward: 256.95 at episode 3722, frame count 2260000
best score of last 100: 640.0, running reward: 249.05 at episode 3739, frame count 2270000
best score of last 100: 640.0, running reward: 243.10 at episode 3754, frame count 2280000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3150.mp4
best score of last 100: 620.0, running reward: 232.45 at episode 3770, frame count 2290000
best score of last 100: 620.0, running reward: 231.30 at episode 3786, frame count 2300000
best score of last 100: 685.0, running reward: 226.40 at episode 3801, frame count 2310000
best score of last 100: 765.0, running reward: 230.40 at episode 3814, frame count 2320000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3200.mp4
best score of last 100: 765.0, running reward: 239.95 at episode 3827, frame count 2330000
best score of last 100: 765.0, running reward: 248.55 at episode 3840, frame count 2340000
best score of last 100: 765.0, running reward: 249.30 at episode 3856, frame count 2350000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3250.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3250.mp4
best score of last 100: 765.0, running reward: 253.45 at episode 3870, frame count 2360000
best score of last 100: 765.0, running reward: 254.05 at episode 3884, frame count 2370000
best score of last 100: 765.0, running reward: 252.45 at episode 3899, frame count 2380000
best score of last 100: 545.0, running reward: 241.90 at episode 3915, frame count 2390000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3300.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3300.mp4
best score of last 100: 545.0, running reward: 229.65 at episode 3932, frame count 2400000
best score of last 100: 545.0, running reward: 232.15 at episode 3946, frame count 2410000
best score of last 100: 545.0, running reward: 229.20 at episode 3962, frame count 2420000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3350.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3350.mp4
best score of last 100: 545.0, running reward: 221.05 at episode 3978, frame count 2430000
best score of last 100: 650.0, running reward: 219.70 at episode 3994, frame count 2440000
best score of last 100: 650.0, running reward: 216.15 at episode 4011, frame count 2450000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3400.mp4
best score of last 100: 650.0, running reward: 223.05 at episode 4028, frame count 2460000
best score of last 100: 650.0, running reward: 206.75 at episode 4044, frame count 2470000
best score of last 100: 650.0, running reward: 193.40 at episode 4061, frame count 2480000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3450.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3450.mp4
best score of last 100: 650.0, running reward: 207.75 at episode 4076, frame count 2490000
best score of last 100: 565.0, running reward: 196.55 at episode 4090, frame count 2500000
best score of last 100: 565.0, running reward: 200.20 at episode 4107, frame count 2510000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3500.mp4
best score of last 100: 565.0, running reward: 197.55 at episode 4123, frame count 2520000
best score of last 100: 490.0, running reward: 197.90 at episode 4140, frame count 2530000
best score of last 100: 490.0, running reward: 204.90 at episode 4156, frame count 2540000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3550.mp4
best score of last 100: 460.0, running reward: 199.00 at episode 4172, frame count 2550000
best score of last 100: 460.0, running reward: 201.15 at episode 4188, frame count 2560000
best score of last 100: 460.0, running reward: 209.25 at episode 4203, frame count 2570000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3600.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3600.mp4
best score of last 100: 460.0, running reward: 204.65 at episode 4218, frame count 2580000
best score of last 100: 460.0, running reward: 209.60 at episode 4234, frame count 2590000
best score of last 100: 460.0, running reward: 213.85 at episode 4249, frame count 2600000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3650.mp4
best score of last 100: 425.0, running reward: 197.70 at episode 4268, frame count 2610000
best score of last 100: 590.0, running reward: 211.70 at episode 4282, frame count 2620000
best score of last 100: 590.0, running reward: 213.65 at episode 4297, frame count 2630000
best score of last 100: 590.0, running reward: 217.40 at episode 4313, frame count 2640000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3700.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3700.mp4
best score of last 100: 590.0, running reward: 222.00 at episode 4328, frame count 2650000
best score of last 100: 590.0, running reward: 225.45 at episode 4342, frame count 2660000
best score of last 100: 590.0, running reward: 230.65 at episode 4358, frame count 2670000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3750.mp4
best score of last 100: 520.0, running reward: 228.40 at episode 4374, frame count 2680000
best score of last 100: 520.0, running reward: 227.95 at episode 4388, frame count 2690000
best score of last 100: 520.0, running reward: 221.25 at episode 4404, frame count 2700000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3800.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3800.mp4
best score of last 100: 520.0, running reward: 223.35 at episode 4417, frame count 2710000
best score of last 100: 485.0, running reward: 214.70 at episode 4434, frame count 2720000
best score of last 100: 485.0, running reward: 217.35 at episode 4448, frame count 2730000
best score of last 100: 510.0, running reward: 222.65 at episode 4463, frame count 2740000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3850.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3850.mp4
best score of last 100: 545.0, running reward: 231.40 at episode 4478, frame count 2750000
best score of last 100: 545.0, running reward: 228.70 at episode 4491, frame count 2760000
best score of last 100: 545.0, running reward: 226.05 at episode 4507, frame count 2770000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3900.mp4
best score of last 100: 545.0, running reward: 221.75 at episode 4522, frame count 2780000
best score of last 100: 545.0, running reward: 221.90 at episode 4538, frame count 2790000
best score of last 100: 545.0, running reward: 222.50 at episode 4552, frame count 2800000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-3950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-3950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-3950.mp4
best score of last 100: 545.0, running reward: 218.05 at episode 4568, frame count 2810000
best score of last 100: 545.0, running reward: 221.95 at episode 4582, frame count 2820000
best score of last 100: 545.0, running reward: 228.90 at episode 4597, frame count 2830000
best score of last 100: 545.0, running reward: 239.60 at episode 4612, frame count 2840000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4000.mp4
best score of last 100: 545.0, running reward: 239.85 at episode 4627, frame count 2850000
best score of last 100: 535.0, running reward: 228.55 at episode 4643, frame count 2860000
best score of last 100: 535.0, running reward: 227.85 at episode 4660, frame count 2870000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4050.mp4
best score of last 100: 535.0, running reward: 230.55 at episode 4673, frame count 2880000
best score of last 100: 535.0, running reward: 225.75 at episode 4689, frame count 2890000
best score of last 100: 510.0, running reward: 226.80 at episode 4703, frame count 2900000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4100.mp4
best score of last 100: 680.0, running reward: 224.95 at episode 4718, frame count 2910000
best score of last 100: 680.0, running reward: 231.60 at episode 4732, frame count 2920000
best score of last 100: 680.0, running reward: 232.20 at episode 4747, frame count 2930000
best score of last 100: 680.0, running reward: 240.00 at episode 4760, frame count 2940000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4150.mp4
best score of last 100: 680.0, running reward: 235.40 at episode 4777, frame count 2950000
best score of last 100: 680.0, running reward: 237.70 at episode 4792, frame count 2960000
best score of last 100: 680.0, running reward: 231.50 at episode 4806, frame count 2970000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4200.mp4
best score of last 100: 575.0, running reward: 228.40 at episode 4821, frame count 2980000
best score of last 100: 635.0, running reward: 223.95 at episode 4837, frame count 2990000
best score of last 100: 635.0, running reward: 221.00 at episode 4853, frame count 3000000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4250.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4250.mp4
best score of last 100: 635.0, running reward: 218.45 at episode 4869, frame count 3010000
best score of last 100: 635.0, running reward: 214.00 at episode 4885, frame count 3020000
best score of last 100: 650.0, running reward: 219.60 at episode 4899, frame count 3030000
best score of last 100: 780.0, running reward: 232.65 at episode 4913, frame count 3040000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4300.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4300.mp4
best score of last 100: 780.0, running reward: 230.95 at episode 4929, frame count 3050000
best score of last 100: 780.0, running reward: 241.00 at episode 4942, frame count 3060000
best score of last 100: 780.0, running reward: 251.65 at episode 4956, frame count 3070000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4350.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4350.mp4
best score of last 100: 780.0, running reward: 254.55 at episode 4970, frame count 3080000
best score of last 100: 780.0, running reward: 262.60 at episode 4984, frame count 3090000
best score of last 100: 780.0, running reward: 257.65 at episode 4998, frame count 3100000
best score of last 100: 565.0, running reward: 238.60 at episode 5013, frame count 3110000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4400.mp4
best score of last 100: 565.0, running reward: 234.95 at episode 5031, frame count 3120000
best score of last 100: 540.0, running reward: 222.80 at episode 5045, frame count 3130000
best score of last 100: 530.0, running reward: 199.50 at episode 5062, frame count 3140000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4450.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4450.mp4
best score of last 100: 580.0, running reward: 204.90 at episode 5076, frame count 3150000
best score of last 100: 580.0, running reward: 203.65 at episode 5091, frame count 3160000
best score of last 100: 580.0, running reward: 200.50 at episode 5107, frame count 3170000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4500.mp4
best score of last 100: 580.0, running reward: 209.85 at episode 5121, frame count 3180000
best score of last 100: 695.0, running reward: 213.35 at episode 5135, frame count 3190000
best score of last 100: 695.0, running reward: 216.80 at episode 5151, frame count 3200000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4550.mp4
best score of last 100: 695.0, running reward: 217.25 at episode 5168, frame count 3210000
best score of last 100: 695.0, running reward: 213.85 at episode 5183, frame count 3220000
best score of last 100: 695.0, running reward: 213.05 at episode 5197, frame count 3230000
best score of last 100: 695.0, running reward: 220.60 at episode 5214, frame count 3240000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4600.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4600.mp4
best score of last 100: 565.0, running reward: 210.95 at episode 5229, frame count 3250000
best score of last 100: 565.0, running reward: 209.85 at episode 5245, frame count 3260000
best score of last 100: 720.0, running reward: 216.25 at episode 5260, frame count 3270000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4650.mp4
best score of last 100: 720.0, running reward: 218.90 at episode 5276, frame count 3280000
best score of last 100: 720.0, running reward: 223.70 at episode 5290, frame count 3290000
best score of last 100: 720.0, running reward: 222.00 at episode 5305, frame count 3300000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4700.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4700.mp4
best score of last 100: 720.0, running reward: 215.25 at episode 5321, frame count 3310000
best score of last 100: 720.0, running reward: 222.60 at episode 5338, frame count 3320000
best score of last 100: 540.0, running reward: 217.75 at episode 5352, frame count 3330000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4750.mp4
best score of last 100: 540.0, running reward: 222.55 at episode 5370, frame count 3340000
best score of last 100: 625.0, running reward: 213.60 at episode 5384, frame count 3350000
best score of last 100: 625.0, running reward: 207.80 at episode 5398, frame count 3360000
best score of last 100: 625.0, running reward: 209.75 at episode 5414, frame count 3370000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4800.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4800.mp4
best score of last 100: 625.0, running reward: 217.85 at episode 5429, frame count 3380000
best score of last 100: 625.0, running reward: 220.05 at episode 5443, frame count 3390000
best score of last 100: 625.0, running reward: 221.20 at episode 5459, frame count 3400000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4850.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4850.mp4
best score of last 100: 625.0, running reward: 220.40 at episode 5476, frame count 3410000
best score of last 100: 490.0, running reward: 212.20 at episode 5491, frame count 3420000
best score of last 100: 490.0, running reward: 214.40 at episode 5505, frame count 3430000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4900.mp4
best score of last 100: 950.0, running reward: 227.55 at episode 5519, frame count 3440000
best score of last 100: 950.0, running reward: 224.20 at episode 5534, frame count 3450000
best score of last 100: 950.0, running reward: 222.40 at episode 5548, frame count 3460000
best score of last 100: 950.0, running reward: 219.90 at episode 5564, frame count 3470000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-4950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-4950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-4950.mp4
best score of last 100: 950.0, running reward: 222.90 at episode 5580, frame count 3480000
best score of last 100: 950.0, running reward: 221.55 at episode 5595, frame count 3490000
best score of last 100: 795.0, running reward: 212.85 at episode 5611, frame count 3500000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5000.mp4
best score of last 100: 795.0, running reward: 208.90 at episode 5627, frame count 3510000
best score of last 100: 795.0, running reward: 204.40 at episode 5643, frame count 3520000
best score of last 100: 795.0, running reward: 205.35 at episode 5658, frame count 3530000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5050.mp4
best score of last 100: 795.0, running reward: 205.15 at episode 5677, frame count 3540000
best score of last 100: 795.0, running reward: 207.20 at episode 5693, frame count 3550000
best score of last 100: 795.0, running reward: 206.95 at episode 5709, frame count 3560000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5100.mp4
best score of last 100: 575.0, running reward: 201.75 at episode 5725, frame count 3570000
best score of last 100: 575.0, running reward: 193.45 at episode 5742, frame count 3580000
best score of last 100: 575.0, running reward: 188.10 at episode 5756, frame count 3590000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5150.mp4
best score of last 100: 575.0, running reward: 189.05 at episode 5771, frame count 3600000
best score of last 100: 575.0, running reward: 191.05 at episode 5787, frame count 3610000
best score of last 100: 920.0, running reward: 197.25 at episode 5803, frame count 3620000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5200.mp4
best score of last 100: 920.0, running reward: 195.50 at episode 5819, frame count 3630000
best score of last 100: 920.0, running reward: 203.45 at episode 5834, frame count 3640000
best score of last 100: 920.0, running reward: 215.00 at episode 5847, frame count 3650000
best score of last 100: 920.0, running reward: 212.35 at episode 5863, frame count 3660000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5250.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5250.mp4
best score of last 100: 920.0, running reward: 212.45 at episode 5878, frame count 3670000
best score of last 100: 920.0, running reward: 211.75 at episode 5895, frame count 3680000
best score of last 100: 545.0, running reward: 206.55 at episode 5909, frame count 3690000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5300.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5300.mp4
best score of last 100: 550.0, running reward: 204.90 at episode 5926, frame count 3700000
best score of last 100: 565.0, running reward: 201.50 at episode 5940, frame count 3710000
best score of last 100: 695.0, running reward: 201.80 at episode 5956, frame count 3720000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5350.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5350.mp4
best score of last 100: 695.0, running reward: 204.15 at episode 5972, frame count 3730000
best score of last 100: 695.0, running reward: 210.30 at episode 5988, frame count 3740000
best score of last 100: 695.0, running reward: 209.05 at episode 6004, frame count 3750000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5400.mp4
best score of last 100: 695.0, running reward: 214.70 at episode 6020, frame count 3760000
best score of last 100: 695.0, running reward: 214.70 at episode 6036, frame count 3770000
best score of last 100: 590.0, running reward: 202.55 at episode 6053, frame count 3780000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5450.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5450.mp4
best score of last 100: 590.0, running reward: 201.75 at episode 6070, frame count 3790000
best score of last 100: 590.0, running reward: 205.20 at episode 6084, frame count 3800000
best score of last 100: 590.0, running reward: 215.25 at episode 6098, frame count 3810000
best score of last 100: 590.0, running reward: 211.70 at episode 6112, frame count 3820000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5500.mp4



                                                                            

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5500.mp4
best score of last 100: 590.0, running reward: 214.65 at episode 6125, frame count 3830000
best score of last 100: 575.0, running reward: 212.95 at episode 6142, frame count 3840000
best score of last 100: 575.0, running reward: 220.30 at episode 6156, frame count 3850000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5550.mp4
best score of last 100: 575.0, running reward: 219.40 at episode 6172, frame count 3860000
best score of last 100: 635.0, running reward: 224.90 at episode 6187, frame count 3870000
best score of last 100: 635.0, running reward: 219.60 at episode 6203, frame count 3880000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5600.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5600.mp4
best score of last 100: 635.0, running reward: 209.05 at episode 6218, frame count 3890000
best score of last 100: 635.0, running reward: 201.25 at episode 6234, frame count 3900000
best score of last 100: 635.0, running reward: 203.05 at episode 6248, frame count 3910000
best score of last 100: 685.0, running reward: 212.05 at episode 6262, frame count 3920000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5650.mp4
best score of last 100: 685.0, running reward: 206.95 at episode 6278, frame count 3930000
best score of last 100: 685.0, running reward: 202.10 at episode 6294, frame count 3940000
best score of last 100: 685.0, running reward: 204.20 at episode 6309, frame count 3950000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5700.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5700.mp4
best score of last 100: 685.0, running reward: 210.90 at episode 6325, frame count 3960000
best score of last 100: 685.0, running reward: 210.50 at episode 6340, frame count 3970000
best score of last 100: 685.0, running reward: 213.15 at episode 6355, frame count 3980000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5750.mp4
best score of last 100: 515.0, running reward: 207.15 at episode 6370, frame count 3990000
best score of last 100: 470.0, running reward: 204.05 at episode 6386, frame count 4000000
best score of last 100: 470.0, running reward: 204.20 at episode 6401, frame count 4010000
best score of last 100: 575.0, running reward: 217.40 at episode 6414, frame count 4020000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5800.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5800.mp4
best score of last 100: 575.0, running reward: 219.05 at episode 6429, frame count 4030000
best score of last 100: 575.0, running reward: 220.50 at episode 6443, frame count 4040000
best score of last 100: 575.0, running reward: 220.30 at episode 6458, frame count 4050000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5850.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5850.mp4
best score of last 100: 575.0, running reward: 213.70 at episode 6475, frame count 4060000
best score of last 100: 575.0, running reward: 223.45 at episode 6486, frame count 4070000
best score of last 100: 575.0, running reward: 229.15 at episode 6500, frame count 4080000
best score of last 100: 845.0, running reward: 224.15 at episode 6515, frame count 4090000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5900.mp4
best score of last 100: 845.0, running reward: 229.05 at episode 6528, frame count 4100000
best score of last 100: 845.0, running reward: 228.60 at episode 6543, frame count 4110000
best score of last 100: 845.0, running reward: 227.65 at episode 6559, frame count 4120000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-5950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-5950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-5950.mp4
best score of last 100: 845.0, running reward: 238.70 at episode 6574, frame count 4130000
best score of last 100: 845.0, running reward: 240.25 at episode 6588, frame count 4140000
best score of last 100: 510.0, running reward: 233.05 at episode 6603, frame count 4150000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6000.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6000.mp4
best score of last 100: 515.0, running reward: 232.00 at episode 6617, frame count 4160000
best score of last 100: 520.0, running reward: 224.45 at episode 6634, frame count 4170000
best score of last 100: 520.0, running reward: 226.45 at episode 6647, frame count 4180000
best score of last 100: 520.0, running reward: 227.90 at episode 6662, frame count 4190000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6050.mp4
best score of last 100: 520.0, running reward: 209.00 at episode 6680, frame count 4200000
best score of last 100: 520.0, running reward: 213.45 at episode 6692, frame count 4210000
best score of last 100: 520.0, running reward: 211.45 at episode 6708, frame count 4220000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6100.mp4
best score of last 100: 565.0, running reward: 215.75 at episode 6723, frame count 4230000
best score of last 100: 565.0, running reward: 220.05 at episode 6739, frame count 4240000
best score of last 100: 565.0, running reward: 219.25 at episode 6754, frame count 4250000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6150.mp4
best score of last 100: 565.0, running reward: 222.20 at episode 6770, frame count 4260000
best score of last 100: 1020.0, running reward: 227.95 at episode 6786, frame count 4270000
best score of last 100: 1020.0, running reward: 226.05 at episode 6801, frame count 4280000
best score of last 100: 1020.0, running reward: 223.70 at episode 6815, frame count 4290000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6200.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6200.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6200.mp4
best score of last 100: 1020.0, running reward: 225.10 at episode 6830, frame count 4300000
best score of last 100: 1020.0, running reward: 227.15 at episode 6842, frame count 4310000
best score of last 100: 1020.0, running reward: 231.80 at episode 6856, frame count 4320000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6250.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6250.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6250.mp4
best score of last 100: 580.0, running reward: 214.90 at episode 6875, frame count 4330000
best score of last 100: 580.0, running reward: 218.60 at episode 6888, frame count 4340000
best score of last 100: 580.0, running reward: 220.45 at episode 6902, frame count 4350000
best score of last 100: 580.0, running reward: 214.10 at episode 6916, frame count 4360000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6300.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6300.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6300.mp4
best score of last 100: 580.0, running reward: 203.40 at episode 6933, frame count 4370000
best score of last 100: 620.0, running reward: 206.20 at episode 6947, frame count 4380000
best score of last 100: 620.0, running reward: 206.75 at episode 6963, frame count 4390000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6350.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6350.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6350.mp4
best score of last 100: 620.0, running reward: 219.70 at episode 6977, frame count 4400000
best score of last 100: 620.0, running reward: 214.05 at episode 6992, frame count 4410000
best score of last 100: 620.0, running reward: 217.40 at episode 7007, frame count 4420000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6400.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6400.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6400.mp4
best score of last 100: 620.0, running reward: 229.05 at episode 7021, frame count 4430000
best score of last 100: 510.0, running reward: 235.10 at episode 7035, frame count 4440000
best score of last 100: 510.0, running reward: 225.25 at episode 7051, frame count 4450000
best score of last 100: 505.0, running reward: 218.55 at episode 7066, frame count 4460000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6450.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6450.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6450.mp4
best score of last 100: 505.0, running reward: 213.00 at episode 7082, frame count 4470000
best score of last 100: 505.0, running reward: 212.45 at episode 7097, frame count 4480000
best score of last 100: 505.0, running reward: 202.15 at episode 7113, frame count 4490000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6500.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6500.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6500.mp4
best score of last 100: 460.0, running reward: 193.90 at episode 7129, frame count 4500000
best score of last 100: 540.0, running reward: 200.40 at episode 7144, frame count 4510000
best score of last 100: 540.0, running reward: 203.50 at episode 7162, frame count 4520000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6550.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6550.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6550.mp4
best score of last 100: 595.0, running reward: 210.15 at episode 7176, frame count 4530000
best score of last 100: 595.0, running reward: 208.55 at episode 7193, frame count 4540000
best score of last 100: 595.0, running reward: 215.30 at episode 7207, frame count 4550000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6600.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6600.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6600.mp4
best score of last 100: 595.0, running reward: 221.55 at episode 7221, frame count 4560000
best score of last 100: 595.0, running reward: 219.05 at episode 7235, frame count 4570000
best score of last 100: 595.0, running reward: 226.80 at episode 7249, frame count 4580000
best score of last 100: 595.0, running reward: 238.45 at episode 7262, frame count 4590000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6650.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6650.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6650.mp4
best score of last 100: 640.0, running reward: 243.35 at episode 7275, frame count 4600000
best score of last 100: 640.0, running reward: 241.80 at episode 7291, frame count 4610000
best score of last 100: 640.0, running reward: 238.75 at episode 7305, frame count 4620000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6700.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6700.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6700.mp4
best score of last 100: 640.0, running reward: 241.55 at episode 7319, frame count 4630000
best score of last 100: 640.0, running reward: 233.75 at episode 7335, frame count 4640000
best score of last 100: 640.0, running reward: 229.60 at episode 7350, frame count 4650000
best score of last 100: 640.0, running reward: 219.85 at episode 7365, frame count 4660000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6750.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6750.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6750.mp4
best score of last 100: 515.0, running reward: 215.85 at episode 7380, frame count 4670000
best score of last 100: 515.0, running reward: 212.05 at episode 7396, frame count 4680000
best score of last 100: 515.0, running reward: 210.45 at episode 7411, frame count 4690000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6800.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6800.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6800.mp4
best score of last 100: 620.0, running reward: 212.80 at episode 7425, frame count 4700000
best score of last 100: 620.0, running reward: 216.35 at episode 7441, frame count 4710000
best score of last 100: 620.0, running reward: 216.70 at episode 7456, frame count 4720000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6850.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6850.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6850.mp4
best score of last 100: 620.0, running reward: 209.95 at episode 7471, frame count 4730000
best score of last 100: 620.0, running reward: 207.65 at episode 7486, frame count 4740000
best score of last 100: 620.0, running reward: 211.05 at episode 7502, frame count 4750000
best score of last 100: 580.0, running reward: 215.30 at episode 7516, frame count 4760000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6900.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6900.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6900.mp4
best score of last 100: 580.0, running reward: 206.60 at episode 7531, frame count 4770000
best score of last 100: 580.0, running reward: 211.80 at episode 7545, frame count 4780000
best score of last 100: 525.0, running reward: 205.20 at episode 7561, frame count 4790000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-6950.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-6950.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-6950.mp4
best score of last 100: 525.0, running reward: 214.65 at episode 7577, frame count 4800000
best score of last 100: 525.0, running reward: 219.30 at episode 7591, frame count 4810000
best score of last 100: 525.0, running reward: 210.25 at episode 7608, frame count 4820000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-7000.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-7000.mp4



                                                                           

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-7000.mp4
best score of last 100: 525.0, running reward: 207.90 at episode 7624, frame count 4830000
best score of last 100: 525.0, running reward: 202.80 at episode 7641, frame count 4840000
best score of last 100: 565.0, running reward: 213.85 at episode 7653, frame count 4850000
best score of last 100: 565.0, running reward: 227.05 at episode 7666, frame count 4860000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-7050.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-7050.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-7050.mp4
best score of last 100: 565.0, running reward: 232.25 at episode 7679, frame count 4870000
best score of last 100: 860.0, running reward: 236.05 at episode 7691, frame count 4880000
best score of last 100: 860.0, running reward: 247.85 at episode 7706, frame count 4890000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-7100.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-7100.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-7100.mp4
best score of last 100: 860.0, running reward: 249.15 at episode 7722, frame count 4900000
best score of last 100: 860.0, running reward: 254.65 at episode 7735, frame count 4910000
best score of last 100: 860.0, running reward: 259.60 at episode 7749, frame count 4920000
best score of last 100: 860.0, running reward: 250.05 at episode 7764, frame count 4930000
MoviePy - Building video d:\workdir\DeepQLab\Outputs\videos\training-episode-7150.mp4.
MoviePy - Writing video d:\workdir\DeepQLab\Outputs\videos\training-episode-7150.mp4



                                                                          

MoviePy - Done !
MoviePy - video ready d:\workdir\DeepQLab\Outputs\videos\training-episode-7150.mp4
