<a href="https://colab.research.google.com/github/Bryan-Az/RL-SARSA-Gym/blob/main/reinforcement_sarsa_gym.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Reinforcement Learning using SARSA in OpenAI Gym / Gymnasium

## Imports and Installs

In [1]:
!pip install gymnasium[atari] # for the gym library
!pip install gymnasium[accept-rom-license] #to add atari envs
!pip install ale-py # for atari envs
import gymnasium as gym
import numpy as np
import random
import time
from IPython.display import clear_output

Collecting gymnasium[atari]
  Downloading gymnasium-0.29.1-py3-none-any.whl.metadata (10 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium[atari])
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Collecting shimmy<1.0,>=0.1.0 (from shimmy[atari]<1.0,>=0.1.0; extra == "atari"->gymnasium[atari])
  Downloading Shimmy-0.2.1-py3-none-any.whl.metadata (2.3 kB)
Collecting ale-py~=0.8.1 (from shimmy[atari]<1.0,>=0.1.0; extra == "atari"->gymnasium[atari])
  Downloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.1 kB)
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Downloading Shimmy-0.2.1-py3-none-any.whl (25 kB)
Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ale_py-0.8.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m━━━━━

## Step 1: Creating the Environment

In [2]:
env_name = "MountainCar-v0"

  and should_run_async(code)


In [3]:
env = gym.make(env_name, render_mode="rgb_array")

## Step 2: Creating the Agent with SARSA Learning

These are the hyperparameters specific to the training within the environment and learning task. These are used within the Agent / Learner class and are similar to the training of a neural network.

In [132]:
#MAX_NUM_EPISODES = 1000
MAX_NUM_EPISODES = 25000 #100001
STEPS_PER_EPISODE = 200 #  This is specific to MountainCar. May change with env
EPSILON_MIN = 0.005
max_num_steps = MAX_NUM_EPISODES * STEPS_PER_EPISODE
EPSILON_DECAY = 500 * EPSILON_MIN / max_num_steps
ALPHA = 0.1  # Learning rate
GAMMA = 0.9 #1.0  # Discount factor
NUM_DISCRETE_BINS = 30  # Number of bins to Discretize each observation dim

In [144]:
class Q_Learner(object):
    def __init__(self, env):
        self.obs_shape = env.observation_space.shape
        self.obs_high = env.observation_space.high
        self.obs_low = env.observation_space.low
        # printing all the obs_ for debugging
        #print(self.obs_high, 'obs high')
        #print(self.obs_low, ' obs low')
        self.obs_bins = NUM_DISCRETE_BINS  # Number of bins to Discretize each observation dim
        self.bin_width = (self.obs_high - self.obs_low) / self.obs_bins
        self.action_shape = env.action_space.n
        # Create a multi-dimensional array (aka. Table) to represent the
        # Q-values
        self.Q = np.zeros((self.obs_bins + 1, self.obs_bins + 1,
                           self.action_shape))  # (51 x 51 x 3)
        self.alpha = ALPHA  # Learning rate
        self.gamma = GAMMA  # Discount factor
        self.epsilon = 0.9
        ### Unique to SARSA Method ###
        rewards = []
    ''' def discretize(self, obs):
        # Convert the observation to a tuple of integers using list comprehension
        # and unpack it using the * operator when indexing into self.Q
        return tuple([int(np.round((obs[i] - self.obs_low[i]) / self.bin_width[i])) for i in range(len(obs))])


    def get_action(self, obs):
        discretized_obs = self.discretize(obs)
        # Epsilon-Greedy action selection
        if self.epsilon > EPSILON_MIN:
            self.epsilon -= EPSILON_DECAY
        if np.random.random() > self.epsilon:
            return np.argmax(self.Q[discretized_obs])  # unpack the tuple
        else:  # Choose a random action
            return np.random.choice([a for a in range(self.action_shape)])
    '''
    def discretize(self, obs):
      #clipped_obs = np.clip(obs[0], self.obs_low, self.obs_high)
      #return tuple(((clipped_obs - self.obs_low) / self.bin_width).astype(int))
      #discretized_env = (self.obs_high - self.obs_low) / self.obs_bins
      #discretized_pos = int((clipped_obs[0] - self.obs_low[0]) / discretized_env[0])
      #discretized_vel = int((clipped_obs[1] - self.obs_low[1]) / discretized_env[1])
      #return discretized_pos, discretized_vel
      discrete_obs = (obs[0] - self.obs_low) / self.bin_width
      return tuple(np.clip(discrete_obs.astype(int), 0, self.obs_bins - 1))

    def get_action(self, obs):
        discretized_obs = self.discretize(obs)
        # Epsilon-Greedy action selection
        if self.epsilon > EPSILON_MIN:
            self.epsilon -= EPSILON_DECAY
        if np.random.random() > self.epsilon:
            return np.argmax(self.Q[discretized_obs])
        else:  # Choose a random action
            return np.random.choice([a for a in range(self.action_shape)])

    def learn(self, obs, action, reward, next_obs, next_action):
        '''
        This is the SARSA learning method that uses the get_action function
        to retrieve the next state-action
        as input to updating the Q learning matrix, versus the
        max Q value of the next state as in Q Learning.
        '''
        discretized_obs = self.discretize(obs)
        discretized_next_obs = self.discretize(next_obs)
        # change self.Q[discretized_next_obs][next_action] to np.max(self.Q[discretized_next_obs])
        # if want to use Q learning
        td_target = reward + self.gamma * self.Q[discretized_next_obs][next_action]
        td_error = td_target - self.Q[discretized_obs][action]
        self.Q[discretized_obs][action] += self.alpha * td_error

def train(agent, env):
    best_reward = -float('inf')
    for episode in range(MAX_NUM_EPISODES):
        done = False
        obs = env.reset()
        action = agent.get_action(obs)
        ### printing the obs for debugging
        #print(obs, ' obs')
        #print(type(obs), ' obs type')
        #print(obs[0][0], ' obs[0]')
        #print(obs[0][1], ' obs[1]')
        #print(obs[1], ' obs[1]')
        #print(type(obs[0]), ' obs[0] type')
        #print(type(obs[1]), ' obs[1] type')
        total_reward = 0.0
        while not done:
            next_obs, reward, terminated, truncated, info = env.step(action)
            # retrieving the next action necessary for the SARSA learning method
            next_action = agent.get_action(next_obs)
            done = terminated or truncated
            agent.learn(obs, action, reward, next_obs, next_action)
            obs = next_obs
            action = next_action
            total_reward += reward
        if total_reward > best_reward:
            best_reward = total_reward
        print("Episode#:{} reward:{} best_reward:{} eps:{}".format(episode,
                                     total_reward, best_reward, agent.epsilon))
    # Return the trained policy
    return np.argmax(agent.Q, axis=2)


def test(agent, env, policy):
    done = False
    obs = env.reset()
    total_reward = 0.0
    while not done:
        action = policy[agent.discretize(obs)]
        next_obs, reward, terminated, truncated, info = env.step(action)
        done = terminated or truncated
        obs = next_obs
        total_reward += reward
    return total_reward

  and should_run_async(code)


## Step 3: Instantiating & Training the Learner / Agent
In this stage, the agent is still learning its' preference / policy. In the testing phase, the outcome of its' preferences will be monitored and visualized.

In [145]:
agent = Q_Learner(env)
learned_policy = train(agent, env)

Moviepy - Building video /content/gym_monitor_output/rl-video-episode-1000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-1000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-1000.mp4
Episode#:0 reward:-200.0 best_reward:-200.0 eps:0.8998000000000165
Episode#:1 reward:-200.0 best_reward:-200.0 eps:0.8996000000000329
Episode#:2 reward:-200.0 best_reward:-200.0 eps:0.8994000000000494
Episode#:3 reward:-200.0 best_reward:-200.0 eps:0.8992000000000658
Episode#:4 reward:-200.0 best_reward:-200.0 eps:0.8990000000000823
Episode#:5 reward:-200.0 best_reward:-200.0 eps:0.8988000000000987
Episode#:6 reward:-200.0 best_reward:-200.0 eps:0.8986000000001152
Episode#:7 reward:-200.0 best_reward:-200.0 eps:0.8984000000001316
Episode#:8 reward:-200.0 best_reward:-200.0 eps:0.8982000000001481
Episode#:9 reward:-200.0 best_reward:-200.0 eps:0.8980000000001646
Episode#:10 reward:-200.0 best_reward:-200.0 eps:0.897800000000181
Episode#:11 reward:-200.0 best_reward:-200.0 eps:0.8976000000001975
Episode#:12 reward:-200.0 best_reward:-200.0 eps:0.8974000000002139
Episode#:13 reward:-200.0 best_rew



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-101000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-100000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-100000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-100000.mp4
Episode#:528 reward:-200.0 best_reward:-200.0 eps:0.7942000000087038
Episode#:529 reward:-200.0 best_reward:-200.0 eps:0.7940000000087203
Episode#:530 reward:-200.0 best_reward:-200.0 eps:0.7938000000087367
Episode#:531 reward:-200.0 best_reward:-200.0 eps:0.7936000000087532
Episode#:532 reward:-200.0 best_reward:-200.0 eps:0.7934000000087696
Episode#:533 reward:-200.0 best_reward:-200.0 eps:0.7932000000087861
Episode#:534 reward:-200.0 best_reward:-200.0 eps:0.7930000000088026
Episode#:535 reward:-200.0 best_reward:-200.0 eps:0.792800000008819
Episode#:536 reward:-200.0 best_reward:-200.0 eps:0.7926000000088355
Episode#:537 reward:-200.0 best_reward:-200.0 eps:0.7924000000088519
Episode#:538 reward:-200.0 best_reward:-200.0 eps:0.7922000000088684
Episode#:539 reward:-200.0 best_reward:-200.0 eps:0.7920000000088848
Episode#:540 reward:-200.0 best_reward:-200.0 eps:0.7918000000089013
Episode#:



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-2000.mp4
Episode#:1000 reward:-200.0 best_reward:-200.0 eps:0.6998000000164698
Episode#:1001 reward:-200.0 best_reward:-200.0 eps:0.6996000000164863
Episode#:1002 reward:-200.0 best_reward:-200.0 eps:0.6994000000165027
Episode#:1003 reward:-200.0 best_reward:-200.0 eps:0.6992000000165192
Episode#:1004 reward:-200.0 best_reward:-200.0 eps:0.6990000000165356
Episode#:1005 reward:-200.0 best_reward:-200.0 eps:0.6988000000165521
Episode#:1006 reward:-200.0 best_reward:-200.0 eps:0.6986000000165685
Episode#:1007 reward:-200.0 best_reward:-200.0 eps:0.698400000016585
Episode#:1008 reward:-200.0 best_reward:-200.0 eps:0.6982000000166014
Episode#:1009 reward:-200.0 best_reward:-200.0 eps:0.6980000000166179
Episode#:1010 reward:-200.0 best_reward:-200.0 eps:0.6978000000166343
Episode#:1011 reward:-200.0 best_reward:-200.0 eps:0.6976000000166508
Episode#:1012 reward:-200.0 best_reward:-200.0 eps:0.697400000016667



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-102000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-101000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-101000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-101000.mp4
Episode#:1528 reward:-200.0 best_reward:-200.0 eps:0.5942000000251572
Episode#:1529 reward:-200.0 best_reward:-200.0 eps:0.5940000000251736
Episode#:1530 reward:-200.0 best_reward:-200.0 eps:0.5938000000251901
Episode#:1531 reward:-200.0 best_reward:-200.0 eps:0.5936000000252065
Episode#:1532 reward:-200.0 best_reward:-200.0 eps:0.593400000025223
Episode#:1533 reward:-200.0 best_reward:-200.0 eps:0.5932000000252394
Episode#:1534 reward:-200.0 best_reward:-200.0 eps:0.5930000000252559
Episode#:1535 reward:-200.0 best_reward:-200.0 eps:0.5928000000252723
Episode#:1536 reward:-200.0 best_reward:-200.0 eps:0.5926000000252888
Episode#:1537 reward:-200.0 best_reward:-200.0 eps:0.5924000000253052
Episode#:1538 reward:-200.0 best_reward:-200.0 eps:0.5922000000253217
Episode#:1539 reward:-200.0 best_reward:-200.0 eps:0.5920000000253381
Episode#:1540 reward:-200.0 best_reward:-200.0 eps:0.5918000000253



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-3000.mp4
Episode#:2000 reward:-200.0 best_reward:-200.0 eps:0.4998000000329009
Episode#:2001 reward:-200.0 best_reward:-200.0 eps:0.4996000000328952
Episode#:2002 reward:-200.0 best_reward:-200.0 eps:0.4994000000328894
Episode#:2003 reward:-200.0 best_reward:-200.0 eps:0.4992000000328837
Episode#:2004 reward:-200.0 best_reward:-200.0 eps:0.4990000000328779
Episode#:2005 reward:-200.0 best_reward:-200.0 eps:0.49880000003287217
Episode#:2006 reward:-200.0 best_reward:-200.0 eps:0.4986000000328664
Episode#:2007 reward:-200.0 best_reward:-200.0 eps:0.49840000003286067
Episode#:2008 reward:-200.0 best_reward:-200.0 eps:0.4982000000328549
Episode#:2009 reward:-200.0 best_reward:-200.0 eps:0.49800000003284917
Episode#:2010 reward:-200.0 best_reward:-200.0 eps:0.4978000000328434
Episode#:2011 reward:-200.0 best_reward:-200.0 eps:0.49760000003283766
Episode#:2012 reward:-200.0 best_reward:-200.0 eps:0.4974000000



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-103000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-102000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-102000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-102000.mp4
Episode#:2528 reward:-200.0 best_reward:-200.0 eps:0.39420000002986433
Episode#:2529 reward:-200.0 best_reward:-200.0 eps:0.3940000000298586
Episode#:2530 reward:-200.0 best_reward:-200.0 eps:0.3938000000298528
Episode#:2531 reward:-200.0 best_reward:-200.0 eps:0.3936000000298471
Episode#:2532 reward:-200.0 best_reward:-200.0 eps:0.3934000000298413
Episode#:2533 reward:-200.0 best_reward:-200.0 eps:0.3932000000298356
Episode#:2534 reward:-200.0 best_reward:-200.0 eps:0.3930000000298298
Episode#:2535 reward:-200.0 best_reward:-200.0 eps:0.39280000002982407
Episode#:2536 reward:-200.0 best_reward:-200.0 eps:0.3926000000298183
Episode#:2537 reward:-200.0 best_reward:-200.0 eps:0.39240000002981257
Episode#:2538 reward:-200.0 best_reward:-200.0 eps:0.3922000000298068
Episode#:2539 reward:-200.0 best_reward:-200.0 eps:0.39200000002980107
Episode#:2540 reward:-200.0 best_reward:-200.0 eps:0.39180000



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-4000.mp4
Episode#:3000 reward:-200.0 best_reward:-200.0 eps:0.2998000000271498
Episode#:3001 reward:-200.0 best_reward:-200.0 eps:0.29960000002714404
Episode#:3002 reward:-200.0 best_reward:-200.0 eps:0.2994000000271383
Episode#:3003 reward:-200.0 best_reward:-200.0 eps:0.29920000002713254
Episode#:3004 reward:-200.0 best_reward:-200.0 eps:0.2990000000271268
Episode#:3005 reward:-200.0 best_reward:-200.0 eps:0.29880000002712104
Episode#:3006 reward:-200.0 best_reward:-200.0 eps:0.2986000000271153
Episode#:3007 reward:-200.0 best_reward:-200.0 eps:0.29840000002710954
Episode#:3008 reward:-200.0 best_reward:-200.0 eps:0.2982000000271038
Episode#:3009 reward:-200.0 best_reward:-200.0 eps:0.29800000002709803
Episode#:3010 reward:-200.0 best_reward:-200.0 eps:0.2978000000270923
Episode#:3011 reward:-200.0 best_reward:-200.0 eps:0.29760000002708653
Episode#:3012 reward:-200.0 best_reward:-200.0 eps:0.29740000



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-104000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-103000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-103000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-103000.mp4
Episode#:3528 reward:-200.0 best_reward:-200.0 eps:0.19420000002721072
Episode#:3529 reward:-200.0 best_reward:-200.0 eps:0.19400000002721607
Episode#:3530 reward:-200.0 best_reward:-200.0 eps:0.19380000002722142
Episode#:3531 reward:-200.0 best_reward:-200.0 eps:0.19360000002722677
Episode#:3532 reward:-200.0 best_reward:-200.0 eps:0.19340000002723212
Episode#:3533 reward:-200.0 best_reward:-200.0 eps:0.19320000002723747
Episode#:3534 reward:-200.0 best_reward:-200.0 eps:0.19300000002724282
Episode#:3535 reward:-200.0 best_reward:-200.0 eps:0.19280000002724818
Episode#:3536 reward:-200.0 best_reward:-200.0 eps:0.19260000002725353
Episode#:3537 reward:-200.0 best_reward:-200.0 eps:0.19240000002725888
Episode#:3538 reward:-200.0 best_reward:-200.0 eps:0.19220000002726423
Episode#:3539 reward:-200.0 best_reward:-200.0 eps:0.19200000002726958
Episode#:3540 reward:-200.0 best_reward:-200.0 eps:0.



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-5000.mp4
Episode#:4000 reward:-200.0 best_reward:-200.0 eps:0.099800000029037
Episode#:4001 reward:-200.0 best_reward:-200.0 eps:0.0996000000290368
Episode#:4002 reward:-200.0 best_reward:-200.0 eps:0.0994000000290366
Episode#:4003 reward:-200.0 best_reward:-200.0 eps:0.0992000000290364
Episode#:4004 reward:-200.0 best_reward:-200.0 eps:0.0990000000290362
Episode#:4005 reward:-200.0 best_reward:-200.0 eps:0.098800000029036
Episode#:4006 reward:-200.0 best_reward:-200.0 eps:0.0986000000290358
Episode#:4007 reward:-200.0 best_reward:-200.0 eps:0.0984000000290356
Episode#:4008 reward:-200.0 best_reward:-200.0 eps:0.0982000000290354
Episode#:4009 reward:-200.0 best_reward:-200.0 eps:0.0980000000290352
Episode#:4010 reward:-200.0 best_reward:-200.0 eps:0.097800000029035
Episode#:4011 reward:-200.0 best_reward:-200.0 eps:0.0976000000290348
Episode#:4012 reward:-200.0 best_reward:-200.0 eps:0.0974000000290346




Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-105000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-104000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-104000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-104000.mp4
Episode#:4528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:4540 reward:-200.0 best_reward:-



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-6000.mp4
Episode#:5000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5012 reward:-200.0 best_reward:-20



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-106000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-105000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-105000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-105000.mp4
Episode#:5528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:5540 reward:-200.0 best_reward:-



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-7000.mp4
Episode#:6000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6012 reward:-200.0 best_reward:-20



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-107000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-106000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-106000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-106000.mp4
Episode#:6528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:6540 reward:-200.0 best_reward:-



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-8000.mp4
Episode#:7000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7012 reward:-200.0 best_reward:-20



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-108000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-107000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-107000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-107000.mp4
Episode#:7528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:7540 reward:-200.0 best_reward:-



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-9000.mp4
Episode#:8000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8012 reward:-200.0 best_reward:-20



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-109000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-108000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-108000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-108000.mp4
Episode#:8528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:8540 reward:-200.0 best_reward:-



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-10000.mp4
Episode#:9000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9012 reward:-200.0 best_reward:-2



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-110000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-109000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-109000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-109000.mp4
Episode#:9528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:9540 reward:-200.0 best_reward:-



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-11000.mp4
Episode#:10000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-111000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-110000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-110000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-110000.mp4
Episode#:10528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:10540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-12000.mp4
Episode#:11000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-112000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-111000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-111000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-111000.mp4
Episode#:11528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:11540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-13000.mp4
Episode#:12000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-113000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-112000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-112000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-112000.mp4
Episode#:12528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:12540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-14000.mp4
Episode#:13000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-114000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-113000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-113000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-113000.mp4
Episode#:13528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:13540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-15000.mp4
Episode#:14000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-115000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-114000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-114000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-114000.mp4
Episode#:14528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:14540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-16000.mp4
Episode#:15000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-116000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-115000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-115000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-115000.mp4
Episode#:15528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:15540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-17000.mp4
Episode#:16000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-117000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-116000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-116000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-116000.mp4
Episode#:16528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:16540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-18000.mp4
Episode#:17000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-118000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-117000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-117000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-117000.mp4
Episode#:17528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:17540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-19000.mp4
Episode#:18000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-119000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-118000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-118000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-118000.mp4
Episode#:18528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:18540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-20000.mp4
Episode#:19000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-120000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-119000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-119000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-119000.mp4
Episode#:19528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:19540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-21000.mp4
Episode#:20000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-121000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-120000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-120000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-120000.mp4
Episode#:20528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:20540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-22000.mp4
Episode#:21000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-122000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-121000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-121000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-121000.mp4
Episode#:21528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:21540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-23000.mp4
Episode#:22000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-123000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-122000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-122000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-122000.mp4
Episode#:22528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:22540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-24000.mp4
Episode#:23000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-124000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-123000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-123000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-123000.mp4
Episode#:23528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:23540 reward:-200.0 



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-25000.mp4
Episode#:24000 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24001 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24002 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24003 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24004 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24005 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24006 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24007 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24008 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24009 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24010 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24011 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24012 reward:-200.0 b



Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-125000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-124000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-124000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-124000.mp4
Episode#:24528 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24529 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24530 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24531 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24532 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24533 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24534 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24535 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24536 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24537 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24538 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24539 reward:-200.0 best_reward:-200.0 eps:0.004999500028947066
Episode#:24540 reward:-200.0 

## Step 4: Evaluating the Training by Recording the Agent within the Environment

In [146]:
# Uses the Gym Monitor wrapper to evalaute the agent and record video
# only one video will be saved

# video of the final episode with the episode trigger
env = gym.wrappers.RecordVideo(
    env, "./gym_monitor_output", episode_trigger=lambda x: x == 0)

test(agent, env, learned_policy)

env.close()

  logger.warn(


Moviepy - Building video /content/gym_monitor_output/rl-video-episode-26000.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-26000.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-26000.mp4
Moviepy - Building video /content/gym_monitor_output/rl-video-episode-0.mp4.
Moviepy - Writing video /content/gym_monitor_output/rl-video-episode-0.mp4





Moviepy - Done !
Moviepy - video ready /content/gym_monitor_output/rl-video-episode-0.mp4
