# this is Lunar lander PPO training ( unit 1 of Deep RL course from Huggingface)
I focus on implementing the whole use case + also some deeper visualizations, and understanding how it works using plots and training env videos.

In [2]:
# Virtual display
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()


<pyvirtualdisplay.display.Display at 0x739f4400d7e0>

In [3]:
import gymnasium

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import (
    notebook_login,
)  # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import gymnasium as gym

# First, we create our environment called LunarLander-v2
env = gym.make("LunarLander-v2")

# Then we reset this environment
observation, info = env.reset()

for _ in range(20):
    # Take a random action
    action = env.action_space.sample()
    print("Action taken:", action)

    # Do this action in the environment and get
    # next_state, reward, terminated, truncated and info
    observation, reward, terminated, truncated, info = env.step(action)

    # If the game is terminated (in our case we land, crashed) or truncated (timeout)
    if terminated or truncated:
        # Reset the environment
        print("Environment is reset")
        observation, info = env.reset()

env.close()

Action taken: 3
Action taken: 0
Action taken: 0
Action taken: 0
Action taken: 1
Action taken: 0
Action taken: 3
Action taken: 1
Action taken: 2
Action taken: 1
Action taken: 0
Action taken: 2
Action taken: 1
Action taken: 1
Action taken: 0
Action taken: 1
Action taken: 1
Action taken: 3
Action taken: 1
Action taken: 0


# making the environment

In [6]:
# We create our environment with gym.make("<name_of_the_environment>")
env = gym.make("LunarLander-v2")
env.reset()
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample())  # Get a random observation

_____OBSERVATION SPACE_____ 

Observation Space Shape (8,)
Sample observation [14.810741   6.9491935 -1.5702169  1.3278024  2.0203414  0.7272784
  0.9341923  0.2483236]


We see with Observation Space Shape (8,) that the observation is a vector of size 8, where each value contains different information about the lander:

    Horizontal pad coordinate (x)
    Vertical pad coordinate (y)
    Horizontal speed (x)
    Vertical speed (y)
    Angle
    Angular speed
    If the left leg contact point has touched the land (boolean)
    If the right leg contact point has touched the land (boolean)

In [7]:
print("\n _____ACTION SPACE_____ \n")
print("Action Space Shape", env.action_space.n)
print("Action Space Sample", env.action_space.sample())  # Take a random action


 _____ACTION SPACE_____ 

Action Space Shape 4
Action Space Sample 1


In [8]:
# Create the environment
env = make_vec_env("LunarLander-v2", n_envs=16)

n is the vectorized environments a method of stacking indepenndent environments

# Using Stable Baseline models

In [9]:
# Check the observation type before training
obs = env.reset()
print(type(obs), obs.shape)


<class 'numpy.ndarray'> (16, 8)


In [10]:
# We create our environment with gym.make("<name_of_the_environment>")
env = gym.make("LunarLander-v2")
env.reset()
print("_____OBSERVATION SPACE_____ \n")
print("Observation Space Shape", env.observation_space.shape)
print("Sample observation", env.observation_space.sample())  # Get a random observation

_____OBSERVATION SPACE_____ 

Observation Space Shape (8,)
Sample observation [ 6.7173262e+00 -5.4620487e+01 -4.5649557e+00 -4.3027511e+00
 -1.8213142e+00 -2.5220447e+00  7.5495332e-02  2.2063985e-02]


In [11]:
import numpy as np

In [12]:

print("Observation value:", obs)
print("Type:", type(obs))
print("Shape:", np.shape(obs))
print("Dtype:", np.array(obs).dtype)

Observation value: [[-2.14500423e-03  1.40913391e+00 -2.17281103e-01 -7.93854743e-02
   2.49231304e-03  4.92173918e-02  0.00000000e+00  0.00000000e+00]
 [-3.00216675e-03  1.40641594e+00 -3.04106593e-01 -2.00191215e-01
   3.48559511e-03  6.88846260e-02  0.00000000e+00  0.00000000e+00]
 [ 3.20568075e-03  1.41756237e+00  3.24688077e-01  2.95211017e-01
  -3.70782684e-03 -7.35466704e-02  0.00000000e+00  0.00000000e+00]
 [-6.33459073e-03  1.40862381e+00 -6.41641736e-01 -1.02074966e-01
   7.34698959e-03  1.45341411e-01  0.00000000e+00  0.00000000e+00]
 [ 1.32579799e-03  1.42057848e+00  1.34269208e-01  4.29254889e-01
  -1.52942550e-03 -3.04139610e-02  0.00000000e+00  0.00000000e+00]
 [-3.31020361e-04  1.41329515e+00 -3.35415304e-02  1.05551131e-01
   3.90312693e-04  7.59766530e-03  0.00000000e+00  0.00000000e+00]
 [-6.15463266e-03  1.40540481e+00 -6.23405516e-01 -2.45142415e-01
   7.13839335e-03  1.41210720e-01  0.00000000e+00  0.00000000e+00]
 [-7.79762259e-03  1.41289353e+00 -7.89830804e-01 

In [15]:

# Instantiate the agent
model = PPO('MlpPolicy', env, verbose=1)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [16]:
# SOLUTION
# We added some parameters to accelerate the training
model = PPO(
    policy="MlpPolicy",
    env=env,
    n_steps=1024,
    batch_size=64,
    n_epochs=4,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=1,
)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [17]:
# SOLUTION
# Train it for 1,000,000 timesteps
model.learn(total_timesteps=1000000)
# Save the model
model_name = "ppo-LunarLander-v2-mark-I"
model.save(model_name)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 97.2     |
|    ep_rew_mean     | -219     |
| time/              |          |
|    fps             | 1212     |
|    iterations      | 1        |
|    time_elapsed    | 0        |
|    total_timesteps | 1024     |
---------------------------------
-------------------------------------------
| rollout/                |               |
|    ep_len_mean          | 89.9          |
|    ep_rew_mean          | -160          |
| time/                   |               |
|    fps                  | 1047          |
|    iterations           | 2             |
|    time_elapsed         | 1             |
|    total_timesteps      | 2048          |
| train/                  |               |
|    approx_kl            | 0.00029156968 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.39         |
|    explained_variance   | -0.0006133318 |


Evaluation


In [11]:
from stable_baselines3.common.evaluation import evaluate_policy


In [12]:
# @title
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")



mean_reward=247.37 +/- 28.18308836823013


# pushing to huggingface hub

In [1]:
from stable_baselines3 import PPO

# Load the model from the saved checkpoint
model = PPO.load("ppo-LunarLander-v2.zip")


  th_object = th.load(file_content, map_location=device)


In [4]:
import gymnasium as gym
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor

# Set up the environment
env_id = "LunarLander-v2"
eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])


In [6]:
model_name = "ppo-LunarLander-v2"

In [7]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "LunarLander-v2"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "honestlyanubhav/ppo-LunarLander-v2"  # Change with your repo id, you can't push with mine 😄

## Define the commit message
commit_message = "Upload PPO LunarLander-v2 trained agent"

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: Monitor(gym.make(env_id, render_mode="rgb_array"))])

# PLACE the package_to_hub function you've just filled here
package_to_hub(
    model=model,  # Our trained model
    model_name=model_name,  # The name of our trained model
    model_architecture=model_architecture,  # The model architecture we used: in our case PPO
    env_id=env_id,  # Name of the environment
    eval_env=eval_env,  # Evaluation Environment
    repo_id=repo_id,  # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
    commit_message=commit_message,
)

[38;5;4mℹ This function will save, evaluate, generate a video of your agent,
create a model card and push everything to the hub. It might take up to 1min.
This is a work in progress: if you encounter a bug, please open an issue.[0m
Saving video to /tmp/tmphvkcjx6u/-step-0-to-step-1000.mp4
MoviePy - Building video /tmp/tmphvkcjx6u/-step-0-to-step-1000.mp4.
MoviePy - Writing video /tmp/tmphvkcjx6u/-step-0-to-step-1000.mp4



ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
  built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
  configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enab

MoviePy - Done !
MoviePy - video ready /tmp/tmphvkcjx6u/-step-0-to-step-1000.mp4


frame= 1000 fps=0.0 q=-1.0 Lsize=     148kB time=00:00:19.94 bitrate=  60.6kbits/s speed=42.1x    
video:135kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 9.119665%
[libx264 @ 0x555eaa5eedc0] frame I:5     Avg QP: 9.51  size:  1848
[libx264 @ 0x555eaa5eedc0] frame P:279   Avg QP:21.51  size:   207
[libx264 @ 0x555eaa5eedc0] frame B:716   Avg QP:21.48  size:    99
[libx264 @ 0x555eaa5eedc0] consecutive B-frames:  3.3%  2.8%  2.7% 91.2%
[libx264 @ 0x555eaa5eedc0] mb I  I16..4: 85.1%  8.9%  6.0%
[libx264 @ 0x555eaa5eedc0] mb P  I16..4:  0.1%  0.3%  0.1%  P16..4:  1.7%  0.5%  0.1%  0.0%  0.0%    skip:97.0%
[libx264 @ 0x555eaa5eedc0] mb B  I16..4:  0.0%  0.0%  0.0%  B16..8:  2.4%  0.2%  0.0%  direct: 0.1%  skip:97.2%  L0:55.5% L1:43.8% BI: 0.7%
[libx264 @ 0x555eaa5eedc0] 8x8 transform intra:20.5% inter:16.4%
[libx264 @ 0x555eaa5eedc0] coded y,uvDC,uvAC intra: 6.6% 10.2% 9.1% inter: 0.2% 0.2% 0.2%
[libx264 @ 0x555eaa5eedc0] i16 v,h,dc,p: 87%  8%  5%  0%
[libx

[38;5;4mℹ Pushing repo honestlyanubhav/ppo-LunarLander-v2 to the Hugging Face
Hub[0m


policy.optimizer.pth:   0%|          | 0.00/88.5k [00:00<?, ?B/s]

replay.mp4:   0%|          | 0.00/151k [00:00<?, ?B/s]

Upload 5 LFS files:   0%|          | 0/5 [00:00<?, ?it/s]

ppo-LunarLander-v2.zip:   0%|          | 0.00/148k [00:00<?, ?B/s]

policy.pth:   0%|          | 0.00/43.8k [00:00<?, ?B/s]

pytorch_variables.pth:   0%|          | 0.00/864 [00:00<?, ?B/s]

[38;5;4mℹ Your model is pushed to the Hub. You can view your model here:
https://huggingface.co/honestlyanubhav/ppo-LunarLander-v2/tree/main/[0m


CommitInfo(commit_url='https://huggingface.co/honestlyanubhav/ppo-LunarLander-v2/commit/afbc82ecc229da94eb2ea8feafeffb621ad51e2a', commit_message='Upload PPO LunarLander-v2 trained agent', commit_description='', oid='afbc82ecc229da94eb2ea8feafeffb621ad51e2a', pr_url=None, repo_url=RepoUrl('https://huggingface.co/honestlyanubhav/ppo-LunarLander-v2', endpoint='https://huggingface.co', repo_type='model', repo_id='honestlyanubhav/ppo-LunarLander-v2'), pr_revision=None, pr_num=None)

# loading a model from hub

In [9]:
from huggingface_sb3 import load_from_hub

repo_id = "honestlyanubhav/ppo-LunarLander-v2" 
filename = "ppo-LunarLander-v2.zip"  # The model filename.zip

# When the model was trained on Python 3.8 the pickle protocol is 5
# But Python 3.6, 3.7 use protocol 4
# In order to get compatibility we need to:
# 1. Install pickle5 (we done it at the beginning of the colab)
# 2. Create a custom empty object we pass as parameter to PPO.load()
custom_objects = {
    "learning_rate": 0.0,
    "lr_schedule": lambda _: 0.0,
    "clip_range": lambda _: 0.0,
}

checkpoint = load_from_hub(repo_id, filename)
model = PPO.load(checkpoint, custom_objects=custom_objects, print_system_info=True)

ppo-LunarLander-v2.zip:   0%|          | 0.00/148k [00:00<?, ?B/s]

== CURRENT SYSTEM INFO ==
- OS: Linux-6.8.0-57-generic-x86_64-with-glibc2.35 # 59~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Mar 19 17:07:41 UTC 2
- Python: 3.10.16
- Stable-Baselines3: 2.0.0a5
- PyTorch: 2.4.0+cu121
- GPU Enabled: True
- Numpy: 2.2.4
- Cloudpickle: 3.0.0
- Gymnasium: 0.28.1

== SAVED MODEL SYSTEM INFO ==
- OS: Linux-6.8.0-57-generic-x86_64-with-glibc2.35 # 59~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Wed Mar 19 17:07:41 UTC 2
- Python: 3.10.16
- Stable-Baselines3: 2.0.0a5
- PyTorch: 2.4.0+cu121
- GPU Enabled: True
- Numpy: 2.2.4
- Cloudpickle: 3.0.0
- Gymnasium: 0.28.1



  th_object = th.load(file_content, map_location=device)


In [13]:
# @title
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")

mean_reward=264.96 +/- 16.65931607652144


# PART II : Visualizing Reinforcement Learning Training

My aim with part 2 of this is to create videos of training env which depict how the model has learnt


In [36]:
import os
import gymnasium as gym
import numpy as np
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback
import cv2
from gymnasium.wrappers.monitoring.video_recorder import VideoRecorder
import glob

# Create directories
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("videos", exist_ok=True)
os.makedirs("videos/evaluation", exist_ok=True)


we use a checkpoint callback to save every nth env during training

In [37]:
class CheckpointCallback(BaseCallback):
    def __init__(self, save_freq, save_path, verbose=1):
        super(CheckpointCallback, self).__init__(verbose)
        self.save_freq = save_freq
        self.save_path = save_path
        self.checkpoint_count = 0
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
        print(f"Checkpoint callback initialized. Will save every {self.save_freq} steps to {self.save_path}")
    
    def _on_step(self):
        if self.n_calls % 10000 == 0:
            print(f"Checkpoint callback: {self.n_calls} steps completed")
            
        if self.n_calls % self.save_freq == 0:
            checkpoint_path = os.path.join(self.save_path, f"model_checkpoint_{self.n_calls}_steps")
            self.model.save(checkpoint_path)
            self.checkpoint_count += 1
            
            print(f"Saving model checkpoint {self.checkpoint_count} to {checkpoint_path}")
        
        return True



training the model

In [None]:
# Set up environment
env = make_vec_env("LunarLander-v2", n_envs=16)

# Create the PPO model
model = PPO(
    policy="MlpPolicy",
    env=env,
    n_steps=1024,
    batch_size=64,
    n_epochs=4,
    gamma=0.999,
    gae_lambda=0.98,
    ent_coef=0.01,
    verbose=1,
)

Using cuda device


In [45]:

# Create the checkpoint callback with more debugging
model_name = "ppo-LunarLander-v2-mark-IV"
checkpoint_callback = CheckpointCallback(save_freq=4000, save_path=f"checkpoints/{model_name}")



In [46]:
# When you run the training again, use:
model.learn(total_timesteps=1000000, callback=checkpoint_callback)

# Save the final model
model.save(model_name)

Checkpoint callback initialized. Will save every 4000 steps to checkpoints/ppo-LunarLander-v2-mark-IV
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 119      |
|    ep_rew_mean     | -54.8    |
| time/              |          |
|    fps             | 4973     |
|    iterations      | 1        |
|    time_elapsed    | 3        |
|    total_timesteps | 16384    |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 148          |
|    ep_rew_mean          | -25.5        |
| time/                   |              |
|    fps                  | 3219         |
|    iterations           | 2            |
|    time_elapsed         | 10           |
|    total_timesteps      | 32768        |
| train/                  |              |
|    approx_kl            | 0.0086493455 |
|    clip_fraction        | 0.0556       |
|    clip_range           | 0.2          |

In [47]:
# @title
eval_env = Monitor(gym.make("LunarLander-v2"))
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")


mean_reward=235.36 +/- 27.75813598603178


## making videos out of the thing
I asked claude to write a code to make videos out of the checkpoints
## and it did

In [49]:
# Import necessary libraries
import os
import gymnasium as gym
import torch
import numpy as np
import glob
from stable_baselines3 import PPO, DQN, A2C, SAC  # Include whichever algorithms you used
import imageio

# Function to generate video from a checkpoint
def create_video_from_checkpoint(checkpoint_path, video_name=None, episodes=3, steps_per_episode=1000):
    """
    Loads a trained model from checkpoint and creates a video of its performance
    
    Args:
        checkpoint_path: Path to the .zip checkpoint file
        video_name: Name for output video file (defaults to checkpoint name)
        episodes: Number of episodes to record
        steps_per_episode: Max steps per episode
    """
    # Create output directory
    os.makedirs("videos/markIV", exist_ok=True)
    
    # Set video name if not provided
    if video_name is None:
        video_name = os.path.basename(checkpoint_path).replace('.zip', '')
    
    video_path = f"videos/markIV/{video_name}.mp4"
    
    # Determine algorithm from filename (customize as needed)
    filename = checkpoint_path.lower()
    algo_class = PPO  # Default
    if 'dqn' in filename:
        algo_class = DQN
    elif 'a2c' in filename:
        algo_class = A2C
    elif 'sac' in filename:
        algo_class = SAC
    
    # Load the model
    try:
        model = algo_class.load(checkpoint_path)
        print(f"Successfully loaded: {checkpoint_path}")
    except Exception as e:
        print(f"Error loading model {checkpoint_path}: {e}")
        return
    
    # Initialize environment
    env = gym.make("LunarLander-v2", render_mode="rgb_array")
    
    # Collect frames across multiple episodes
    all_frames = []
    
    for ep in range(episodes):
        obs, _ = env.reset()
        frames = []
        
        for step in range(steps_per_episode):
            # Get action from model
            action, _ = model.predict(obs, deterministic=True)
            
            # Execute action in environment
            obs, reward, terminated, truncated, info = env.step(action)
            
            # Render and save frame
            frame = env.render()
            frames.append(frame)
            
            # End episode if done
            if terminated or truncated:
                break
        
        all_frames.extend(frames)
        print(f"Episode {ep+1} complete: {len(frames)} frames")
    
    # Save video
    if all_frames:
        imageio.mimsave(video_path, all_frames, fps=30)
        print(f"Video saved to {video_path}")
    else:
        print("No frames were collected")

# Process all checkpoints in a folder
def process_checkpoint_folder(folder_path="checkpoints"):
    """Process all checkpoint files in the specified folder"""
    checkpoint_files = glob.glob(f"{folder_path}/*.zip")
    
    if not checkpoint_files:
        print(f"No checkpoint files found in {folder_path}")
        return
    
    print(f"Found {len(checkpoint_files)} checkpoint files")
    for checkpoint in checkpoint_files:
        create_video_from_checkpoint(checkpoint)

# Example usage - run this cell to process a single checkpoint
# create_video_from_checkpoint("checkpoints/your_model.zip")

# Example usage - run this cell to process all checkpoints in a folder
# process_checkpoint_folder("checkpoints")

In [51]:
# This processes all checkpoints in the folder
process_checkpoint_folder("./checkpoints/ppo-LunarLander-v2-mark-IV")

Found 15 checkpoint files
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_24000_steps.zip


  th_object = th.load(file_content, map_location=device)


Episode 1 complete: 750 frames
Episode 2 complete: 721 frames




Episode 3 complete: 851 frames
Video saved to videos/markIV/model_checkpoint_24000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_8000_steps.zip
Episode 1 complete: 760 frames
Episode 2 complete: 342 frames




Episode 3 complete: 423 frames
Video saved to videos/markIV/model_checkpoint_8000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_60000_steps.zip
Episode 1 complete: 345 frames
Episode 2 complete: 317 frames




Episode 3 complete: 329 frames
Video saved to videos/markIV/model_checkpoint_60000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_16000_steps.zip
Episode 1 complete: 1000 frames
Episode 2 complete: 826 frames




Episode 3 complete: 685 frames
Video saved to videos/markIV/model_checkpoint_16000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_40000_steps.zip
Episode 1 complete: 379 frames
Episode 2 complete: 419 frames




Episode 3 complete: 403 frames
Video saved to videos/markIV/model_checkpoint_40000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_12000_steps.zip
Episode 1 complete: 1000 frames
Episode 2 complete: 930 frames




Episode 3 complete: 1000 frames
Video saved to videos/markIV/model_checkpoint_12000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_44000_steps.zip
Episode 1 complete: 378 frames
Episode 2 complete: 362 frames




Episode 3 complete: 411 frames
Video saved to videos/markIV/model_checkpoint_44000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_36000_steps.zip
Episode 1 complete: 465 frames
Episode 2 complete: 1000 frames




Episode 3 complete: 417 frames
Video saved to videos/markIV/model_checkpoint_36000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_4000_steps.zip
Episode 1 complete: 518 frames
Episode 2 complete: 391 frames




Episode 3 complete: 1000 frames
Video saved to videos/markIV/model_checkpoint_4000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_48000_steps.zip
Episode 1 complete: 381 frames
Episode 2 complete: 377 frames




Episode 3 complete: 403 frames
Video saved to videos/markIV/model_checkpoint_48000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_32000_steps.zip
Episode 1 complete: 443 frames
Episode 2 complete: 445 frames




Episode 3 complete: 486 frames
Video saved to videos/markIV/model_checkpoint_32000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_20000_steps.zip
Episode 1 complete: 1000 frames
Episode 2 complete: 1000 frames




Episode 3 complete: 1000 frames
Video saved to videos/markIV/model_checkpoint_20000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_56000_steps.zip
Episode 1 complete: 356 frames
Episode 2 complete: 352 frames




Episode 3 complete: 367 frames
Video saved to videos/markIV/model_checkpoint_56000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_52000_steps.zip
Episode 1 complete: 367 frames
Episode 2 complete: 373 frames




Episode 3 complete: 405 frames
Video saved to videos/markIV/model_checkpoint_52000_steps.mp4
Successfully loaded: ./checkpoints/ppo-LunarLander-v2-mark-IV/model_checkpoint_28000_steps.zip
Episode 1 complete: 1000 frames
Episode 2 complete: 888 frames




Episode 3 complete: 523 frames
Video saved to videos/markIV/model_checkpoint_28000_steps.mp4
