# Training Notebook

### Install Packages

In [1]:
%pip install numpy
%pip install torch
%pip install torchvision
%pip install opencv-python
%pip install stable-baselines3[extra]
%pip install gymnasium

Collecting stable-baselines3[extra]
  Downloading stable_baselines3-2.2.1-py3-none-any.whl (181 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.7/181.7 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting gymnasium<0.30,>=0.28.1 (from stable-baselines3[extra])
  Downloading gymnasium-0.29.1-py3-none-any.whl (953 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m953.9/953.9 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
Collecting shimmy[atari]~=1.3.0 (from stable-baselines3[extra])
  Downloading Shimmy-1.3.0-py3-none-any.whl (37 kB)
Collecting autorom[accept-rom-license]~=0.6.1 (from stable-baselines3[extra])
  Downloading AutoROM-0.6.1-py3-none-any.whl (9.4 kB)
Collecting AutoROM.accept-rom-license (from autorom[accept-rom-license]~=0.6.1->stable-baselines3[extra])
  Downloading AutoROM.accept-rom-license-0.6.1.tar.gz (434 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m434.7/434.7 kB[0m [31m39.6 MB/s[0m eta [

### Move to Path

In [2]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive/MyDrive/Codes/Python/Rocket-Lander

Mounted at /content/drive
/content/drive/MyDrive/Codes/Python/Rocket-Lander


### Train the Model

In [3]:
import os
import torch
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from rocket import Rocket

# Decide which device we want to run on
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if __name__ == '__main__':

    task = 'landing'
    max_m_episode = 800000
    max_steps = 800

    # Create a vectorized environment
    env_fn = lambda: Rocket(task=task, max_steps=max_steps)
    env = make_vec_env(env_fn, n_envs=4)

    # Load the model or create a new one
    model_path = os.path.join('Models', task + '_ppo')
    if os.path.exists(model_path + ".zip"):
        model = PPO.load(model_path, env=env)  # Set the environment here
    else:
        model = PPO("MlpPolicy", env, verbose=1)

    # Train the model
    model.learn(total_timesteps=100000, progress_bar=True)  # Adjust the number of timesteps as needed

    # Save the model
    model_path = os.path.join('models', task + '_ppo')
    model.save(model_path)


Using cuda device


Output()

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 251      |
|    ep_rew_mean     | 4.68     |
| time/              |          |
|    fps             | 1749     |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 8192     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 251          |
|    ep_rew_mean          | 4.54         |
| time/                   |              |
|    fps                  | 1098         |
|    iterations           | 2            |
|    time_elapsed         | 14           |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0038497034 |
|    clip_fraction        | 0.0244       |
|    clip_range           | 0.2          |
|    entropy_loss         | -2.81        |
|    explained_variance   | 0.243        |
|    learning_r