<a href="https://colab.research.google.com/github/AICraftsLab/pygame-aliens/blob/main/aliens.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!apt install swig cmake

In [None]:
!pip install stable-baselines3
#swig
!pip install gymnasium
!pip install huggingface_sb3

During the notebook, we'll need to generate a replay video. To do so, with colab, **we need to have a virtual screen to be able to render the environment** (and thus record the frames).

Hence the following cell will install virtual screen libraries and create and run a virtual screen 🖥

In [None]:
!sudo apt-get update
!sudo apt-get install -y python3-opengl
!apt install ffmpeg
!apt install xvfb
!pip3 install pyvirtualdisplay

To make sure the new installed libraries are used, **sometimes it's required to restart the notebook runtime**. The next cell will force the **runtime to crash, so you'll need to connect again and run the code starting from here**. Thanks to this trick, **we will be able to run our virtual screen.**

In [None]:
import os
os.kill(os.getpid(), 9)

In [None]:
# Virtual display
from pyvirtualdisplay import Display

virtual_display = Display(visible=0, size=(1400, 900))
virtual_display.start()

In [None]:
!git clone https://github.com/AICraftsLab/pygame-aliens.git

In [None]:
import gymnasium

from huggingface_sb3 import load_from_hub, package_to_hub
from huggingface_hub import notebook_login # To log to our Hugging Face account to be able to upload models to the Hub.

from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor

In [None]:
import os
import numpy as np
import gymnasium as gym
import aliens_env
from itertools import count

import torch
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize


# Save normalization stats callback
class SaveNormStatsCallback(BaseCallback):
    def __init__(self, vec_env: VecNormalize, filename: str = 'vecnormalize.pkl', save_dir: str = './', verbose: int = 1):
        super().__init__(verbose)
        self.vec_env = vec_env
        self.save_dir = save_dir
        self.filename = filename

    def _init_callback(self) -> None:
        # Create folder if needed
        if self.save_dir is not None:
            os.makedirs(self.save_dir, exist_ok=True)

    def _on_step(self) -> bool:
        save_path = os.path.join(self.save_dir, self.filename)
        self.vec_env.save(save_path)
        if self.verbose >= 1:
            print(f"Saving best model normalizing stats to {save_path}")

        return True


if __name__ == "__main__":
    env_id = 'Aliens'
    save_dir = 'run1'
    seed = None
    tensorboard_log = 'tensorboard_logs'

    # For resuming training
    is_new_training = True
    model_file_path = None
    stats_file_path = None

    # Save_dir should not exist if is_new_training
    os.makedirs(save_dir, exist_ok= not is_new_training)

    timesteps = 5e6
    num_cpu = 20  # Env nums
    vec_env_cls = DummyVecEnv

    vec_env = make_vec_env(env_id, n_envs=num_cpu, vec_env_cls=vec_env_cls, seed=seed)

    if is_new_training:
        vec_env = VecNormalize(vec_env, norm_obs=True, norm_reward=True)
    else:
        vec_env = VecNormalize.load(stats_file_path, vec_env)

    checkpoint_callback = CheckpointCallback(
      save_freq=10000,
      save_path=save_dir,
      name_prefix="model",
      save_replay_buffer=False,
      save_vecnormalize=True,
      verbose=1,
    )

    if is_new_training:
        model = PPO("MlpPolicy", vec_env, verbose=1, tensorboard_log=tensorboard_log)
    else:
        model = PPO.load(model_file_path, vec_env, verbose=1, tensorboard_log=tensorboard_log)
        print('Loaded model:', model_file_path)

    reset_num_timesteps = is_new_training
    model.learn(total_timesteps=int(timesteps), callback=checkpoint_callback, reset_num_timesteps=reset_num_timesteps, tb_log_name=save_dir)

    print('Training complete')
    print('Name:', save_dir)


In [None]:
import gymnasium as gym

# First, we create our environment called LunarLander-v2
env = gym.make("LunarLander-v2")

# Then we reset this environment
observation, info = env.reset()

for _ in range(20):
  # Take a random action
  action = env.action_space.sample()
  print("Action taken:", action)

  # Do this action in the environment and get
  # next_state, reward, terminated, truncated and info
  observation, reward, terminated, truncated, info = env.step(action)

  # If the game is terminated (in our case we land, crashed) or truncated (timeout)
  if terminated or truncated:
      # Reset the environment
      print("Environment is reset")
      observation, info = env.reset()

env.close()

In [None]:
notebook_login()
!git config --global credential.helper store

In [None]:
import gymnasium as gym

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.env_util import make_vec_env

from huggingface_sb3 import package_to_hub

# PLACE the variables you've just defined two cells above
# Define the name of the environment
env_id = "LunarLander-v2"

# TODO: Define the model architecture we used
model_architecture = "PPO"

## Define a repo_id
## repo_id is the id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
## CHANGE WITH YOUR REPO ID
repo_id = "AbdulrazaqAS/ppo-LunarLander-v2" # Change with your repo id, you can't push with mine 😄

## Define the commit message
commit_message = "Upload PPO LunarLander-v2 trained agent"

# Create the evaluation env and set the render_mode="rgb_array"
eval_env = DummyVecEnv([lambda: gym.make(env_id, render_mode="rgb_array")])

# PLACE the package_to_hub function you've just filled here
package_to_hub(model=model, # Our trained model
               model_name=model_name, # The name of our trained model
               model_architecture=model_architecture, # The model architecture we used: in our case PPO
               env_id=env_id, # Name of the environment
               eval_env=eval_env, # Evaluation Environment
               repo_id=repo_id, # id of the model repository from the Hugging Face Hub (repo_id = {organization}/{repo_name} for instance ThomasSimonini/ppo-LunarLander-v2
               commit_message=commit_message)
