# Installing Libraries

In [2]:
# final import versions

!pip install torch
!pip install "stable-baselines3[extra]>=2.0.0a4"
!pip install "rl_zoo3"
# cells will automatically re-import modules when executing a new cell
# %load_ext autoreload
# %autoreload 2



# Imports

In [3]:
# to reload modules
import importlib

# import Policy Optimization Models
from stable_baselines3 import PPO
from stable_baselines3 import A2C

# import Vectorized Environments
from stable_baselines3.common.env_util import make_atari_env
from stable_baselines3.common.vec_env import VecFrameStack

# import ALE for Atari Gameplay and gym
import ale_py
import gymnasium as gym
import numpy as np
from gymnasium import spaces
from gymnasium.wrappers import TransformReward

# helper function to evaluate how good a policy / model is
from stable_baselines3.common.evaluation import evaluate_policy

# importing callbacks
from stable_baselines3.common.callbacks import ProgressBarCallback, CheckpointCallback, CallbackList, EveryNTimesteps

# so we can record video of gameplay
from stable_baselines3.common.vec_env import VecVideoRecorder, DummyVecEnv

# imports so we can access file system
import sys
import os

# import custom helper functions
# adding helper functions to direcotry path
sys.path.append(os.path.join(os.getcwd(), 'Code'))

from Code import policy_helper
from policy_helper import create_agent
from policy_helper import gen_video
from policy_helper import variant_checkpointCallback
from policy_helper import get_doge_all_boxing_env, get_hard_hitter_boxing_env

# Training Models

In [None]:
# Atari PPO Open AI Parameters
# Horizon (T)                    128
# Adam stepsize                  2.5 × 10−4 × α
# Num. epochs                    3
# Minibatch size                 32 × 8
# Discount (γ)                   0.99
# GAE parameter (λ)              0.95
# Number of actors               8
# Clipping parameter:            0.1 × α
# VF coeff. c1                   1
# Entropy coeff. c2              0.01
# open_ai_param_dict = {

# }

# this stores our hyper parameters for all atari games in general
param_dict = {
  "frame_stack": 4,             #
  "policy": 'CnnPolicy',        #
  "n_envs": 8,                  #
  "n_steps": 128,               #
  "n_epochs": 4,                #
  "batch_size": 256,            #
  "n_timesteps": 1e7,           #
  "learning_rate": 2.5e-4,      #
  "clip_range": 0.1,            #
  "vf_coef": 0.5,               #
  "ent_coef": 0.01,             #
  }

# general environment
vec_env = make_atari_env("ALE/Boxing-v5", n_envs=param_dict["n_envs"], seed=0)
vec_env = VecFrameStack(vec_env, n_stack = param_dict["frame_stack"])


# getting the save and load paths
model_name = "normal_model"
use_checkpoint = True
model_save_path = "Saved_Models/" + model_name
tensor_board_name = "Tensorboard/" + model_name
if (use_checkpoint):
    #Checkpoints/normal_model/normal_model_33744_steps.zip
    model_load_path = "./Checkpoints/" + model_name + "/normal_model_33744_steps"
else:
    model_load_path = model_save_path
checkpoint_path = model_name

# defining our callbacks
# Save a checkpoint every k steps
checkpoint_callback = CheckpointCallback(
  save_freq=max(100000 // param_dict["n_envs"], 1),
  save_path="./Checkpoints/" + model_name,
  name_prefix=checkpoint_path,
  save_replay_buffer=True,
  save_vecnormalize=True,
)

callback = CallbackList([checkpoint_callback])

# loading and training the model
vec_env.reset()
load = False
if (load):
    model = PPO.load(model_load_path)
else:
    # model
    model = PPO(
                param_dict["policy"],
                vec_env,
                verbose = 1,
                tensorboard_log = tensor_board_name,
                device = "cuda",
    
                n_steps = param_dict["n_steps"],
                n_epochs = param_dict["n_epochs"],
                batch_size = param_dict["batch_size"],
    
                learning_rate = param_dict["learning_rate"],
                clip_range = param_dict["clip_range"],
                vf_coef = param_dict["vf_coef"],
                ent_coef = param_dict["ent_coef"],
                )
model.set_env(vec_env)

# training
model.learn(total_timesteps=param_dict["n_timesteps"], callback=callback, progress_bar = True, reset_num_timesteps = False)
model.save(model_save_path)
# IMPORTANT: if you get this error: LiveError: Only one live display may be active at once
# then you need to refresh the ipynb notebook

A.L.E: Arcade Learning Environment (version 0.10.1+unknown)
[Powered by Stella]


Using cpu device
Wrapping the env in a VecTransposeImage.
Wrapping the env in a VecTransposeImage.
Logging to Tensorboard/normal_model/PPO_0


Output()

-----------------------------
| time/              |      |
|    fps             | 82   |
|    iterations      | 1    |
|    time_elapsed    | 12   |
|    total_timesteps | 1024 |
-----------------------------
------------------------------------------
| time/                   |              |
|    fps                  | 0            |
|    iterations           | 2            |
|    time_elapsed         | 39241        |
|    total_timesteps      | 2048         |
| train/                  |              |
|    approx_kl            | 0.0013827279 |
|    clip_fraction        | 0.0381       |
|    clip_range           | 0.1          |
|    entropy_loss         | -2.89        |
|    explained_variance   | -0.000926    |
|    learning_rate        | 0.00025      |
|    loss                 | 0.382        |
|    n_updates            | 4            |
|    policy_gradient_loss | -0.00316     |
|    value_loss           | 1.23         |
------------------------------------------
----------------

# Training Ensemble

In [None]:
from ensembled_rl import EnsembledActorCritic
vec_env = make_atari_env("ALE/Boxing-v5", n_envs=8, seed=0)
vec_env = VecFrameStack(vec_env, n_stack = 4)

should_delete = ""
# specify a unique id if you want a unique save, instead of overwriting
unique_num = ""
unique_name = "ensemble_aggressive"
unique_id = unique_num + unique_name

special_parts = should_delete + unique_id
root_dir = '/content/drive/My Drive/'+ FOLDERNAME + "/Saved_Models/"
save_name = "general_atari_ppo_boxing"+special_parts
model_save_path = root_dir + save_name
model_load_path = root_dir + save_name + ".zip"

param_dict = {
  "frame_stack": 4,
  "policy": 'CnnPolicy',
  "n_envs": 8,
  "n_steps": 128,
  "n_epochs": 4,
  "batch_size": 256,
  "n_timesteps": float(5.8e5),
  "learning_rate": 2.5e-4,
  "clip_range": 0.1,
  "vf_coef": 0.5,
  "ent_coef": 0.01,
  }

model = PPO(
             EnsembledActorCritic,
             vec_env,
             verbose = 1,
             tensorboard_log = "PPO-Ensemble",
             device = "cpu",
             n_steps = param_dict["n_steps"],
             n_epochs = param_dict["n_epochs"],
             batch_size = param_dict["batch_size"],
             learning_rate = param_dict["learning_rate"],
             clip_range = param_dict["clip_range"],
             vf_coef = param_dict["vf_coef"],
             ent_coef = param_dict["ent_coef"],
             policy_kwargs={
                'base_models': [
                    root_dir + 'general_atari_ppo_boxing',
                    root_dir + 'general_atari_ppo_boxingaggressive_pure'
                ],
                'ensemble_type': 'mlp',
                'action_size': 18,
             }
             )

model.learn(total_timesteps=param_dict["n_timesteps"], progress_bar = True, reset_num_timesteps = False)
model.save(model_save_path)

In [None]:
print("Done Running")