Load necessary packages

In [1]:
collab_setup_needed = False
if collab_setup_needed:
  !pip install ale-py
  !pip install gymnasium[atari]
  !pip install gymnasium[accept-rom-license]
  !ale-import-roms /usr/local/lib/python3.10/dist-packages/ale_py/roms/

In [2]:
if collab_setup_needed:
    from google.colab import drive
    drive.mount('/content/drive')

    folder_path = folder_path = '/content/drive/MyDrive/ColabNotebooks/BioInspired'

    import os
    os.chdir(folder_path)

In [3]:
from MainRunFiles.Preprocessing import MsPacmanReducedActionSpaceWrapper, ActionRepeatWrapper

In [4]:
import gymnasium as gym
import torch
torch.cuda.empty_cache()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Print if GPU is available
if torch.cuda.is_available():
    print("Using GPU")
torch.autograd.set_detect_anomaly(True)

Using GPU


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x1ca9223fbe0>

Configuration

In [5]:
configuration_MsPacmanReduced =  {
        "double_q_learning_bool": True,
        "icm_bool": True,
        "PER_bool": True,
        "DuelingQNetwork": True
    }

DQN Parameters

In [6]:
DQN_params_MsPacmanReduced = {
    "learning_rate": 1e-4,                  # learning rate
    "replay_buffer_size": 20000,            # replay buffer size
    "batch_size": 128,                      # batch size for training
    "gamma": 0.99,                          # discount factor
    "epsilon": 1.0,                         # exploration rate (Load-Run, Start-Run)
    "epsilon_decay": 0.99,                  # exploration rate decay
    "epsilon_min": 0.02,                    # minimum exploration rate
    "update_target_every": 500,             # update target network every 500 steps
    "alpha": 0.6,                           # alpha value for prioritization
    "beta": 0.4,                            # beta value for importance sampling
    "beta_increment_per_sampling": 0.001,   # increment for beta
    "device": device,                       # device to use (CPU or GPU)
    "icm_learning_rate" : 1e-4,
    "icm_update_every": 5,
    "eta" : 0.1
}


Display, save and episodes configuration

In [7]:
num_episodes = 500
render_frequency = 551
model_save_interval = 20

Saving functions

In [8]:
def generate_config_name(configuration):
    # (eg) DQN or Double first
    string = ""
    if configuration["double_q_learning_bool"]:
        string += "Double"
    else:
        string += "DQN"

    if configuration["PER_bool"] and configuration["DuelingQNetwork"]:
        string += "_PER_Dueling"
    elif configuration["PER_bool"] and not configuration["DuelingQNetwork"]:
        string += "_PER"
    elif not configuration["PER_bool"] and configuration["DuelingQNetwork"]:
        string += "__Dueling"
    else:
        string += ""

    return string

def write_rewards(config_name, rewards):
    # Write the rewards to a file
    with open(f"Results/Pacman/{config_name}/Rewards.txt", "w") as file:
        for reward in rewards:
            file.write(str(reward) + "\n")

Create Pacman environment

In [9]:
input_shape = (1, 80, 80)  # dep. pre-processing


env_MsPacman = gym.make('ALE/MsPacman-v5', full_action_space=False)
wrapped_env_MsPacmanReduced_semi = MsPacmanReducedActionSpaceWrapper(env_MsPacman)
wrapped_env_MsPacmanReduced = ActionRepeatWrapper(wrapped_env_MsPacmanReduced_semi)
num_actions_MsPacmanReduced = wrapped_env_MsPacmanReduced.action_space.n
config_name_MsPacmanReduced = generate_config_name(configuration_MsPacmanReduced)

Create Trainer object

In [10]:
from MainRunFiles.TrainingLoop import DQNTrainer

plot_bool = True

train_MsPacmanReduced = DQNTrainer(input_shape,                                      # input shape
                    num_actions_MsPacmanReduced,                                     # number of actions
                    DQN_params_MsPacmanReduced,                                      # DQN parameters
                    wrapped_env_MsPacmanReduced,                                     # environment
                    num_episodes,                                           # number of episodes
                    render_frequency,                                       # render frequency
                    file_path="Results",                                    # file path
                    environment_name = 'MsPacman-Reduced',                       # environment name
                    Wrapper= MsPacmanReducedActionSpaceWrapper,                                 # wrapper
                    model_save_interval=model_save_interval,                # model save interval
                    configuration = configuration_MsPacmanReduced,                   # configuration
                    config_name = config_name_MsPacmanReduced,                       # configuration namee
                    plot_bool=plot_bool,                                    # plot enabled
                    pnn_bool = False)                                       # PNN enabled

PNN bool: False


Load Weights

In [11]:
#train_MsPacmanReduced.load_pretrained_weights_transfer_learning(
    #'Results/Pacman_epsilon_two/Double_PER_Dueling/Episode_280_Agent.pt',
    #'Results/Pacman_epsilon_two/Double_PER_Dueling/Episode_280_Target.pt',
    #'Results/Pacman_epsilon_two/Double_PER_Dueling/Episode_280_ReplayBuffer.pkl',
    #'Results/Pacman_epsilon_two/Double_PER_Dueling/Episode_280_ICM.pt',
    #freeze_feature_extractor=True
#)

Train Agent

In [12]:
train_MsPacmanReduced.training_loop()

Episode 1/500, Total Reward: 700.0, Steps in Episode: 165, Exploration Rate: 0.682554595010387


KeyboardInterrupt: 