<a href="https://colab.research.google.com/github/Khaarl/ViZDOOM-PPO/blob/STAGING/ViZDOOM_PPO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive base folder: /content/drive/MyDrive/ViZDoom-PPO


In [5]:
# CELL 2: Install Dependencies

!apt-get update
!apt-get install -y build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
    nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
    libopenal-dev timidity libwildmidi-dev unzip ffmpeg

!pip install vizdoom
!pip install stable-baselines3[extra]

Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading

In [6]:
# CELL 3: Download Scenario Config and Handle WAD File

import os
import shutil

# Define local paths for scenario and storage
LOCAL_SCENARIO_PATH = "/content/scenarios/deathmatch.cfg"
LOCAL_STORAGE_PATH = "/content/scenarios/training_data"
LOCAL_MODEL_PATH = "/content/scenarios/training_data/models"
LOCAL_LOG_PATH = "/content/scenarios/training_data/logs"
LOCAL_TENSORBOARD_PATH = "/content/scenarios/training_data/tensorboard"
LOCAL_WAD_PATH = "/content/scenarios/freedoom2.wad"

# Create local directories
os.makedirs(LOCAL_STORAGE_PATH, exist_ok=True)
os.makedirs(LOCAL_MODEL_PATH, exist_ok=True)
os.makedirs(LOCAL_LOG_PATH, exist_ok=True)
os.makedirs(LOCAL_TENSORBOARD_PATH, exist_ok=True)
print("Created local directories.")

# Download freedoom2.wad if it doesn't exist
if not os.path.exists(LOCAL_WAD_PATH):
    !wget https://github.com/freedoom/freedoom/releases/download/v0.13.0/freedoom2.wad -O $LOCAL_WAD_PATH
    print(f"Downloaded freedoom2.wad to {LOCAL_WAD_PATH}")
else:
    print(f"Using existing freedoom2.wad at {LOCAL_WAD_PATH}")

# Download deathmatch.cfg if it doesn't exist
if not os.path.exists(LOCAL_SCENARIO_PATH):
    !wget https://raw.githubusercontent.com/mwydmuch/ViZDoom/master/scenarios/deathmatch.cfg -P /content/scenarios/
    print(f"Downloaded deathmatch.cfg to {LOCAL_SCENARIO_PATH}")
else:
    print(f"Using existing deathmatch.cfg at {LOCAL_SCENARIO_PATH}")

Created local directories.
Using existing freedoom2.wad at /content/scenarios/freedoom2.wad
Using existing deathmatch.cfg at /content/scenarios/deathmatch.cfg


In [9]:
# CELL 4: Define ViZDoom Environment with Reward Shaping (Corrected)

from vizdoom import *
import numpy as np
import gymnasium as gym
from gymnasium import spaces

# Define local path for WAD file here to ensure it's accessible
LOCAL_WAD_PATH = "/content/scenarios/freedoom2.wad"  # This should be freedoom2.wad

class VizdoomEnv(gym.Env):
    def __init__(self, scenario_path, frame_skip=4):
        super(VizdoomEnv, self).__init__()
        self.game = DoomGame()
        self.game.load_config(scenario_path)
        self.game.set_doom_game_path(LOCAL_WAD_PATH)  # Use freedoom2.wad here
        self.game.set_window_visible(False)
        self.game.set_mode(Mode.PLAYER)
        self.game.set_screen_format(ScreenFormat.GRAY8)
        self.game.set_screen_resolution(ScreenResolution.RES_640X480)
        self.game.init()

        self.frame_skip = frame_skip
        self.action_space = spaces.Discrete(self.game.get_available_buttons_size())
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.game.get_screen_height(), self.game.get_screen_width(), 1), dtype=np.uint8)

        # Initialize variables for reward shaping
        self.previous_game_variables = None

    def step(self, action):
        buttons = np.zeros(self.game.get_available_buttons_size())
        buttons[action] = 1

        reward = self.game.make_action(buttons.tolist(), self.frame_skip)
        done = self.game.is_episode_finished()

        state = self.game.get_state().screen_buffer if not done else np.zeros(self.observation_space.shape, dtype=np.uint8)
        state = np.expand_dims(state, axis=-1)

        shaped_reward = reward + self._shape_reward()

        return state, shaped_reward, done, False, {}

    def _shape_reward(self):
      # Get current game variables
      current_game_vars = self.game.get_state().game_variables if self.game.get_state() else None

      # Initialize reward
      reward = 0

      # If this is the first step or the episode is over, set previous variables and return 0 reward
      if current_game_vars is None or self.previous_game_variables is None:
          self.previous_game_variables = current_game_vars
          return reward

      # Calculate reward based on changes in game variables
      reward += (current_game_vars[0] - self.previous_game_variables[0]) * 100.0  # Reward for kills
      reward -= (self.previous_game_variables[2] - current_game_vars[2]) * 0.1  # Penalty for ammo used
      reward -= (self.previous_game_variables[1] - current_game_vars[1])  # Penalty for health loss
      reward += 0.1  # Encourage survival

      # Reward for getting closer to enemies
      min_dist_now = self._get_closest_enemy_distance()
      if hasattr(self, 'min_dist_prev'):
          if min_dist_now < self.min_dist_prev and min_dist_now < 500:
              reward += 0.05
          elif min_dist_now > self.min_dist_prev and self.min_dist_prev < 500:
              reward -= 0.05
      self.min_dist_prev = min_dist_now

      # Update previous game variables for the next step
      self.previous_game_variables = current_game_vars

      return reward

    def _get_closest_enemy_distance(self):
      min_dist = float('inf')
      current_game_vars = self.game.get_state().game_variables if self.game.get_state() else None

      if current_game_vars is None:
          return min_dist

      px, py = current_game_vars[3], current_game_vars[4]

      for obj in self.game.get_state().objects:
          if obj.is_enemy():
              dist = ((px - obj.position_x)**2 + (py - obj.position_y)**2)**0.5
              min_dist = min(min_dist, dist)

      return min_dist

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        state = np.expand_dims(state, axis=-1)
        self.previous_game_variables = None # Reset for reward shaping
        self.min_dist_prev = float('inf')
        return state, {}

    def close(self):
        self.game.close()

In [10]:
# CELL 5: Train PPO Agent with User Input, Loading, and Saving

from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback
import os
import logging

# Configure logging
logging.basicConfig(filename=os.path.join(LOCAL_LOG_PATH, 'training.log'), level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')

# Function to ask for user input with validation
def get_user_input(prompt, type_=None, min_=None, max_=None, range_=None):
    if min_ is not None and max_ is not None and max_ < min_:
        raise ValueError("min_ must be less than or equal to max_.")
    while True:
        val = input(prompt)
        if type_ is not None:
            try:
                val = type_(val)
            except ValueError:
                print(f"Input must be of type {type_.__name__}.")
                continue
        if min_ is not None and val < min_:
            print(f"Input must be greater than or equal to {min_}.")
        elif max_ is not None and val > max_:
            print(f"Input must be less than or equal to {max_}.")
        elif range_ is not None and val not in range_:
            if isinstance(range_, range):
                template = f"Input must be between {range_.start} and {range_.stop-1}."
            else:
                template = f"Input must be {', '.join(map(str, range_))}."
            print(template)
        else:
            return val

# Get user input for the number of training timesteps
num_episodes = get_user_input("Enter the number of training timesteps: ", type_=int, min_=1)
logging.info(f"Number of training timesteps: {num_episodes}")

# Prompt user to load a pre-trained model
load_pretrained = get_user_input("Do you want to load a pre-trained model? (yes/no): ", type_=str, range_=["yes", "no"])
logging.info(f"Load pre-trained model: {load_pretrained}")

# Initialize model and env to None
model = None
env = None

if load_pretrained == "yes":
    while True:
        pretrained_model_path = get_user_input("Enter the path to the pre-trained model: ")
        if os.path.exists(pretrained_model_path):
            try:
                # Load the model
                model = PPO.load(pretrained_model_path)
                logging.info(f"Successfully loaded model from: {pretrained_model_path}")

                # Create a new environment and attach it to the loaded model
                env = VizdoomEnv(LOCAL_SCENARIO_PATH)
                env = Monitor(env, LOCAL_LOG_PATH)
                model.set_env(env)
                break  # Exit the loop after successfully loading the model and setting the environment
            except Exception as e:
                logging.error(f"Error loading model: {e}")
                print(f"Error loading model: {e}")
                print("Please enter a valid path.")
        else:
            logging.warning(f"Model path does not exist: {pretrained_model_path}")
            print("Model path does not exist. Please enter a valid path.")
else:
    # Create a new environment and model
    try:
        env = VizdoomEnv(LOCAL_SCENARIO_PATH)
        env = Monitor(env, LOCAL_LOG_PATH)
        model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=LOCAL_TENSORBOARD_PATH)
        logging.info("Successfully created new environment and PPO model.")
    except Exception as e:
        logging.error(f"Error creating environment or PPO model: {e}")
        print(f"Error creating environment or PPO model: {e}")
        exit()  # Exit if environment or model creation fails

# Check if model and env were successfully created
if model is None or env is None:
    logging.error("Model or environment was not created successfully.")
    print("Model or environment was not created successfully.")
    exit()

# Define checkpoint callback
checkpoint_callback = CheckpointCallback(
    save_freq=max(10000, num_episodes // 10),
    save_path=LOCAL_MODEL_PATH,
    name_prefix="ppo_deathmatch"
)

# Train the agent
try:
    model.learn(total_timesteps=num_episodes, callback=checkpoint_callback)
    logging.info("Model training completed.")
except Exception as e:
    logging.error(f"Error during model training: {e}")
    print(f"Error during model training: {e}")
    if env:
        env.close()
    exit()

# Save the final model locally
final_model_local_path = os.path.join(LOCAL_MODEL_PATH, "ppo_deathmatch_final")
try:
    model.save(final_model_local_path)
    logging.info(f"Final model saved locally to: {final_model_local_path}")
except Exception as e:
    logging.error(f"Error saving the final model locally: {e}")
    print(f"Error saving the final model locally: {e}")

# Close the environment only if it has been initialized
if env:
    env.close()
logging.info("Environment closed.")

print("Training process completed.")

Enter the number of training timesteps: 10
Do you want to load a pre-trained model? (yes/no): no


ERROR:root:Error creating environment or PPO model: File "/content/scenarios/deathmatch.wad" does not exist.
ERROR:root:Model or environment was not created successfully.
ERROR:root:Error during model training: 'NoneType' object has no attribute 'learn'
ERROR:root:Error saving the final model locally: 'NoneType' object has no attribute 'save'


Error creating environment or PPO model: File "/content/scenarios/deathmatch.wad" does not exist.
Model or environment was not created successfully.
Error during model training: 'NoneType' object has no attribute 'learn'
Error saving the final model locally: 'NoneType' object has no attribute 'save'
Training process completed.
