<a href="https://colab.research.google.com/github/Khaarl/ViZDOOM-PPO/blob/STAGING/ViZDOOM_PPO.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# CELL 1: Mount Drive and Setup Base Paths
from google.colab import drive
drive.mount('/content/drive')

import os
GDRIVE_BASE = "/content/drive/MyDrive/ViZDoom-PPO"
print(f"Google Drive base folder: {GDRIVE_BASE}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Drive base folder: /content/drive/MyDrive/ViZDoom-PPO


In [5]:
# CELL 2: Install Dependencies
!apt-get update
!apt-get install -y build-essential zlib1g-dev libsdl2-dev libjpeg-dev \
    nasm tar libbz2-dev libgtk2.0-dev cmake git libfluidsynth-dev libgme-dev \
    libopenal-dev timidity libwildmidi-dev unzip ffmpeg

!pip install vizdoom
!pip install stable-baselines3[extra]

Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:3 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:6 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Reading package lists... Done
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
Reading package lists... Done
Building dependency tree... Done
Reading

In [None]:
# CELL 4: Create ViZDoom Environment


In [6]:
# CELL 3: Download Scenario Config and Handle WAD File
import os
import requests
from pathlib import Path
import hashlib

# Base paths setup
BASE_DIR = Path.cwd()
SCENARIOS_DIR = BASE_DIR / "scenarios"
MODELS_DIR = BASE_DIR / "models"
LOGS_DIR = BASE_DIR / "logs"

# Create directories
for dir_path in [SCENARIOS_DIR, MODELS_DIR, LOGS_DIR]:
    dir_path.mkdir(exist_ok=True)

# File definitions with SHA256 checksums
SCENARIO_FILES = {
    "deathmatch.cfg": {
        "url": "https://raw.githubusercontent.com/Farama-Foundation/ViZDoom/master/scenarios/deathmatch.cfg",
        "checksum": None  # Add checksum if available
    },
    "deathmatch.wad": {
        "url": "https://raw.githubusercontent.com/Farama-Foundation/ViZDoom/master/scenarios/deathmatch.wad",
        "checksum": None  # Add checksum if available
    }
}

def download_file(url: str, dest_path: Path) -> bool:
    if dest_path.exists():
        print(f"File already exists: {dest_path.name}")
        return True
        
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        
        with open(dest_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
                
        print(f"Downloaded {dest_path.name} successfully")
        return True
    except Exception as e:
        print(f"Error downloading {dest_path.name}: {e}")
        return False

# Download files
for filename, file_info in SCENARIO_FILES.items():
    dest_path = SCENARIOS_DIR / filename
    if download_file(file_info["url"], dest_path):
        if file_info["checksum"]:
            # Verify checksum if provided
            with open(dest_path, "rb") as f:
                file_hash = hashlib.sha256(f.read()).hexdigest()
                assert file_hash == file_info["checksum"], f"Checksum mismatch for {filename}"

# Export paths for other cells
SCENARIO_PATH = str(SCENARIOS_DIR / "deathmatch.cfg")
WAD_PATH = str(SCENARIOS_DIR / "deathmatch.wad")

print(f"Setup complete:")
print(f"Scenario path: {SCENARIO_PATH}")
print(f"WAD path: {WAD_PATH}")

Created local directories.
Using existing freedoom2.wad at /content/scenarios/freedoom2.wad
Using existing deathmatch.cfg at /content/scenarios/deathmatch.cfg


In [9]:
# CELL 4: Define ViZDoom Environment with Reward Shaping (Corrected)

from vizdoom import *
import numpy as np
import gymnasium as gym
from gymnasium import spaces

# Define local path for WAD file here to ensure it's accessible
LOCAL_WAD_PATH = "/content/scenarios/freedoom2.wad"  # This should be freedoom2.wad

class VizdoomEnv(gym.Env):
    def __init__(self, scenario_path, frame_skip=4):
        super(VizdoomEnv, self).__init__()
        self.game = DoomGame()
        self.game.load_config(scenario_path)
        self.game.set_doom_game_path(LOCAL_WAD_PATH)  # Use freedoom2.wad here
        self.game.set_window_visible(False)
        self.game.set_mode(Mode.PLAYER)
        self.game.set_screen_format(ScreenFormat.GRAY8)
        self.game.set_screen_resolution(ScreenResolution.RES_640X480)
        self.game.init()

        self.frame_skip = frame_skip
        self.action_space = spaces.Discrete(self.game.get_available_buttons_size())
        self.observation_space = spaces.Box(low=0, high=255, shape=(self.game.get_screen_height(), self.game.get_screen_width(), 1), dtype=np.uint8)

        # Initialize variables for reward shaping
        self.previous_game_variables = None

    def step(self, action):
        buttons = np.zeros(self.game.get_available_buttons_size())
        buttons[action] = 1

        reward = self.game.make_action(buttons.tolist(), self.frame_skip)
        done = self.game.is_episode_finished()

        state = self.game.get_state().screen_buffer if not done else np.zeros(self.observation_space.shape, dtype=np.uint8)
        state = np.expand_dims(state, axis=-1)

        shaped_reward = reward + self._shape_reward()

        return state, shaped_reward, done, False, {}

    def _shape_reward(self):
      # Get current game variables
      current_game_vars = self.game.get_state().game_variables if self.game.get_state() else None

      # Initialize reward
      reward = 0

      # If this is the first step or the episode is over, set previous variables and return 0 reward
      if current_game_vars is None or self.previous_game_variables is None:
          self.previous_game_variables = current_game_vars
          return reward

      # Calculate reward based on changes in game variables
      reward += (current_game_vars[0] - self.previous_game_variables[0]) * 100.0  # Reward for kills
      reward -= (self.previous_game_variables[2] - current_game_vars[2]) * 0.1  # Penalty for ammo used
      reward -= (self.previous_game_variables[1] - current_game_vars[1])  # Penalty for health loss
      reward += 0.1  # Encourage survival

      # Reward for getting closer to enemies
      min_dist_now = self._get_closest_enemy_distance()
      if hasattr(self, 'min_dist_prev'):
          if min_dist_now < self.min_dist_prev and min_dist_now < 500:
              reward += 0.05
          elif min_dist_now > self.min_dist_prev and self.min_dist_prev < 500:
              reward -= 0.05
      self.min_dist_prev = min_dist_now

      # Update previous game variables for the next step
      self.previous_game_variables = current_game_vars

      return reward

    def _get_closest_enemy_distance(self):
      min_dist = float('inf')
      current_game_vars = self.game.get_state().game_variables if self.game.get_state() else None

      if current_game_vars is None:
          return min_dist

      px, py = current_game_vars[3], current_game_vars[4]

      for obj in self.game.get_state().objects:
          if obj.is_enemy():
              dist = ((px - obj.position_x)**2 + (py - obj.position_y)**2)**0.5
              min_dist = min(min_dist, dist)

      return min_dist

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        state = np.expand_dims(state, axis=-1)
        self.previous_game_variables = None # Reset for reward shaping
        self.min_dist_prev = float('inf')
        return state, {}

    def close(self):
        self.game.close()

In [10]:
# CELL 5: Train Agent
from stable_baselines3 import PPO
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import CheckpointCallback

# Create and wrap environment
env = ViZDoomEnv(SCENARIO_PATH)
env = Monitor(env, "logs")

# Create agent
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log="logs")

# Setup checkpointing
checkpoint_callback = CheckpointCallback(
    save_freq=10000,
    save_path="./models",
    name_prefix="doom_ppo"
)

# Train
TIMESTEPS = 100000
model.learn(total_timesteps=TIMESTEPS, callback=checkpoint_callback)

# Save final model
model.save("models/doom_ppo_final")

Enter the number of training timesteps: 10
Do you want to load a pre-trained model? (yes/no): no


ERROR:root:Error creating environment or PPO model: File "/content/scenarios/deathmatch.wad" does not exist.
ERROR:root:Model or environment was not created successfully.
ERROR:root:Error during model training: 'NoneType' object has no attribute 'learn'
ERROR:root:Error saving the final model locally: 'NoneType' object has no attribute 'save'


Error creating environment or PPO model: File "/content/scenarios/deathmatch.wad" does not exist.
Model or environment was not created successfully.
Error during model training: 'NoneType' object has no attribute 'learn'
Error saving the final model locally: 'NoneType' object has no attribute 'save'
Training process completed.
