<a href="https://colab.research.google.com/github/Khaarl/VIZDOOM1/blob/main/VISDOOMGPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# --- Setup ---
!apt-get update -y
!apt-get install -y build-essential cmake git libboost-all-dev libsdl2-dev libjpeg-dev libpng-dev zlib1g-dev libpython3-dev

import os

# Use Mesa for software rendering (CPU-based)
os.environ['LIBGL_ALWAYS_SOFTWARE'] = '1'

# Set XDG_RUNTIME_DIR to avoid other SDL issues
os.environ['XDG_RUNTIME_DIR'] = '/tmp'

0% [Working]            Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Get:5 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,197 kB]
Get:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:7 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Get:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [8,563 kB]
Get:13 http://security.

In [2]:
# --- Install ViZDoom from source (for proper linking) ---
!pip uninstall -y vizdoom
!apt-get install -y libboost-python-dev
!git clone https://github.com/mwydmuch/ViZDoom.git
%cd ViZDoom
!mkdir build && cd build
!cmake .. -DBUILD_PYTHON=ON -DCMAKE_INSTALL_PREFIX=/usr/local
!make -j$(nproc) install
%cd /content
!pip install vizdoom

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libboost-python-dev is already the newest version (1.74.0.3ubuntu7).
libboost-python-dev set to manually installed.
0 upgraded, 0 newly installed, 0 to remove and 47 not upgraded.
Cloning into 'ViZDoom'...
remote: Enumerating objects: 18734, done.[K
remote: Counting objects: 100% (567/567), done.[K
remote: Compressing objects: 100% (198/198), done.[K
remote: Total 18734 (delta 469), reused 369 (delta 369), pack-reused 18167 (from 5)[K
Receiving objects: 100% (18734/18734), 59.90 MiB | 20.41 MiB/s, done.
Resolving deltas: 100% (11722/11722), done.
/content/ViZDoom
  Ignoring extra path from command line:

   ".."

[0m
[0mCMake Error: The source directory "/content" does not appear to contain CMakeLists.txt.
Specify --help for usage, or press the help button on the CMake GUI.[0m
make: *** No rule to make target 'install'.  Stop.
/content
Collecting vizdoom
  Downloading vizdoom-1.2.4-c

In [5]:
from google.colab import drive
import imageio
import os
from vizdoom import *
import numpy as np
import time
import torch
import torch.nn as nn
import torch.optim as optim
import random

# --- Configuration ---
RECORD_LMP = False       # Set to True to record .lmp demo files
RECORD_VIDEO = False     # Set to True to record a video (MP4)
VIDEO_FPS = 30          # Frames per second for video recording
LMP_DIR = "lmp_recordings"  # Directory to save .lmp files
SCENARIO_NAME = "defend_the_center.cfg"  # Change the scenario name here
MODEL_PATH = "dqn_model.pth"  # Path to save the trained model

# --- User Input for Recording and Number of Episodes ---
record_choice = input("Do you want to record the game? (yes/no): ").lower()
if record_choice == "yes":
    record_lmp_choice = input("Record .lmp demo files? (yes/no): ").lower()
    RECORD_LMP = record_lmp_choice == "yes"
    record_video_choice = input("Record video (MP4)? (yes/no): ").lower()
    RECORD_VIDEO = record_video_choice == "yes"

num_episodes_choice = input("Enter the number of episodes to run: ")
try:
    NUM_EPISODES = int(num_episodes_choice)
except ValueError:
    print("Invalid input. Using default number of episodes (10).")
    NUM_EPISODES = 10

# --- Google Drive Setup ---
drive_mounted = os.path.exists('/content/drive/My Drive')
if not drive_mounted:
    drive.mount('/content/drive')

VIDEO_DIR = "/content/drive/My Drive/ViZDoomRecordings"
VIDEO_FILENAME = "game_recording.mp4"
VIDEO_PATH = os.path.join(VIDEO_DIR, VIDEO_FILENAME)
os.makedirs(VIDEO_DIR, exist_ok=True)

# --- DQN ---
class DQN(nn.Module):
    def __init__(self, input_shape, num_actions):
        super(DQN, self).__init__()
        self.conv1 = nn.Conv2d(input_shape[0], 32, kernel_size=8, stride=4)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=4, stride=2)
        self.conv3 = nn.Conv2d(64, 64, kernel_size=3, stride=1)
        self.fc1 = nn.Linear(self.get_conv_output(input_shape), 512)
        self.fc2 = nn.Linear(512, num_actions)

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = torch.relu(self.conv3(x))
        x = x.view(x.size(0), -1)  # Flatten
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def get_conv_output(self, shape):
        o = self.conv1(torch.zeros(1, *shape))
        o = self.conv2(o)
        o = self.conv3(o)
        return int(np.prod(o.size()))

class ReplayMemory:
    def __init__(self, capacity):
        self.capacity = capacity
        self.memory = []
        self.position = 0

    def push(self, state, action, reward, next_state, done):
        if len(self.memory) < self.capacity:
            self.memory.append(None)
        self.memory[self.position] = (state, action, reward, next_state, done)
        self.position = (self.position + 1) % self.capacity

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)

class DQNAgent:
    def __init__(self, state_shape, num_actions, learning_rate, gamma, epsilon_start, epsilon_end, epsilon_decay, memory_capacity, batch_size):
        self.state_shape = state_shape
        self.num_actions = num_actions
        self.gamma = gamma
        self.epsilon = epsilon_start
        self.epsilon_end = epsilon_end
        self.epsilon_decay = epsilon_decay
        self.batch_size = batch_size

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.policy_net = DQN(state_shape, num_actions).to(self.device)
        self.target_net = DQN(state_shape, num_actions).to(self.device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.target_net.eval()

        self.optimizer = optim.Adam(self.policy_net.parameters(), lr=learning_rate)
        self.memory = ReplayMemory(memory_capacity)

    def select_action(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.num_actions)
        else:
            with torch.no_grad():
                state = torch.tensor(state, dtype=torch.float32, device=self.device).unsqueeze(0)
                q_values = self.policy_net(state)
                return q_values.argmax(dim=1).item()

    def update_epsilon(self):
        self.epsilon = max(self.epsilon_end, self.epsilon * self.epsilon_decay)

    def learn(self):
        if len(self.memory) < self.batch_size:
            return

        transitions = self.memory.sample(self.batch_size)
        batch = tuple(zip(*transitions))

        state_batch = torch.tensor(np.array(batch[0]), dtype=torch.float32, device=self.device)
        action_batch = torch.tensor(batch[1], dtype=torch.long, device=self.device).unsqueeze(1)
        reward_batch = torch.tensor(batch[2], dtype=torch.float32, device=self.device).unsqueeze(1)
        next_state_batch = torch.tensor(np.array(batch[3]), dtype=torch.float32, device=self.device)
        done_batch = torch.tensor(batch[4], dtype=torch.float32, device=self.device).unsqueeze(1)

        q_values = self.policy_net(state_batch).gather(1, action_batch)

        with torch.no_grad():
            next_q_values = self.target_net(next_state_batch).max(1)[0].unsqueeze(1)
            expected_q_values = reward_batch + self.gamma * next_q_values * (1 - done_batch)

        loss = nn.MSELoss()(q_values, expected_q_values)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

    def update_target_network(self):
        self.target_net.load_state_dict(self.policy_net.state_dict())

# --- Training Parameters ---
LEARNING_RATE = 0.0001
GAMMA = 0.99
EPSILON_START = 1.0
EPSILON_END = 0.05
EPSILON_DECAY = 0.995
MEMORY_CAPACITY = 10000
BATCH_SIZE = 64
TARGET_UPDATE_FREQ = 10
# NUM_EPISODES is set via user input earlier
FRAME_SKIP = 4

# --- ViZDoom Setup ---
game = DoomGame()
import vizdoom
vizdoom_path = os.path.dirname(vizdoom.__file__)
SCENARIO_PATH = os.path.join(vizdoom_path, "scenarios", SCENARIO_NAME)
game.load_config(SCENARIO_PATH)
game.set_window_visible(False)
game.set_screen_format(ScreenFormat.RGB24)
game.set_screen_resolution(ScreenResolution.RES_320X240)

if RECORD_LMP:
    game.set_mode(Mode.PLAYER)
    game.set_doom_scenario_path(SCENARIO_PATH)
    os.makedirs(LMP_DIR, exist_ok=True)

game.init()
num_actions = game.get_available_buttons_size()

# --- One-hot encoding for actions ---
actions = np.identity(num_actions, dtype=int).tolist()

# --- Get initial state shape ---
screen_height, screen_width = game.get_screen_height(), game.get_screen_width()
channels = game.get_screen_channels()
state_shape = (channels, screen_height, screen_width)

# --- Create Agent ---
agent = DQNAgent(state_shape, num_actions, LEARNING_RATE, GAMMA, EPSILON_START, EPSILON_END, EPSILON_DECAY, MEMORY_CAPACITY, BATCH_SIZE)

# --- Video Writer ---
if RECORD_VIDEO:
    writer = imageio.get_writer(VIDEO_PATH, fps=VIDEO_FPS)

# --- Training Loop ---
for episode in range(NUM_EPISODES):
    if RECORD_LMP:
        lmp_file_path = os.path.join(LMP_DIR, f"episode_{episode+1}.lmp")
        game.new_episode(lmp_file_path)
    else:
        game.new_episode()

    state = game.get_state().screen_buffer
    # Transform the state to have the shape (3, 240, 320)
    state = np.transpose(state, (2, 0, 1))
    total_reward = 0

    while not game.is_episode_finished():
        action_index = agent.select_action(state)
        action = actions[action_index]

        reward = game.make_action(action, FRAME_SKIP)
        done = game.is_episode_finished()

        # Check if the episode is finished before calling get_state()
        if not done:
            next_state = game.get_state().screen_buffer
            # Transform the next_state to have the shape (3, 240, 320)
            next_state = np.transpose(next_state, (2, 0, 1))
        else:
            next_state = np.zeros(state_shape)

        agent.memory.push(state, action_index, reward, next_state, done)
        agent.learn()

        state = next_state
        total_reward += reward

        if RECORD_VIDEO and state is not None:
            writer.append_data(state.transpose(1, 2, 0))

        if done:
            break

    agent.update_epsilon()
    if episode % TARGET_UPDATE_FREQ == 0:
        agent.update_target_network()

    print(f"Episode {episode+1}/{NUM_EPISODES}, Total Reward: {total_reward}, Epsilon: {agent.epsilon:.3f}")

# --- Save the trained model ---
torch.save(agent.policy_net.state_dict(), MODEL_PATH)

# Close the writer
if RECORD_VIDEO:
    writer.close()

game.close()
print("Done!")

Do you want to record the game? (yes/no): yes
Record .lmp demo files? (yes/no): no
Record video (MP4)? (yes/no): yes
Enter the number of episodes to run: 1




Episode 1/1, Total Reward: 1.0, Epsilon: 0.995
Done!
