AGENT IMPLEMENTATION 
A random policy baseline is used to evaluate uninformed behavior and establish the minimum expected performance in the VizDoom environment.

In [3]:
!apt-get install -y vizdoom
!pip install vizdoom gymnasium stable-baselines3 torch

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
E: Unable to locate package vizdoom


In [4]:
import vizdoom as vzd
import os
import numpy as np
import matplotlib.pyplot as plt
import random

In [6]:
game = vzd.DoomGame()
game.load_config(os.path.join(vzd.scenarios_path, "basic.cfg"))
game.set_window_visible(False)
game.init()

In [7]:
# ---------- Action space ----------
# Inspect buttons once
buttons = game.get_available_buttons()
num_buttons = game.get_available_buttons_size()

print("Available buttons:", buttons)
print("Number of buttons:", num_buttons)

# Create all single-action combinations + no-op
actions = []
actions.append([0] * num_buttons)  # no-op

for i in range(num_buttons):
    a = [0] * num_buttons
    a[i] = 1
    actions.append(a)

print("Action space:")
for a in actions:
    print(a)


Available buttons: [<Button.MOVE_LEFT: 11>, <Button.MOVE_RIGHT: 10>, <Button.ATTACK: 0>]
Number of buttons: 3
Action space:
[0, 0, 0]
[1, 0, 0]
[0, 1, 0]
[0, 0, 1]


In [8]:
# ---------- Random agent ----------
def run_random_agent(num_episodes=10):
    episode_rewards = []
    episode_lengths = []

    for ep in range(num_episodes):
        game.new_episode()
        total_reward = 0
        steps = 0

        while not game.is_episode_finished():
            action = random.choice(actions)
            reward = game.make_action(action)
            total_reward += reward
            steps += 1

        episode_rewards.append(total_reward)
        episode_lengths.append(steps)

        print(f"Episode {ep+1:02d} | Reward: {total_reward:7.1f} | Steps: {steps}")

    return episode_rewards, episode_lengths

In [9]:
# ---------- Run baseline ----------
rewards, lengths = run_random_agent(num_episodes=10)

print("\n===== RANDOM AGENT BASELINE =====")
print(f"Mean reward: {np.mean(rewards):.2f}")
print(f"Std reward : {np.std(rewards):.2f}")
print(f"Mean steps : {np.mean(lengths):.1f}")

game.close()

Episode 01 | Reward:  -123.0 | Steps: 184
Episode 02 | Reward:  -375.0 | Steps: 300
Episode 03 | Reward:  -380.0 | Steps: 300
Episode 04 | Reward:  -375.0 | Steps: 300
Episode 05 | Reward:  -136.0 | Steps: 192
Episode 06 | Reward:  -375.0 | Steps: 300
Episode 07 | Reward:    91.0 | Steps: 10
Episode 08 | Reward:  -380.0 | Steps: 300
Episode 09 | Reward:    93.0 | Steps: 8
Episode 10 | Reward:  -375.0 | Steps: 300

===== RANDOM AGENT BASELINE =====
Mean reward: -233.50
Std reward : 188.85
Mean steps : 219.4
