In [1]:
from vizdoom import *
import vizdoom as vzd
import random
import time
import numpy as np
import os
import shutil

# SETUP-GAME

In [2]:
game = DoomGame()
game.load_config(r'./scenarios/deadly_corridor-skill-4.cfg')
game.init()

In [3]:
actions = np.identity(7, dtype=np.uint8)
print(actions)

[[1 0 0 0 0 0 0]
 [0 1 0 0 0 0 0]
 [0 0 1 0 0 0 0]
 [0 0 0 1 0 0 0]
 [0 0 0 0 1 0 0]
 [0 0 0 0 0 1 0]
 [0 0 0 0 0 0 1]]


1. actions[0] : MOVE_LEFT
2. actions[1] : MOVE_RIGHT
3. actions[2] : ATTACK
4. actions[3] : MOVE_FORWARD
5. actions[4] : MOVE_BACKWARD
6. actions[5] : TURN_LEFT
7. actions[6] : TURN_RIGHT

In [4]:
game.new_episode()
game.is_episode_finished()
game.make_action(random.choice(actions))

0.0

In [5]:
episodes = 3
for e in range(episodes):
    game.new_episode()
    while not game.is_episode_finished():
        satate=game.get_state()
        state = game.get_state()
        img = state.screen_buffer
        # Get the game variables - ammo
        info = state.game_variables
        reward = game.make_action(random.choice(actions),4) # frame skip=4 time for agent to process
        print('reward:', reward) 
        print("ammo",info)
        print("state",state)
        time.sleep(0.02)
    print('////////////////// Result:', game.get_total_reward())
    time.sleep(2)

reward: 0.0
ammo [100.   0.  -1.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B03E1CE7B0>
reward: -0.78125
ammo [100.   0.  26.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B03E58EE70>
reward: -2.458099365234375
ammo [100.   0.  26.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B0548DDD30>
reward: -1.6581268310546875
ammo [100.   0.  26.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B03E1CE7B0>
reward: -1.1185302734375
ammo [100.   0.  26.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B0548D7E30>
reward: -0.7545623779296875
ammo [100.   0.  25.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B054976030>
reward: -0.5091094970703125
ammo [100.   0.  25.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B03E1CE7B0>
reward: 6.606719970703125
ammo [100.   0.  25.   0.   0.   0.]
state <vizdoom.vizdoom.GameState object at 0x000001B0548DE0B0>
reward: 8

In [6]:
game.close()

In [7]:
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import cv2
import torch
import torch.nn as nn
import torch.nn.functional as F
from stable_baselines3 import DQN, PPO
from stable_baselines3.common import env_checker
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from stable_baselines3.common.env_util import make_vec_env
from matplotlib import pyplot as plt
import torchvision
import torchaudio
from stable_baselines3.common.callbacks import CheckpointCallback

In [15]:
class VizDoomGym(Env):
    def __init__(self, render=False,config='./scenarios/deadly_corridor-skill-4.cfg'):
        super().__init__()
        self.game = vzd.DoomGame()
        self.game.load_config(config)

        # Render frame logic
        if not render:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        self.game.init()

        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100, 160, 1), dtype=np.uint8)
        self.action_space = Discrete(7)  # 7 possible actions
        self.actions=np.identity(7, dtype=np.float32)
        


    def custom_reward(self, prev_state, current_state):
        reward = 0
    
        # Extract game variables
        prev_health = prev_state.game_variables[0]  # HEALTH
        prev_hits = prev_state.game_variables[1]  # HITCOUNT
        prev_ammo = prev_state.game_variables[2]  # SELECTED_WEAPON_AMMO
        prev_kills = prev_state.game_variables[3]  # KILLCOUNT
        prev_dmg = prev_state.game_variables[4]  # KILLCOUNT
        prev_dmg_deal = prev_state.game_variables[5]  # KILLCOUNT
        
        current_health = current_state.game_variables[0]  # HEALTH
        current_hits = current_state.game_variables[1]  # HITCOUNT
        current_ammo = current_state.game_variables[2]  # SELECTED_WEAPON_AMMO
        current_kills = current_state.game_variables[3]  # KILLCOUNT
        current_dmg = current_state.game_variables[4]  # KILLCOUNT
        current_dmg_deal = current_state.game_variables[5]  # KILLCOUNT
        
        ammo_delta=current_ammo-prev_ammo 
        hitcount_delta= current_dmg_deal - prev_dmg_deal
        damage_taken_delta=-current_dmg+prev_dmg
        
        reward = damage_taken_delta*60 + hitcount_delta*200  + ammo_delta*50 
        
    

        return reward
        
    def step(self, action):
        prev_state = self.game.get_state()  # Store the previous state
        reward = self.game.make_action(self.actions[action], 4)  # Default reward
        current_state = self.game.get_state()  # Get the current state

        # Compute custom reward
        if prev_state is not None and current_state is not None:
            reward += self.custom_reward(prev_state, current_state)

        terminated = self.game.is_episode_finished()
        truncated = self.game.get_episode_time() >= self.game.get_episode_timeout()

        state = np.zeros(self.observation_space.shape, dtype=np.uint8)  # Default blank state
        info = {"ammo": 0}  # Default info

        if not (terminated or truncated):
            game_state = self.game.get_state()
            if game_state is not None:
                state = self.grayscale(game_state.screen_buffer)
                info = {"ammo": game_state.game_variables[0]}

        return state, reward, terminated, truncated, info

    def reset(self, seed=None, options=None):
        """Restart the game and return the initial state."""
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state), {}

    def grayscale(self, observation):
        """Convert the observation to grayscale and resize it."""
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160, 100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100, 160, 1))
        return state

    def close(self):
        """Close the game."""
        self.game.close()

In [9]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True


### Testing the model 
`./train/train_Deadly_Corridor_COMP_5_S_3/best_model_50000.zip`

In [10]:
import torch
import numpy as np
from stable_baselines3 import DQN, PPO 
import cv2


model_path = "./train/train_Deadly_Corridor_COMP_5_S_3/best_model_50000.zip"  
model = PPO.load(model_path)


env = VizDoomGym(render=True)  
num_episodes = 4


for episode in range(num_episodes):
    obs,_ = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        time.sleep(0.10)
        total_reward += reward
        done=terminated or truncated
        # time.sleep(1)
    print(f"Episode {episode + 1}: Total Reward = {total_reward}")
    time.sleep(2)
  
# Close environment
env.close()


Episode 1: Total Reward = 8848.961502075195
Episode 2: Total Reward = 8510.800674438477
Episode 3: Total Reward = 5338.528274536133
Episode 4: Total Reward = 5146.283798217773


**The agent is doing surprisingly well -- but it need more training** 
<br>
*new reward:*<br>
`damage_taken_delta*55 + hitcount_delta*200  + ammo_delta*45 ` <br>
*old reward:*<br>
`damage_taken_delta*60 + hitcount_delta*200  + ammo_delta*50 `

In [19]:
CHECKPOINT_DIR = './train/train_Deadly_Corridor_COMP_5_S_4'

In [20]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)
env=VizDoomGym()

In [21]:
model = model_path = "./train/train_Deadly_Corridor_COMP_5_S_3/best_model_50000.zip"  
model = PPO.load(model_path)

In [22]:
model.set_env(env)
model.learn(total_timesteps=100000, callback=callback)
env.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./logs/log_Deadly_Corridor\PPO_16
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 140      |
|    ep_rew_mean     | 9.41e+03 |
| time/              |          |
|    fps             | 12       |
|    iterations      | 1        |
|    time_elapsed    | 656      |
|    total_timesteps | 8192     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 129         |
|    ep_rew_mean          | 9.64e+03    |
| time/                   |             |
|    fps                  | 12          |
|    iterations           | 2           |
|    time_elapsed         | 1318        |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.011291652 |
|    clip_fraction        | 0.311  

##### **Testing the model `./train/train_Deadly_Corridor_COMP_5_S_4/best_model_100000.zip`**

In [23]:
import torch
import numpy as np
from stable_baselines3 import DQN, PPO 
import cv2


model_path = "./train/train_Deadly_Corridor_COMP_5_S_4/best_model_100000.zip"  
model = PPO.load(model_path)


env = VizDoomGym(render=True)  
num_episodes = 4


for episode in range(num_episodes):
    obs,_ = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        time.sleep(0.10)
        total_reward += reward
        done=terminated or truncated
        # time.sleep(1)
    print(f"Episode {episode + 1}: Total Reward = {total_reward}")
    time.sleep(2)
  
# Close environment
env.close()


Episode 1: Total Reward = 13780.956329345703
Episode 2: Total Reward = 8869.434326171875
Episode 3: Total Reward = -793.1803436279297
Episode 4: Total Reward = 14877.097183227539


# The agent is excellent