In [None]:
!pip install vizdoom

In [None]:
import os
import numpy as np

import warnings
warnings.filterwarnings('ignore')

import gymnasium 
from gymnasium import spaces
import numpy as np
import cv2
from vizdoom import DoomGame
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO, DQN

# Creating VizDoom Gym environment

In [None]:
class VizDoomGym(gymnasium.Env):
    def __init__(self, render=False,frameskip=4):
        super(VizDoomGym, self).__init__()
        self.game = DoomGame()
        self.game.load_config('../data/scenarios/deadly_corridor.cfg')
        self.step_reward =0 
        self.frameskip = frameskip

        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)

        self.game.init()

        self.observation_space = spaces.Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8)
        self.action_space = spaces.Discrete(7)

        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52
        

    def step(self, action):
        actions = np.identity(7)
        total_reward = 0
        done = False
        info = None

        for _ in range(self.frameskip):
            reward = self.game.make_action(actions[action].flatten(), 1)
            done = self.game.is_episode_finished()

            if self.game.get_state(): 
                state = self.game.get_state().screen_buffer
                state = self.grayscale(state)
                
                # Reward shaping
                game_variables = self.game.get_state().game_variables
                health, damage_taken, hitcount, ammo = game_variables
                
                # Calculate reward deltas
                damage_taken_delta = -damage_taken + self.damage_taken
                self.damage_taken = damage_taken
                hitcount_delta = hitcount - self.hitcount
                self.hitcount = hitcount
                ammo_delta = ammo - self.ammo
                self.ammo = ammo
                
                reward += damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5
                info = ammo
            else: 
                state = np.zeros(self.observation_space.shape)
                info = 0 

            total_reward += reward

            if done:
                break

        info = {"info":info}
        truncated = False 
        self.step_reward = total_reward
        return np.array(state), total_reward, done, truncated, info
    
    def render(self, mode='human'):
        if mode == 'rgb_array':
            return self.game.get_state().screen_buffer
        elif mode == 'human':
            pass

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)

        self.game.new_episode()
        if seed is not None:
            self.game.set_seed(seed)
        state = self.game.get_state().screen_buffer
        state = self.grayscale(state)

        info = {}  

        return np.array(state), info

    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state

    def close(self):
        self.game.close()



# Evaulating Agent

In [None]:
model = PPO.load('../data/DeadlyCorridor560k')

In [None]:
# model = DQN.load('./dqn_dc3_cnn_600k.zip')

In [None]:
import time
env = VizDoomGym(render=True,frameskip=1)
num_episodes = 6
time.sleep(10)
for episode in range(num_episodes):
    obs, _ = env.reset() 
    done = False
    total_reward = 0
    while not done:
        env.render()
        action, _states = model.predict(np.expand_dims(obs, axis=0)) 
        obs, reward, done,_, info = env.step(action)
        total_reward += reward
        time.sleep(0.01)
    print(f'Episode {episode + 1}: {total_reward}')

env.close()