In [7]:
# Import vizdoom for game env
from vizdoom import *
# Import random for action sampling
import random
# Import time for sleeping
import time
# Import numpy for identity matrix
import numpy as np

In [2]:
# Import environment base class from OpenAI Gym
from gymnasium import Env
# Import gym spaces 
from gymnasium.spaces import Discrete, Box
# Import opencv 
import cv2

In [8]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config('VizDoom/scenarios/basic.cfg')
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(3)
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(3)
        reward = self.game.make_action(actions[action], 4) 
        
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

# 6. Test the Model

In [4]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3 import PPO

In [5]:
# Reload model from disc
model = PPO.load('./train/train_basic/best_model_60000')

In [19]:
# Create rendered environment
env = VizDoomGym(render=True)

In [None]:
env.close()

In [14]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)

ValueError: too many values to unpack (expected 2)

In [None]:
mean_reward

In [15]:
model.predict(obs)

NameError: name 'obs' is not defined

In [21]:
for episode in range(100): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)

Total Reward for episode 91.0 is 0
Total Reward for episode 91.0 is 1
Total Reward for episode 79.0 is 2
Total Reward for episode 95.0 is 3
Total Reward for episode 67.0 is 4
Total Reward for episode 75.0 is 5
Total Reward for episode 71.0 is 6
Total Reward for episode 40.0 is 7
Total Reward for episode 91.0 is 8
Total Reward for episode 95.0 is 9
Total Reward for episode 83.0 is 10


KeyboardInterrupt: 