# Imports & Init

In [2]:
from vizdoom import *
import numpy as np
import random
import time
from gym import Env
from gym.spaces import Box, Discrete
import cv2
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common.env_checker import check_env
from stable_baselines3 import PPO
from stable_baselines3.common.evaluation import evaluate_policy

# Init Test

In [43]:
game = DoomGame()
game.load_config('Github repos\\ViZDoom\\scenarios\\deadly_corridor_s1.cfg')
game.init()

In [44]:
actions = np.identity(7, dtype=np.uint8)

In [45]:
# Loop through episodes 
episodes = 1 
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image 
        img = state.screen_buffer
        # Get the game variables - ammo
        info = state.game_variables
        # Take an action
        reward = game.make_action(random.choice(actions),4)
        # Print reward 
        print('reward:', reward) 
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

reward: 0.0
reward: 0.0
reward: 0.0
reward: -0.836029052734375
reward: -0.5425567626953125
reward: -1.5323333740234375
reward: -1.8193511962890625
reward: -1.227294921875
reward: -7.463043212890625
reward: -1.7415618896484375
reward: 1.618743896484375
reward: 7.362823486328125
reward: 14.530548095703125
reward: 13.094192504882812
reward: 2.5609283447265625
reward: -1.5659942626953125
reward: -1.056365966796875
reward: -7.742034912109375
reward: -8.913650512695312
reward: -7.0559234619140625
reward: -6.350830078125
reward: -2.054718017578125
reward: 5.0536651611328125
reward: 3.6894378662109375
reward: 9.571929931640625
reward: 10.176101684570312
reward: -0.0865631103515625
reward: -3.58282470703125
reward: -2.35882568359375
reward: -1.5912017822265625
reward: -8.1837158203125
reward: -16.364334106445312
reward: -1.3274383544921875
reward: 7.307342529296875
reward: 1.552337646484375
reward: 4.4232177734375
reward: -0.393096923828125
reward: -3.9991302490234375
reward: -2.697586059570312

In [46]:
game.close()

# Setup VizDoom OpenAI Gym Environment

In [3]:
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, config='1'): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(f'Github repos\\ViZDoom\\scenarios\\deadly_corridor_s{config}.cfg')
        
        # Render frame logic
        self.game.set_window_visible(render)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        # Game variables: HEALTH DAMAGE_TAKEN HITCOUNT SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 ## CHANGED
        
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to return 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            # Calculate reward deltas
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [69]:
env = VizDoomGym(render=True)
check_env(env)

In [70]:
env.close()

# Setup Callback

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

# Train on Deadly Corridor using curriculum

In [20]:
CHECKPOINT_DIR = './train/train_deadly_corridor'
LOG_DIR = './logs/log_deadly_corridor'
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [21]:
env = VizDoomGym(render=False, config='1')
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)
# model.load(".\\train\\train_deadly_corridor\\best_model_250000.zip")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [22]:
for i in range(50):
    for i in range(1, 6):
        print(f"Skill Level: {i}")
        env = VizDoomGym(render=False, config=str(i))
        model.set_env(env)
        model.learn(total_timesteps=50000, callback=callback)

Skill Level: 1
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./logs/log_deadly_corridor\PPO_7
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 174      |
|    ep_rew_mean     | 97.9     |
| time/              |          |
|    fps             | 17       |
|    iterations      | 1        |
|    time_elapsed    | 463      |
|    total_timesteps | 8192     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 162          |
|    ep_rew_mean          | 143          |
| time/                   |              |
|    fps                  | 13           |
|    iterations           | 2            |
|    time_elapsed         | 1256         |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0025107607 |
|    clip_

KeyboardInterrupt: 

# Test the model

In [62]:
model = PPO.load('train\\train_deadly_corridor\\best_model_100000.zip')

In [63]:
env = VizDoomGym(render=True, config="5")
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)
mean_reward
env.close()