## Install vizdoom

In [None]:
%pip install vizdoom

vizdoom: RL platform, runs very quick

In [None]:
!cd github & git clone https://github.com/mwydmuch/ViZDoom


In [3]:
# import game env
from vizdoom import *
# for random action
import random
# for sleeping
import time
# create action space for random actions
import numpy as np

In [None]:
# setup game
game = DoomGame()
game.load_config('github/ViZDoom/scenarios/deadly_corridor.cfg')
game.init()

In [None]:
# simple action space without double inputs
actions = np.identity(7, dtype='uint8')

In [None]:
episodes = 10
for episode in range(episodes):
    game.new_episode()
    while not game.is_episode_finished():
        state = game.get_state()
        img = state.screen_buffer
        info = state.game_variables
        reward = game.make_action(random.choice(actions)) # frame skip 4 -> get reward after 4 frames
        print(reward)
        time.sleep(1/50)
    print('Total results:', game.get_total_reward())
    time.sleep(2)

In [None]:
game.close()

## 2. Wrap in Gym wrapper

In [None]:
%pip install gym

In [1]:
# import base class
from gym import Env
# import spaces
from gym.spaces import Discrete, Box # Discrete is like range(), Box is like array
# import opencv
import cv2 as cv

In [2]:
# build the vizdoom env
class ViZDoomGym(Env):
    def __init__(self, render=False, config='github/ViZDoom/scenarios/deadly_corridor_s1.cfg'):
        super().__init__()

        self.action_nr = 7

        self.game = DoomGame()
        self.game.load_config(config)

        self.game.set_window_visible(render)

        self.game.init()

        self.observation_space = Box(0, 255, shape=(100, 160, 1), dtype='uint8')
        self.action_space = Discrete(self.action_nr)

        self.damage_taken = 0
        self.hit_count = 0
        self.ammo = 52

    def step(self, action):
        actions = np.identity(self.action_nr, dtype='uint8')
        movement_reward = self.game.make_action(actions[action])

        reward = 0
        if self.game.get_state(): # interesting line
            state = self.game.get_state()
            img = state.screen_buffer
            img = self.grayscale(img)
            
            # reward shaping
            game_variables = state.game_variables
            health, damage_taken, hit_count, ammo = game_variables
            
            damage_taken_delta = - damage_taken + self.damage_taken # when hit gives negaitve value
            self.damage_taken = damage_taken
            hit_count_delta = hit_count - self.hit_count # when kills gives positive value
            self.hit_count = hit_count
            ammo_delta = ammo - self.ammo # when shoot give negative value
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta * 10 + hit_count_delta * 200 + ammo_delta * 5
            
            info = {}

        else:
            img = np.zeros(self.observation_space.shape)
            info = {}

        done = self.game.is_episode_finished()

        return img, reward, done, info

    def close(self):
        self.game.close()

    def __del__(self):
        self.game.close()
        super().__del__()

    def render():
        pass # handled by vizdoom itself

    def grayscale(self, observation):
        img = cv.cvtColor(np.moveaxis(observation, 0, -1), cv.COLOR_BGR2GRAY)
        # scale image down for performance
        img = cv.resize(img, (160, 100), interpolation=cv.INTER_CUBIC)
        img = np.reshape(img, (100, 160, 1)) # add one dimension
        return img

    def reset(self):
        self.game.new_episode()
        img = self.game.get_state().screen_buffer
        return self.grayscale(img)

In [4]:
env = ViZDoomGym()

In [5]:
# verify environment
from stable_baselines3.common import env_checker

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
env_checker.check_env(env)

## 3. View step

In [13]:
from matplotlib import pyplot as plt
img = env.step(1)[0]
plt.imshow(img, cmap='gray')
img.shape

: 

: 

## 4. Setup callback

In [6]:
import os
from stable_baselines3.common.callbacks import BaseCallback

In [7]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [8]:
CHECKPOINT_DIR = './doom/train_corridor' # for model weights
LOG_DIR = './doom/log_corridor' # for tf logs

In [9]:
callback = TrainAndLoggingCallback(check_freq=33000, save_path=CHECKPOINT_DIR)

## 5. Train the model with curriculum

In [11]:
from stable_baselines3 import PPO

In [13]:
env = ViZDoomGym(config='github/ViZDoom/scenarios/deadly_corridor_s1.cfg')

Exception ignored in: <function ViZDoomGym.__del__ at 0x000002C64EB69090>
Traceback (most recent call last):
  File "C:\Users\phiga\AppData\Local\Temp\ipykernel_20460\2200486960.py", line 60, in __del__
AttributeError: 'super' object has no attribute '__del__'


In [14]:
# model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=4096)
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=4096)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [15]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./doom/log_corridor\PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 1.27e+03 |
|    ep_rew_mean     | 63.9     |
| time/              |          |
|    fps             | 68       |
|    iterations      | 1        |
|    time_elapsed    | 60       |
|    total_timesteps | 4096     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 811         |
|    ep_rew_mean          | 31.2        |
| time/                   |             |
|    fps                  | 63          |
|    iterations           | 2           |
|    time_elapsed         | 129         |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.035039023 |
|    clip_fraction        | 0.321       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.92       |
|    explained_variance   | -7.15e-

KeyboardInterrupt: 

## Test model

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
model = PPO.load(CHECKPOINT_DIR + '/best_model_99000')

In [None]:
env = ViZDoomGym(render=True)

In [None]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)
mean_reward

In [None]:
for episode in range(5):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(1/50)
        total_reward += reward
    print(f'Reward of episode {episode+1} is {total_reward}')
    time.sleep(2)
env.close()

not perfoect performance

In [10]:
env.close()