## Install vizdoom

In [None]:
%pip install vizdoom

vizdoom: RL platform, runs very quick

In [None]:
!cd github & git clone https://github.com/mwydmuch/ViZDoom


In [1]:
# import game env
from vizdoom import *
# for random action
import random
# for sleeping
import time
# create action space for random actions
import numpy as np

In [12]:
# setup game
game = DoomGame()
game.load_config('github/ViZDoom/scenarios/deadly_corridor.cfg')
game.init()

In [13]:
# simple action space without double inputs
actions = np.identity(7, dtype='uint8')

In [14]:
episodes = 10
for episode in range(episodes):
    game.new_episode()
    while not game.is_episode_finished():
        state = game.get_state()
        img = state.screen_buffer
        info = state.game_variables
        reward = game.make_action(random.choice(actions)) # frame skip 4 -> get reward after 4 frames
        print(reward)
        time.sleep(1/10)
    print('Total results:', game.get_total_reward())
    time.sleep(2)

0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.0
0.022430419921875
-0.002105712890625
-0.001922607421875
-0.7830047607421875
-1.4908599853515625
-2.132354736328125
-8.859329223632812
-2.5012664794921875
-0.227203369140625
-0.022430419921875
0.0
-0.0013275146484375
0.7564544677734375
-0.3112640380859375
0.498779296875
-0.3288726806640625
-0.2980499267578125
0.5096588134765625
0.5072784423828125
0.459716796875
-0.3631744384765625
-0.283721923828125
-0.2571258544921875
-0.8586273193359375
0.0
0.0
0.04541015625
0.0411529541015625
-0.0950927734375
0.0683441162109375
0.0619354248046875
0.056121826171875
0.11920166015625
-0.3078155517578125
0.0
0.0
0.0
0.0683441162109375
-0.0064239501953125
0.0625152587890625
0.056640625
0.826690673828125
0.6579437255859375
0.59625244140625
0.54034423828125
0.48968505859375
0.4662017822265625
0.400054931640625
0.340118408203125
1.0890960693359375
1.0094146728515625
0.9147796630859375
0.829010009765625
-0.02960205078125
-0.0268402099609375
-0.0243377685546875
-0.022064

KeyboardInterrupt: 

In [15]:
game.close()

## 2. Wrap in Gym wrapper

In [None]:
%pip install gym

In [2]:
# import base class
from gym import Env
# import spaces
from gym.spaces import Discrete, Box # Discrete is like range(), Box is like array
# import opencv
import cv2 as cv

In [3]:
# build the vizdoom env
class ViZDoomGym(Env):
    def __init__(self, render=False, config='github/ViZDoom/scenarios/deadly_corridor.cfg'):
        super().__init__()

        self.action_nr = 7

        self.game = DoomGame()
        self.game.load_config(config)

        self.game.set_window_visible(render)

        self.game.init()

        self.observation_space = Box(0, 255, shape=(100, 160, 1), dtype='uint8')
        self.action_space = Discrete(self.action_nr)

        self.damage_taken = 0
        self.kill_count = 0
        self.ammo = 52

    def step(self, action):
        actions = np.identity(self.action_nr, dtype='uint8')
        movement_reward = self.game.make_action(actions[action])

        reward = 0
        if self.game.get_state(): # interesting line
            state = self.game.get_state()
            img = state.screen_buffer
            img = self.grayscale(img)
            
            # reward shaping
            game_variables = state.game_variables
            health, damage_taken, kill_count, ammo = game_variables
            
            damage_taken_delta = - damage_taken + self.damage_taken # when hit gives negaitve value
            self.damage_taken = damage_taken
            kill_count_delta = kill_count - self.kill_count # when kills gives positive value
            self.kill_count = kill_count
            ammo_delta = ammo - self.ammo # when shoot give negative value
            self.ammo = ammo
                        
            reward = movement_reward + damage_taken_delta * 5 + kill_count_delta * 250 + ammo_delta * 5
            
            info = {}

        else:
            img = np.zeros(self.observation_space.shape)
            info = {}

        done = self.game.is_episode_finished()

        return img, reward, done, info

    def close(self):
        self.game.close()

    def render():
        pass # handled by vizdoom itself

    def grayscale(self, observation):
        img = cv.cvtColor(np.moveaxis(observation, 0, -1), cv.COLOR_BGR2GRAY)
        # scale image down for performance
        img = cv.resize(img, (160, 100), interpolation=cv.INTER_CUBIC)
        img = np.reshape(img, (100, 160, 1)) # add one dimension
        return img

    def reset(self):
        self.game.new_episode()
        img = self.game.get_state().screen_buffer
        return self.grayscale(img)

In [4]:
env = ViZDoomGym()

In [5]:
# verify environment
from stable_baselines3.common import env_checker

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
env_checker.check_env(env)

## 3. View step

In [13]:
from matplotlib import pyplot as plt
img = env.step(1)[0]
plt.imshow(img, cmap='gray')
img.shape

: 

: 

## 4. Setup callback

In [4]:
import os
from stable_baselines3.common.callbacks import BaseCallback

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [6]:
CHECKPOINT_DIR = './doom/train_corridor' # for model weights
LOG_DIR = './doom/log_corridor' # for tf logs

In [9]:
callback = TrainAndLoggingCallback(check_freq=33000, save_path=CHECKPOINT_DIR)

## 5. Train the model with curriculum

In [7]:
from stable_baselines3 import PPO

In [36]:
env = ViZDoomGym(config='doom/corridor_schedule/deadly_corridor_s1.cfg')

# old parameters
# model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=4096)

model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)
# also look at the definition of the metrics and reward if not training well

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [23]:
callback = TrainAndLoggingCallback(check_freq=33333, save_path=CHECKPOINT_DIR + '/s1')
model.learn(total_timesteps=300000, callback=callback)

Logging to ./doom/log_corridor\PPO_5
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 895      |
|    ep_rew_mean     | -85      |
| time/              |          |
|    fps             | 69       |
|    iterations      | 1        |
|    time_elapsed    | 118      |
|    total_timesteps | 8192     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 655          |
|    ep_rew_mean          | 16.7         |
| time/                   |              |
|    fps                  | 65           |
|    iterations           | 2            |
|    time_elapsed         | 250          |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0027132803 |
|    clip_fraction        | 0.0914       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.94        |
|    explained_varian

<stable_baselines3.ppo.ppo.PPO at 0x27db60cb880>

In [8]:
difficulty = 3

env = ViZDoomGym(config=f'doom/corridor_schedule/deadly_corridor_s{difficulty}.cfg')
callback = TrainAndLoggingCallback(check_freq=33333, save_path=CHECKPOINT_DIR + f'/s{difficulty}')
 
model = PPO.load(CHECKPOINT_DIR + '/s1' + '/best_model_299997.zip', env=env, device='cuda')
model.learn(total_timesteps=200000, callback=callback)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./doom/log_corridor\PPO_9
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 126      |
|    ep_rew_mean     | 662      |
| time/              |          |
|    fps             | 76       |
|    iterations      | 1        |
|    time_elapsed    | 106      |
|    total_timesteps | 8192     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 118         |
|    ep_rew_mean          | 650         |
| time/                   |             |
|    fps                  | 67          |
|    iterations           | 2           |
|    time_elapsed         | 244         |
|    total_timesteps      | 16384       |
| train/                  |             |
|    approx_kl            | 0.002597753 |
|    clip_fraction        | 0.0911      |
|

KeyboardInterrupt: 

## Test model

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

In [21]:
model = PPO.load(CHECKPOINT_DIR + '/s1' + '/best_model_299997')

In [22]:
env = ViZDoomGym(render=True, config='doom/corridor_schedule/deadly_corridor_s1.cfg')

In [23]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)
mean_reward

NameError: name 'evaluate_policy' is not defined

In [25]:
for episode in range(5):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done:
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(1/5)
        print(reward)
        total_reward += reward
    print(f'Reward of episode {episode+1} is {total_reward}')
    time.sleep(2)
env.close()

0.0 0.0 0.0 0.0
0.0
0.0 0.0 0.0 0.0
0.0
0.0 0.0 0.0 0.0
0.0
0.0 0.0 0.0 0.0
0.0
0.0 0.0 0.0 0.0
0.0
0.0 0.0 0.0 0.0
0.0
0.0 0.0 0.0 0.0
0.0
0.78125 0.0 0.0 0.0
0.78125
1.4892578125 0.0 0.0 0.0
1.4892578125
2.130889892578125 0.0 0.0 0.0
2.130889892578125
2.7123565673828125 0.0 0.0 0.0
2.7123565673828125
3.23931884765625 0.0 0.0 0.0
3.23931884765625
3.7168731689453125 0.0 0.0 0.0
3.7168731689453125
2.587158203125 0.0 0.0 0.0
2.587158203125
3.1258544921875 0.0 0.0 0.0
3.1258544921875
3.614044189453125 0.0 0.0 0.0
3.614044189453125
4.0564727783203125 0.0 0.0 0.0
4.0564727783203125
4.457427978515625 0.0 0.0 0.0
4.457427978515625
4.8207855224609375 0.0 0.0 0.0
4.8207855224609375
5.15008544921875 0.0 0.0 0.0
5.15008544921875
5.4485015869140625 0.0 0.0 0.0
5.4485015869140625
5.7189483642578125 0.0 0.0 0.0
5.7189483642578125
5.1827850341796875 0.0 0.0 0.0
5.1827850341796875
5.4781341552734375 0.0 0.0 0.0
5.4781341552734375
5.7458038330078125 0.0 0.0 0.0
5.7458038330078125
5.988372802734375 0.0 

ViZDoomUnexpectedExitException: Controlled ViZDoom instance exited unexpectedly.

not perfoect performance

In [11]:
env.close()