# 1. Getting VizDoom Up and Running

In [1]:
!pip install vizdoom



In [2]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [3]:
# Setup game
game = DoomGame()
game.load_config(r'C:\Users\Yeyian PC\Downloads\ViZDoom-master\ViZDoom-master\scenarios\basic.cfg')
game.init()

In [4]:
# This is the set of actions we can take in the environment
actions = np.identity(3, dtype=np.uint8)

In [5]:
state = game.get_state()

In [6]:
state.game_variables

array([50.])

# 2. Converting it to a Gym Environment

In [7]:
!pip install gym



In [8]:
!pip install opencv-python



In [9]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [10]:
print (game)

<vizdoom.vizdoom.DoomGame object at 0x00000291D58F4A30>


In [11]:
game.get_state().screen_buffer.shape

(3, 240, 320)

In [12]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(r'C:\Users\Yeyian PC\Downloads\ViZDoom-master\ViZDoom-master\scenarios\basic.cfg')
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(3)
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(3)
        reward = self.game.make_action(actions[action], 4) 
        
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [13]:
env = VizDoomGym(render=True)

In [14]:
state = env.reset()

# 3. View Game State

In [15]:
env.reset()

array([[[55],
        [50],
        [59],
        ...,
        [57],
        [57],
        [66]],

       [[68],
        [65],
        [65],
        ...,
        [56],
        [67],
        [72]],

       [[49],
        [79],
        [66],
        ...,
        [79],
        [51],
        [29]],

       ...,

       [[75],
        [63],
        [62],
        ...,
        [44],
        [71],
        [60]],

       [[15],
        [48],
        [47],
        ...,
        [49],
        [69],
        [47]],

       [[22],
        [14],
        [26],
        ...,
        [57],
        [37],
        [39]]], dtype=uint8)

In [16]:
!pip install stable-baselines3



In [17]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [18]:
env_checker.check_env(env)

# 3. View State

In [19]:
!pip install matplotlib



In [20]:
from matplotlib import pyplot as plt

In [21]:
!pip install opencv-python
import cv2



# 4. Setup Callback

In [22]:
!pip install torch



In [23]:
!pip install stable-baselines3[extra]



In [24]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [25]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic/LOGBASIC2'

In [26]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 5. Train Model

In [27]:
# import ppo for training
from stable_baselines3 import PPO

In [28]:
# Non rendered environment
env = VizDoomGym()

In [29]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00007, n_steps=1024)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [30]:
!pip install tensorboard



In [31]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./logs/log_basic/LOGBASIC2\PPO_2
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.8     |
|    ep_rew_mean     | -87.6    |
| time/              |          |
|    fps             | 247      |
|    iterations      | 1        |
|    time_elapsed    | 4        |
|    total_timesteps | 1024     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 32          |
|    ep_rew_mean          | -74.2       |
| time/                   |             |
|    fps                  | 93          |
|    iterations           | 2           |
|    time_elapsed         | 21          |
|    total_timesteps      | 2048        |
| train/                  |             |
|    approx_kl            | 0.009569664 |
|    clip_fraction        | 0.0895      |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | 

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 17.1        |
|    ep_rew_mean          | 17.8        |
| time/                   |             |
|    fps                  | 54          |
|    iterations           | 11          |
|    time_elapsed         | 205         |
|    total_timesteps      | 11264       |
| train/                  |             |
|    approx_kl            | 0.012945651 |
|    clip_fraction        | 0.256       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.01       |
|    explained_variance   | 0.408       |
|    learning_rate        | 7e-05       |
|    loss                 | 2e+03       |
|    n_updates            | 100         |
|    policy_gradient_loss | -0.000104   |
|    value_loss           | 4.4e+03     |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 13.9    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 11.3        |
|    ep_rew_mean          | 48.3        |
| time/                   |             |
|    fps                  | 52          |
|    iterations           | 21          |
|    time_elapsed         | 408         |
|    total_timesteps      | 21504       |
| train/                  |             |
|    approx_kl            | 0.022003818 |
|    clip_fraction        | 0.361       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.854      |
|    explained_variance   | 0.568       |
|    learning_rate        | 7e-05       |
|    loss                 | 915         |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.00478    |
|    value_loss           | 2.17e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 10.3  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 7.89        |
|    ep_rew_mean          | 65.3        |
| time/                   |             |
|    fps                  | 51          |
|    iterations           | 31          |
|    time_elapsed         | 612         |
|    total_timesteps      | 31744       |
| train/                  |             |
|    approx_kl            | 0.020274507 |
|    clip_fraction        | 0.2         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.732      |
|    explained_variance   | 0.751       |
|    learning_rate        | 7e-05       |
|    loss                 | 495         |
|    n_updates            | 300         |
|    policy_gradient_loss | -0.00371    |
|    value_loss           | 1.26e+03    |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 6.92  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 4.8        |
|    ep_rew_mean          | 82.3       |
| time/                   |            |
|    fps                  | 51         |
|    iterations           | 41         |
|    time_elapsed         | 817        |
|    total_timesteps      | 41984      |
| train/                  |            |
|    approx_kl            | 0.01912203 |
|    clip_fraction        | 0.121      |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.33      |
|    explained_variance   | 0.559      |
|    learning_rate        | 7e-05      |
|    loss                 | 71.5       |
|    n_updates            | 400        |
|    policy_gradient_loss | 0.0203     |
|    value_loss           | 145        |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.88        |
|    ep_rew_m

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.32        |
|    ep_rew_mean          | 84.9        |
| time/                   |             |
|    fps                  | 50          |
|    iterations           | 51          |
|    time_elapsed         | 1026        |
|    total_timesteps      | 52224       |
| train/                  |             |
|    approx_kl            | 0.054052606 |
|    clip_fraction        | 0.134       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.232      |
|    explained_variance   | 0.795       |
|    learning_rate        | 7e-05       |
|    loss                 | 102         |
|    n_updates            | 500         |
|    policy_gradient_loss | -0.0114     |
|    value_loss           | 230         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.69  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.13        |
|    ep_rew_mean          | 85.6        |
| time/                   |             |
|    fps                  | 50          |
|    iterations           | 61          |
|    time_elapsed         | 1248        |
|    total_timesteps      | 62464       |
| train/                  |             |
|    approx_kl            | 0.003571406 |
|    clip_fraction        | 0.0509      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.202      |
|    explained_variance   | 0.603       |
|    learning_rate        | 7e-05       |
|    loss                 | 30.9        |
|    n_updates            | 600         |
|    policy_gradient_loss | -0.00053    |
|    value_loss           | 93.6        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.1   

---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 4.06      |
|    ep_rew_mean          | 86.1      |
| time/                   |           |
|    fps                  | 44        |
|    iterations           | 71        |
|    time_elapsed         | 1631      |
|    total_timesteps      | 72704     |
| train/                  |           |
|    approx_kl            | 0.2294248 |
|    clip_fraction        | 0.228     |
|    clip_range           | 0.2       |
|    entropy_loss         | -0.187    |
|    explained_variance   | 0.66      |
|    learning_rate        | 7e-05     |
|    loss                 | 129       |
|    n_updates            | 700       |
|    policy_gradient_loss | -0.0458   |
|    value_loss           | 187       |
---------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.2         |
|    ep_rew_mean          | 85.8  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 3.74        |
|    ep_rew_mean          | 87.9        |
| time/                   |             |
|    fps                  | 41          |
|    iterations           | 81          |
|    time_elapsed         | 1979        |
|    total_timesteps      | 82944       |
| train/                  |             |
|    approx_kl            | 0.023777157 |
|    clip_fraction        | 0.0645      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.173      |
|    explained_variance   | 0.834       |
|    learning_rate        | 7e-05       |
|    loss                 | 7.58        |
|    n_updates            | 800         |
|    policy_gradient_loss | 0.00564     |
|    value_loss           | 24.1        |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.26  

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.09        |
|    ep_rew_mean          | 86.2        |
| time/                   |             |
|    fps                  | 41          |
|    iterations           | 91          |
|    time_elapsed         | 2221        |
|    total_timesteps      | 93184       |
| train/                  |             |
|    approx_kl            | 0.006497549 |
|    clip_fraction        | 0.0478      |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.127      |
|    explained_variance   | 0.868       |
|    learning_rate        | 7e-05       |
|    loss                 | 7.83        |
|    n_updates            | 900         |
|    policy_gradient_loss | 0.00395     |
|    value_loss           | 14.5        |
-----------------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 4.07

<stable_baselines3.ppo.ppo.PPO at 0x291d5e52cd0>

# 6. Test the Model

In [32]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [33]:
# Reload model from disc
model = PPO.load('./train/train_basic/best_model_100000')

In [34]:
# Create rendered environment
env = VizDoomGym(render=True)

In [35]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)



In [36]:
mean_reward
obs = env.reset()

In [37]:
model.predict(obs)

(0, None)

In [38]:
for episode in range(100): 
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        # time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)

Total Reward for episode 95.0 is 0
Total Reward for episode 99.0 is 1
Total Reward for episode 99.0 is 2
Total Reward for episode 99.0 is 3
Total Reward for episode 99.0 is 4
Total Reward for episode 99.0 is 5
Total Reward for episode 99.0 is 6
Total Reward for episode 99.0 is 7
Total Reward for episode 99.0 is 8
Total Reward for episode 99.0 is 9
Total Reward for episode 99.0 is 10
Total Reward for episode 99.0 is 11
Total Reward for episode 99.0 is 12
Total Reward for episode 99.0 is 13
Total Reward for episode 99.0 is 14
Total Reward for episode 99.0 is 15
Total Reward for episode 99.0 is 16
Total Reward for episode 99.0 is 17
Total Reward for episode 99.0 is 18
Total Reward for episode 99.0 is 19
Total Reward for episode 99.0 is 20
Total Reward for episode 99.0 is 21
Total Reward for episode 99.0 is 22
Total Reward for episode 99.0 is 23
Total Reward for episode 99.0 is 24
Total Reward for episode 99.0 is 25
Total Reward for episode 99.0 is 26
Total Reward for episode 99.0 is 27
To