## 1. Install VizDoom 

In [1]:
!pip install vizdoom



You should consider upgrading via the 'd:\jn\python.exe -m pip install --upgrade pip' command.


In [2]:
!git clone https://github.com/mwydmuch/ViZDoom

���⥬� �� 㤠���� ���� 㪠����� ����.
Cloning into 'ViZDoom'...
Updating files:  79% (1620/2033)
Updating files:  80% (1627/2033)
Updating files:  81% (1647/2033)
Updating files:  82% (1668/2033)
Updating files:  83% (1688/2033)
Updating files:  84% (1708/2033)
Updating files:  85% (1729/2033)
Updating files:  86% (1749/2033)
Updating files:  87% (1769/2033)
Updating files:  88% (1790/2033)
Updating files:  89% (1810/2033)
Updating files:  90% (1830/2033)
Updating files:  91% (1851/2033)
Updating files:  92% (1871/2033)
Updating files:  93% (1891/2033)
Updating files:  94% (1912/2033)
Updating files:  95% (1932/2033)
Updating files:  96% (1952/2033)
Updating files:  97% (1973/2033)
Updating files:  98% (1993/2033)
Updating files:  99% (2013/2033)
Updating files: 100% (2033/2033)
Updating files: 100% (2033/2033), done.


# 2. Getting up Doom

In [1]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [2]:
CONFIG = 'VizDoom/scenarios/deathmatch.cfg'

In [3]:
# Setup game
game = DoomGame()
game.load_config(CONFIG)
game.init()

In [23]:
# This is the set of actions we can take in the environment
actions = np.identity(20, dtype=np.uint8)

In [24]:
state = game.get_state()

In [25]:
state.game_variables

array([  0., 100.,   0.,   2.,  50.])

In [26]:
# Loop through episodes 
episodes = 10 
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image 
        img = state.screen_buffer
        # Get the game variables - ammo
        info = state.game_variables
        # Take an action
        reward = game.make_action(random.choice(actions),4)
        # Print rewward 
        # print('reward:', reward) 
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

Result: 3.0
Result: 4.0
Result: 5.0


KeyboardInterrupt: 

## 3. Converting it to a Gym Environment

In [15]:
!pip install gym

Collecting gym
  Downloading gym-0.23.1.tar.gz (626 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
    Preparing wheel metadata: started
    Preparing wheel metadata: finished with status 'done'
Collecting importlib-metadata>=4.10.0
  Downloading importlib_metadata-4.11.3-py3-none-any.whl (18 kB)
Collecting cloudpickle>=1.2.0
  Downloading cloudpickle-2.0.0-py3-none-any.whl (25 kB)
Collecting gym-notices>=0.0.4
  Downloading gym_notices-0.0.6-py3-none-any.whl (2.7 kB)
Collecting zipp>=0.5
  Downloading zipp-3.8.0-py3-none-any.whl (5.4 kB)
Building wheels for collected packages: gym
  Building wheel for gym (PEP 517): started
  Building wheel for gym (PEP 517): finished with status 'done'
  Created wheel for gym: filename=gym-0.23.1-py3-none-any.whl size=701374 sha256=3270d1726e18294232235849da3fe9a3d18a9d3a5173b1fa2

You should consider upgrading via the 'd:\jn\python.exe -m pip install --upgrade pip' command.


In [2]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces
from gym.spaces import Discrete, Box
# Import opencv
import cv2

In [3]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, config='VizDoom/scenarios/deadly_corridor.cfg'): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(config)
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        # Game variables: HEALTH DAMAGE_TAKEN HITCOUNT SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 ## CHANGED
        
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            # Calculate reward deltas
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self):
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [4]:
env = VizDoomGym(render=True)

In [5]:
res = env.step(5)

In [57]:
state = env.reset()

In [54]:
!pip install stable_baselines3

  and should_run_async(code)


Collecting stable_baselines3
  Downloading stable_baselines3-1.5.0-py3-none-any.whl (177 kB)
Collecting gym==0.21
  Downloading gym-0.21.0.tar.gz (1.5 MB)
Building wheels for collected packages: gym
  Building wheel for gym (setup.py): started
  Building wheel for gym (setup.py): finished with status 'done'
  Created wheel for gym: filename=gym-0.21.0-py3-none-any.whl size=1616822 sha256=2aa8a4ac5b785c08f7c330549adcd5aadeeeddedf3af55a8d733c47d210e9f98
  Stored in directory: c:\users\dungeon master\appdata\local\pip\cache\wheels\b3\50\6c\0a82c1358b4da2dbd9c1bb17e0f89467db32812ab236dbf6d5
Successfully built gym
Installing collected packages: gym, stable-baselines3
  Attempting uninstall: gym
    Found existing installation: gym 0.23.1
    Uninstalling gym-0.23.1:
      Successfully uninstalled gym-0.23.1
Successfully installed gym-0.21.0 stable-baselines3-1.5.0


You should consider upgrading via the 'd:\jn\python.exe -m pip install --upgrade pip' command.


In [18]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [58]:
env_checker.check_env(env)

AttributeError: module 'gym' has no attribute 'GoalEnv'

In [7]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [8]:
CHECKPOINT_DIR = './train/train_corridor'
LOG_DIR = './logs/log_corridor'

In [9]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [10]:
# import ppo for training
from stable_baselines3 import PPO
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [23]:
model.learn(total_timesteps=1000000, callback=callback)

Logging to ./logs/log_corridor\PPO_7
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 34.4     |
|    ep_rew_mean     | 25.8     |
| time/              |          |
|    fps             | 28       |
|    iterations      | 1        |
|    time_elapsed    | 283      |
|    total_timesteps | 8192     |
---------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 25.4       |
|    ep_rew_mean          | 9.27       |
| time/                   |            |
|    fps                  | 27         |
|    iterations           | 2          |
|    time_elapsed         | 589        |
|    total_timesteps      | 16384      |
| train/                  |            |
|    approx_kl            | 0.13152626 |
|    clip_fraction        | 0.515      |
|    clip_range           | 0.1        |
|    entropy_loss         | -1.92      |
|    explained_variance   | -7.15e-06  |
|    lea

KeyboardInterrupt: 

## Test model

In [5]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [11]:
# Reload model from disc
model = PPO.load('train/train_corridor/best_model_560000')

In [12]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)



In [15]:
final_reward = 0
min_rew = 1e10
max_rew = -1e10
for episode in range(100): 
    obs = env.reset()
    done = False
    total_reward = 0
    st = 0
    while not done:
        st += 1
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.02)
        total_reward += reward
        print(f'Reward on step {st} in episode {episode} : {reward}')
    final_reward += total_reward
    min_rew = min(min_rew, total_reward)
    max_rew = max(max_rew, total_reward)
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)
print(f'Maximal Reward : {max_rew}')
print(f'Minimal Reward : {min_rew}')
print(f'ep_rew_mean : {final_reward / 100}')

Reward on step 1 in episode 0 : -845.0
Reward on step 2 in episode 0 : 0.0
Reward on step 3 in episode 0 : 0.0
Reward on step 4 in episode 0 : -600.0
Reward on step 5 in episode 0 : -15.819534301757812
Reward on step 6 in episode 0 : 195.0
Reward on step 7 in episode 0 : 2.256072998046875
Reward on step 8 in episode 0 : 2.706390380859375
Reward on step 9 in episode 0 : 1.8253936767578125
Reward on step 10 in episode 0 : 1.2311248779296875
Reward on step 11 in episode 0 : 0.8303070068359375
Reward on step 12 in episode 0 : 195.5599365234375
Reward on step 13 in episode 0 : 6.5190887451171875
Reward on step 14 in episode 0 : 13.500152587890625
Reward on step 15 in episode 0 : 17.78240966796875
Reward on step 16 in episode 0 : 14.90423583984375
Reward on step 17 in episode 0 : 10.82501220703125
Reward on step 18 in episode 0 : 19.16339111328125
Reward on step 19 in episode 0 : 21.146530151367188
Reward on step 20 in episode 0 : 22.324615478515625
Reward on step 21 in episode 0 : 23.024429