# Setup vizdoom

In [36]:
!pip install vizdoom



In [37]:
!cd github & git clone https://github.com/mwydmuch/ViZDoom

Den angivne sti blev ikke fundet.
fatal: destination path 'ViZDoom' already exists and is not an empty directory.


In [38]:
from vizdoom import * 
import time
import numpy as np

# Setup gym env

In [39]:
!pip install gym
!pip install opencv-python



In [40]:
from gym import Env
from gym.spaces import Discrete, Box
import cv2

In [41]:
class VizDoomGym(Env): 
    def __init__(self, render=False, config='../scenarios/deadly_corridor2.cfg'): 
        super().__init__()
        self.game = DoomGame()
        self.game.load_config(config)
        
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        self.game.init()
        
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 
        
        
    def step(self, action):
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    def render(): 
        pass
    
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    def close(self): 
        self.game.close()


In [42]:
env = VizDoomGym(render=True)

In [43]:
env.close()

# Setup callback

In [9]:
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html
Collecting torch==1.10.1+cu113
  Using cached https://download.pytorch.org/whl/cu113/torch-1.10.1%2Bcu113-cp39-cp39-win_amd64.whl (2442.3 MB)
Collecting torchvision==0.11.2+cu113
  Using cached https://download.pytorch.org/whl/cu113/torchvision-0.11.2%2Bcu113-cp39-cp39-win_amd64.whl (3.2 MB)
Collecting torchaudio===0.10.1+cu113
  Using cached https://download.pytorch.org/whl/cu113/torchaudio-0.10.1%2Bcu113-cp39-cp39-win_amd64.whl (336 kB)
Installing collected packages: torch, torchvision, torchaudio
Successfully installed torch-1.10.1+cu113 torchaudio-0.10.1+cu113 torchvision-0.11.2+cu113


In [10]:
!pip install stable-baselines3[extra]

Collecting stable-baselines3[extra]
  Using cached stable_baselines3-1.4.0-py3-none-any.whl (176 kB)
Collecting gym<0.20,>=0.17
  Using cached gym-0.19.0.tar.gz (1.6 MB)
Collecting tensorboard>=2.2.0
  Using cached tensorboard-2.8.0-py3-none-any.whl (5.8 MB)
Collecting atari-py==0.2.6
  Using cached atari-py-0.2.6.tar.gz (790 kB)
Collecting cloudpickle
  Using cached cloudpickle-1.6.0-py3-none-any.whl (23 kB)
Collecting absl-py>=0.4
  Using cached absl_py-1.0.0-py3-none-any.whl (126 kB)
Collecting markdown>=2.6.8
  Using cached Markdown-3.3.6-py3-none-any.whl (97 kB)
Collecting protobuf>=3.6.0
  Using cached protobuf-3.19.4-cp39-cp39-win_amd64.whl (895 kB)
Collecting tensorboard-data-server<0.7.0,>=0.6.0
  Using cached tensorboard_data_server-0.6.1-py3-none-any.whl (2.4 kB)
Collecting grpcio>=1.24.3
  Using cached grpcio-1.44.0-cp39-cp39-win_amd64.whl (3.4 MB)
Collecting google-auth<3,>=1.6.3
  Using cached google_auth-2.6.0-py2.py3-none-any.whl (156 kB)
Collecting tensorboard-plugin-w

Reason for being yanked: re-release with new wheels
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
spyder 5.1.5 requires pyqt5<5.13, which is not installed.
spyder 5.1.5 requires pyqtwebengine<5.13, which is not installed.
spyder-kernels 2.1.3 requires jupyter-client<7,>=5.3.4, but you have jupyter-client 7.1.2 which is incompatible.


In [44]:
import os 
from stable_baselines3.common.callbacks import BaseCallback

In [45]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [46]:
CHECKPOINT_DIR = './train/train_deadlycoridor'
LOG_DIR = './logs/log_deadlycoridor'

In [47]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# Train Model

In [48]:
from stable_baselines3 import PPO

In [49]:
env = VizDoomGym(config='../scenarios/deadly_corridor2.cfg')

In [50]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [51]:
model.learn(total_timesteps=40000, callback=callback)

Logging to ./logs/log_deadlycoridor\PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 29.1     |
|    ep_rew_mean     | 14.1     |
| time/              |          |
|    fps             | 41       |
|    iterations      | 1        |
|    time_elapsed    | 195      |
|    total_timesteps | 8192     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 31.9         |
|    ep_rew_mean          | 20.7         |
| time/                   |              |
|    fps                  | 23           |
|    iterations           | 2            |
|    time_elapsed         | 685          |
|    total_timesteps      | 16384        |
| train/                  |              |
|    approx_kl            | 0.0021106019 |
|    clip_fraction        | 0.0858       |
|    clip_range           | 0.1          |
|    entropy_loss         | -1.94        |
|    explained_v

<stable_baselines3.ppo.ppo.PPO at 0x1e6120c15e0>

# 6. Test the Model

In [52]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [56]:
# Reload model from disc
model = PPO.load('./train/train_deadlycoridor/best_model_40000.zip')

In [59]:
# Create rendered environment
env = VizDoomGym(render=True, config='../scenarios/deadly_corridor2.cfg')

In [None]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=10)

In [None]:
for episode in range(20): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.02)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(total_reward, episode))
    time.sleep(2)