In [None]:
!pip install vizdoom
!cd github/ & git clone https://github.com/mwydmuch/ViZdoom

In [1]:
from vizdoom import *
import random
import time
import numpy as np
import matplotlib.pyplot as plt
from stable_baselines3.common.evaluation import evaluate_policy
from gym import Env
from gym.spaces import Discrete, Box
import cv2
from stable_baselines3.common import env_checker

# Random Agent

In [3]:
game = DoomGame()
game.load_config("github/ViZdoom/scenarios/deadly_corridor_s1.cfg")  # Easy mode
game.init()

In [4]:
state = game.get_state()

In [6]:
episodes = 1
actions = np.eye(7,dtype=np.uint8)

for episode in range(episodes):
    game.new_episode()
    while not game.is_episode_finished():
        state = game.get_state()
        img = state.screen_buffer
        random_action = random.choice(actions)
        reward = game.make_action(random_action)
#         time.sleep(0.02)
    print("Result :", game.get_total_reward())
#     time.sleep(2)
game.close()


Result : -95.27561950683594


# Wrapping Doom Game into a Gym Environment

In [2]:
class VizDoomGym(Env):
    def __init__(self,render=False, config="github/ViZdoom/scenarios/deadly_corridor_s1.cfg"):
        super().__init__()
        self.game = DoomGame()
        self.game.load_config(config)
        
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        self.game.init()
        self.observation_space = Box(low=0, high=255, shape=(85,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52
        
    def step(self,action):
        actions = np.eye(7, dtype=np.uint8)
        movement_reward = self.game.make_action(actions[action],4)
        reward = 0 
        
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            health, damage_taken, hitcount, ammo = self.game.get_state().game_variables
            
            damage_taken_delta = self.damage_taken - damage_taken # Took damage -> negative reward
            self.damage_taken = damage_taken 
            
            hitcount_delta = hitcount - self.hitcount # Hit an ennemy -> positive reward  
            self.hitcount = hitcount
            
            ammo_delta = ammo - self.ammo  # Wasted ammo -> negative reward
            self.ammo = ammo  
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta *200 + ammo_delta *5
            info = ammo
        else:
            state = np.zeros(self.observation_space.shape)
            info = 0
            
        info = {"info":info}
        done = self.game.is_episode_finished()
        return state, reward, done, info
    
    def render():
        pass
    
    def reset(self):
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        state = self.grayscale(state)
        return state
    
    def grayscale(self,observation):
        """Grayscale, trim the bottom infos and reduce the number of pixels"""
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        state = state[:85, :]
        return state
    
    def close(self):
        self.game.close()

# Train the model : Curriculum Learning (or simply said : increase the difficulty progressively)

In [3]:
import os 
from stable_baselines3 import PPO
from stable_baselines3.common.callbacks import BaseCallback

In [4]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
        return True

In [5]:
CHECKPOINT_DIR = './train/train_deadly_corridor'
LOG_DIR = './logs/log_deadly_corridor'

In [6]:
callback = TrainAndLoggingCallback(check_freq=20000, save_path=CHECKPOINT_DIR)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s1.cfg')
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)

In [None]:
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s2.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s3.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s4.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s5.cfg')
model.set_env(env)
model.learn(total_timesteps=100000, callback=callback)

# Test the model

In [15]:
model = PPO.load('./train/train_deadly_corridor/best_model_260000.zip')

In [23]:
env = VizDoomGym(render=False)

In [24]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=5)
"Mean reward : " + str(mean_reward)

'Mean reward : 996.0'

In [25]:
for episode in range(5): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.05)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))
    time.sleep(1)
env.close()

Total Reward for episode 0 is -1523.982192993164
Total Reward for episode 1 is -1238.3617095947266
Total Reward for episode 2 is 1557.6107940673828
Total Reward for episode 3 is 2408.5035858154297
Total Reward for episode 4 is -667.1030426025391
