In [100]:
!pip install vizdoom





In [101]:
!cd github & git clone https://github.com/mwydmuch/ViZDoom

fatal: destination path 'ViZDoom' already exists and is not an empty directory.


In [102]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [103]:
# Setup game
game = DoomGame()
game.load_config(r'C:\DOOMgame-using-RLagent-main\github\ViZDoom\scenarios\deadly_corridor.cfg')
game.init()

In [104]:
# This is the set of actions we can take in the environment
actions = np.identity(7, dtype=np.uint8)

In [105]:
state = game.get_state()

In [106]:
state.game_variables


In [107]:
# Loop through episodes 
episodes = 10 
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image 
        img = state.screen_buffer
        # Get the game variables - ammo
        info = state.game_variables
        # Take an action
        reward = game.make_action(random.choice(actions),4)
        # Print rewward 
        print('reward:', reward) 
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

reward: 0.0
reward: 0.0
reward: 0.0
reward: 0.0
reward: -3.5984649658203125
reward: -4.7531890869140625
reward: 3.674285888671875
reward: 5.9538726806640625
reward: 3.9779205322265625
reward: 2.6656341552734375
reward: 2.6337890625
reward: -7.1376800537109375
reward: -17.502883911132812
reward: -1.889068603515625
reward: -0.0213470458984375
reward: 7.0593719482421875
reward: 8.468719482421875
reward: 5.712188720703125
reward: 3.208404541015625
reward: 7.7231597900390625
reward: -91.70016479492188
Result: -75.52545166015625
reward: 0.0
reward: 0.0
reward: 0.0
reward: 0.8306884765625
reward: -16.701828002929688
reward: -0.1110992431640625
reward: 6.780731201171875
reward: 8.227874755859375
reward: 5.549713134765625
reward: 10.116897583007812
reward: 3.797149658203125
reward: -7.1597747802734375
reward: -5.5988616943359375
reward: 4.9531097412109375
reward: 13.578109741210938
reward: 12.683456420898438
reward: 2.181304931640625
reward: -1.8758697509765625
reward: -0.2917022705078125
rewar

In [108]:
game.close()

In [109]:
!pip install gym





In [110]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [111]:
# game.get_state().screen_buffer.shape

In [124]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, config= r'C:\DOOMgame-using-RLagent-main\github\ViZDoom\scenarios\deadly_corridor.cfg'): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(config)
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        # Game variables: HEALTH DAMAGE_TAKEN HITCOUNT SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 ## CHANGED
        
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            # Calculate reward deltas
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [125]:
env = VizDoomGym(render=True)

In [126]:
env.step(0)

(array([[[32],
         [33],
         [25],
         ...,
         [27],
         [23],
         [24]],
 
        [[27],
         [33],
         [23],
         ...,
         [24],
         [24],
         [24]],
 
        [[20],
         [35],
         [23],
         ...,
         [24],
         [24],
         [24]],
 
        ...,
 
        [[75],
         [63],
         [62],
         ...,
         [44],
         [71],
         [60]],
 
        [[15],
         [48],
         [47],
         ...,
         [49],
         [69],
         [47]],
 
        [[22],
         [14],
         [26],
         ...,
         [57],
         [37],
         [39]]], dtype=uint8),
 0.0,
 False,
 {'info': 52.0})

In [127]:
state = env.reset()

In [128]:
env.reset()

array([[[32],
        [33],
        [25],
        ...,
        [27],
        [23],
        [24]],

       [[27],
        [33],
        [23],
        ...,
        [24],
        [24],
        [24]],

       [[20],
        [35],
        [23],
        ...,
        [24],
        [24],
        [24]],

       ...,

       [[75],
        [63],
        [62],
        ...,
        [44],
        [71],
        [60]],

       [[15],
        [48],
        [47],
        ...,
        [49],
        [69],
        [47]],

       [[22],
        [14],
        [26],
        ...,
        [57],
        [37],
        [39]]], dtype=uint8)

In [129]:
env.close()

In [130]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [131]:
# env_checker.check_env(env)

In [132]:

# !pip install matplotlib

In [133]:
# from matplotlib import pyplot as plt

In [134]:

# plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

In [135]:
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

Looking in indexes: https://download.pytorch.org/whl/cu121




In [139]:
#!pip install stable-baselines3[extra]


In [141]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

In [142]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [143]:
CHECKPOINT_DIR = './train/train_corridor'
LOG_DIR = './logs/log_corridor'

In [144]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [145]:

# import ppo for training
from stable_baselines3 import PPO

In [146]:
# Non rendered environment
env = VizDoomGym(config = r'C:\DOOMgame-using-RLagent-main\github\ViZDoom\scenarios\deadly_corridor.cfg')

In [147]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=2048)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




In [None]:
model.learn(total_timesteps=100000, callback=callback)

Logging to ./logs/log_corridor\PPO_3
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 32       |
|    ep_rew_mean     | 10.4     |
| time/              |          |
|    fps             | 24       |
|    iterations      | 1        |
|    time_elapsed    | 84       |
|    total_timesteps | 2048     |
---------------------------------
---------------------------------------
| rollout/                |           |
|    ep_len_mean          | 36.3      |
|    ep_rew_mean          | 19.3      |
| time/                   |           |
|    fps                  | 21        |
|    iterations           | 2         |
|    time_elapsed         | 189       |
|    total_timesteps      | 4096      |
| train/                  |           |
|    approx_kl            | 0.4965789 |
|    clip_fraction        | 0.416     |
|    clip_range           | 0.2       |
|    entropy_loss         | -1.88     |
|    explained_variance   | -1.12e-05 |
|    learning_rate     

In [115]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [116]:

# Reload model from disc
model = PPO.load('./train/train_corridor/best_model_90000')

In [117]:

# Create rendered environment
env = VizDoomGym(render=True)

In [122]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=4)

In [123]:

mean_reward

-3.4269256591796875

In [124]:

for episode in range(5): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.80)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))
    time.sleep(2)

Total Reward for episode 0 is 146.13624572753906
Total Reward for episode 1 is 217.1281280517578
Total Reward for episode 2 is -255.98895263671875
Total Reward for episode 3 is -33.233673095703125
Total Reward for episode 4 is -65.98152160644531


In [125]:
env.close()

In [205]:
env = VizDoomGym(render=True)

In [214]:
env.step(1)

(array([[[ 6],
         [10],
         [ 9],
         ...,
         [ 6],
         [11],
         [ 4]],
 
        [[ 9],
         [ 9],
         [ 6],
         ...,
         [ 6],
         [ 6],
         [ 4]],
 
        [[10],
         [ 7],
         [ 7],
         ...,
         [ 6],
         [ 6],
         [ 9]],
 
        ...,
 
        [[75],
         [63],
         [62],
         ...,
         [44],
         [71],
         [60]],
 
        [[15],
         [48],
         [47],
         ...,
         [49],
         [69],
         [47]],
 
        [[22],
         [14],
         [26],
         ...,
         [57],
         [37],
         [39]]], dtype=uint8),
 0.0,
 False,
 {'info': 26.0})

In [204]:
env.close()