In [1]:
from vizdoom import *
import random
import time
import numpy as np
!cd github

In [2]:
game = DoomGame()
game.load_config('github/VizDoom/scenarios/basic.cfg')
game.init()

In [3]:
actions = np.identity(3,dtype = np.uint8)

In [4]:
random.choice(actions)

array([0, 1, 0], dtype=uint8)

In [5]:
game.new_episode()

In [6]:
game.is_episode_finished

<bound method PyCapsule.is_episode_finished of <vizdoom.vizdoom.DoomGame object at 0x000001EDF371D230>>

In [7]:
game.make_action(random.choice(actions))

-1.0

In [8]:
# Loop through episodes 
episodes = 10 
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image from game state
        img = state.screen_buffer
        # Get the game variables - ammo from state
        info = state.game_variables
        # Take an action
        reward = game.make_action(random.choice(actions),4)#4 over here is for frame buffer we skip next 4 frame after actions to see the reward of the action
        # Print rewward 
        print('reward:', reward) 
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: 99.0
Result: 83.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: 97.0
reward: -1.0
Result: -96.0
reward: -4.0
reward: 99.0
Result: 95.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: 99.0
Result: 83.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.0
reward: -4.0
reward: 97.0
Result: 64.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: 99.0
Result: 79.0
reward: -4.0
reward: -4.0
reward: -4.0
reward: -9.0
reward: -4.

In [8]:
game.close()

# Converting it to OpenAi Gym 

In [15]:
!pip install gym



In [9]:
from gym import Env
from gym.spaces import Discrete, Box
import cv2

In [10]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config('github/VizDoom/scenarios/basic.cfg')
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(3)
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(3)
        reward = self.game.make_action(actions[action], 4) 
        
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [12]:
env = VizDoomGym(render=True) 

In [13]:
######When You Are making your own env check if the env you coded is valid by using this#####
from stable_baselines3.common import env_checker
env_checker.check_env(env)

In [14]:
env.step(0)

(array([[[71],
         [76],
         [72],
         ...,
         [75],
         [71],
         [72]],
 
        [[58],
         [35],
         [33],
         ...,
         [34],
         [33],
         [35]],
 
        [[67],
         [74],
         [55],
         ...,
         [27],
         [27],
         [27]],
 
        ...,
 
        [[75],
         [63],
         [62],
         ...,
         [44],
         [71],
         [60]],
 
        [[15],
         [48],
         [47],
         ...,
         [49],
         [69],
         [47]],
 
        [[22],
         [14],
         [26],
         ...,
         [57],
         [37],
         [39]]], dtype=uint8),
 -9.0,
 False,
 {'info': 48.0})

In [15]:
env.close()

# Save Model

In [11]:
import os 
from stable_baselines3.common.callbacks import BaseCallback

In [12]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [13]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

In [14]:
callback = TrainAndLoggingCallback(check_freq=50000, save_path=CHECKPOINT_DIR)

# Train Model

In [15]:
from stable_baselines3 import PPO

In [16]:
# Non rendered environment
env = VizDoomGym()

In [17]:
model = PPO('CnnPolicy', env, verbose=1, learning_rate=0.0001, n_steps=2048)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [325]:
model.learn(total_timesteps=100000)

---------------------------------
| rollout/           |          |
|    ep_len_mean     | 31.7     |
|    ep_rew_mean     | -78      |
| time/              |          |
|    fps             | 32       |
|    iterations      | 1        |
|    time_elapsed    | 62       |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 34.3        |
|    ep_rew_mean          | -94.5       |
| time/                   |             |
|    fps                  | 13          |
|    iterations           | 2           |
|    time_elapsed         | 301         |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.009303957 |
|    clip_fraction        | 0.119       |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.09       |
|    explained_variance   | -0.000115   |
|    learning_rate        | 0.

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 18          |
|    ep_rew_mean          | 12.3        |
| time/                   |             |
|    fps                  | 8           |
|    iterations           | 11          |
|    time_elapsed         | 2668        |
|    total_timesteps      | 22528       |
| train/                  |             |
|    approx_kl            | 0.020608021 |
|    clip_fraction        | 0.263       |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.886      |
|    explained_variance   | 0.656       |
|    learning_rate        | 0.0001      |
|    loss                 | 1.4e+03     |
|    n_updates            | 100         |
|    policy_gradient_loss | 0.00458     |
|    value_loss           | 2.78e+03    |
-----------------------------------------
----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 10.7    

-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.57        |
|    ep_rew_mean          | 84.3        |
| time/                   |             |
|    fps                  | 7           |
|    iterations           | 21          |
|    time_elapsed         | 5478        |
|    total_timesteps      | 43008       |
| train/                  |             |
|    approx_kl            | 0.067822464 |
|    clip_fraction        | 0.32        |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.477      |
|    explained_variance   | 0.601       |
|    learning_rate        | 0.0001      |
|    loss                 | 129         |
|    n_updates            | 200         |
|    policy_gradient_loss | -0.0105     |
|    value_loss           | 334         |
-----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.96  

----------------------------------------
| rollout/                |            |
|    ep_len_mean          | 3.6        |
|    ep_rew_mean          | 88.5       |
| time/                   |            |
|    fps                  | 7          |
|    iterations           | 31         |
|    time_elapsed         | 9044       |
|    total_timesteps      | 63488      |
| train/                  |            |
|    approx_kl            | 0.02658273 |
|    clip_fraction        | 0.0888     |
|    clip_range           | 0.2        |
|    entropy_loss         | -0.134     |
|    explained_variance   | 0.674      |
|    learning_rate        | 0.0001     |
|    loss                 | 19.8       |
|    n_updates            | 300        |
|    policy_gradient_loss | 0.00348    |
|    value_loss           | 65.6       |
----------------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 4.23        |
|    ep_rew_m

KeyboardInterrupt: 

In [18]:
PPO_path = os.path.join('Training', 'Saved Models', 'PPO_model')

In [19]:
model.save(PPO_path)

In [328]:
del model #Just deleted the model not the saved file

In [20]:
model = PPO.load(PPO_path, env=env) #reloaded the model fron the saved file

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [21]:
from stable_baselines3.common.evaluation import evaluate_policy
env = VizDoomGym(render=True) 
mean_reward = evaluate_policy(model,env,n_eval_episodes=10)
print(mean_reward)



(-300.0, 0.0)


In [30]:
model.predict(obs)

NameError: name 'obs' is not defined

In [None]:
for episode in range(100): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        # time.sleep(0.20)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode,total_reward))
    time.sleep(2)