In [None]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [None]:
#right now this is being used to train the model on the health gathering scenario with the config files(action space and game variables) changed to accommodate curriculum learning in deathmatch 

In [None]:
#action suppression is by modification of action selection matrix

In [None]:
# Create Vizdoom OpenAI Gym Environment    #for health gathering deathmatch
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, config='ViZDoom/scenarios/health_gathering_forDeathmatch.cfg'): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(config)
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(20)
        
        # Game variables: KILLCOUNT HEALTH ARMOR SELECTED_WEAPON SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.killcount = 0
        self.ammo = 0 ## CHANGED
        self.ep_length = 0
        
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        #actions = np.identity(20)          #normally this would be an identity matrix
        actions = [[1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
                   [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]]
        
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            self.ep_length = self.ep_length+1
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            killcount, health, armour, selected_weapon, ammo = game_variables  #ammo of selected weapon  #damage_taken is to armor
            
            # Calculate reward deltas
            damage_taken_delta = -health + self.damage_taken
            self.damage_taken = health
            
            #reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            #reward = movement_reward + damage_taken_delta*30 + killcount_delta*100
            #reward = damage_taken_delta*50 + hitcount_delta*1000
            reward = movement_reward*1.00 - damage_taken_delta*1.0
            reward=float(reward)
            info=health
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info": info}
        done = self.game.is_episode_finished()
        
        if done:
            self.ep_length = 0
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 52 ## CHANGED
        self.ep_length = 0
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()
    
    def get_game_variables(self):
        return self.game.get_state().game_variables

In [None]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [None]:
CHECKPOINT_DIR = '/mnt/new_hdd5/fps/train/deathmatch/Anu/basic_3.0_ppo_healthpart'  #change here chose where to save weights
LOG_DIR = '/mnt/new_hdd5/fps/logs/deathmatch/Anu/log_basic'

In [None]:
# import ppo for training
from stable_baselines3 import DQN
from stable_baselines3 import PPO  #right now PPO is used

In [None]:
# Non rendered environment
env = VizDoomGym(config='ViZDoom/scenarios/health_gathering_forDeathmatch.cfg')

In [None]:
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)
#model = DQN('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, gamma=.95)

In [None]:
#model training
env = VizDoomGym(config='ViZDoom/scenarios/health_gathering_forDeathmatch.cfg')
model.set_env(env)
model.learn(total_timesteps=500000, callback=callback)

In [None]:
#model = PPO.load('/mnt/new_hdd5/fps/train/deathmatch/Anu/basic_1.2_healthpart/best_model_500000')
#for curriculum

In [None]:
#Testing

In [None]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy
from gym import wrappers

In [None]:
# Create non rendered environment
env = VizDoomGym(render=False, config='ViZDoom/scenarios/health_gathering_forDeathmatch.cfg')

In [None]:
# Evaluate mean reward for 10 games
mean_reward, ep_length = evaluate_policy(model, env, n_eval_episodes=100, return_episode_rewards = True)
for i in range(100):
    print('Episode {}: Episode Length = {}, Reward = {}'.format(i, ep_length[i], mean_reward[i]))