Used to test different deltas on different maps and models

Import Dependancies 

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
#from vizdoom_env import Defend_the_Center_VZG #Will just define in the notebook because its more organized
import random
from pathfinder import doomfinder, create_new_best_generation_directory
from torch.utils.tensorboard import SummaryWriter
from vizdoom import * #Import all of vizdoom
import time #To make the program sleep (wait), so we can actually see what's happening
from gymnasium import Env #Import OpenAI Gym's Env class
from gymnasium.spaces import Discrete, Box #Import OpenAI Gym's Discrete and Box spaces
import cv2 #OpenCV for image processing, used for modifying the DOOM environment to make it run faster 
from stable_baselines3.common.callbacks import BaseCallback #Import the BaseCallback class from stable_baselines3 to learn from the environment
import os #To create directories for saving models

Testing defend_the_center.cfg (effect of adding killcount)

In [None]:
#Define a simple neural network for action selection
class DoomAgent(nn.Module):
    def __init__(self):
        super(DoomAgent, self).__init__()
        self.fc1 = nn.Linear(160 * 100, 128)
        self.fc2 = nn.Linear(128, 64)  #Additional hidden layer
        self.fc3 = nn.Linear(64, 3)  #Output layer for 3 actions

    def forward(self, x):
        x = torch.flatten(x, 1)  #Flatten input
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))  #Pass through second hidden layer
        x = torch.softmax(self.fc3(x), dim=1)
        return x

class Defend_the_Center_VZG(Env): #Used for defend_the_center config
    def __init__(self, config_path, render=False): #Constructor
        
        #The naming convention will be "First map this was used on" + "_VZG" (VizDoomGym), 
        #but each config/map this env is used for will be listed below

        #Maps/Config: defend_the_center

        super(Defend_the_Center_VZG, self).__init__() #Inherit from Env class

        #Args: 
            #config_path (str): The path to the configuration file
            #render (bool): Whether to render the environment or not, false by default

        #Setup game
        self.game = vizdoom.DoomGame() #Create a DoomGame object
        self.game.load_config(config_path) #Load the configuration file from file path, ex: doomfinder("basic.cfg")

        #Set window visibility
        if render == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)

        self.game.init() #Start the game

        #Setup action and observation space
        self.observation_space = Box(low=0, high=255, shape=(100, 160, 1), dtype=np.uint8) #Observation space, 100x160x1 image
        self.action_space = Discrete(3) #Action space, 3 actions

        #Game variables
        self.ammo = self.game.get_state().game_variables[0]  #Get the ammo count, initialize to the current ammo
        self.health = 100 #Initialize health to 100 (assuming we start at full health)
        self.killcount = 0 #Initialize killcount to 0

    def step(self, action, limit = 1000): #Take a step in the environment 
        #Args:
            #action (int): The action to take
            #limit (int): Unimplemented "limit" for the episode, most likely will be a time limit
        #Returns:
            #observation (np.array): The screen buffer of the environment
            #reward (float): The reward for the action taken
            #terminated (bool) Whether the episode is finished or not (by reaching the goal)
            #truncated (bool): Whether the episode has reached some terminal state without reaching the goal (ie: running out of time)
            #info (dict): Additional information about the environment

        #Specify actions and take a step
        actions = np.identity(3) #Create an identity matrix with 3 rows (3 actions), MOVE_LEFT, MOVE_RIGHT, ATTACK, these are the actions we can take in the environment
        movement_reward = self.game.make_action(actions[action], 4) #Reward for taking a random action, second parameter is frame skip (skip 4 frames before taking the next action), the reason we do this is because it saves us time while being easy to see what is happening 
        reward = movement_reward #Initialize reward to movement reward
        truncated = False #Not implemented yet, so set to False. The idea is that if step passes some sort of limit, like a time limit, then the episode is truncated.
        info = {} #Initialize info to an empty dictionary

        if self.game.get_state(): #If the game is not finished
            observation = self.game.get_state().screen_buffer #Get the screen buffer
            observation = self.greyscale(observation) #Convert the image to greyscale

            #Get game variables
            game_variables = self.game.get_state().game_variables #Get 
            ammo, health, killcount = game_variables
            
            #Calculate reward deltas
            ammo_delta = ammo - self.ammo #Current ammo - old ammo = ammo used
            ammo = self.ammo 
            health_delta = health - self.health  #Current health - old health = damage taken
            health = self.health
            killcount_delta = killcount - self.killcount #Current killcount - old killcount = kills made

            #reward = movement_reward*2 + ammo_delta*0.0384615385 + health_delta*0.01 #Calculate the reward, we get 2 pts for each enemy we kill, if we lose all heath our score is subtracted by 1, if we lose all ammo our score is subtracted by 1
            #reward = movement_reward*2 + ammo_delta*0.0384615385 + health_delta*0 #Ignore health delta because it might just be punishing the model too much
            #reward = movement_reward*2 + ammo_delta*0.01 + health_delta*0 #Lower the amount the model is punished for wasting ammo
            reward = movement_reward #Ignore reward shaping
            #reward = movement_reward*2 #Move punishing ammo wasted over to the fitness function
            print(f"Health delta: {health_delta*0} (impact: {health_delta * 0.0}), Ammo delta: {ammo_delta} (impact: {ammo_delta * 0.0384615385}), Reward: {reward}, Killcount delta: {killcount_delta}, (impact: {killcount_delta}), Movement reward: {movement_reward} (impact: {movement_reward * 2}) \n")
            info = {"ammo": ammo, "health": health} #Add ammo and health to the info dictionary
        else:
            observation = np.zeros(self.observation_space.shape) #Return a blank screen

        terminated = self.game.is_episode_finished() #Check if the episode is finished

        return observation, reward, terminated, truncated, info

    def render(self, render_in_greyscale=False): #Render the environment for a frame
        #Args:
            #render_in_greyscale (bool): Whether to render the environment in greyscale or not
        
        if self.game.get_state() and render_in_greyscale:  #Only render if there's a valid game state
            observation = self.game.get_state().screen_buffer
            greyscale_obs = self.greyscale(observation)  #Convert to greyscale
            #Render using OpenCV to visualize
            cv2.imshow("VizDoom Environment", greyscale_obs.squeeze())  #Remove extra dimension and display
            cv2.waitKey(1)  #Wait 1ms between frames to allow for rendering
        elif self.game.get_state():  #Only render if there's a valid game state
            observation = self.game.get_state().screen_buffer
            #Render using OpenCV to visualize
            cv2.imshow("VizDoom Environment", observation.squeeze())  #Remove extra dimension and display
            cv2.waitKey(1)  #Wait 1ms between frames to allow for rendering
        else:
            print("No game state to render.")

            
    def reset(self, seed=None): #Reset the environment when we start a new game
        #Args:
            #seed (int): The seed for the random number generator
        #Returns:
            #(observation, info) (tuple)
                #observation (np.array): The screen buffer of the environment
                #info (dict): Additional information about the environment
            
        super().reset(seed=seed) #Implement seeding
        
        self.game.new_episode() #Start a new episode
        state = self.game.get_state().screen_buffer #Get the screen buffer
        observation = self.greyscale(state) #Convert the image to greyscale
        
        #Gather any additional environment-specific info (like ammo, etc.)
        if self.game.get_state():
            ammo = self.game.get_state().game_variables[0]  #Get the ammo/health count
            info = {"ammo": ammo, "health": 100} #Add ammo and health to the info dictionary
        else:
            info = {} #No gamestate means no info can be gathered
        
        return (observation, info) #Tuple of observation and info

    def greyscale(self, observation=None): #Convert the enivornment to greyscale and resize it
        #Args:
            #observation (np.array): The image of the environment (the current game frame)
        #Returns:
            #grey_return (np.array): The resized greyscale image of the environment
        
        if observation is None and self.game.get_state(): #If no observation is passed
            observation = self.game.get_state().screen_buffer #Get the screen buffer 

        grey = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY) #Convert the image to greyscale
        resize = cv2.resize(grey, (160, 100), interpolation=cv2.INTER_CUBIC) #Resize the image to 160x100
        state = np.reshape(resize, (100, 160, 1)) #Reshape the image to 100x160x1
        
        return state
    
    def get_state(self): 
        #Returns:
            #state (np.array): The current state of the environment
        return self.game.get_state()

    def close(self): #Close the environment
        self.game.close()


#Initialize the agent
agent = DoomAgent()

#Load the saved model weights into the agent
agent.load_state_dict(torch.load("runs/vizdoom_ga_defend_the_center/run_7/saved_models/best_agent_gen_999.pth"))

#Set the agent to evaluation mode
agent.eval()

#Initialize the environment
env = Defend_the_Center_VZG(doomfinder('defend_the_center_modified.cfg'), render=True)

for episode in range(5):
    observation, _ = env.reset()  #Reset the environment and get only the observation
    done = False  #Set done to false
    total_reward = 0  #Set total reward to 0

    while not done:  #While the game isn't done
        #Convert the observation to a tensor and pass it through the agent
        obs_tensor = torch.from_numpy(observation).float().unsqueeze(0)
        with torch.no_grad():  # Disable gradient computation
            action_probs = agent(obs_tensor)  # Use agent instead of best_agent
            action = torch.argmax(action_probs).item()
        
        #Take a step in the environment
        observation, reward, done, _, _ = env.step(action)
        total_reward += reward  #Add the reward to the total reward
        time.sleep(0.05)  #Sleep for 0.05 seconds

    print(f'Episode: {episode}, Total Reward: {total_reward}')  #Print the episode and total reward
    time.sleep(2)  #Sleep for 2 seconds between episodes



  agent.load_state_dict(torch.load("runs/vizdoom_ga_defend_the_center/run_1/saved_models/best_agent_gen_199.pth"))


RuntimeError: Error(s) in loading state_dict for DoomAgent:
	Missing key(s) in state_dict: "fc3.weight", "fc3.bias". 
	size mismatch for fc2.weight: copying a param with shape torch.Size([3, 128]) from checkpoint, the shape in current model is torch.Size([64, 128]).
	size mismatch for fc2.bias: copying a param with shape torch.Size([3]) from checkpoint, the shape in current model is torch.Size([64]).