Using a NEAT to learn defend_the_center.cfg

Import everything

In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from torch.utils.tensorboard import SummaryWriter
from vizdoom import * #Import all of vizdoom
import time #To make the program sleep (wait), so we can actually see what's happening
from gymnasium import Env #Import OpenAI Gym's Env class
from gymnasium.spaces import Discrete, Box #Import OpenAI Gym's Discrete and Box spaces
import cv2 #OpenCV for image processing, used for modifying the DOOM environment to make it run faster 
from stable_baselines3.common.callbacks import BaseCallback #Import the BaseCallback class from stable_baselines3 to learn from the environment
from stable_baselines3.common import env_checker #Import the env_checker class from stable_baselines3 to check the environment
import os #To create directories for saving models
import sys #To backtrack to root
import neat

original_sys_path = sys.path.copy() #Come back to this path later after we navigate to the parent directory
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  #Add the parent directory to the path so we can import the pathfinder module
from pathfinder import doomfinder, create_new_best_generation_directory, gamefinder #Import functions from the pathfinder module
sys.path = original_sys_path #Set the path back to the original path


Define enviornment

In [19]:
#Initialize the VizDoom environment
do_render = False
episodes = 1

class DefendTheCenterNEAT:
    def __init__(self, config_path, render=False, episodes=1): 
        self.game = DoomGame()
        self.game.set_doom_game_path(gamefinder('DOOM2.wad'))
        self.game.load_config(config_path)
        self.game.set_window_visible(render)
        self.game.init()
        self.ammo = 26  #Initial ammo 
        self.health = 100
        self.killcount = 0

    def set_params(self, do_render=False, episodes=1):
        do_render = do_render
        episodes = episodes

    def get_observation(self):
        state = self.game.get_state()
        if state:
            buffer = state.screen_buffer
            grey = cv2.cvtColor(np.moveaxis(buffer, 0, -1), cv2.COLOR_BGR2GRAY)
            resized = cv2.resize(grey, (40, 25), interpolation=cv2.INTER_CUBIC)
            return resized.flatten()  #Flatten to 1000 elements for NEAT input
        else:
            return np.zeros(40 * 25)

    def step(self, action):
        actions = np.identity(3)
        reward = self.game.make_action(actions[action], 6) #Observe action every 6 frames, action will be repeated for 6 frames
        done = self.game.is_episode_finished()
        return reward, done

    def reset(self):
        self.game.new_episode()
        return self.get_observation()

def eval_genome(genomes, config):
    #Initialize the environment
    env = DefendTheCenterNEAT(doomfinder('defend_the_center_modified.cfg'), render=do_render, episodes=episodes)

    #Evaluate each genome in the population
    for genome_id, genome in genomes:
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        fitness = 0

        for _ in range(episodes):  #Evaluate over 1 episode
            obs = env.reset()
            episode_fitness = 0
            kill_reward = 0
            ammo_wasted = 0

            while not env.game.is_episode_finished():
                got_kill = False #Flag to check if a kill was made

                #Get action from NEAT network
                output = net.activate(obs)  #obs is a flattened array (1000 elements)
                action = np.argmax(output)
                ammo, health, killcount = env.game.get_state().game_variables

                #Take action
                reward, done = env.step(action)
                episode_fitness += reward

                #Calculate ammo wasted
                ammo_used = env.ammo - ammo
                env.ammo = ammo
                
                env.health = health
                if env.killcount < killcount:
                    got_kill = True

                env.killcount = killcount

                if ammo_used > 0 and not got_kill: #If ammo was used and no kill was made
                    ammo_wasted += ammo_used

                if got_kill:    
                    kill_reward += (1 + 1 * health/100) #Reward for getting a kill, scaled by health

            #Penalize wasted ammo
            ammo_delta = ammo_wasted * 0.1
            kill_delta = kill_reward 

            episode_fitness = ammo_delta + kill_delta  

            fitness += episode_fitness

        #Assign fitness to the genome
        genome.fitness = fitness / episodes  #Average fitness across episodes

In [None]:
def run_neat(config_path):
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_path)
    
    #Create population
    population = neat.Population(config)

    #Add reporters
    population.add_reporter(neat.StdOutReporter(True))
    population.add_reporter(neat.StatisticsReporter())

    #Run evolution
    winner = population.run(eval_genome)
    #Save the best genome
    torch.save(winner, "best_neat_genome.pth")

if __name__ == "__main__":
    run_neat("Maps and Configs/neat-config.ini")


 ****** Running generation 0 ****** 



In [None]:
def test_best_genome(config_path, genome_path):
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                         neat.DefaultSpeciesSet, neat.DefaultStagnation,
                         config_path)
    
    #Load the best genome
    winner = torch.load(genome_path)
    net = neat.nn.FeedForwardNetwork.create(winner, config)

    #Initialize environment with rendering
    env = DefendTheCenterNEAT(doomfinder('defend_the_center_modified.cfg'), render=True)

    for episode in range(5):
        obs = env.reset()
        done = False
        total_reward = 0

        while not done:
            output = net.activate(obs)
            action = np.argmax(output)
            reward, done = env.step(action)
            total_reward += reward
            time.sleep(0.05)

        print(f"Episode {episode}: Total Reward = {total_reward}")
        time.sleep(2)

#Usage
test_best_genome("neat-config.ini", "best_neat_genome.pth")