Using a ACO to learn health gathering and mywayhome (not done)

Import everything

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import math
from torch.utils.tensorboard import SummaryWriter
from vizdoom import * #Import all of vizdoom
import time #To make the program sleep (wait), so we can actually see what's happening
from gymnasium import Env #Import OpenAI Gym's Env class
from gymnasium.spaces import Discrete, Box #Import OpenAI Gym's Discrete and Box spaces
import cv2 #OpenCV for image processing, used for modifying the DOOM environment to make it run faster 
from stable_baselines3.common.callbacks import BaseCallback #Import the BaseCallback class from stable_baselines3 to learn from the environment
from stable_baselines3.common import env_checker #Import the env_checker class from stable_baselines3 to check the environment
import os #To create directories for saving models
import sys #To change the path so we can import the pathfinder module
import matplotlib.pyplot as plt

original_sys_path = sys.path.copy() #Come back to this path later after we navigate to the parent directory
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))  #Add the parent directory to the path so we can import the pathfinder module
from pathfinder import doomfinder, create_new_best_generation_directory, gamefinder #Import functions from the pathfinder module
sys.path = original_sys_path #Set the path back to the original path

Define enviornment (for mywayhome), step needs fixing as the way I have it set up now makes turning think its a wall

In [17]:
#Note we are using are representing the map as a 960x832 grid

#Map extreme verticies
MIN_X, MAX_X = 160, 1120
MIN_Y, MAX_Y = -704, 128

#Grid dimensions, the map size is actually 960x832 but this allows us some leeway if we get close to the edge
GRID_WIDTH = 960 #1 unit per cell
GRID_HEIGHT = 832 #1 units per cell

class mywayhome_VZG(Env): #Used for mywayhome config
    def __init__(self, config_path, render=False, grid_size=(GRID_HEIGHT, GRID_WIDTH), pheromone_evaporation_rate=0.01, pheromone_map = None): #Constructor
        #Args:
            #config_path (str): The path to the configuration file
            #render (bool): Whether to render the environment or not, false by default
            #grid_size (tuple): The size of the grid (x, y) for the environment
            #pheromone_evaporation_rate (float): The rate at which pheromones evaporate over time
            #pheromone_map (np.array): Optional pre-filled pheromone map, if not provided, it will be initialized with low pheromone levels

        super(mywayhome_VZG, self).__init__() #Inherit from Env class

        #Setup game environment
        self.game = vizdoom.DoomGame() #Create a DoomGame object
        self.game.set_doom_game_path(gamefinder('DOOM2.WAD')) #Set the path to the game
        self.game.load_config(config_path) #Load the configuration file from file path
        self.game.set_window_visible(render) #Set window visibility based on render argument
        self.game.init() #Initialize the game

        #Setup action and observation space
        self.observation_space = Box(low=0, high=255, shape=(100, 160, 1), dtype=np.uint8) #Observation space, 100x160x1 image
        self.action_space = Discrete(6) #Action space, 6 actions (turn left, turn right, move forward, move left, move right, move backwards)

        #Grid dimensions and pheromone setup
        self.grid_size = grid_size #Set the grid size as specified 
        if pheromone_map is not None:
            self.pheromone_map = pheromone_map #Use the provided pheromone map if available
        else: #If no pheromone map is provided, initialize a new one
            self.pheromone_map = np.ones(grid_size) * 1  #Start with adjustable pheromone levels across the grid (adjustable by the multiplier)
        self.pheromone_evaporation_rate = pheromone_evaporation_rate  #Controls how fast pheromones decay over time

        #Get game variables
        game_variables = self.game.get_state().game_variables
        xpos, ypos, angle = game_variables #Unpack the game variables
        
        self.xpos = xpos #X position of the player
        self.ypos = ypos #Y position of the player
        self.angle = angle #Angle of the player 

    def coord_to_grid(self, x, y): #Convert coordinates to grid indices
        #Args:
            #x (int): X coordinate to convert
            #y (int): Y coordinate to convert
        #Returns:
            #grid_x, grid_y (tuple): The grid indices corresponding to the coordinates
        #Raises:
            #ValueError: If the coordinates are out of bounds

        #Ensure coordinates are within world bounds
        if x < MIN_X or x > MAX_X or y < MIN_Y or y > MAX_Y:
            raise ValueError(f"Coordinates ({x}, {y}) are out of bounds!")
        
        #Normalize coordinates to range [0, 1]
        norm_x = (x - MIN_X) / (MAX_X - MIN_X)
        norm_y = (y - MIN_Y) / (MAX_Y - MIN_Y)
        
        print(f"norm_x = {norm_x}, norm_y = {norm_y}") #Debug print

        #Scale to grid dimensions and floor the values to ensure integers
        grid_x = int(norm_x * (GRID_WIDTH - 1))
        grid_y = int(norm_y * (GRID_HEIGHT - 1))

        print(f"grid_x = {grid_x}, grid_y = {grid_y}") #Debug print

        return (grid_x, grid_y)

    def step(self, action, limit = 1000): #Take a step in the environment 
        #Args:
            #action (int): The action to take
            #limit (int): Unimplemented "limit" for the episode, most likely will be a time limit
        #Returns:
            #observation (np.array): The screen buffer of the environment
            #reward (float): The reward for the action taken
            #terminated (bool) Whether the episode is finished or not (by reaching the goal)
            #truncated (bool): Whether the episode has reached some terminal state without reaching the goal (ie: running out of time)
            #info (dict): Additional information about the environment

        print(f"xpos = {self.xpos}, ypos = {self.ypos}, angle = {self.angle}") #Debug print

        #Specify actions and take a step
        actions = np.identity(6) #Create an identity matrix with 6 rows (6 actions), TURN_LEFT, TURN_RIGHT, MOVE_FOWARD, MOVE_LEFT, MOVE_RIGHT, MOVE_BACKWARD, these are the actions we can take in the environment
        self.game.make_action(actions[action], 4) #Take an action in the action space, second parameter is frame skip (skip 4 frames before taking the next action), the reason we do this is because it saves us time while being easy to see what is happening 
        truncated = False #Not implemented yet, so set to False. The idea is that if step passes some sort of limit, like a time limit, then the episode is truncated.
        info = {"xpos": self.xpos, "ypos": self.ypos, "angle": self.angle} # Initialize info with current position and angle
        reward = 0 #Initialize reward to 0

        if self.game.get_state(): #If the game is not finished
            observation = self.game.get_state().screen_buffer #Get the screen buffer
            observation = self.greyscale(observation) #Convert the image to greyscale
            
            (prev_grid_x, prev_grid_y) = self.coord_to_grid(self.xpos, self.ypos) #Find the grid indices of the previous position
            xpos, ypos, angle = self.game.get_state().game_variables #Get the game variables
            self.xpos = xpos #Update the x position
            self.ypos = ypos #Update the y position
            self.angle = angle #Update the angle

            info = {"xpos": self.xpos, "ypos": self.ypos, "angle": self.angle} #Add position and angle to the info dictionary
            (grid_x, grid_y) = self.coord_to_grid(xpos, ypos) #Find the grid indices of the current position

        #Update pheromones based on movement feedback
            if (grid_x, grid_y) == (prev_grid_x, prev_grid_y): #No movement detected
                self.pheromone_map[grid_y, grid_x] = 0 #Mark as a wall
            else: #If this isnt a wall, add pheromones
                self.pheromone_map[grid_y, grid_x] += 0.1 #Deposit pheromones
                self.pheromone_map *= (1 - self.pheromone_evaporation_rate) #Simulate evaporation
        else: #If the game is finished
            observation = np.zeros(self.observation_space.shape) #Return a blank screen

        terminated = self.game.is_episode_finished() #Check if the episode is finished

        return observation, reward, terminated, truncated, info

    def reset(self): #Reset the environment when we start a new game
        #Args:
            #seed (int): The seed for the random number generator
        #Returns:
            #(observation, info) (tuple)
                #observation (np.array): The screen buffer of the environment
                #info (dict): Additional information about the environment

        self.game.new_episode() #Start a new episode
        game_variables = self.game.get_state().game_variables  #Get the game variables   
        self.xpos, self.ypos, self.angle = game_variables  #Unpack the game variables, set agent to its random starting position 

        state = self.game.get_state().screen_buffer
        observation = self.greyscale(state)  #Convert the initial screen state to greyscale
        info = {"xpos": self.xpos, "ypos": self.ypos, "angle": self.angle} #Add position and angle to the info dictionary

        return observation, info  #Return the observation and any additional info

    def greyscale(self, observation): #Convert the environment image to greyscale and resize it
        #Args:
            #observation (np.array): The image of the environment (the current game frame)
        #Returns:
            #grey_return (np.array): The resized greyscale image of the environment
        
        grey = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY) #Convert the image to greyscale
        resize = cv2.resize(grey, (160, 100), interpolation=cv2.INTER_CUBIC) #Resize the image to 160x100 (frame size)
        state = np.reshape(resize, (100, 160, 1)) #Reshape to 100x160x1 for compatibility with neural networks
        
        return state

    def render(self, render_in_greyscale=False): #Render the environment for a frame
        #Args:
            #render_in_greyscale (bool): Whether to render the environment in greyscale or not
        
        if self.game.get_state() and render_in_greyscale:  #Only render if there's a valid game state
            observation = self.game.get_state().screen_buffer
            greyscale_obs = self.greyscale(observation)  #Convert the observation to greyscale
            cv2.imshow("VizDoom Environment", greyscale_obs.squeeze())  #Remove extra dimension and display
            cv2.waitKey(1)  #Wait 1ms between frames to allow for rendering
        elif self.game.get_state():  #Only render if there's a valid game state
            observation = self.game.get_state().screen_buffer
            cv2.imshow("VizDoom Environment", observation.squeeze())  #Render the environment without greyscale
            cv2.waitKey(1)  #Wait 1ms between frames to allow for rendering
        else:
            print("No game state to render.")

    def get_state(self): #Get the current state of the environment
        #Returns:
            #state (np.array): The current state of the environment
        return self.game.get_state()

    def get_agent_position(self): #Get the current position of the agent (tuple of x, y coordinates)
        #Returns:
            #(xpos, ypos) (tuple): The current position of the agent

        return (self.xpos, self.ypos)

    def close(self): #Close the environment when done
        self.game.close()  #Terminate the game

Define Training

In [28]:
def plot_pheromone_grid(pheromone_grid, iteration):
    plt.imshow(pheromone_grid, cmap='hot', interpolation='nearest')
    plt.title(f'Pheromone Grid at Iteration {iteration}')
    plt.colorbar()
    plt.show()

def aco_training(env, num_ants=10, iterations=50, alpha=5.0, beta=2.0, rho=0.05, pheromone_deposit=1.0):
    """
    Train a bot to pathfind in the ViZDoom environment using ACO.
    
    Args:
        env (Env): The ViZDoom environment.
        num_ants (int): Number of ants (pathfinding agents) per iteration.
        iterations (int): Total iterations for ACO.
        alpha (float): Importance of pheromones.
        beta (float): Importance of heuristic (distance to goal).
        rho (float): Pheromone evaporation rate.
        pheromone_deposit (float): Amount of pheromone deposited on successful paths.
    """
    #Initialize pheromone grid
    pheromone_grid = env.pheromone_map
    
    GOAL_POSITION = env.coord_to_grid(1040, -316)  #Armor is at 1040, -316
    
    #Heuristic function: inverse distance to goal
    def heuristic(x, y):
        gx, gy = GOAL_POSITION
        return 1.0 / (np.sqrt((gx - x) ** 2 + (gy - y) ** 2) + 1e-6)

    #Simulate movement locally without stepping the game, this is unfinished I have to account for angle
    def simulate_move(x, y, angle, move):
        print(f"x passed is {x}, y passed is {y}, angle passed is {angle}")  #Debug print

        step_size = 1  #Approximate movement step size; tune based on game behavior
        angle_rad = np.deg2rad(angle)  #Convert angle to radians

        if move == 0:  #TURN_LEFT (no position change)
            return x, y, angle - 90
        elif move == 1:  #TURN_RIGHT (no position change)
            return x, y, angle + 90
        elif move == 2:  #MOVE_FORWARD
            return x + step_size * np.cos(angle_rad), y + step_size * np.sin(angle_rad), angle
        elif move == 3:  #MOVE_LEFT
            return x - step_size * np.sin(angle_rad), y + step_size * np.cos(angle_rad), angle
        elif move == 4:  #MOVE_RIGHT
            return x + step_size * np.sin(angle_rad), y - step_size * np.cos(angle_rad), angle
        elif move == 5:  #MOVE_BACKWARD
            return x - step_size * np.cos(angle_rad), y - step_size * np.sin(angle_rad), angle
        return x, y, angle

    #Function to select next move based on pheromones and heuristic
    def select_next_move(x, y, angle):
        possible_moves = [0, 1, 2, 3, 4, 5]  #TURN_LEFT, TURN_RIGHT, etc.
        probs = []

        for move in possible_moves:
            #Simulate movement locally without stepping the game
            simulated_x, simulated_y, simulated_angle = simulate_move(x, y, angle, move)
            print(f"Simulated move: {move}, x: {simulated_x}, y: {simulated_y}, angle: {simulated_angle}")  #Debug print

            #Convert to grid indices and calculate pheromone + heuristic
            grid_x, grid_y = env.coord_to_grid(simulated_x, simulated_y)
            if 0 <= grid_x < GRID_WIDTH and 0 <= grid_y < GRID_HEIGHT:
                pheromone = pheromone_grid[grid_y, grid_x]
                eta = heuristic(grid_x, grid_y)
                probs.append((pheromone ** alpha) * (eta ** beta))
            else:
                probs.append(0.0)  #Invalid moves have zero probability

        #Normalize probabilities
        probs = np.array(probs)
        if probs.sum() == 0:
            probs = np.ones(len(possible_moves)) / len(possible_moves)  #Default equal prob
        else:
            probs /= probs.sum()

        return np.random.choice(possible_moves, p=probs)

    for iteration in range(iterations):
        print(f"Iteration {iteration + 1}/{iterations}")
        
        for ant in range(num_ants):
            observation, info = env.reset()
            x, y, angle = env.xpos, env.ypos, env.angle
            path = []
            success = False

            for step_count in range(100):  #Limit steps per ant
                action = select_next_move(x, y, angle)
                observation, _, terminated, _, info = env.step(action)
                x, y, angle = info['xpos'], info['ypos'], info['angle']
                path.append((env.coord_to_grid(x, y)))
                
                if (x, y) == GOAL_POSITION:
                    success = True
                    print(f"Ant {ant + 1} reached the goal in {step_count} steps.")
                    break
                
                if terminated:
                    break
            
            #Update pheromones
            for (px, py) in path:
                pheromone_grid[py, px] += pheromone_deposit if success else 0.0
        
        #Evaporate pheromones
        pheromone_grid *= (1 - rho)
        
        print(f"Pheromone grid updated for iteration {iteration + 1}.")
        plot_pheromone_grid(pheromone_grid, iteration + 1)  #Plot the pheromone grid after each iteration

    env.close()

Training

In [29]:
def plot_pheromone_grid(pheromone_grid, iteration):
    plt.imshow(pheromone_grid, cmap='hot', interpolation='nearest')
    plt.title(f'Pheromone Grid at Iteration {iteration}')
    plt.colorbar()
    plt.show()

#Initialize the environment and run ACO training
env = mywayhome_VZG(config_path=doomfinder("my_way_home.cfg"), render=True)
aco_training(env)

norm_x = 0.9166666666666666, norm_y = 0.46634615384615385
grid_x = 879, grid_y = 387
Iteration 1/50
x passed is 1043.1000061035156, y passed is 60.20799255371094, angle passed is 13.886718753233254
Simulated move: 0, x: 1043.1000061035156, y: 60.20799255371094, angle: -76.11328124676675
norm_x = 0.9198958396911621, norm_y = 0.9185192218193641
grid_x = 882, grid_y = 763
x passed is 1043.1000061035156, y passed is 60.20799255371094, angle passed is 13.886718753233254
Simulated move: 1, x: 1043.1000061035156, y: 60.20799255371094, angle: 103.88671875323325
norm_x = 0.9198958396911621, norm_y = 0.9185192218193641
grid_x = 882, grid_y = 763
x passed is 1043.1000061035156, y passed is 60.20799255371094, angle passed is 13.886718753233254
Simulated move: 2, x: 1044.070778244231, y: 60.44799557621446, angle: 13.886718753233254
norm_x = 0.9209070606710739, norm_y = 0.9188076869906424
grid_x = 883, grid_y = 763
x passed is 1043.1000061035156, y passed is 60.20799255371094, angle passed is 13.886

KeyboardInterrupt: 

Define functions that create the directories that the logs will be saved in

In [60]:
def create_run_directory(base_dir="runs/vizdoom_ga_defend_the_center"):
    os.makedirs(base_dir, exist_ok=True)
    existing_runs = [int(d.split('_')[-1]) for d in os.listdir(base_dir) if d.split('_')[-1].isdigit()]
    run_number = max(existing_runs, default=0) + 1
    run_dir = os.path.join(base_dir, f"run_{run_number}")
    os.makedirs(run_dir, exist_ok=True)
    
    #Create log directory within the run directory
    log_dir = os.path.join(run_dir, "log")
    os.makedirs(log_dir, exist_ok=True)
    
    return run_dir, log_dir

Define best agents loader

In [61]:
def load_best_agents(model_dir, num_agents=5):
    """Load a specified number of best agents from a previous run's model directory."""
    best_agents = []
    for i in range(num_agents):
        agent = DoomAgent()
        checkpoint_path = os.path.join(model_dir, f"best_agent_gen_{i}.pth")
        agent.load_state_dict(torch.load(checkpoint_path, weights_only=False)) 
        best_agents.append(agent)
    return best_agents

Train with the ViZDoom Environment

In [None]:
#Initialize Doom environment
env = Defend_the_Center_VZG(doomfinder('defend_the_center_modified.cfg'), render=False)

env_checker.check_env(env) #Check the environment to see if its valid

#Run Genetic Algorithm
trained_agents = run_ga(env, generations=1000, pop_size=30, num_parents=5, mutation_rate=0.01)

Train with the ViZDoom enviornment based off a previous best agent

In [64]:
#Initialize Doom environment
#env = Defend_the_Center_VZG(doomfinder('defend_the_center_modified.cfg'), render=False)

#initial_population = load_best_agents("runs/vizdoom_ga_defend_the_center/run_7/saved_models", num_agents=999)

#Run Genetic Algorithm
#trained_agents = run_ga(env, generations=1000, pop_size=30, num_parents=5, mutation_rate=0.01, initial_population=initial_population)


Test best agent

In [None]:
#Initialize the agent
agent = DoomAgent()

#Load the saved model weights into the agent
agent.load_state_dict(torch.load("runs/vizdoom_ga_defend_the_center/run_7/saved_models/best_agent_gen_999.pth"))

#Set the agent to evaluation mode
agent.eval()

#Initialize the environment
env = Defend_the_Center_VZG(doomfinder('defend_the_center.cfg'), render=True)

for episode in range(5):
    observation, _ = env.reset()  #Reset the environment and get only the observation
    done = False  #Set done to false
    total_reward = 0  #Set total reward to 0

    while not done:  #While the game isn't done
        #Convert the observation to a tensor and pass it through the agent
        obs_tensor = torch.from_numpy(observation).float().unsqueeze(0)
        with torch.no_grad():  # Disable gradient computation
            action_probs = agent(obs_tensor)  # Use agent instead of best_agent
            action = torch.argmax(action_probs).item()
        
        #Take a step in the environment
        observation, reward, done, _, _ = env.step(action)
        total_reward += reward  #Add the reward to the total reward
        time.sleep(0.05)  #Sleep for 0.05 seconds

    print(f'Episode: {episode}, Total Reward: {total_reward}')  #Print the episode and total reward
    time.sleep(2)  #Sleep for 2 seconds between episodes
