Using a GA to learn basic.cfg

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from vizdoom_env import VizDoomGym_Simple
from stable_baselines3.common.callbacks import BaseCallback
import random
from pathfinder import doomfinder

#Define a simple neural network for action selection
class DoomAgent(nn.Module):
    def __init__(self):
        super(DoomAgent, self).__init__()
        self.fc1 = nn.Linear(160 * 100, 128)
        self.fc2 = nn.Linear(128, 3)  #3 possible actions

    def forward(self, x):
        x = torch.flatten(x, 1)  #Flatten input
        x = torch.relu(self.fc1(x))
        x = torch.softmax(self.fc2(x), dim=1)
        return x

In [2]:
def initialize_population(pop_size):
    return [DoomAgent() for _ in range(pop_size)]

In [3]:
def fitness(agent, env, episodes=3):
    total_reward = 0
    for _ in range(episodes):
        obs, _ = env.reset()
        done = False
        while not done:
            obs_tensor = torch.from_numpy(obs).float().unsqueeze(0)
            action_probs = agent(obs_tensor)
            action = torch.argmax(action_probs).item()
            obs, reward, done, _, _ = env.step(action)
            total_reward += reward
    return total_reward

In [4]:
def select_parents(population, fitnesses, num_parents=5):
    sorted_population = [x for _, x in sorted(zip(fitnesses, population), reverse=True)]
    return sorted_population[:num_parents]

def crossover(parent1, parent2):
    child = DoomAgent()
    for param_child, param1, param2 in zip(child.parameters(), parent1.parameters(), parent2.parameters()):
        mask = torch.rand_like(param1) > 0.5
        param_child.data = torch.where(mask, param1.data, param2.data)
    return child

def mutate(agent, mutation_rate=0.01):
    for param in agent.parameters():
        if random.random() < mutation_rate:
            param.data += torch.randn_like(param) * mutation_rate

In [5]:
def run_ga(env, generations=20, pop_size=10, num_parents=5, mutation_rate=0.01):
    population = initialize_population(pop_size)
    for generation in range(generations):
        fitnesses = [fitness(agent, env) for agent in population]
        print(f'Generation {generation}: Best Fitness = {max(fitnesses)}')

        parents = select_parents(population, fitnesses, num_parents)
        next_population = parents[:]

        while len(next_population) < pop_size:
            parent1, parent2 = random.sample(parents, 2)
            child = crossover(parent1, parent2)
            mutate(child, mutation_rate)
            next_population.append(child)

        population = next_population
    return population


In [6]:
#Initialize Doom environment
env = VizDoomGym_Simple(doomfinder('defend_the_center.cfg'), render=True)

#Run Genetic Algorithm
trained_agents = run_ga(env, generations=20, pop_size=10)


Generation 0: Best Fitness = 1.4599999999999986


TypeError: '<' not supported between instances of 'DoomAgent' and 'DoomAgent'

In [None]:
best_agent = trained_agents[0]  # Assuming it's the best one after GA
observation, _ = env.reset()
done = False
while not done:
    obs_tensor = torch.from_numpy(observation).float().unsqueeze(0)
    action_probs = best_agent(obs_tensor)
    action = torch.argmax(action_probs).item()
    observation, reward, done, _, _ = env.step(action)
    env.render()
env.close()
