In [1]:
# %load_ext line_profiler

import random
from deap import base, creator, tools
from genetic.common_types import Rule, ConditionType, OperatorType, ActionType
from genetic.agent import GeneticAgent
from genetic.game import Game
import numpy as np
import pickle
import multiprocessing
import os
from itertools import combinations
import cProfile

if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual


In [None]:
POPULATION_SIZE = 200
MUTATION_RATE = 0.4
MUTATION_RATE_NUM_RULES = 0.15
MUTATION_RATE_RULE = 0.2
MUTATION_RATE_REPLACE = 0.025
MUTATION_RATE_SHUFFLE = 0.05
MUTATION_RATE_ADD_COND = 0.05
MUTATION_RATE_REMOVE_COND = 0.05
MUTATION_RATE_REPLACE_COND = 0.3
MUTATION_RATE_REPLACE_OPERATOR = 0.15
MUTATION_RATE_REPLACE_ACTION = 0.15
CROSSOVER_RATE = 0.75
MAX_GENERATIONS = 200
TOURNAMENT_SIZE = 4
NUM_EPISODES = 10
NUM_ELITES = POPULATION_SIZE // 20

In [3]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [4]:
def random_condition():
    return random.choice(list(ConditionType))

def random_operator():
    return random.choice(list(OperatorType))

def random_action():
    return random.choice(list(ActionType))

def create_random_rule():
    num_conditions = random.choices([1, 2, 3], weights=[1, 3, 1], k=1)[0]
    conditions = [random_condition() for _ in range(num_conditions)]
    
    num_operators = num_conditions - 1
    operators = [random_operator() for _ in range(num_operators)]
    
    action = random_action()
    
    return Rule(conditions, operators, action)

def create_individual(num_rules):
    return [create_random_rule() for _ in range(num_rules)]

In [5]:
toolbox = base.Toolbox()
toolbox.register("rule", create_random_rule)

toolbox.register("individual", tools.initRepeat, creator.Individual, 
                toolbox.rule, n=10)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [None]:
def mutate_rule(rule: Rule):
    if random.random() < MUTATION_RATE_RULE:
        # 50% chance to add a condition if there are less than 3
        if len(rule.conditions) < 3 and random.random() < MUTATION_RATE_ADD_COND:
            rule.conditions.append(random_condition())
            if len(rule.conditions) > 1:
                rule.operators.append(random_operator())
        # 50% chance to remove a condition if there are more than 1
        elif len(rule.conditions) > 1 and random.random() < MUTATION_RATE_REMOVE_COND:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions.pop(idx)
            if idx < len(rule.operators):
                rule.operators.pop(idx)
            else:
                rule.operators.pop(-1)
        
        if len(rule.conditions) > 1 and random.random() < MUTATION_RATE_REPLACE_COND:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions[idx] = random_condition()

    for i in range(len(rule.operators)):
        if random.random() < MUTATION_RATE_REPLACE_OPERATOR:
            rule.operators[i] = random_operator()
            
    if random.random() < MUTATION_RATE_REPLACE_ACTION:
        rule.action = random_action()
        
    return rule

def mutate_individual(individual):
    num_rules_to_mutate = max(1, int(len(individual) * MUTATION_RATE_NUM_RULES))
    rule_indices = random.sample(range(len(individual)), num_rules_to_mutate)

    for i in rule_indices:
        if random.random() < MUTATION_RATE_REPLACE:
            individual[i] = create_random_rule()
        else:
            individual[i] = mutate_rule(individual[i], MUTATION_RATE_RULE)

    return individual,

def mutate_shuffle(individual):
    if len(individual) < 2:
        return individual,
    
    num_to_shuffle = random.randint(2, len(individual))
    indices = random.sample(range(len(individual)), num_to_shuffle)
    shuffled_rules = [individual[i] for i in indices]
    
    random.shuffle(shuffled_rules)
    
    temp_individual = list(individual)
    for i, idx in enumerate(indices):
        temp_individual[idx] = shuffled_rules[i]
        
    return temp_individual,

def crossover_individuals(ind1, ind2):
    cxpoint1 = random.randint(0, len(ind1) - 1)
    cxpoint2 = random.randint(0, len(ind1) - 1)
    if cxpoint1 > cxpoint2:
        cxpoint1, cxpoint2 = cxpoint2, cxpoint1
        
    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = \
        ind2[cxpoint1:cxpoint2], ind1[cxpoint1:cxpoint2]
            
    return ind1, ind2

In [None]:
def evaluate_tournament(tournament_data):
    population, indices = tournament_data
    agents = [
        GeneticAgent(
            rules=population[index], 
            individual_index=index
        ) for index in indices
    ]
    game = Game(agents)
    results = game.play_game(num_episodes=NUM_EPISODES, render_mode=None)
    return results

def evaluate_population_in_tournament(population, generation, run_id):
    fitness_scores = [0] * len(population)

    agent_episode_counts = [0] * len(population)
    
    agents_per_tournament = 4
    rounds_per_agent = 5

    tournament_data = []
    
    for _ in range(rounds_per_agent):
        shuffled_indices = list(range(len(population)))
        random.shuffle(shuffled_indices)
        
        for i in range(0, len(shuffled_indices), agents_per_tournament):
            current_indices = shuffled_indices[i:i + agents_per_tournament]
            
            if len(current_indices) == agents_per_tournament:
                tournament_data.append((population, current_indices))
    
    processor_count = min(multiprocessing.cpu_count(), len(tournament_data))
    with multiprocessing.Pool(processes=processor_count) as pool:
        all_results = pool.map(evaluate_tournament, tournament_data)

    if generation % 10 == 0:
        with open(f'./results/{run_id}/tournament/{generation}.pkl', 'wb') as f:
            pickle.dump(all_results, f)
        
    for result in all_results:
        for episode_result in result:
            agent_results = episode_result.agent_results
            for agent_result in agent_results:
                agent_episode_counts[agent_result.individual_index] += 1

                agent_index = agent_result.individual_index
                fitness = 0

                fitness += agent_result.visited_tiles * 2
                fitness += agent_result.bombs_placed * 2
                fitness += agent_result.wood_exploded * 30

                # In a 11x11 grid, the maximum distance is 20
                normalized_proximity_score = max(0, 20 - agent_result.average_distance)
                fitness += normalized_proximity_score * 2  # Weight: 2 points per unit closer

                for kill in agent_result.kills:
                    if kill != agent_result.id:
                        fitness += 150
                        
                if agent_result.winner:
                    if len(agent_result.kills) > 0:
                        fitness += 75
                    else:
                        fitness += 20
                else:
                    if agent_result.is_alive:
                        fitness -= 100

                if agent_result.visited_tiles < 10 and agent_result.bombs_placed < 2:
                    fitness -= 10
                    
                fitness_scores[agent_index] += fitness
    
    # Normalize fitness scores
    final_fitness_scores = [0.0] * len(fitness_scores)
    for i in range(len(fitness_scores)):
        if agent_episode_counts[i] > 0:
            final_fitness_scores[i] = fitness_scores[i] / agent_episode_counts[i]
        
    return [(score,) for score in final_fitness_scores]
        
        
def evaluate_individual(individual):
    return (0, )

In [None]:
toolbox.register("mate", crossover_individuals)
toolbox.register("mutate", mutate_individual)
toolbox.register("mutate_shuffle", mutate_shuffle)
toolbox.register("select", tools.selTournament, tournsize=TOURNAMENT_SIZE)
toolbox.register("evaluate", evaluate_individual)

In [9]:
def create_folder_structure(run_id):
    if not os.path.exists('./results'):
        os.makedirs('./results')
    if not os.path.exists(f'./results/{run_id}'):
        os.makedirs(f'./results/{run_id}')
    if not os.path.exists(f'./results/{run_id}/tournament'):
        os.makedirs(f'./results/{run_id}/tournament')
    if not os.path.exists(f'./results/{run_id}/generation'):
        os.makedirs(f'./results/{run_id}/generation')
    if not os.path.exists(f'./results/{run_id}/conditions'):
        os.makedirs(f'./results/{run_id}/conditions')
    if not os.path.exists(f'./results/{run_id}/actions'):
        os.makedirs(f'./results/{run_id}/actions')
        
def write_population(population, generation, run_id):
    with open(f'./results/{run_id}/generation/{generation}.pkl', 'wb') as f:
        pickle.dump(population, f)
        
def write_conditions(population, generation, run_id):
    conditions = {}
    for individual in population:
        for rule in individual:
            for condition in rule.conditions:
                if condition not in conditions:
                    conditions[condition] = 0
                conditions[condition] += 1
                


In [None]:
def run_evolution(n_gen=MAX_GENERATIONS, pop_size=POPULATION_SIZE, run_id=None):
    create_folder_structure(run_id)

    pop_size = (pop_size // 4) * 4
    if pop_size < 4:
        pop_size = 4
    
    print(f"Starting evolution with population size: {pop_size}")
    
    pop = toolbox.population(n=pop_size)
    
    fitnesses = evaluate_population_in_tournament(pop, 0, run_id)
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
        
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)
    
    hof = tools.HallOfFame(2)
    
    for gen in range(n_gen + 1):
        print(f"Generation {gen}/{n_gen}")
        
        elites = tools.selBest(pop, NUM_ELITES)
        elites = [toolbox.clone(ind) for ind in elites]
        
        offspring = toolbox.select(pop, len(pop) - NUM_ELITES)
        offspring = [toolbox.clone(ind) for ind in offspring]
        
        for i in range(0, len(offspring), 2):
            if i + 1 < len(offspring):
                if random.random() < CROSSOVER_RATE:
                    toolbox.mate(offspring[i], offspring[i + 1])
                    del offspring[i].fitness.values
                    del offspring[i + 1].fitness.values

        for i in range(len(offspring)):
            if random.random() < MUTATION_RATE:
                toolbox.mutate(offspring[i])
                del offspring[i].fitness.values

            if random.random() < MUTATION_RATE_SHUFFLE:
                toolbox.mutate_shuffle(offspring[i])
                del offspring[i].fitness.values

        pop[:] = offspring + elites

        fitnesses = evaluate_population_in_tournament(pop, gen, run_id)
        for ind, fit in zip(pop, fitnesses):
            ind.fitness.values = fit
            
        hof.update(pop)
        
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        
        print(f"  Min: {np.min(fits)}")
        print(f"  Max: {np.max(fits)}")
        print(f"  Avg: {mean}")
        print(f"  Best individual fitness: {hof[0].fitness.values[0]:.2f}")

        # Gen starts at 0, so we save every 10 generations
        if gen % 10 == 0 or gen == n_gen:
            # with open(f"generations/generation_{gen}.pkl", "wb") as f:
            with open(f'./results/{run_id}/generation/{gen}.pkl', 'wb') as f:
                pickle.dump(pop, f)
            
            pop_conditions = {}
            pop_actions = {}
            for individual in pop:  
                for rule in individual:
                    for condition in rule.conditions:
                        if condition not in pop_conditions:
                            pop_conditions[condition] = 0
                        pop_conditions[condition] += 1
                    if rule.action not in pop_actions:
                        pop_actions[rule.action] = 0
                    pop_actions[rule.action] += 1
            sorted_conditions = sorted(pop_conditions.items(), key=lambda x: x[1], reverse=True)
            sorted_actions = sorted(pop_actions.items(), key=lambda x: x[1], reverse=True)
            
            with open(f'./results/{run_id}/conditions/{gen}.txt', 'w') as f:
                for condition, count in sorted_conditions:
                    f.write(f"{condition.name}: {count}\n")
            with open(f'./results/{run_id}/actions/{gen}.txt', 'w') as f:
                for action, count in sorted_actions:
                    f.write(f"{action.name}: {count}\n")

        with open(f'./results/{run_id}/best_individual.pkl', 'wb') as f:
            pickle.dump(hof[0], f)

    return pop, stats, hof

In [11]:
# import pstats
# import io

# pr = cProfile.Profile()
# pr.enable()

RUN_ID = np.random.randint(0, 100000)
print(f"Run ID: {RUN_ID}")
final_pop, stats, hof = run_evolution(n_gen=MAX_GENERATIONS, pop_size=POPULATION_SIZE, run_id=RUN_ID)

print("Best agent in final population:")
# sort final population by fitness
final_pop.sort(key=lambda ind: ind.fitness.values[0], reverse=True)
print(f"Best individual fitness: {final_pop[0].fitness.values[0]}")
print(f"Best individual has {len(final_pop[0])} rules:")
for rule in final_pop[0]:
    print(rule)

print("\nEvolution finished.")
print(f"Stored best individual in ./results/{RUN_ID}/best_individual.pkl")
print(f"Best individual has {len(hof[0])} rules with fitness: {hof[0].fitness.values[0]}")
print("Best individual rules:")
for rule in hof[0]:
    print(rule)

# pr.disable()
# s = io.StringIO()
# ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
# ps.print_stats(15)
# print(s.getvalue())

Run ID: 83685
Starting evolution with population size: 200
Generation 0/200
  Min: -241.31961666666675
  Max: 62.522333333333336
  Avg: -69.4620554850821
  Best individual fitness: 62.52
Generation 1/200
  Min: -119.1693818181818
  Max: 66.79333333333334
  Avg: 19.61306747993101
  Best individual fitness: 66.79
Generation 2/200
  Min: -118.53517727272732
  Max: 69.6075555555555
  Avg: 27.97230311004151
  Best individual fitness: 69.61
Generation 3/200
  Min: -142.9176452124936
  Max: 83.29933333333332
  Avg: 35.35706861746427
  Best individual fitness: 83.30
Generation 4/200
  Min: -97.20319166666668
  Max: 92.47844444444445
  Avg: 40.36575171114816
  Best individual fitness: 92.48
Generation 5/200
  Min: -71.89565833333336
  Max: 79.99399999999996
  Avg: 45.43439425610736
  Best individual fitness: 92.48
Generation 6/200
  Min: -63.696299999999994
  Max: 86.84
  Avg: 54.08304395084988
  Best individual fitness: 92.48
Generation 7/200
  Min: -68.77748333333334
  Max: 94.92866666666669


In [12]:
# %lprun -f GeneticAgent.act run_evolution(n_gen=1, pop_size=20, run_id=1)