In [44]:
import random
from deap import base, creator, tools, algorithms
import matplotlib.pyplot as plt
from genetic.common_types import Rule, ConditionType, OperatorType, ActionType
from genetic.agent import GeneticAgent
from genetic.game import Game
import numpy as np
import pickle
from pommerman.agents import PlayerAgent
import multiprocessing
import os

if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual


In [None]:
POPULATION_SIZE = 100
MUTATION_RATE = 0.3
CROSSOVER_RATE = 0.7
MAX_GENERATIONS = 50

custom_map = [
    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Border walls
    [1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1],  # Player 0 starting area
    [1, 0, 1, 2, 1, 2, 1, 2, 1, 0, 1],
    [1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1],
    [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1],
    [1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1],  # Middle row
    [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1],
    [1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1],
    [1, 0, 1, 2, 1, 2, 1, 2, 1, 0, 1],
    [1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1],  # Player 3 starting area
    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Border walls
]

In [46]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [47]:
def random_condition():
    return random.choice(list(ConditionType))

def random_operator():
    return random.choice(list(OperatorType))

def random_action():
    return random.choice(list(ActionType))

def create_random_rule():
    num_conditions = random.choices([1, 2, 3], weights=[1, 2, 7], k=1)[0]
    conditions = [random_condition() for _ in range(num_conditions)]
    
    num_operators = num_conditions - 1
    operators = [random_operator() for _ in range(num_operators)]
    
    action = random_action()
    
    return Rule(conditions, operators, action)

def create_individual(num_rules):
    return [create_random_rule() for _ in range(num_rules)]

In [48]:
toolbox = base.Toolbox()
toolbox.register("rule", create_random_rule)

toolbox.register("individual", tools.initRepeat, creator.Individual, 
                toolbox.rule, n=10)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [None]:
def mutate_rule(rule, indpb=0.7):
    if random.random() < indpb:
        # 50% chance to add a condition if there are less than 3
        if len(rule.conditions) < 3 and random.random() < 0.5:
            rule.conditions.append(random_condition())
            if len(rule.conditions) > 1:
                rule.operators.append(random_operator())
        # 50% chance to remove a condition if there are more than 1
        elif len(rule.conditions) > 1 and random.random() < 0.5:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions.pop(idx)
            if idx < len(rule.operators):
                rule.operators.pop(idx)
            else:
                rule.operators.pop(-1)
        # otherwise, replace a condition
        else:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions[idx] = random_condition()

    for i in range(len(rule.operators)):
        if random.random() < indpb:
            rule.operators[i] = random_operator()
            
    if random.random() < indpb:
        rule.action = random_action()
        
    return rule

def mutate_individual(individual):
    num_rules_to_mutate = max(1, int(len(individual) * 0.3))
    rule_indices = random.sample(range(len(individual)), num_rules_to_mutate)

    for i in rule_indices:
        individual[i] = mutate_rule(individual[i], 0.7)

    return individual,

def crossover_individuals(ind1, ind2):
    cxpoint1 = random.randint(0, len(ind1))
    cxpoint2 = random.randint(0, len(ind1))
    if cxpoint1 > cxpoint2:
        cxpoint1, cxpoint2 = cxpoint2, cxpoint1
        
    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = \
        ind2[cxpoint1:cxpoint2], ind1[cxpoint1:cxpoint2]
            
    return ind1, ind2

In [None]:
def evaluate_tournament(tournament_data):
    population, indices, custom_map = tournament_data
    agents = [GeneticAgent(rules=population[index], individual_index=index) for index in indices]
    # game = Game(agents, custom_map=custom_map)
    game = Game(agents)
    results = game.play_game(num_episodes=3, render_mode=None)
    return results

def evaluate_population_in_tournament(population):
    fitness_scores = [0] * len(population)
    num_tournaments = len(population) // 4
    
    tournament_data = []
    for _ in range(num_tournaments):
        competitors_indices = np.random.choice(len(population), 4, replace=False)
        tournament_data.append((population, competitors_indices, custom_map))

    processor_count = min(multiprocessing.cpu_count(), len(tournament_data))
    with multiprocessing.Pool(processes=processor_count) as pool:
        all_results = pool.map(evaluate_tournament, tournament_data)
        
    for result in all_results:
        for episode_result in result:
            agents = episode_result['agents']
            total_steps = episode_result['total_steps']
            for agent in agents:
                agent_index = agent['individual_index']
                
                if agent['winner']:
                    fitness_scores[agent_index] += 25

                visited_tiles = len(agent['visited_tiles'])
                fitness_scores[agent_index] += visited_tiles * 0.5
                fitness_scores[agent_index] += agent['bombs_placed']
                fitness_scores[agent_index] += agent['step_count'] / total_steps * 2

                if visited_tiles < 10 and agent['bombs_placed'] < 2:
                    fitness_scores[agent_index] -= 5
                
    return [(score,) for score in fitness_scores]
        
        
def evaluate_individual(individual):
    return (0, )

In [None]:
toolbox.register("mate", crossover_individuals)
toolbox.register("mutate", mutate_individual)
toolbox.register("select", tools.selTournament, tournsize=5)
toolbox.register("evaluate", evaluate_individual)

In [None]:
def run_evolution(n_gen=10, pop_size=40):
    pop_size = (pop_size // 4) * 4
    if pop_size < 4:
        pop_size = 4
    
    print(f"Starting evolution with population size: {pop_size}")
    
    pop = toolbox.population(n=pop_size)
    
    fitnesses = evaluate_population_in_tournament(pop)
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
        
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)
    
    hof = tools.HallOfFame(10)
    
    for gen in range(n_gen):
        print(f"Generation {gen + 1}/{n_gen}")
        
        offspring = toolbox.select(pop, len(pop))
        offspring = [toolbox.clone(ind) for ind in offspring]
        
        # Replace the worst individuals in the population with the best individuals from the hall of fame
        if len(hof) > 0:
            worst_individuals = tools.selWorst(offspring, len(hof))

            for i, (worst, elite) in enumerate(zip(worst_individuals, hof)):
                idx = offspring.index(worst)
                offspring[idx] = toolbox.clone(elite)
                
        for i in range(0, len(offspring), 2):
            if i + 1 < len(offspring):
                if random.random() < CROSSOVER_RATE:
                    toolbox.mate(offspring[i], offspring[i + 1])
                    del offspring[i].fitness.values
                    del offspring[i + 1].fitness.values

        for i in range(len(offspring)):
            if random.random() < MUTATION_RATE:
                toolbox.mutate(offspring[i])
                del offspring[i].fitness.values
                
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = evaluate_population_in_tournament(invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        hof.update(pop)
        
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        print(f"  Min: {np.min(fits)}")
        print(f"  Max: {np.max(fits)}")
        print(f"  Avg: {mean}")
        print(f"  Best individual fitness: {hof[0].fitness.values[0]:.2f}")

        # Save current generation with pickle
        # create a directory for generations if it doesn't exist

        if gen % 5 == 0:
            if not os.path.exists("generations"):
                os.makedirs("generations")

            with open(f"generations/generation_{gen}.pkl", "wb") as f:
                pickle.dump(pop, f)
            with open(f"generations/hof_{gen}.pkl", "wb") as f:
                pickle.dump(hof, f)

    return pop, stats, hof

In [None]:
final_pop, stats, hof = run_evolution(n_gen=200, pop_size=200)

print("\nEvolution finished.")
print(f"Best individual has {len(hof[0])} rules with fitness: {hof[0].fitness.values[0]}")
print("Best individual rules:")
for rule in hof[0]:
    print(rule)
    
with open("best_individual.pkl", "wb") as f:
    pickle.dump(hof[0], f)
    
print("\nBest individual saved to best_individual.pkl.")

Starting evolution with population size: 200
Generation 1/200
  Min: -4.5
  Max: 124.04761904761904
  Avg: 8.034214046275077
  Best individual fitness: 124.05
Generation 2/200
  Min: -3.0
  Max: 183.1973992673993
  Avg: 14.615296864982358
  Best individual fitness: 183.20
Generation 3/200
  Min: -4.5
  Max: 119.95857142857142
  Avg: 12.852480388508411
  Best individual fitness: 183.20
Generation 4/200
  Min: -7.200000000000001
  Max: 108.76923076923077
  Avg: 14.570786159461308
  Best individual fitness: 183.20
Generation 5/200
  Min: -1.7799999999999998
  Max: 154.29999999999998
  Avg: 20.342553891545247
  Best individual fitness: 183.20
Generation 6/200
  Min: -3.9000000000000004
  Max: 236.9125874125874
  Avg: 20.345785332655222
  Best individual fitness: 236.91
Generation 7/200
  Min: -3.0
  Max: 136.83304195804195
  Avg: 13.156051378426387
  Best individual fitness: 236.91
Generation 8/200
  Min: -2.7
  Max: 92.9719641228337
  Avg: 16.34678161826542
  Best individual fitness: 236.