In [1]:
import random
from deap import base, creator, tools, algorithms
import matplotlib.pyplot as plt
from genetic.common_types import Rule, ConditionType, OperatorType, ActionType
from genetic.test_agent import GeneticAgent
from genetic.game import Game
import numpy as np
import pickle
from pommerman.agents import PlayerAgent

if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual


In [2]:
POPULATION_SIZE = 100


custom_map = [
    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Border walls
    [1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1],  # Player 0 starting area
    [1, 0, 1, 2, 1, 2, 1, 2, 1, 0, 1],
    [1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1],
    [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1],
    [1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1],  # Middle row
    [1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1],
    [1, 0, 2, 0, 2, 0, 2, 0, 2, 0, 1],
    [1, 0, 1, 2, 1, 2, 1, 2, 1, 0, 1],
    [1, 0, 0, 0, 2, 0, 2, 0, 0, 0, 1],  # Player 3 starting area
    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],  # Border walls
]

In [3]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [4]:
def random_condition():
    return random.choice(list(ConditionType))

def random_operator():
    return random.choice(list(OperatorType))

def random_action():
    return random.choice(list(ActionType))

def create_random_rule():
    num_conditions = random.randint(1, 3)
    conditions = [random_condition() for _ in range(num_conditions)]
    
    num_operators = num_conditions - 1
    operators = [random_operator() for _ in range(num_operators)]
    
    action = random_action()
    
    return Rule(conditions, operators, action)

def create_individual(num_rules):
    return [create_random_rule() for _ in range(num_rules)]

In [5]:
toolbox = base.Toolbox()
toolbox.register("rule", create_random_rule)

toolbox.register("individual", tools.initRepeat, creator.Individual, 
                toolbox.rule, n=10)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [6]:
def mutate_rule(rule, indpb=0.1):
    if random.random() < indpb:
        if len(rule.conditions) < 3 and random.random() < 0.5:
            rule.conditions.append(random_condition())
            if len(rule.conditions) > 1:
                rule.operators.append(random_operator())
        elif len(rule.conditions) > 1 and random.random() < 0.5:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions.pop(idx)
            if idx < len(rule.operators):
                rule.operators.pop(idx)
            else:
                rule.operators.pop(-1)
        else:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions[idx] = random_condition()

    for i in range(len(rule.operators)):
        if random.random() < indpb:
            rule.operators[i] = random_operator()
            
    if random.random() < indpb:
        rule.action = random_action()
        
    return rule


def mutate_individual(individual, indpb=0.1):
    for i in range(len(individual)):
        if random.random() < indpb:
            individual[i] = mutate_rule(individual[i], indpb)

    return individual,

def crossover_individuals(ind1, ind2):
    if len(ind1) != len(ind2):
        raise ValueError("Individuals must have the same number of rules for crossover.")
    
    cxpoint1 = random.randint(0, len(ind1))
    cxpoint2 = random.randint(0, len(ind1))
    if cxpoint1 > cxpoint2:
        cxpoint1, cxpoint2 = cxpoint2, cxpoint1
        
    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = \
        ind2[cxpoint1:cxpoint2], ind1[cxpoint1:cxpoint2]
        
    return ind1, ind2

In [7]:
def evaluate_population_in_tournament(population):
    fitness_scores = [0] * len(population)
    num_tournaments = len(population) // 4
        
    for tournament in range(num_tournaments):
        competitors_indices = np.random.choice(len(population), 4, replace=False)

        competitors = [population[i] for i in competitors_indices]
        agents = [GeneticAgent(rules=competitor) for competitor in competitors]

        game = Game(agents, custom_map=custom_map)
        
        results = game.play_game(num_episodes=3, render_mode=None)
        
        for episode_result in results:
            winners = episode_result['winners'] or []
            for winner in winners:
                pop_idx = competitors_indices[winner]
                fitness_scores[pop_idx] += 10
                
            survival_steps = episode_result['survival_steps']
            for agent_idx, steps in enumerate(survival_steps):
                pop_idx = competitors_indices[agent_idx]
                fitness_scores[pop_idx] += steps / 50
                
    return [(score,) for score in fitness_scores]
        
        
def evaluate_individual(individual):
    return (0, )

In [8]:
toolbox.register("mate", crossover_individuals)
toolbox.register("mutate", mutate_individual, indpb=0.1)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate_individual)

In [9]:
def run_evolution(n_gen=10, pop_size=40):
    pop_size = (pop_size // 4) * 4
    if pop_size < 4:
        pop_size = 4
        
    print(f"Starting evolution with population size: {pop_size}")
    
    pop = toolbox.population(n=pop_size)
    
    fitnesses = evaluate_population_in_tournament(pop)
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
        
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)
    
    hof = tools.HallOfFame(5)
    
    for gen in range(n_gen):
        print(f"Generation {gen + 1}/{n_gen}")
        
        offspring = toolbox.select(pop, len(pop))
        offspring = [toolbox.clone(ind) for ind in offspring]
        
        for i in range(0, len(offspring), 2):
            if i + 1 < len(offspring):
                if random.random() < 0.7:
                    toolbox.mate(offspring[i], offspring[i + 1])
                    del offspring[i].fitness.values
                    del offspring[i + 1].fitness.values

        for i in range(len(offspring)):
            if random.random() < 0.2:
                toolbox.mutate(offspring[i])
                del offspring[i].fitness.values
                
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = evaluate_population_in_tournament(invalid_ind)
        for ind, fit in zip(invalid_ind, fitnesses):
            ind.fitness.values = fit
            
        pop[:] = offspring
        
        hof.update(pop)
        
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x*x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        
        print(f"  Min: {np.min(fits)}")
        print(f"  Max: {np.max(fits)}")
        print(f"  Avg: {mean}")
        print(f"  Best individual fitness: {hof[0].fitness.values[0]:.2f}")

    return pop, stats, hof

In [10]:
# final_pop, stats, hof = run_evolution(n_gen=10, pop_size=POPULATION_SIZE)

In [11]:
final_pop, stats, hof = run_evolution(n_gen=50, pop_size=POPULATION_SIZE)

print("\nEvolution finished.")
print(f"Best individual has {len(hof[0])} rules with fitness: {hof[0].fitness.values[0]}")
print("Best individual rules:")
for rule in hof[0]:
    print(rule)
    
with open("best_individual.pkl", "wb") as f:
    pickle.dump(hof[0], f)
    
print("\nBest individual saved to best_individual.pkl.")

best_agent = GeneticAgent(rules=hof[0])
game = Game([best_agent, PlayerAgent()], custom_map=custom_map)

Starting evolution with population size: 100
Generation 1/50
  Min: 0.0
  Max: 60.0
  Avg: 13.957800000000002
  Best individual fitness: 60.00
Generation 2/50
  Min: 0.0
  Max: 60.0
  Avg: 14.349200000000002
  Best individual fitness: 60.00
Generation 3/50
  Min: 0.0
  Max: 60.0
  Avg: 16.279600000000002
  Best individual fitness: 60.00
Generation 4/50
  Min: 0.0
  Max: 60.0
  Avg: 13.9672
  Best individual fitness: 60.00
Generation 5/50
  Min: 0.0
  Max: 48.0
  Avg: 12.612400000000001
  Best individual fitness: 60.00
Generation 6/50
  Min: 0.0
  Max: 48.0
  Avg: 13.532800000000002
  Best individual fitness: 60.00
Generation 7/50
  Min: 0.0
  Max: 48.0
  Avg: 14.28
  Best individual fitness: 60.00
Generation 8/50
  Min: 0.0
  Max: 48.0
  Avg: 15.36
  Best individual fitness: 60.00
Generation 9/50
  Min: 0.0
  Max: 48.0
  Avg: 15.0
  Best individual fitness: 60.00
Generation 10/50
  Min: 0.0
  Max: 48.0
  Avg: 16.56
  Best individual fitness: 60.00
Generation 11/50
  Min: 0.0
  Max: 60.