In [1]:
# %load_ext line_profiler

import random
from deap import base, creator, tools
from genetic.common_types import Rule, ConditionType, OperatorType, ActionType
from genetic.agent import GeneticAgent
from genetic.game import Game
import numpy as np
import pickle
import multiprocessing
import os
from itertools import combinations
import cProfile

if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual


In [None]:
POPULATION_SIZE = 200
MUTATION_RATE = 0.4
MUTATION_RATE_RULE = 0.45
CROSSOVER_RATE = 0.75
MAX_GENERATIONS = 200
TOURNAMENT_SIZE = 4
NUM_EPISODES = 10
NUM_ELITES = POPULATION_SIZE * 0.05

In [3]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [4]:
def random_condition():
    return random.choice(list(ConditionType))

def random_operator():
    return random.choice(list(OperatorType))

def random_action():
    return random.choice(list(ActionType))

def create_random_rule():
    num_conditions = random.choices([1, 2, 3], weights=[1, 2, 7], k=1)[0]
    conditions = [random_condition() for _ in range(num_conditions)]
    
    num_operators = num_conditions - 1
    operators = [random_operator() for _ in range(num_operators)]
    
    action = random_action()
    
    return Rule(conditions, operators, action)

def create_individual(num_rules):
    return [create_random_rule() for _ in range(num_rules)]

In [5]:
toolbox = base.Toolbox()
toolbox.register("rule", create_random_rule)

toolbox.register("individual", tools.initRepeat, creator.Individual, 
                toolbox.rule, n=10)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [None]:
def mutate_rule(rule, indpb=MUTATION_RATE_RULE):
    if random.random() < indpb:
        # 50% chance to add a condition if there are less than 3
        if len(rule.conditions) < 3 and random.random() < 0.5:
            rule.conditions.append(random_condition())
            if len(rule.conditions) > 1:
                rule.operators.append(random_operator())
        # 50% chance to remove a condition if there are more than 1
        elif len(rule.conditions) > 1 and random.random() < 0.5:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions.pop(idx)
            if idx < len(rule.operators):
                rule.operators.pop(idx)
            else:
                rule.operators.pop(-1)
        # otherwise, replace a condition
        else:
            idx = random.randint(0, len(rule.conditions) - 1)
            rule.conditions[idx] = random_condition()

    for i in range(len(rule.operators)):
        if random.random() < indpb:
            rule.operators[i] = random_operator()
            
    if random.random() < indpb:
        rule.action = random_action()
        
    return rule

def mutate_individual(individual):
    num_rules_to_mutate = max(1, int(len(individual) * 0.15))
    rule_indices = random.sample(range(len(individual)), num_rules_to_mutate)

    for i in rule_indices:
        if random.random() < 0.1:
            individual[i] = create_random_rule()
        else:
            individual[i] = mutate_rule(individual[i], MUTATION_RATE_RULE)

    return individual,

def crossover_individuals(ind1, ind2):
    cxpoint1 = random.randint(0, len(ind1) - 1)
    cxpoint2 = random.randint(0, len(ind1) - 1)
    if cxpoint1 > cxpoint2:
        cxpoint1, cxpoint2 = cxpoint2, cxpoint1
        
    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = \
        ind2[cxpoint1:cxpoint2], ind1[cxpoint1:cxpoint2]
            
    return ind1, ind2

In [None]:
def evaluate_tournament(tournament_data):
    population, indices = tournament_data
    agents = [
        GeneticAgent(
            rules=population[index], 
            individual_index=index
        ) for index in indices
    ]
    game = Game(agents)
    results = game.play_game(num_episodes=NUM_EPISODES, render_mode=None)
    return results

def evaluate_population_in_tournament(population, generation, run_id):
    fitness_scores = [0] * len(population)
    num_tournaments = len(population) // 4
    
    all_possible = list(combinations(range(len(population)), 4))
    tournament_indices = random.sample(all_possible, num_tournaments)
    tournament_data = [(population, indices) for indices in tournament_indices]
    
    processor_count = min(multiprocessing.cpu_count(), len(tournament_data))
    with multiprocessing.Pool(processes=processor_count) as pool:
        all_results = pool.map(evaluate_tournament, tournament_data)

    # all_results = []
    # for data in tournament_data:
    #     result = evaluate_tournament(data)
    #     all_results.append(result)
        
    if generation % 10 == 0:
        with open(f'./results/{run_id}/tournament/{generation}.pkl', 'wb') as f:
            pickle.dump(all_results, f)
        
    for result in all_results:
        for episode_result in result:
            agent_results = episode_result.agent_results
            for agent_result in agent_results:
                agent_index = agent_result.individual_index

                fitness = 0
                # If the agent is the winner AND has at least one kill
                if agent_result.winner and len(agent_result.kills) > 0:
                    fitness += 50

                fitness += agent_result.visited_tiles * 2
                fitness += agent_result.bombs_placed * 10
                fitness += agent_result.wood_exploded * 10

                # In a 11x11 grid, the maximum distance is 20
                normalized_proximity_score = max(0, 20 - agent_result.average_distance)
                fitness += normalized_proximity_score * 2  # Weight: 2 points per unit closer

                for kill in agent_result.kills:
                    if kill == agent_result.id:
                        fitness -= 25
                    else:
                        fitness += 20
                        
                if agent_result.visited_tiles < 10 and agent_result.bombs_placed < 2:
                    fitness -= 10
                    
                fitness_scores[agent_index] += fitness
                
    return [(score,) for score in fitness_scores]
        
        
def evaluate_individual(individual):
    return (0, )

In [8]:
toolbox.register("mate", crossover_individuals)
toolbox.register("mutate", mutate_individual)
toolbox.register("select", tools.selTournament, tournsize=TOURNAMENT_SIZE)
toolbox.register("evaluate", evaluate_individual)

In [9]:
def create_folder_structure(run_id):
    if not os.path.exists('./results'):
        os.makedirs('./results')
    if not os.path.exists(f'./results/{run_id}'):
        os.makedirs(f'./results/{run_id}')
    if not os.path.exists(f'./results/{run_id}/tournament'):
        os.makedirs(f'./results/{run_id}/tournament')
    if not os.path.exists(f'./results/{run_id}/generation'):
        os.makedirs(f'./results/{run_id}/generation')
    if not os.path.exists(f'./results/{run_id}/conditions'):
        os.makedirs(f'./results/{run_id}/conditions')
    if not os.path.exists(f'./results/{run_id}/actions'):
        os.makedirs(f'./results/{run_id}/actions')
        
def write_population(population, generation, run_id):
    with open(f'./results/{run_id}/generation/{generation}.pkl', 'wb') as f:
        pickle.dump(population, f)
        
def write_conditions(population, generation, run_id):
    conditions = {}
    for individual in population:
        for rule in individual:
            for condition in rule.conditions:
                if condition not in conditions:
                    conditions[condition] = 0
                conditions[condition] += 1
                


In [10]:
def run_evolution(n_gen=MAX_GENERATIONS, pop_size=POPULATION_SIZE, run_id=None):
    create_folder_structure(run_id)

    pop_size = (pop_size // 4) * 4
    if pop_size < 4:
        pop_size = 4
    
    print(f"Starting evolution with population size: {pop_size}")
    
    pop = toolbox.population(n=pop_size)
    
    fitnesses = evaluate_population_in_tournament(pop, 0, run_id)
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
        
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)
    
    hof = tools.HallOfFame(2)
    
    for gen in range(n_gen + 1):
        print(f"Generation {gen}/{n_gen}")
        
        elites = tools.selBest(pop, NUM_ELITES)
        elites = [toolbox.clone(ind) for ind in elites]
        
        offspring = toolbox.select(pop, len(pop) - NUM_ELITES)
        offspring = [toolbox.clone(ind) for ind in offspring]
        
        for i in range(0, len(offspring), 2):
            if i + 1 < len(offspring):
                if random.random() < CROSSOVER_RATE:
                    toolbox.mate(offspring[i], offspring[i + 1])
                    del offspring[i].fitness.values
                    del offspring[i + 1].fitness.values

        for i in range(len(offspring)):
            if random.random() < MUTATION_RATE:
                toolbox.mutate(offspring[i])
                del offspring[i].fitness.values

        pop[:] = offspring + elites

        fitnesses = evaluate_population_in_tournament(pop, gen, run_id)
        for ind, fit in zip(pop, fitnesses):
            ind.fitness.values = fit
            
        hof.update(pop)
        
        fits = [ind.fitness.values[0] for ind in pop]
        length = len(pop)
        mean = sum(fits) / length
        
        print(f"  Min: {np.min(fits)}")
        print(f"  Max: {np.max(fits)}")
        print(f"  Avg: {mean}")
        print(f"  Best individual fitness: {hof[0].fitness.values[0]:.2f}")

        # Gen starts at 0, so we save every 10 generations
        if gen % 10 == 0 or gen == n_gen:
            # with open(f"generations/generation_{gen}.pkl", "wb") as f:
            with open(f'./results/{run_id}/generation/{gen}.pkl', 'wb') as f:
                pickle.dump(pop, f)
            
            pop_conditions = {}
            pop_actions = {}
            for individual in pop:  
                for rule in individual:
                    for condition in rule.conditions:
                        if condition not in pop_conditions:
                            pop_conditions[condition] = 0
                        pop_conditions[condition] += 1
                    if rule.action not in pop_actions:
                        pop_actions[rule.action] = 0
                    pop_actions[rule.action] += 1
            sorted_conditions = sorted(pop_conditions.items(), key=lambda x: x[1], reverse=True)
            sorted_actions = sorted(pop_actions.items(), key=lambda x: x[1], reverse=True)
            
            with open(f'./results/{run_id}/conditions/{gen}.txt', 'w') as f:
                for condition, count in sorted_conditions:
                    f.write(f"{condition.name}: {count}\n")
            with open(f'./results/{run_id}/actions/{gen}.txt', 'w') as f:
                for action, count in sorted_actions:
                    f.write(f"{action.name}: {count}\n")

        with open(f'./results/{run_id}/best_individual.pkl', 'wb') as f:
            pickle.dump(hof[0], f)

    return pop, stats, hof

In [None]:
# import pstats
# import io

# pr = cProfile.Profile()
# pr.enable()

RUN_ID = np.random.randint(0, 100000)
print(f"Run ID: {RUN_ID}")
final_pop, stats, hof = run_evolution(n_gen=MAX_GENERATIONS, pop_size=POPULATION_SIZE, run_id=RUN_ID)

print("\nEvolution finished.")
print(f"Stored best individual in ./results/{RUN_ID}/best_individual.pkl")
print(f"Best individual has {len(hof[0])} rules with fitness: {hof[0].fitness.values[0]}")
print("Best individual rules:")
for rule in hof[0]:
    print(rule)

# pr.disable()
# s = io.StringIO()
# ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
# ps.print_stats(15)
# print(s.getvalue())

Run ID: 97720
Starting evolution with population size: 200
Generation 0/150
  Min: -19.024708624708627
  Max: 389.9486980568958
  Avg: 51.677714104704016
  Best individual fitness: 389.95
Generation 1/150
  Min: 0.0
  Max: 290.0030218147865
  Avg: 53.40301571502985
  Best individual fitness: 389.95
Generation 2/150
  Min: -9.044444444444451
  Max: 257.02610915486304
  Avg: 54.83005372970328
  Best individual fitness: 389.95
Generation 3/150
  Min: -13.17777777777777
  Max: 250.75026422852505
  Avg: 52.552926925708164
  Best individual fitness: 389.95
Generation 4/150
  Min: -12.933333333333337
  Max: 274.9321693829047
  Avg: 55.298678149314135
  Best individual fitness: 389.95
Generation 5/150
  Min: 0.0
  Max: 422.29601262934597
  Avg: 60.51873554677989
  Best individual fitness: 422.30
Generation 6/150
  Min: -1.1515151515151558
  Max: 309.2866748745365
  Avg: 64.63333938299182
  Best individual fitness: 422.30
Generation 7/150
  Min: -10.957575757575754
  Max: 357.6161042661043
  Av

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f59b873f580>>
Traceback (most recent call last):
  File "/home/main/Documents/TCS/Module-12/pommerman-test/venv/lib/python3.10/site-packages/ipykernel/ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(
KeyboardInterrupt: 


  Min: 0.0
  Max: 416.41588681849555
  Avg: 87.51318101003194
  Best individual fitness: 758.51
Generation 68/150
  Min: 0.0
  Max: 490.85047664987565
  Avg: 100.38681181289883
  Best individual fitness: 758.51
Generation 69/150
  Min: 0.0
  Max: 663.9709495751162
  Avg: 98.01603275650832
  Best individual fitness: 758.51
Generation 70/150
  Min: 0.0
  Max: 480.2707496413333
  Avg: 90.75851425953697
  Best individual fitness: 758.51
Generation 71/150
  Min: 0.0
  Max: 577.346383380439
  Avg: 95.429184718102
  Best individual fitness: 758.51
Generation 72/150
  Min: 0.0
  Max: 423.0355379188713
  Avg: 88.23078713789594
  Best individual fitness: 758.51
Generation 73/150
  Min: 0.0
  Max: 469.9071880280213
  Avg: 90.03215900965154
  Best individual fitness: 758.51
Generation 74/150
  Min: 0.0
  Max: 645.7698640280579
  Avg: 94.12948613520159
  Best individual fitness: 758.51
Generation 75/150
  Min: 0.0
  Max: 439.7172488743141
  Avg: 94.59681027603057
  Best individual fitness: 758.51
G

In [None]:
# %lprun -f GeneticAgent.act run_evolution(n_gen=1, pop_size=20, run_id=1)