In [45]:
# %load_ext line_profiler

from typing import List
import random
from deap import base, creator, tools
from pommerman.agents.simple_agent import SimpleAgent
from genetic.common_types import Rule, Condition, ConditionType, OperatorType, ActionType
from genetic.agent import GeneticAgent
from genetic.game import Game
import numpy as np
import pickle
import multiprocessing
import os
import cProfile
import warnings
import csv

warnings.filterwarnings("ignore", message=".*pkg_resources.*", category=UserWarning)

if "FitnessMax" in creator.__dict__:
    del creator.FitnessMax
if "Individual" in creator.__dict__:
    del creator.Individual


In [46]:
POPULATION_SIZE = 200
MUTATION_RATE = 0.4
MUTATION_RATE_RULE = 0.15
MUTATION_RATE_REPLACE = 0.075
MUTATION_RATE_SHUFFLE = 0.10
MUTATION_RATE_ADD_COND = 0.05
MUTATION_RATE_REMOVE_COND = 0.05
MUTATION_RATE_REPLACE_COND = 0.3
MUTATION_RATE_REPLACE_OPERATOR = 0.15
MUTATION_RATE_REPLACE_ACTION = 0.15
CROSSOVER_RATE = 0.75
MAX_GENERATIONS = 200
TOURNAMENT_SIZE = 7
NUM_EPISODES = 2
NUM_ELITES = POPULATION_SIZE // 20

LOGGING_RESULTS = []
LOGGING_CONDITIONS = []
LOGGING_ACTIONS = []
LOGGING_PERFORMANCE = []
PREVIOUS_BEST = -1e6

USEFUL_RULES = [
    (Rule([Condition(ConditionType.IS_BOMB_ON_PLAYER, False), Condition(ConditionType.CAN_MOVE_UP, False)], [OperatorType.AND], ActionType.MOVE_UP), 10),
    (Rule([Condition(ConditionType.IS_BOMB_ON_PLAYER, False), Condition(ConditionType.CAN_MOVE_DOWN, False)], [OperatorType.AND], ActionType.MOVE_DOWN), 10),
    (Rule([Condition(ConditionType.IS_BOMB_ON_PLAYER, False), Condition(ConditionType.CAN_MOVE_LEFT, False)], [OperatorType.AND], ActionType.MOVE_LEFT), 10),
    (Rule([Condition(ConditionType.IS_BOMB_ON_PLAYER, False), Condition(ConditionType.CAN_MOVE_RIGHT, False)], [OperatorType.AND], ActionType.MOVE_RIGHT), 10),
    (Rule([Condition(ConditionType.IS_WOOD_IN_RANGE, False), Condition(ConditionType.HAS_BOMB, False)], [OperatorType.AND], ActionType.PLACE_BOMB), 10),
    (Rule([Condition(ConditionType.IS_BOMB_DOWN, False), Condition(ConditionType.CAN_MOVE_UP, False)], [OperatorType.AND], ActionType.MOVE_UP), 1),
    (Rule([Condition(ConditionType.IS_BOMB_DOWN, False), Condition(ConditionType.CAN_MOVE_LEFT, False)], [OperatorType.AND], ActionType.MOVE_LEFT), 1),
    (Rule([Condition(ConditionType.IS_BOMB_DOWN, False), Condition(ConditionType.CAN_MOVE_RIGHT, False)], [OperatorType.AND], ActionType.MOVE_RIGHT), 1),
    (Rule([Condition(ConditionType.IS_BOMB_UP, False), Condition(ConditionType.CAN_MOVE_DOWN, False)], [OperatorType.AND], ActionType.MOVE_DOWN), 1),
    (Rule([Condition(ConditionType.IS_BOMB_UP, False), Condition(ConditionType.CAN_MOVE_LEFT, False)], [OperatorType.AND], ActionType.MOVE_LEFT), 1),
    (Rule([Condition(ConditionType.IS_BOMB_UP, False), Condition(ConditionType.CAN_MOVE_RIGHT, False)], [OperatorType.AND], ActionType.MOVE_RIGHT), 1),
    (Rule([Condition(ConditionType.IS_BOMB_LEFT, False), Condition(ConditionType.CAN_MOVE_DOWN, False)], [OperatorType.AND], ActionType.MOVE_DOWN), 1),
    (Rule([Condition(ConditionType.IS_BOMB_LEFT, False), Condition(ConditionType.CAN_MOVE_UP, False)], [OperatorType.AND], ActionType.MOVE_UP), 1),
    (Rule([Condition(ConditionType.IS_BOMB_LEFT, False), Condition(ConditionType.CAN_MOVE_RIGHT, False)], [OperatorType.AND], ActionType.MOVE_RIGHT), 1),
    (Rule([Condition(ConditionType.IS_BOMB_RIGHT, False), Condition(ConditionType.CAN_MOVE_DOWN, False)], [OperatorType.AND], ActionType.MOVE_DOWN), 1),
    (Rule([Condition(ConditionType.IS_BOMB_RIGHT, False), Condition(ConditionType.CAN_MOVE_UP, False)], [OperatorType.AND], ActionType.MOVE_UP), 1),
    (Rule([Condition(ConditionType.IS_BOMB_RIGHT, False), Condition(ConditionType.CAN_MOVE_LEFT, False)], [OperatorType.AND], ActionType.MOVE_LEFT), 1),
    (Rule([Condition(ConditionType.IS_ENEMY_IN_RANGE, False), Condition(ConditionType.HAS_BOMB, False)], [OperatorType.AND], ActionType.PLACE_BOMB), 10),
]

In [47]:
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

In [48]:
def random_condition():
    condition_type = random.choice(list(ConditionType))
    # negation = random.choices([True, False], weights=[0.25, 0.75], k=1)[0]
    # negation = random.choices([True, False], weights=[0, 1], k=1)[0]
    return Condition(condition_type, False)

def random_operator():
    return random.choice(list(OperatorType))

def random_action():
    return random.choice(list(ActionType))

def create_random_rule():
    num_conditions = random.choices([1, 2, 3], weights=[1, 3, 1], k=1)[0]
    conditions = [random_condition() for _ in range(num_conditions)]
    
    num_operators = num_conditions - 1
    operators = [random_operator() for _ in range(num_operators)]
    
    action = random_action()
    
    return Rule(conditions, operators, action)

def create_individual(num_rules):
    num_useful_rules = random.randint(1, 6)
    # num_useful_rules = 0
    
    rules = [rule for rule, _ in USEFUL_RULES]
    weights = [weight for _, weight in USEFUL_RULES]
    indices = np.random.choice(
        len(rules), 
        size=num_useful_rules,
        replace=False,
        p=np.array(weights) / sum(weights)
    )
    useful_sample = [rules[i] for i in indices]

    random_rules = [create_random_rule() for _ in range(num_rules - num_useful_rules)]

    rules = useful_sample + random_rules
    
    random.shuffle(rules)

    individual = creator.Individual(rules)

    return individual

In [49]:
toolbox = base.Toolbox()
toolbox.register("individual", create_individual, num_rules=10)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

In [50]:
def mutate_rule(rule: Rule):
    # 50% chance to add a condition if there are less than 3
    if len(rule.conditions) < 3 and random.random() < MUTATION_RATE_ADD_COND:
        rule.conditions.append(random_condition())
        if len(rule.conditions) > 1:
            rule.operators.append(random_operator())

    # 50% chance to remove a condition if there are more than 1
    if len(rule.conditions) > 1 and random.random() < MUTATION_RATE_REMOVE_COND:
        idx = random.randint(0, len(rule.conditions) - 1)
        rule.conditions.pop(idx)
        if idx < len(rule.operators):
            rule.operators.pop(idx)
        else:
            rule.operators.pop(-1)

    if len(rule.conditions) > 0 and random.random() < MUTATION_RATE_REPLACE_COND:
        idx = random.randint(0, len(rule.conditions) - 1)
        rule.conditions[idx] = random_condition()

    # for i in range(len(rule.conditions)):
    #     if random.random() < 0.1:
    #         rule.conditions[i].negation = not rule.conditions[i].negation

    for i in range(len(rule.operators)):
        if random.random() < MUTATION_RATE_REPLACE_OPERATOR:
            rule.operators[i] = random_operator()
            
    if random.random() < MUTATION_RATE_REPLACE_ACTION:
        rule.action = random_action()
        
    return rule

def mutate_individual(individual):
    for i in range(len(individual)):
        if random.random() < MUTATION_RATE_RULE:
            if random.random() < MUTATION_RATE_REPLACE:
                individual[i] = create_random_rule()
            else:
                individual[i] = mutate_rule(individual[i])

    if random.random() < MUTATION_RATE_SHUFFLE:
        return mutate_shuffle(individual)

    return individual,

def mutate_shuffle(individual):
    if len(individual) < 2:
        return individual,
    
    num_to_shuffle = random.randint(2, len(individual))
    indices = random.sample(range(len(individual)), num_to_shuffle)
    shuffled_rules = [individual[i] for i in indices]
    
    random.shuffle(shuffled_rules)
    
    temp_individual = list(individual)
    for i, idx in enumerate(indices):
        temp_individual[idx] = shuffled_rules[i]
        
    return temp_individual,

def crossover_individuals(ind1, ind2):
    cxpoint1 = random.randint(0, len(ind1) - 1)
    cxpoint2 = random.randint(0, len(ind1) - 1)
    if cxpoint1 > cxpoint2:
        cxpoint1, cxpoint2 = cxpoint2, cxpoint1
        
    ind1[cxpoint1:cxpoint2], ind2[cxpoint1:cxpoint2] = \
        ind2[cxpoint1:cxpoint2], ind1[cxpoint1:cxpoint2]
            
    return ind1, ind2

In [51]:
def evaluate_tournament(tournament_data):
    population, indices = tournament_data
    agents = [
        GeneticAgent(
            rules=population[index], 
            individual_index=index
        ) for index in indices
    ]
    # agents.append(SimpleAgent())
    # agents.append(SimpleAgent())

    random.shuffle(agents)

    game = Game(agents, max_steps=600)
    results = game.play_game(num_episodes=NUM_EPISODES, render_mode=None)
    return results

def evaluate_population_in_tournament(population, generation, run_id):
    fitness_scores = [0] * len(population)

    agent_episode_counts = [0] * len(population)
    
    agents_per_tournament = 4
    rounds_per_agent = 5

    tournament_data = []
    
    for _ in range(rounds_per_agent):
        shuffled_indices = list(range(len(population)))
        random.shuffle(shuffled_indices)
        
        for i in range(0, len(shuffled_indices), agents_per_tournament):
            current_indices = shuffled_indices[i:i + agents_per_tournament]
            
            if len(current_indices) == agents_per_tournament:
                tournament_data.append((population, current_indices))
    
    processor_count = min(multiprocessing.cpu_count(), len(tournament_data))
    with multiprocessing.Pool(processes=processor_count) as pool:
        all_results = pool.map(evaluate_tournament, tournament_data)

    rewards = {
        "TILES": 15,             # Points per unique tile visited
        "BOMBS": 75,            # Points per bomb placed
        "WOOD": 150,             # Points per wood exploded
        # "DISTANCE": 0.025,          # Points per unit distance from other agents
        "SELF_KILL": -1000,       # Points for self-kill
        "KILL": 750,            # Points for killing another agent
        "WIN_WITH_KILLS": 1000,   # Points for winning with kills
        "WIN_NO_KILLS": 200,     # Points for winning without kills
        "DIED": -500,           # Points for dying
        "PASSIVE_TILES": -50,        # Points for being passive (visited < 10 tiles)
        "PASSIVE_BOMBS": -100,        # Points for being passive (placed < 4 bombs)
        "TIMEOUT_LOSE": -150,    # Points for not winning or dying
        "STEP_REWARD": 0.1, # Points per step taken
        "VISIT_PENALTY": -5,     # Penalty per excess visit to frequently visited tiles
    }

    for result in all_results:
        for episode_result in result:
            agent_results = episode_result.agent_results
            for agent_result in agent_results:
                agent_episode_counts[agent_result.individual_index] += 1
                agent_index = agent_result.individual_index
                
                visit_penalty = 0
                # Calculate visit penalty for frequently visited tiles
                for position, visit_count in agent_result.visited_tiles.items():
                    if visit_count > 3:  # Penalty threshold
                        excess_visits = visit_count - 3
                        visit_penalty += excess_visits * rewards["VISIT_PENALTY"]
                unique_tiles_visited = len(agent_result.visited_tiles)
            
                logging_episode_result = {
                    'generation': generation,
                    'agent_index': agent_index,
                    'episode_index': agent_episode_counts[agent_result.individual_index],
                    'tiles': unique_tiles_visited * rewards["TILES"],
                    'bombs_placed': agent_result.bombs_placed * rewards["BOMBS"],
                    'wood_exploded': agent_result.wood_exploded * rewards["WOOD"],
                    'distance': 0,
                    'self_kill': 0,
                    'kills': 0,
                    'win': 0,
                    'alive': 0,
                    'passive_tiles': 0,
                    'passive_bombs': 0,
                    'visit_penalty': visit_penalty,
                    'fitness': 0,
                    'step_reward': agent_result.step_count * rewards["STEP_REWARD"],
                    'steps': agent_result.step_count,
                    'no_satisfied_rules': agent_result.no_satisfied_rules,
                }

                fitness = 0

                fitness += unique_tiles_visited * rewards["TILES"]
                fitness += agent_result.bombs_placed * rewards["BOMBS"]
                fitness += agent_result.wood_exploded * rewards["WOOD"]
                fitness += agent_result.step_count * rewards["STEP_REWARD"]
                fitness += visit_penalty  # Add visit penalty to fitness

                # In a 11x11 grid, the maximum distance is 20
                # normalized_proximity_score = max(0, 20 - agent_result.average_distance)
                # proximity_per_step = max(0, 20 - agent_result.average_distance)
                # distance_score = proximity_per_step * agent_result.step_count * rewards["DISTANCE"]
                # fitness += distance_score
                # logging_episode_result['distance'] += round(distance_score, 3)

                for kill in agent_result.kills:
                    if kill == agent_result.id:
                        fitness += rewards["SELF_KILL"]
                        logging_episode_result['self_kill'] += rewards["SELF_KILL"]
                    else:
                        fitness += rewards["KILL"]
                        logging_episode_result['kills'] += rewards["KILL"]
                        
                if agent_result.winner:
                    if len(agent_result.kills) > 0:
                        fitness += rewards["WIN_WITH_KILLS"]
                        logging_episode_result['win'] += rewards["WIN_WITH_KILLS"]
                    else:
                        fitness += rewards["WIN_NO_KILLS"]
                        logging_episode_result['win'] += rewards["WIN_NO_KILLS"]
                else:
                    if not agent_result.is_alive:
                        fitness += rewards["DIED"]
                        logging_episode_result['alive'] += rewards["DIED"]
                    else:
                        fitness += rewards["TIMEOUT_LOSE"]
                        logging_episode_result['alive'] += rewards["TIMEOUT_LOSE"]

                if unique_tiles_visited < 10:
                    tile_penalty = (10 - unique_tiles_visited) * rewards["PASSIVE_TILES"]
                    fitness += tile_penalty
                    logging_episode_result['passive_tiles'] += tile_penalty
                if agent_result.bombs_placed < 4:
                    bomb_penalty = (4 - agent_result.bombs_placed) * rewards["PASSIVE_BOMBS"]
                    fitness += bomb_penalty
                    logging_episode_result['passive_bombs'] += bomb_penalty

                logging_episode_result['fitness'] = round(fitness, 3)
                LOGGING_RESULTS.append(logging_episode_result)
                
                fitness_scores[agent_index] += fitness

    # Normalize fitness scores
    final_fitness_scores = [0.0] * len(fitness_scores)
    for i in range(len(fitness_scores)):
        if agent_episode_counts[i] > 0:
            final_fitness_scores[i] = fitness_scores[i] / agent_episode_counts[i]
        
    return [(score,) for score in final_fitness_scores]

In [52]:
toolbox.register("mate", crossover_individuals)
toolbox.register("mutate", mutate_individual)
toolbox.register("mutate_shuffle", mutate_shuffle)
toolbox.register("select", tools.selTournament, tournsize=TOURNAMENT_SIZE)

In [None]:
def create_folder_structure(run_id):
    if not os.path.exists('./results'):
        os.makedirs('./results')
    if not os.path.exists(f'./results/{run_id}'):
        os.makedirs(f'./results/{run_id}')
    if not os.path.exists(f'./results/{run_id}/best_individuals'):
        os.makedirs(f'./results/{run_id}/best_individuals')
        
    LOGGING_RESULTS.clear()
    LOGGING_CONDITIONS.clear()
    LOGGING_ACTIONS.clear()
    LOGGING_PERFORMANCE.clear()

def get_performance_data(gen, pop, hof, run_id):
    fits = [ind.fitness.values[0] for ind in pop]
    length = len(pop)
    mean = sum(fits) / length
    min = np.min(fits)
    max = np.max(fits)
    std = np.std(fits)
    
    best_fitness = hof[0].fitness.values[0]
    global PREVIOUS_BEST
    if best_fitness > PREVIOUS_BEST:
        PREVIOUS_BEST = best_fitness
        with open(f'./results/{run_id}/best_individuals/{gen}.txt', 'w') as f:
            f.writelines([str(rule) + '\n' for rule in hof[0]])

    LOGGING_PERFORMANCE.append({
        'generation': gen,
        'mean': round(mean, 3),
        'min': round(min, 3),
        'max': round(max, 3),
        'std': round(std, 3),
        'best_fitness': round(best_fitness, 3),
    })

    return mean, min, max, std, best_fitness

def get_conditions_actions_data(gen, pop: List[List[Rule]]):
    pop_conditions = {
        "generation": gen,
    }
    for condition in ConditionType:
        for negation in [True, False]:
            pop_conditions[(condition.name, negation)] = 0
    pop_actions = {
        "generation": gen,
    }
    for action in ActionType:
        pop_actions[action] = 0

    for individual in pop:
        for rule in individual:
            for condition in rule.conditions:
                condition_key = (condition.condition_type.name, condition.negation)
                pop_conditions[condition_key] += 1
            pop_actions[rule.action] += 1
            
    LOGGING_CONDITIONS.append(pop_conditions)
    LOGGING_ACTIONS.append(pop_actions)

    return pop_conditions, pop_actions
        
def save_results(run_id, gen):
    with open(f'./results/{run_id}/conditions.csv', 'a') as f:
        fieldnames = LOGGING_CONDITIONS[0].keys()
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if gen == 0:
            writer.writeheader()
        for result in LOGGING_CONDITIONS:
            writer.writerow(result)
        
    with open(f'./results/{run_id}/actions.csv', 'a') as f:
        fieldnames = LOGGING_ACTIONS[0].keys()
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        if gen == 0:
            writer.writeheader()
        for result in LOGGING_ACTIONS:
            writer.writerow(result)

    with open(f"./results/{run_id}/agent_episodes.csv", "a", newline='') as csvfile:
        fieldnames = LOGGING_RESULTS[0].keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if gen == 0:
            writer.writeheader()
        for result in LOGGING_RESULTS:
            writer.writerow(result)

    with open(f"./results/{run_id}/performance.csv", "a", newline='') as csvfile:
        fieldnames = LOGGING_PERFORMANCE[0].keys()
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        if gen == 0:
            writer.writeheader()
        for result in LOGGING_PERFORMANCE:
            writer.writerow(result)


In [54]:
def run_evolution(n_gen=MAX_GENERATIONS, pop_size=POPULATION_SIZE, run_id=None):
    create_folder_structure(run_id)

    pop_size = (pop_size // 4) * 4
    if pop_size < 4:
        pop_size = 4
    
    print(f"Starting evolution with population size: {pop_size}")
    
    pop = toolbox.population(n=pop_size)
    
    fitnesses = evaluate_population_in_tournament(pop, 0, run_id)
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit
        
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("min", np.min)
    stats.register("max", np.max)
    
    hof = tools.HallOfFame(2)
    hof.update(pop)

    get_performance_data(0, pop, hof, run_id)
    get_conditions_actions_data(0, pop)

    save_results(run_id, 0)
    
    for gen in range(1, n_gen + 1):
        print(f"Generation {gen}/{n_gen}")
        LOGGING_CONDITIONS.clear()
        LOGGING_ACTIONS.clear()
        LOGGING_PERFORMANCE.clear()
        LOGGING_RESULTS.clear()

        elites = tools.selBest(pop, NUM_ELITES)
        elites = [toolbox.clone(ind) for ind in elites]
        
        offspring = toolbox.select(pop, len(pop) - NUM_ELITES)
        offspring = [toolbox.clone(ind) for ind in offspring]
        
        for i in range(0, len(offspring), 2):
            if i + 1 < len(offspring):
                if random.random() < CROSSOVER_RATE:
                    toolbox.mate(offspring[i], offspring[i + 1])
                    del offspring[i].fitness.values
                    del offspring[i + 1].fitness.values

        for i in range(len(offspring)):
            if random.random() < MUTATION_RATE:
                toolbox.mutate(offspring[i])
                del offspring[i].fitness.values

        pop[:] = offspring + elites

        fitnesses = evaluate_population_in_tournament(pop, gen, run_id)
        for ind, fit in zip(pop, fitnesses):
            ind.fitness.values = fit
            
        hof.update(pop)
        
        mean, min, max, std, best_fitness = get_performance_data(gen, pop, hof, run_id)
        
        print(f"  Avg: {round(mean, 3)}")
        print(f"  Min: {round(min, 3)}")
        print(f"  Max: {round(max, 3)}")
        print(f"  Std: {round(std, 3)}")
        print(f"  Best individual fitness: {round(best_fitness, 3)}")

        get_conditions_actions_data(gen, pop)

        with open(f'./results/{run_id}/best_individual.pkl', 'wb') as f:
            pickle.dump(hof[0], f)

        save_results(run_id, gen)

    return pop, stats, hof

In [55]:
# import pstats
# import io

# pr = cProfile.Profile()
# pr.enable()

RUN_ID = np.random.randint(0, 100000)
print(f"Run ID: {RUN_ID}")
final_pop, stats, hof = run_evolution(n_gen=MAX_GENERATIONS, pop_size=POPULATION_SIZE, run_id=RUN_ID)

print("Best agent in final population:")
# sort final population by fitness
final_pop.sort(key=lambda ind: ind.fitness.values[0], reverse=True)
print(f"Best individual fitness: {final_pop[0].fitness.values[0]}")
print(f"Best individual has {len(final_pop[0])} rules:")
for rule in final_pop[0]:
    print(rule)

print("\nEvolution finished.")
print(f"Stored best individual in ./results/{RUN_ID}/best_individual.pkl")
print(f"Best individual has {len(hof[0])} rules with fitness: {hof[0].fitness.values[0]}")
print("Best individual rules:")
for rule in hof[0]:
    print(rule)

# pr.disable()
# s = io.StringIO()
# ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
# ps.print_stats(15)
# print(s.getvalue())

Run ID: 51042
Starting evolution with population size: 200


Generation 1/200


Process ForkPoolWorker-1709:
Process ForkPoolWorker-1713:
Process ForkPoolWorker-1705:
Process ForkPoolWorker-1712:
Process ForkPoolWorker-1707:
Process ForkPoolWorker-1706:
Process ForkPoolWorker-1708:
Process ForkPoolWorker-1716:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):


KeyboardInterrupt: 

Traceback (most recent call last):
Process ForkPoolWorker-1715:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-1714:
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()


  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.py", line 314, in _bootstrap
    self.run()
  File "/usr/lib/python3.10/multiprocessing/process.p

In [None]:
# %lprun -f GeneticAgent.act run_evolution(n_gen=1, pop_size=20, run_id=1)