### Imports

In [1]:
# If updates on imported files aren't detected, restart the kernel (we'll need to find an automatic solution for this)

import numpy as np
from icecream import ic

from typing import List, Tuple, Dict


### Evolution Helper

In [2]:
import random
from typing import List
from gxgp.node import Node
from utils.operations_dict import basic_function_set, complex_function_set

### Tree Generation

In [3]:
def generate_random_tree(max_height: int, pc: float, terminal_list: List[str],
                         constants: list[float] = None, p_pick_constant: float = 0.2, p_cut_tree: float = 0.2,
                         verbose: bool = False, cur_depth: int = 0) -> Node:
    """
    Generate a random symbolic expression tree.

    Mandatory Parameters
    ----------
    max_height : int
        The maximum height of the tree. The height of a tree is the length of the longest path from the root to a leaf (e.g. height of a leaf is 0).
    pc : float
        The probability of choosing a complex function over a basic function.
    terminal_list : List[str]
        The terminal list to choose from. Example: ['x0', 'x1', 'x2']

    Optional Parameters
    ----------
    constants : list[float]
        A list of constants that can be used in the tree (default is None).
    p_pick_constant : float
        The probability of choosing a constant over a terminal (default is 0.2).
    p_cut_tree : float
        The probability of cutting the tree early (default is 0.2).    
    verbose : bool    
        Whether to print debug information (default is False).
    cur_depth : int
        The exploration depth (e.g. depth of root is 0)

    Returns
    -------
    Node
        A Node object representing the root of the tree.
    """
    indent = ' ' * (cur_depth * 2)

    # Cut the tree early with probability 0.2
    if (random.random() < p_cut_tree) or max_height == 0:  
        # If constants are provided, choose one with probability p_pick_constant
        if constants is not None and random.random() < p_pick_constant: 
            terminal = random.choice(constants) 
        # Otherwise, pick from the terminal set
        else:                                                
            terminal = random.choice(terminal_list)
        
        if verbose: print(f"{indent}Picked terminal: {terminal}")

        # Set the height of the node to 0
        my_node = Node(terminal)
        my_node.set_height(0)
        return my_node
    else:
        # Choose a complex function with probability pc
        if random.random() < pc:                       
            func = random.choice(list(complex_function_set.keys()))
            if verbose: print(f"{indent}Chose complex function {func}")
            num_children = complex_function_set[func].__code__.co_argcount  # Numero di argomenti della funzione
            children = [generate_random_tree(max_height - 1, pc, terminal_list, constants, p_pick_constant, p_cut_tree, verbose, cur_depth + 1)
                        for _ in range(num_children)]
            
            # Set height
            cur_height = max([child.get_height() for child in children]) + 1
            my_node = Node(complex_function_set[func], children, name=func)
            my_node.set_height(cur_height)
            return my_node
        # Otherwise, choose a basic function
        else:                                           
            func = random.choice(list(basic_function_set.keys()))
            if verbose: print(f"{indent}Chose basic function {func}")
            num_children = basic_function_set[func].__code__.co_argcount  # Numero di argomenti della funzione
            children = [generate_random_tree(max_height - 1, pc, terminal_list, constants, p_pick_constant, p_cut_tree, verbose, cur_depth + 1)
                        for _ in range(num_children)]
            # Set height
            cur_height = max([child.get_height() for child in children]) + 1
            my_node = Node(basic_function_set[func], children, name=func)
            my_node.set_height(cur_height)
            return my_node

def generate_random_tree_with_all_terminal(max_height: int, pc: float, terminal_list: List[str],
                         constants: list[float] = None, p_pick_constant: float = 0.2, p_cut_tree: float = 0.2,
                         verbose: bool = False, cur_depth: int = 0, picked_terminal: set[str]=set()) -> Node:
    """
    Generate a random symbolic expression tree.

    Mandatory Parameters
    ----------
    max_height : int
        The maximum height of the tree. The height of a tree is the length of the longest path from the root to a leaf (e.g. height of a leaf is 0).
    pc : float
        The probability of choosing a complex function over a basic function.
    terminal_list : List[str]
        The terminal list to choose from. Example: ['x0', 'x1', 'x2']

    Optional Parameters
    ----------
    constants : list[float]
        A list of constants that can be used in the tree (default is None).
    p_pick_constant : float
        The probability of choosing a constant over a terminal (default is 0.2).
    p_cut_tree : float
        The probability of cutting the tree early (default is 0.2).    
    verbose : bool    
        Whether to print debug information (default is False).
    cur_depth : int
        The exploration depth (e.g. depth of root is 0)

    Returns
    -------
    Node
        A Node object representing the root of the tree.
    """
    indent = ' ' * (cur_depth * 2)

    # Cut the tree early with probability 0.2
    if (random.random() < p_cut_tree) or max_height == 0:  
        # If constants are provided, choose one with probability p_pick_constant
        if constants is not None and random.random() < p_pick_constant and len(picked_terminal) == len(terminal_list): 
            terminal = random.choice(constants) 
        # Otherwise, pick from the terminal set
        else:                                                
            terminal = random.choice(terminal_list)
            picked_terminal.add(terminal)
        
        if verbose: print(f"{indent}Picked terminal: {terminal}")

        # Set the height of the node to 0
        my_node = Node(terminal)
        my_node.set_height(0)
        return my_node
    else:
        # Choose a complex function with probability pc
        if random.random() < pc:                       
            func = random.choice(list(complex_function_set.keys()))
            if verbose: print(f"{indent}Chose complex function {func}")
            num_children = complex_function_set[func].__code__.co_argcount  # Numero di argomenti della funzione
            children = [generate_random_tree_with_all_terminal(max_height - 1, pc, terminal_list, constants, p_pick_constant, p_cut_tree, verbose, cur_depth + 1,picked_terminal)
                        for _ in range(num_children)]
            
            # Set height
            cur_height = max([child.get_height() for child in children]) + 1
            my_node = Node(complex_function_set[func], children, name=func)
            my_node.set_height(cur_height)
            return my_node
        # Otherwise, choose a basic function
        else:                                           
            func = random.choice(list(basic_function_set.keys()))
            if verbose: print(f"{indent}Chose basic function {func}")
            num_children = basic_function_set[func].__code__.co_argcount  # Numero di argomenti della funzione
            children = [generate_random_tree_with_all_terminal(max_height - 1, pc, terminal_list, constants, p_pick_constant, p_cut_tree, verbose, cur_depth + 1,picked_terminal)
                        for _ in range(num_children)]
            # Set height
            cur_height = max([child.get_height() for child in children]) + 1
            my_node = Node(basic_function_set[func], children, name=func)
            my_node.set_height(cur_height)
            return my_node

### Mutations

In [4]:
def point_mutation(Tree: Node, terminal_list: List[str], constants: list[float] = None, p_pick_constant: float = 0.2, pc: float = 0.2) -> Node:
    """
    Mutate a tree by changing a random node to a new random node.

    Parameters
    ----------
    Tree : Node
        The tree to mutate.
    terminal_list : List[str]
        The terminal list to choose from. Example: ['x0', 'x1', 'x2']
    constants : list[float]
        A list of constants that can be used in the tree.
    p_pick_constant : float
        The probability of choosing a constant over a terminal.
    pc : float
        The probability of choosing a complex function over a basic function.

    Returns
    -------
    Node
        The mutated tree.
    """

    # Get the list of nodes in the tree
    node = Tree.get_random_node()

    # If the node is a terminal, change it to a new terminal
    if node.is_leaf:
        if constants is not None and random.random() < p_pick_constant:
            terminal = random.choice(constants)
        else:
            terminal = random.choice(terminal_list)
        node.set_func(terminal)
        return Tree
    # Otherwise, change it to a new function maintaining the arity
    else:
        if random.random() < pc:
            while True:
                func = random.choice(list(complex_function_set.keys()))
                arity = complex_function_set[func].__code__.co_argcount
                if arity == node._arity:
                    break
            node.set_func(complex_function_set[func], name=func)
        else:
            while True:
                func = random.choice(list(basic_function_set.keys()))
                arity = basic_function_set[func].__code__.co_argcount
                if arity == node._arity:
                    break
            node.set_func(basic_function_set[func], name=func)
        return Tree
    
def subtree_mutation(Tree: Node, terminal_list: List[str], constants: list[float] = None, p_pick_constant: float = 0.2, pc: float = 0.2, height: int = 3, verbose: bool = False) -> Node:
    """
    Mutate a tree by changing a random subtree to a new random subtree.

    Parameters
    ----------
    Tree : Node
        The tree to mutate.
    terminal_list : List[str]
        The terminal list to choose from. Example: ['x0', 'x1', 'x2']
    constants : list[float]
        A list of constants that can be used in the tree.
    p_pick_constant : float
        The probability of choosing a constant over a terminal.
    pc : float
        The probability of choosing a complex function over a basic function.
    height : int
        The maximum height of the new subtree.

    Returns
    -------
    Node
        The mutated tree.
    """

    # Get the list of nodes in the tree
    node = Tree.get_random_node()

    if verbose:
        print(f"Node to mutate: {node._str} at height {node._height}")

    new_subtree = generate_random_tree(height, pc, terminal_list, constants, p_pick_constant)
    node = node.replace_tree_shallow(new_subtree)
    return Tree

def expansion_mutation(Tree: Node, terminal_list: List[str], constants: list[float] = None, p_pick_constant: float = 0.2, pc: float = 0.2, height: int = 3, verbose: bool = False) -> Node:
    """
    Mutate a tree by expanding a random node to a new random subtree.

    Parameters
    ----------
    Tree : Node
        The tree to mutate.
    terminal_list : List[str]
        The terminal list to choose from. Example: ['x0', 'x1', 'x2']
    constants : list[float]
        A list of constants that can be used in the tree.
    p_pick_constant : float
        The probability of choosing a constant over a terminal.
    pc : float
        The probability of choosing a complex function over a basic function.
    height : int
        The maximum height of the new subtree.

    Returns
    -------
    Node
        The mutated tree.
    """
    # Get the list of nodes in the tree
    node = random.choice(Tree.get_leafs())

    if verbose:
        print(f"Node to mutate: {node._str} at height {node._height}")  
    
    
    new_subtree = generate_random_tree(height, pc, terminal_list, constants, p_pick_constant)
    node = node.replace_tree_shallow(new_subtree)
    return Tree

def collaps_mutation(Tree: Node, terminal_list: List[str], constants: list[float] = None, p_pick_constant: float = 0.2, pc: float = 0.2, verbose: bool = False) -> Node:
    """
    Mutate a tree by collapsing a random node to a terminal.

    Parameters
    ----------
    Tree : Node
        The tree to mutate.
    terminal_list : List[str]
        The terminal list to choose from. Example: ['x0', 'x1', 'x2']
    constants : list[float]
        A list of constants that can be used in the tree.
    p_pick_constant : float
        The probability of choosing a constant over a terminal.
    pc : float
        The probability of choosing a complex function over a basic function.

    Returns
    -------
    Node
        The mutated tree.
    """

    # Get the list of nodes in the tree
    node = Tree.get_random_node()

    if verbose:
        print(f"Node to mutate: {node._str} at height {node._height}")

   # possible choices
    possible_choices = node.get_leafs()


    node.replace_tree_shallow(random.choice(possible_choices))
    return Tree


def permutation_mutation(Tree: Node, terminal_list: List[str], constants: list[float] = None, p_pick_constant: float = 0.2, pc: float = 0.2,verbose: bool = False) -> Node:
    """
    Mutate a tree by permuting the children of a random node through a rotation.

    Parameters
    ----------
    Tree : Node
        The tree to mutate.

    Returns
    -------
    Node
        The mutated tree.
    """

    # Extract a random node that has at least two children (so no leaves and single input functions)
    found = False
    for i in range(20):
        father = Tree.get_random_node()
        if father._arity > 1:
            found = True
            break
    if not found:
        return Tree
    
    if verbose:
        print(f"Father node: {father._str} at height {father._height}. It has {len(father._successors)} children: [", end="")
        for i, child in enumerate(father._successors):
            print(f"{child._str},", end=" ")
        print("]")

    
    new_successors = ()
    for i, child in enumerate(father._successors):
        new_successors = new_successors + (father._successors[i-1],)

    father._successors = new_successors

    if verbose:
        print(f"New children: [", end="")
        for i, child in enumerate(father._successors):
            print(f"{child._str},", end=" ")
        print("]")

    return Tree

def hoist_mutation(Tree: Node, terminal_list: List[str], constants: list[float] = None, p_pick_constant: float = 0.2, pc: float = 0.2,verbose: bool = False) -> Node:
    """
    Mutate a tree by replacing the root with a random child.

    Parameters
    ----------
    Tree : Node
        The tree to mutate.

    Returns
    -------
    Node
        The mutated tree.
    """

    random_node = Tree.get_random_node()
    return random_node

### crossover

In [5]:
def recombination_crossover(Tree1: Node, Tree2: Node, verbose: bool = False) -> Tuple[Node, Node]:
    """
    Recombine two trees by swapping a random subtree.

    Parameters
    ----------
    Tree1 : Node
        The first tree.
    Tree2 : Node
        The second tree.

    Returns
    -------
    Tuple[Node, Node]
        The recombined trees.
    """

    # Get the list of nodes in the trees
    node1 = Tree1.get_random_node()
    node2 = Tree2.get_random_node()

    if verbose:
        print(f"Node1 to swap: {node1._str} at height {node1._height}")
        print(f"Node2 to swap: {node2._str} at height {node2._height}")

    # Swap the subtrees
    temp1 = node1.clone()
    temp2 = node2.clone()
    node1.replace_tree_shallow(temp2)
    node2.replace_tree_shallow(temp1)

    return Tree1, Tree2



### Load Data

In [6]:
from gxgp import Node
problem_number = 3
problem = np.load(f'./data/problem_{problem_number}.npz')
input = problem['x']
labels = problem['y']

print("Input shape:", input.shape, " Example of sample: ", input[:, 0])
print("Labels shape:", labels.shape, " Example of label: ", labels[0])

# Terminal set
terminal_list = ['x' + str(i) for i in range(input.shape[0])]

print("terminal_list: ", terminal_list)
# Main

Input shape: (3, 5000)  Example of sample:  [ 1.52827812 -2.67876092 -3.73351453]
Labels shape: (5000,)  Example of label:  40.96071445158248
terminal_list:  ['x0', 'x1', 'x2']


### examples for generating trees
You can fine tune the size of the output by modifying draw() from draw.py

In [7]:
from utils.terminal_constants import crammed_constants

# height = 5
# initialized = generate_random_tree(height, 0.2, terminal_list, constants=crammed_constants, p_pick_constant=0.7, p_cut_tree=0.01, verbose=True)
# initialized.draw()

In [8]:
# collapsed = initialized.collapse_constants()
# collapsed.draw()

In [9]:
"""second=generate_random_tree(height, 0.2, terminal_list, constants=crammed_constants, p_pick_constant=0.4, p_cut_tree=0.05, verbose=True)
second.draw()"""

'second=generate_random_tree(height, 0.2, terminal_list, constants=crammed_constants, p_pick_constant=0.4, p_cut_tree=0.05, verbose=True)\nsecond.draw()'

In [10]:
"""for obj in recombination_crossover(initialized, second, verbose=True):
    obj.draw()"""

'for obj in recombination_crossover(initialized, second, verbose=True):\n    obj.draw()'

### Create input formatted

In [11]:
print("input shape is ", input.shape)

vars = []
for j in range(input.shape[1]):
    cur_vars = {'x'+str(i): input[i][j] for i in range(input.shape[0])}
    # print("cur_vars is ", cur_vars)
    vars.append(cur_vars)
vars = np.array(vars)

print("vars shape is ", vars.shape)

input shape is  (3, 5000)
vars shape is  (5000,)


### fitness

In [12]:
import warnings
warnings.simplefilter("error", RuntimeWarning)

In [13]:
# def fitness(mytree: Node, vars, labels, verbose=False, penalized = 'sqrt'):
#     try:
#         output = np.array([mytree(**var) for var in vars])
#         mse = 100 * np.square(labels - output).mean()
#         if penalized == 'percent':
#             return mse +  mse * mytree.get_height() * 0.01 if mytree.get_height() > 0 else mse
#         else:
#             return mse * np.sqrt(mytree.get_height()) if mytree.get_height() > 0 else mse
#     except RuntimeWarning as e:
#         if verbose: print(f"caught runtime warning: {e}, setting fitness to inf")
#         return np.inf

# Fitness reverse
def fitness(mytree, vars, labels, penalized=None):
    try:
        output = np.array([mytree(**var) for var in vars])
        mytree.reeval_heights()
        height = mytree.get_height()
        mse = 100 * np.square(labels - output).mean()
        if height > 13:
            mse *= float(height - 3) / 10
        return mse
    except RuntimeWarning as e:
        # print(f"caught runtime warning: {e}, setting fitness to inf")
        return np.inf

#print(fitness(initialized, vars, labels))

def fitness_unscaled(mytree: Node, vars, labels, verbose=False):
    try:
        output = np.array([mytree(**var) for var in vars])
        mse = 100 * np.square(labels - output).mean()
        return mse
    except RuntimeWarning as e:
        if verbose: print(f"caught runtime warning: {e}, setting fitness to inf")
        return np.inf
#print(fitness(initialized, vars, labels))

In [14]:
from gxgp.gp_common import xover_swap_subtree
"""# Xover
def xover(tree1, tree2):
    return xover_swap_subtree(tree1, tree2)

# initialized.subtree.pop().draw() # this procedure is really similar to get_random_node, except there we use a list and random.choice
hoist_mutation(initialized, verbose=True).draw()"""

'# Xover\ndef xover(tree1, tree2):\n    return xover_swap_subtree(tree1, tree2)\n\n# initialized.subtree.pop().draw() # this procedure is really similar to get_random_node, except there we use a list and random.choice\nhoist_mutation(initialized, verbose=True).draw()'

### parent selection

In [15]:

def parent_selection(population, pre_calculated_fitnesses=None, penalized = 'sqrt'):
    if pre_calculated_fitnesses is None:
        candidates = sorted(np.random.choice(population, 2), key=lambda e: fitness(e,vars,labels, penalized=penalized))
        return candidates[0]
    else:
        #Random index between 0 and population size
        index1 = np.random.randint(0, len(population))
        index2 = np.random.randint(0, len(population))
        candidates = [population[index1], population[index2]]
        if pre_calculated_fitnesses[index1] > pre_calculated_fitnesses[index2]:
            return candidates[1]
        else:
            return candidates[0]

### utils functions

In [16]:
import concurrent
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import numpy as np

def compute_pair_distance(i, j, population):
    return i, j, population[i].tree_distance(population[j])

def tree_distance(population, verbose="Calculating tree distance matrix"):
    n = len(population)
    matrix = np.zeros((n, n))
    pairs = [(i, j) for i in range(n) for j in range(i+1, n)]
    
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(compute_pair_distance, i, j, population) for i, j in pairs]
        for future in tqdm(concurrent.futures.as_completed(futures), desc=verbose, total=len(futures)):
            i, j, dist = future.result()
            matrix[i][j] = dist/ population[i].__len__() if population[i].__len__() > 0 else dist
            matrix[j][i] = dist/ population[j].__len__() if population[j].__len__() > 0 else dist

    
    return matrix

def random_mutation(p1=0.16, p2=0.16, p3=0.16, p4=0.16, p5=0.16):
    r = random.random()
    if r < p1:
        return point_mutation
    elif r < p1 + p2:
        return subtree_mutation
    elif r < p1 + p2 + p3:
        return expansion_mutation
    elif r < p1 + p2 + p3 + p4:
        return permutation_mutation
    elif r < p1 + p2 + p3 + p4 + p5:
        return collaps_mutation
    else:
        return hoist_mutation


### Training

In [None]:
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
import math


# Parameters
crossover = recombination_crossover
OFFSPRING_SIZE = 200
POPULATION_SIZE = 100
OUTSIDER_SIZE = math.ceil(OFFSPRING_SIZE*0.1)
pm = 0.05
x_elitism = 0.08
MAX_GENERATIONS = 100
HEIGHT = 5
PC = 0.1
P_PICK_CONSTANT = 0.4
P_CUT_TREE = 0.05


# Initialize the population
def initialize_population(_):
    return generate_random_tree_with_all_terminal(HEIGHT, PC, terminal_list, constants=crammed_constants, p_pick_constant=P_PICK_CONSTANT, p_cut_tree=P_CUT_TREE)

ALREADY_INITIALIZED = False
if not ALREADY_INITIALIZED:
    with ThreadPoolExecutor() as executor:
        population = list(tqdm(executor.map(initialize_population, range(POPULATION_SIZE)), desc="Initializing population", total=POPULATION_SIZE))

population = [tree.collapse_constants() for tree in population if tree is not None]    
# Remove identical trees
distance_matrix = tree_distance(population, verbose='Initial tree distances')
n = len(population)
# I need to keep only the first tree if there are identical trees
for i in range(n):
    for j in range(i + 1, n):
        if distance_matrix[i][j] == 0 and population[j] is not None:
            population[j] = None

population = [tree for tree in population if tree is not None]
for tree in population:
    tree.reeval_heights()

 # Evaluate the population
with ThreadPoolExecutor() as executor:
    fitnesses = np.array(list(tqdm(executor.map(lambda tree: fitness(tree, vars, labels, penalized='sqrt'), population), desc="Evaluating population", total=len(population))))
   
penalized = 'percent'
probabilities2 = [0.10, 0.10, 0.10, 0.10, 0.10]
probabilities1 = [0.16, 0.16, 0.16, 0.16, 0.16]

print(*probabilities1)

probabilities = probabilities1
# Training
for generation in range(MAX_GENERATIONS):
    if (generation >= 15):
        probabilities = probabilities2
        penalized = 'percent'
    if(generation >= 25):
        penalized = 'percent'
        probabilities = probabilities1

    pm = max(0.05, 0.2 - generation / MAX_GENERATIONS * 0.15)# from 0.2 to 0.05
    # Select the best individuals
    best_individuals = np.argsort(fitnesses)[:int(x_elitism * POPULATION_SIZE)]
    # Create the offspring
    offspring = []
    for _ in tqdm(range(OFFSPRING_SIZE), desc=f"Generation {generation}, Creating offsprings"):
        # Mutation
        if random.random() < pm:
            mutation = random_mutation(*probabilities)
            child = mutation(parent_selection(population, fitnesses).clone(), terminal_list, constants=crammed_constants, p_pick_constant=P_PICK_CONSTANT, pc=PC)
            child.reeval_heights()
            offspring.append(child)
        else:
            # Select parents
            parent1 = parent_selection(population, fitnesses, penalized).clone()
            parent2 = parent_selection(population, fitnesses, penalized).clone()
            # Crossover
            child1, child2 = crossover(parent1, parent2)
            child1.reeval_heights()
            child2.reeval_heights()
            offspring.extend([child1, child2])
    # Combine and select the best individuals
    population = [population[i] for i in best_individuals] + offspring

    # Remove identical trees
    population = [tree.collapse_constants() for tree in population if tree is not None]
    distance_matrix = tree_distance(population)
    n = len(population)
    # I need to keep only the first tree if there are identical trees
    for i in range(n):
        for j in range(i + 1, n):
            if distance_matrix[i][j] == 0 and population[j] is not None:
                population[j] = None
    
    # Sort population for summation of distance similarity
    summation = np.zeros(n)
    for i in range(n):
        summation[i] = np.sum(distance_matrix[i,:])
    
    distance_sorted = np.argsort(summation)[::-1]
    outsiders = set(distance_sorted[:OUTSIDER_SIZE])
    # Calculate fitness function
    with ThreadPoolExecutor() as executor:
        fitnesses_offspring = np.array(list(tqdm(executor.map(lambda tree: fitness(tree, vars, labels, penalized=penalized), offspring), desc="Evaluating offsprings", total=len(offspring))))

    # Select the best individuals and outsiders
    all_fitnesses = np.concatenate([fitnesses[best_individuals], fitnesses_offspring])
    best_fitnesses = set(np.argsort(all_fitnesses)[:POPULATION_SIZE])
    # Union between best individuals and outsiders
    union = best_fitnesses.union(outsiders)
    intersection = best_fitnesses.intersection(outsiders)

    union_filtered = [i for i in union if population[i] is not None]

    population = [population[i] for i in union_filtered]
    before = len(union)
    fitnesses = [all_fitnesses[i] for i in union_filtered]
    fitnesses = np.array(fitnesses)
    best_fitness = fitness_unscaled(population[0], vars, labels)
    print(f"Removed {before - len(population)} identical or invalid trees")
    print(f'Kept {len(outsiders) - len(intersection)} outsiders with low fitness')
    if generation > 0:
        print(f"Generation {generation} - Best fitness: {best_fitness} - Difference: {best_fitness - old_best_fitness}")
    else:
        print(f"Generation {generation} - Best fitness: {best_fitness}")
    old_best_fitness = best_fitness
    print(f"Population size: {len(population)}")
    print(f'Best height: {population[0].get_height()}')
    print(f"Mean height of the population: {np.mean([tree.get_height() for tree in population])}")


Initializing population: 100%|██████████| 100/100 [00:00<00:00, 49973.84it/s]
Initial tree distances: 100%|██████████| 4950/4950 [00:00<00:00, 12636.39it/s]
Evaluating population: 100%|██████████| 100/100 [00:26<00:00,  3.84it/s]


0.16 0.16 0.16 0.16 0.16


Generation 0, Creating offsprings: 100%|██████████| 200/200 [00:00<00:00, 303.20it/s]
Calculating tree distance matrix: 100%|██████████| 65703/65703 [00:06<00:00, 10441.58it/s]
Evaluating offsprings: 100%|██████████| 355/355 [01:35<00:00,  3.72it/s]


Removed 7 identical or invalid trees
Kept 6 outsiders with low fitness
Generation 0 - Best fitness: 174868.34820510735
Population size: 99
Best height: 5
Mean height of the population: 4.696969696969697


Generation 1, Creating offsprings: 100%|██████████| 200/200 [00:00<00:00, 570.92it/s]
Calculating tree distance matrix: 100%|██████████| 67528/67528 [00:04<00:00, 16687.64it/s]
Evaluating offsprings: 100%|██████████| 360/360 [01:21<00:00,  4.43it/s]


Removed 10 identical or invalid trees
Kept 16 outsiders with low fitness
Generation 1 - Best fitness: 174868.34820510735 - Difference: 0.0
Population size: 106
Best height: 5
Mean height of the population: 4.688679245283019


Generation 2, Creating offsprings: 100%|██████████| 200/200 [00:00<00:00, 492.90it/s]
Calculating tree distance matrix: 100%|██████████| 67528/67528 [00:05<00:00, 11338.61it/s]
Evaluating offsprings: 100%|██████████| 360/360 [01:44<00:00,  3.45it/s]


Removed 12 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 2 - Best fitness: 139182.0620368147 - Difference: -35686.28616829266
Population size: 108
Best height: 10
Mean height of the population: 5.546296296296297


Generation 3, Creating offsprings: 100%|██████████| 200/200 [00:00<00:00, 382.20it/s]
Calculating tree distance matrix: 100%|██████████| 67528/67528 [00:06<00:00, 10883.10it/s]
Evaluating offsprings: 100%|██████████| 360/360 [02:17<00:00,  2.62it/s]


Removed 9 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 3 - Best fitness: 121957.74993974261 - Difference: -17224.312097072077
Population size: 111
Best height: 5
Mean height of the population: 6.18018018018018


Generation 4, Creating offsprings: 100%|██████████| 200/200 [00:00<00:00, 213.68it/s]
Calculating tree distance matrix: 100%|██████████| 66430/66430 [00:11<00:00, 5698.96it/s]
Evaluating offsprings: 100%|██████████| 357/357 [03:34<00:00,  1.66it/s]


Removed 11 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 4 - Best fitness: 119874.43600668808 - Difference: -2083.313933054538
Population size: 109
Best height: 5
Mean height of the population: 7.3577981651376145


Generation 5, Creating offsprings: 100%|██████████| 200/200 [00:03<00:00, 57.85it/s]
Calculating tree distance matrix: 100%|██████████| 72010/72010 [00:31<00:00, 2288.55it/s]
Evaluating offsprings: 100%|██████████| 372/372 [04:21<00:00,  1.42it/s]


Removed 8 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 5 - Best fitness: 91827.47265891852 - Difference: -28046.96334776956
Population size: 112
Best height: 13
Mean height of the population: 8.035714285714286


Generation 6, Creating offsprings: 100%|██████████| 200/200 [00:01<00:00, 165.22it/s]
Calculating tree distance matrix: 100%|██████████| 68635/68635 [00:12<00:00, 5299.56it/s]
Evaluating offsprings: 100%|██████████| 363/363 [04:11<00:00,  1.44it/s] 


Removed 14 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 6 - Best fitness: 62078.147521524865 - Difference: -29749.32513739365
Population size: 106
Best height: 8
Mean height of the population: 10.481132075471699


Generation 7, Creating offsprings: 100%|██████████| 200/200 [00:01<00:00, 102.20it/s]
Calculating tree distance matrix: 100%|██████████| 70876/70876 [00:21<00:00, 3275.31it/s]
Evaluating offsprings: 100%|██████████| 369/369 [07:26<00:00,  1.21s/it]


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 7 - Best fitness: 62078.147521524865 - Difference: 0.0
Population size: 114
Best height: 8
Mean height of the population: 13.043859649122806


Generation 8, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 77.80it/s]
Calculating tree distance matrix: 100%|██████████| 67161/67161 [00:27<00:00, 2458.67it/s]
Evaluating offsprings: 100%|██████████| 359/359 [09:51<00:00,  1.65s/it] 


Removed 8 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 8 - Best fitness: 61027.93440019506 - Difference: -1050.2131213298053
Population size: 112
Best height: 17
Mean height of the population: 14.714285714285714


Generation 9, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 67.16it/s]
Calculating tree distance matrix: 100%|██████████| 67896/67896 [00:31<00:00, 2154.84it/s]
Evaluating offsprings: 100%|██████████| 361/361 [11:24<00:00,  1.90s/it]


Removed 3 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 9 - Best fitness: 52875.80821285208 - Difference: -8152.126187342983
Population size: 117
Best height: 13
Mean height of the population: 14.376068376068377


Generation 10, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 68.80it/s]
Calculating tree distance matrix: 100%|██████████| 67896/67896 [00:30<00:00, 2226.46it/s]
Evaluating offsprings: 100%|██████████| 361/361 [10:58<00:00,  1.82s/it]


Removed 5 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 10 - Best fitness: 43958.313057547755 - Difference: -8917.495155304321
Population size: 115
Best height: 17
Mean height of the population: 14.408695652173913


Generation 11, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 76.06it/s]
Calculating tree distance matrix: 100%|██████████| 68635/68635 [00:28<00:00, 2383.51it/s]
Evaluating offsprings: 100%|██████████| 363/363 [10:26<00:00,  1.73s/it]


Removed 7 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 11 - Best fitness: 40888.70560979981 - Difference: -3069.6074477479488
Population size: 113
Best height: 10
Mean height of the population: 13.150442477876107


Generation 12, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 80.14it/s]
Calculating tree distance matrix: 100%|██████████| 72771/72771 [00:26<00:00, 2722.62it/s]
Evaluating offsprings: 100%|██████████| 374/374 [09:23<00:00,  1.51s/it] 


Removed 8 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 12 - Best fitness: 39357.927100199406 - Difference: -1530.7785096004009
Population size: 112
Best height: 17
Mean height of the population: 12.839285714285714


Generation 13, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 90.04it/s]
Calculating tree distance matrix: 100%|██████████| 70500/70500 [00:24<00:00, 2878.27it/s]
Evaluating offsprings: 100%|██████████| 368/368 [08:17<00:00,  1.35s/it]


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 13 - Best fitness: 26829.42219084518 - Difference: -12528.504909354226
Population size: 114
Best height: 14
Mean height of the population: 13.517543859649123


Generation 14, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 90.15it/s]
Calculating tree distance matrix: 100%|██████████| 68265/68265 [00:24<00:00, 2824.56it/s]
Evaluating offsprings: 100%|██████████| 362/362 [08:17<00:00,  1.37s/it]


Removed 4 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 14 - Best fitness: 26821.59674817251 - Difference: -7.825442672670761
Population size: 116
Best height: 16
Mean height of the population: 13.60344827586207


Generation 15, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 76.75it/s]
Calculating tree distance matrix: 100%|██████████| 70876/70876 [00:27<00:00, 2574.18it/s]
Evaluating offsprings: 100%|██████████| 369/369 [09:24<00:00,  1.53s/it]


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 15 - Best fitness: 25282.03459754874 - Difference: -1539.5621506237694
Population size: 114
Best height: 17
Mean height of the population: 13.56140350877193


Generation 16, Creating offsprings: 100%|██████████| 200/200 [00:02<00:00, 74.27it/s]
Calculating tree distance matrix: 100%|██████████| 67528/67528 [00:29<00:00, 2311.56it/s]
Evaluating offsprings: 100%|██████████| 360/360 [10:22<00:00,  1.73s/it]


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 16 - Best fitness: 22687.776643604226 - Difference: -2594.257953944514
Population size: 114
Best height: 16
Mean height of the population: 14.043859649122806


Generation 17, Creating offsprings: 100%|██████████| 200/200 [00:03<00:00, 63.87it/s]
Calculating tree distance matrix: 100%|██████████| 72010/72010 [00:35<00:00, 2057.09it/s]
Evaluating offsprings: 100%|██████████| 372/372 [12:05<00:00,  1.95s/it]


Removed 7 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 17 - Best fitness: 21961.175324226297 - Difference: -726.601319377929
Population size: 113
Best height: 25
Mean height of the population: 15.460176991150442


Generation 18, Creating offsprings: 100%|██████████| 200/200 [00:03<00:00, 57.77it/s]
Calculating tree distance matrix: 100%|██████████| 70500/70500 [00:38<00:00, 1846.34it/s]
Evaluating offsprings: 100%|██████████| 368/368 [12:54<00:00,  2.10s/it]


Removed 5 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 18 - Best fitness: 21595.602736928864 - Difference: -365.5725872974326
Population size: 115
Best height: 20
Mean height of the population: 16.06086956521739


Generation 19, Creating offsprings: 100%|██████████| 200/200 [00:03<00:00, 58.42it/s]
Calculating tree distance matrix: 100%|██████████| 70500/70500 [00:38<00:00, 1853.99it/s]
Evaluating offsprings: 100%|██████████| 368/368 [13:01<00:00,  2.12s/it] 


Removed 12 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 19 - Best fitness: 15180.269168151008 - Difference: -6415.333568777856
Population size: 108
Best height: 13
Mean height of the population: 17.796296296296298


Generation 20, Creating offsprings: 100%|██████████| 200/200 [00:03<00:00, 53.28it/s]
Calculating tree distance matrix: 100%|██████████| 67161/67161 [00:40<00:00, 1643.31it/s]
Evaluating offsprings: 100%|██████████| 359/359 [14:52<00:00,  2.48s/it]


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 20 - Best fitness: 12754.808344194604 - Difference: -2425.460823956404
Population size: 114
Best height: 13
Mean height of the population: 17.736842105263158


Generation 21, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 46.03it/s]
Calculating tree distance matrix: 100%|██████████| 69751/69751 [00:42<00:00, 1622.70it/s]
Evaluating offsprings: 100%|██████████| 366/366 [15:31<00:00,  2.55s/it] 


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 21 - Best fitness: 12754.808344194604 - Difference: 0.0
Population size: 114
Best height: 13
Mean height of the population: 17.36842105263158


Generation 22, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 49.91it/s]
Calculating tree distance matrix: 100%|██████████| 69378/69378 [00:41<00:00, 1667.44it/s]
Evaluating offsprings: 100%|██████████| 365/365 [14:29<00:00,  2.38s/it] 


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 22 - Best fitness: 9247.971008207522 - Difference: -3506.837335987082
Population size: 114
Best height: 20
Mean height of the population: 16.69298245614035


Generation 23, Creating offsprings: 100%|██████████| 200/200 [00:03<00:00, 54.94it/s]
Calculating tree distance matrix: 100%|██████████| 67528/67528 [00:41<00:00, 1642.79it/s]
Evaluating offsprings: 100%|██████████| 360/360 [14:29<00:00,  2.41s/it] 


Removed 5 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 23 - Best fitness: 9247.960481427635 - Difference: -0.010526779886276927
Population size: 115
Best height: 21
Mean height of the population: 18.660869565217393


Generation 24, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 47.31it/s]
Calculating tree distance matrix: 100%|██████████| 70876/70876 [00:51<00:00, 1370.90it/s]
Evaluating offsprings: 100%|██████████| 369/369 [17:30<00:00,  2.85s/it]  


Removed 9 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 24 - Best fitness: 9172.41919899629 - Difference: -75.54128243134619
Population size: 111
Best height: 21
Mean height of the population: 19.53153153153153


Generation 25, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 49.79it/s]
Calculating tree distance matrix: 100%|██████████| 70125/70125 [00:48<00:00, 1453.24it/s]
Evaluating offsprings: 100%|██████████| 367/367 [16:46<00:00,  2.74s/it]  


Removed 3 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 25 - Best fitness: 7825.432549149848 - Difference: -1346.9866498464417
Population size: 117
Best height: 24
Mean height of the population: 19.00854700854701


Generation 26, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 45.26it/s]
Calculating tree distance matrix: 100%|██████████| 70125/70125 [00:47<00:00, 1464.22it/s]
Evaluating offsprings: 100%|██████████| 367/367 [16:28<00:00,  2.69s/it]


Removed 12 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 26 - Best fitness: 7612.732701995262 - Difference: -212.69984715458577
Population size: 108
Best height: 24
Mean height of the population: 18.62962962962963


Generation 27, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 44.60it/s]
Calculating tree distance matrix: 100%|██████████| 69378/69378 [00:49<00:00, 1390.49it/s]
Evaluating offsprings: 100%|██████████| 365/365 [17:23<00:00,  2.86s/it] 


Removed 6 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 27 - Best fitness: 7612.732701995262 - Difference: 0.0
Population size: 114
Best height: 24
Mean height of the population: 19.42105263157895


Generation 28, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 41.24it/s]
Calculating tree distance matrix: 100%|██████████| 73153/73153 [00:56<00:00, 1283.88it/s]
Evaluating offsprings: 100%|██████████| 375/375 [19:44<00:00,  3.16s/it]  


Removed 2 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 28 - Best fitness: 7018.100564658104 - Difference: -594.6321373371575
Population size: 118
Best height: 21
Mean height of the population: 21.203389830508474


Generation 29, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 41.85it/s]
Calculating tree distance matrix: 100%|██████████| 67528/67528 [00:54<00:00, 1241.36it/s]
Evaluating offsprings: 100%|██████████| 360/360 [18:05<00:00,  3.02s/it]


Removed 1 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 29 - Best fitness: 6843.848354449463 - Difference: -174.25221020864137
Population size: 119
Best height: 20
Mean height of the population: 20.764705882352942


Generation 30, Creating offsprings: 100%|██████████| 200/200 [00:05<00:00, 39.68it/s]
Calculating tree distance matrix: 100%|██████████| 69006/69006 [00:59<00:00, 1168.20it/s]
Evaluating offsprings: 100%|██████████| 364/364 [21:05<00:00,  3.48s/it] 


Removed 8 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 30 - Best fitness: 6843.848354449463 - Difference: 0.0
Population size: 112
Best height: 20
Mean height of the population: 20.258928571428573


Generation 31, Creating offsprings: 100%|██████████| 200/200 [00:05<00:00, 39.32it/s]
Calculating tree distance matrix: 100%|██████████| 70500/70500 [01:00<00:00, 1159.02it/s]
Evaluating offsprings: 100%|██████████| 368/368 [21:16<00:00,  3.47s/it] 


Removed 4 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 31 - Best fitness: 6341.78497801862 - Difference: -502.06337643084316
Population size: 116
Best height: 21
Mean height of the population: 19.49137931034483


Generation 32, Creating offsprings: 100%|██████████| 200/200 [00:05<00:00, 38.65it/s]
Calculating tree distance matrix: 100%|██████████| 67896/67896 [00:58<00:00, 1159.26it/s]
Evaluating offsprings: 100%|██████████| 361/361 [20:30<00:00,  3.41s/it]  


Removed 8 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 32 - Best fitness: 6341.363495875487 - Difference: -0.42148214313237986
Population size: 112
Best height: 21
Mean height of the population: 19.517857142857142


Generation 33, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 42.07it/s]
Calculating tree distance matrix: 100%|██████████| 70876/70876 [01:00<00:00, 1170.90it/s]
Evaluating offsprings: 100%|██████████| 369/369 [21:19<00:00,  3.47s/it] 


Removed 4 identical or invalid trees
Kept 20 outsiders with low fitness
Generation 33 - Best fitness: 6338.815471418568 - Difference: -2.548024456918938
Population size: 116
Best height: 20
Mean height of the population: 19.637931034482758


Generation 34, Creating offsprings: 100%|██████████| 200/200 [00:04<00:00, 42.37it/s]
Calculating tree distance matrix: 100%|██████████| 69378/69378 [00:59<00:00, 1158.09it/s]
Evaluating offsprings:  54%|█████▍    | 197/365 [11:40<07:57,  2.84s/it] 

In [None]:
from datetime import datetime
conf = {
    "problem": problem_number,
    "crossover": crossover,
    "OFFSPRING_SIZE": OFFSPRING_SIZE,
    "POPULATION_SIZE": POPULATION_SIZE,
    "OUTSIDER_SIZE": OUTSIDER_SIZE,
    "pm": pm,
    "x_elitism": x_elitism,
    "MAX_GENERATIONS": MAX_GENERATIONS,
    "HEIGHT": HEIGHT,
    "PC": PC,
    "P_PICK_CONSTANT": P_PICK_CONSTANT,
    "P_CUT_TREE": P_CUT_TREE
}
def save_results(conf, res_function, res_fitness):
    cur_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    with open(f"./results/{cur_time}.txt", "w") as file:
        # Write the contents of conf
        file.write("# Configuration\n")
        file.write("conf = {\n")
        for key, value in conf.items():
            file.write(f"    '{key}': {value},\n")
        file.write("}\n\n")
        
        # Write the contents of res_function
        file.write("# Resulting function\n")
        file.write(f"{res_function}")
        
        # Write the contents of res_fitness
        file.write("# Resulting fitness\n")
        file.write(f"{res_fitness}")
save_results(conf, str(population[0]), best_fitness)

In [None]:
population[0].draw()
print(fitness(population[0], vars, labels))