# Import

In [33]:
import numpy as np
from random import choice, choices, randint,random, uniform, sample
import copy
from dataclasses import dataclass

THINGS TO BE DONE:
1. IMPROVE THE GENERATION OF RANDOM PROGRAMS, CURRENTLY SOME OF THEM HAVE A STRANGE FORM SUCH AS 
f(x)= (sin(x)-3)/(4+3)-> even if probably it could be useful to reproduce the multiplication by a float.
1. MUTATION SHOULD WORK, BUT CAN BE IMPROVED;
2. CHECK ON MAX_DEPTH TO BE IMPLEMENTED BOTH IN MUTATE AND IN CROSSOVER, IN RANDOM_PROGRAM THE DEPTH CANNOT BE MORE THAN 3, SO NO PROBLEM THERE


# Operations

In [34]:
#The depth of a tree cannot be greater than 10 OVERALL
MAX_TREE_DEPTH = 10

#Operations with 1 or 2 arguments divided
#Add a field of probabilities: sum and subtraction should be more likely to be chosen
OPERATIONS_BINARY = [
    (np.add, 2, "({} + {})",0.13),
    (np.subtract, 2, "({} - {})",0.13),
    (np.divide, 2, "({} / {})",0.1),
    (np.multiply, 2, "({} * {})",0.1),   
    (np.power, 2, "({} ^ {})",0.1),
]

BINARY_WEIGHTS = [op[3] for op in OPERATIONS_BINARY]

#In this way, also sin and cos cannot be nested within sqrt and exp, to be changed if needed
OPERATIONS_UNARY = [
    (np.sin, 1, "sin({})",0.1),
    (np.cos, 1, "cos({})",0.1),
    (np.tan, 1, "tan({})",0.1),
    (np.exp, 1, "exp({})",0.1),
    (np.log, 1, "log({})",0.1),
    (np.sqrt, 1, "sqrt({})",0.1),
    (np.square, 1, "square({})",0.1),
]
UNARY_WEIGHTS = [op[3] for op in OPERATIONS_UNARY]
OPERATIONS = OPERATIONS_BINARY + OPERATIONS_UNARY
WEIGHTS = BINARY_WEIGHTS + UNARY_WEIGHTS    


# Genothype definition

The recursive function we've developed is designed to create complex mathematical formulas in a structured format, where each formula is represented as a list: `[operator, first_operand, second_operand]`. This format allows the recursive and hierarchical organization of operations, facilitating the computational evaluation and manipulation of the formula.

Key features of this function include:

1. **Use of Input Dimensions:** The function uses a list, `used_indices`, to track which input dimensions (e.g., `0,1`, etc.) have been utilized in the formula. This ensures comprehensive coverage of all available input variables (if the depth make it possible), making the formula relevant to all dimensions of the input data.  

2. **Restrictions on Trigonometric Functions:** To enhance the mathematical sensibility of the formulas generated, the function includes a specific constraint regarding the nesting of trigonometric functions. Once a trigonometric function (such as `sin`, `cos`, etc.) is used, the function prohibits the inclusion of another trigonometric function within it. This constraint helps in preventing mathematically nonsensical expressions like `sin(cos(tan(x)))`, which, while computationally valid, may not be practically meaningful or may complicate the interpretation and analysis of the formula.

This approach not only ensures that each formula is robust and contextually appropriate but also maintains clarity and reduces the computational redundancy that might arise from nested trigonometric operations. Such constraints are particularly important in scientific computing and simulations where the accuracy and interpretability of mathematical expressions are critical.


In [35]:
#Here a random program is generated, taking into account the dimensions of the input already used
#Make the initial depth hgìigher to ensure that all the dimensions are considered
from random import choices


def random_program(depth,input_dim,unary=False, used_indices=None):
    if used_indices is None:
        used_indices = set()

    # Base case: generate a leaf node
    if depth == 0 or (random()<0.3):
        if len(used_indices) < input_dim:
           # Ensure that all indices are used at least once
            available_indices = list(set(range(input_dim)) - used_indices)
            index = choice(available_indices)
            used_indices.add(index)
            return f"x[{index}]", used_indices
        else:
            # Once all indices are used, place a costant. Or do we also consider to place a variable twice?
            return str(uniform(-10, 10)), used_indices
           
    # Recursive case: generate an inner node without starting composing too much trigonometric functions
    #Last condition in end is done in order not to place twice a variable
    if(depth<=1 and not unary and len(used_indices)!=input_dim):
        operations = OPERATIONS
        weights = WEIGHTS
    else:
        operations = OPERATIONS_BINARY
        weights = BINARY_WEIGHTS
    op, arity, symbol,p = choices(operations,weights=weights, k=1)[0]
    if(unary != True):
        unary = arity==1
    children = []
    for _ in range(arity):
        child, used_indices = random_program(depth - 1, input_dim, unary, used_indices)
        children.append(child)
    return [symbol] + children,used_indices

In [36]:
#It takes the input dimension and generates a program for each dimension
#Concatenates the programs together through a binary operation
def generate_program(input_dim):
    programs = []
    for i in range(input_dim):
        used_indices_local = set()
        used_indices_local = set(range(input_dim)) - {i}
        program = random_program(2, input_dim,used_indices=used_indices_local)[0]
        programs.append(program)
    #Combine programs together through a binary operation
    program = programs[0]
    for i in range(1,input_dim):
        op, arity, symbol,p = choices(OPERATIONS_BINARY,weights=BINARY_WEIGHTS, k=1)[0]
        program = [symbol] + [program, programs[i]]
    return program


## Individual

We developed a dataclass to store the fitness value of each individual. This will allow us to avoid to recompute the fitness function for the same individual.

In [37]:
@dataclass
class Individual:
    genome: list
    fitness : float = None

## Transform the program into a human readable function

In [38]:
def program_to_string(program):
    if isinstance(program, str):  # leaf
        return program  
    elif isinstance(program, list):  
        try:
            _, _, symbol = next((op, arity, s) for op, arity, s,p in OPERATIONS if s == program[0])
        except StopIteration:
            raise ValueError(f"Not known operation: {program[0]}")
        
        children = [program_to_string(child) for child in program[1:]]
        
        return symbol.format(*children)


In [39]:
print(program_to_string(generate_program(2)))

(x[0] ^ (square(x[1]) * (-0.28880657297978374 + -5.93404016467969)))


Now we need a function that given the genotype provide us with the output provided by the predicted function. This function must receive the input vector to perform his operation.

In [40]:
def evaluate_program(program, x):
    if isinstance(program, str):  # Leaf node
        #If it's a leaf, it could be a costant or a variable
        if program[0] == 'x':
            return x[int(program[2:-1])]
        else:
            return float(program)
    elif isinstance(program, list): 
        op = next(op for op, _, symbol,p in OPERATIONS if symbol == program[0])
        args = [evaluate_program(child, x) for child in program[1:]]
        try:
            return op(*args)
        except ZeroDivisionError:
            return np.inf

As you may notice this function verify with the function __isinstance(element, type)__ if "element" is an instance of the "type", with the objective of understanding if it is a __leaf node__. If this is the case we simply extract the value out of it, otherwise we still need to invoke the function recursively.

## Fitness function

The fitness function of a solution should consider the mean square error compared to the expected results and the "complexity" of is formulation in order to prefer simpler equations.

In [41]:
def mse(program, x, y):
    predictions = np.array([evaluate_program(program, x_row) for x_row in x.T])
    return float(100 * np.mean(np.square(predictions - y)))

In [42]:
#Compute the depth of a program
def depth(program):
    if isinstance(program, str):
        return 1
    elif isinstance(program, list):
        return 1 + max(depth(child) for child in program[1:])

In [43]:
# Funzione di fitness avanzata ispirata a deep_based_FGP_NLS.py
import math


def fitness_function(program, x, y):
    """
    Valuta la fitness di un programma.
    """
    try:
        # Valutazione del programma
        predictions = np.array([evaluate_program(program, x_row) for x_row in x.T])
        if np.any(np.isnan(predictions)) or np.any(np.isinf(predictions)):
            return np.inf  # Penalizza programmi invalidi

        # Calcolo dell'errore
        #error = np.mean((predictions - y) ** 2)  # Errore quadratico medio
        error = mse(program, x, y)
        if not math.isfinite(error):
            return np.inf

        # How to evaluate the complexity of a program?
        #It has to be related to the fitness computed
        complexity_penalty = depth(program) * 0.01  #*error Multiplied by error, it could not be a general scale, it's problem dependant
        fitness = error + complexity_penalty

    except Exception as e:
        # Penalizza programmi che generano errori
        print(f"Errore nella valutazione del programma: {e}")
        return np.inf

    return fitness

In [44]:
print(program_to_string(generate_program(2)))

((tan(x[0]) * 6.597657083149372) / (x[1] * -4.280383654297253))


PROVO AD APPLICARE LA FITNESS FUNCTION DI CRANMLER:

In [45]:
def custom_fitness_factory(function, greater_is_better=True):
    """
    Factory per creare metriche di fitness personalizzate compatibili con la tua struttura.
    """
    sign = 1 if greater_is_better else -1

    def fitness(program, x, y, weights=None):
        try:
            # Valutazione del programma
            y_pred = np.array([evaluate_program(program, x_row) for x_row in x.T])

            # Pesi uniformi se non specificati
            if weights is None:
                weights = np.ones_like(y)

            # Calcolo della fitness
            return sign * function(y, y_pred, weights)
        except Exception as e:
            print(f"Errore nella valutazione del programma: {e}")
            return np.inf  # Penalità per errori

    return fitness

# Definizione delle metriche
def mean_square_error(y, y_pred, weights):
    """Errore quadratico medio"""
    return np.average((y - y_pred) ** 2, weights=weights)

def mean_absolute_error(y, y_pred, weights):
    """Errore assoluto medio"""
    return np.average(np.abs(y - y_pred), weights=weights)

# Factory per creare metriche personalizzate
mse_fitness = custom_fitness_factory(mean_square_error, greater_is_better=False)
mae_fitness = custom_fitness_factory(mean_absolute_error, greater_is_better=False)


In [46]:
# def fitness_function(program, x, y):
#     """
#     Valuta la fitness di un programma utilizzando la metrica selezionata.
#     """
#     return mse_fitness(program, x, y)


## Tweak function

There is a lot of __variability__ that has to be considered for the tweak function. We can now imagine to implement a recursive function that receives the program (which indicates the current function that we are using for the task), the number of dimensions for the input and the maximum depth allowed for the tweaked solution.

Recursively, if we end up into a leaf node, or if the current solution is still a list but with 0.3 probability, we simply generate a new sub-program.

Otherwise, we simply invoke the same function for a random index.

In [47]:
def mutate_program(program, input_dim, depth=3):
    if random() < 0.3 or not isinstance(program, list):  
        return random_program(depth, input_dim)[0]
    idx = randint(1, len(program) - 1)
    program[idx] = mutate_program(program[idx], input_dim, depth - 1)
    return program

Other function (FIGP based)

In [48]:
def mutate_program(program, input_dim, max_depth=3):
    """Mutazione per rimpiazzare un sottoalbero casuale."""
    mutant = copy.deepcopy(program)
    
    def get_subtree_points(prog):
        return [i for i, node in enumerate(prog) if isinstance(node, list)]
    
    points = get_subtree_points(mutant)
    if not points:
        return random_program(max_depth, input_dim)[0]  # Genera nuovo programma
    
    point = choice(points)
    new_subtree = random_program(randint(1, max_depth), input_dim)[0]  # Nuovo sottoalbero
    mutant[point] = new_subtree
    
    return mutant


In [49]:
#Functions to find all the subtree recursively and to access them 
def get_subtree_points_recursive(prog, path='', index=0, result=None):
    if result is None:
        result = []
    # Se l'elemento corrente è una lista e contiene sottoalberi, registrarlo
    if isinstance(prog, list) and len(prog) > 1:  # Assicurati che non sia una lista vuota o singolo elemento
        if path:
            result.append(path)
        for i, node in enumerate(prog):
            if isinstance(node, list):  # Se il nodo è una lista, esplorarlo ulteriormente
                new_path = f"{path}[{i}]" if path else f"[{i}]"
                get_subtree_points_recursive(node, new_path, i, result)
    return result
def access_node_by_path(prog, path):
    # Rimuove i caratteri non necessari e suddivide il percorso in indici interi
    indices = [int(p.strip('][')) for p in path.split('][') if p]
    current = prog
    for index in indices:
        current = current[index]
    return current

#Returns a set of integers representing the indices of the variables in the program
def find_variable_indices(node, result=None):
    if result is None:
        result = set()

    # Verifica se il nodo è una lista e procedi ricorsivamente
    if isinstance(node, list):
        for child in node:
            find_variable_indices(child, result)
    # Altrimenti, controlla se è una variabile e raccogli l'indice
    elif isinstance(node, str) and node.startswith('x['):
        result.add(int(node[2:-1]))

    return result

def set_subtree_at_path(program, path, new_subtree):
    """Imposta il nuovo sottoalbero al percorso specificato all'interno del programma."""
    if path == '':
        return new_subtree
    current = program
    indices = [int(x) for x in path.strip('][').split('][')]
    for i in indices[:-1]:  # Vai fino al genitore del nodo finale
        current = current[i]
    current[indices[-1]] = new_subtree  # Sostituisci il sottoalbero
    return program
#given a program, a path and an operation, swap the operation in that point
def swap_operation_at_path(program, path, new_op):
    """Sostituisce l'operazione al percorso specificato all'interno del programma."""
    current = program
    indices = [int(x) for x in path.strip('][').split('][')]
    for i in indices[:-1]:  # Vai fino al genitore del nodo finale
        current = current[i]
    if isinstance(current[indices[-1]], list):  # Assicurati che il target sia una lista (sottoalbero)
        current[indices[-1]][0] = new_op  # Sostituisci solo l'operazione, non l'intero sottoalbero
    return program



Other function (FIGP based)

The idea of the mutation is that recursively finds all the subtree of the tree. Casually select one of them. It checks which variables were in that subtree and generates a new tree containing the same variables to be added at that place: it ensures that we always consider all the dimensions of the input. 
To be done: mantain the depth under MAX_DEPTH and, with a certain probability, only swap operation between two trees instead of mutating the entire subtree, or do both of them (it happens sometimes).

If the considered sub tree do not contains any variable's dimension, we can return a constant value.

In [50]:
def mutate(program, input_dim, max_depth=3):
    """Mutazione di un programma."""
    mutant = copy.deepcopy(program)

    points = get_subtree_points_recursive(program)
    if not points:
        return generate_program(input_dim)

    point = choice(points)
    subtree = access_node_by_path(mutant, point)
    
    variable_set = find_variable_indices(subtree)
    
    variables = set(range(input_dim))-variable_set
    
    new_subtree = []
    if(len(variable_set)==0):
        #return generate_program(input_dim)
        new_subtree = str(random()*9+1)
        return set_subtree_at_path(mutant, point, new_subtree)
    elif(len(variable_set)==1):
        new_subtree = random_program(1, input_dim,False,variables)[0]
    else:
        new_subtree = random_program(max_depth, input_dim,False,variables)[0]
    mutant = set_subtree_at_path(mutant, point, new_subtree)
    if(random()<0.4):
            points = get_subtree_points_recursive(mutant)
            if not points:
                return mutant
            point = choice(points)
            #Randomly choose a binary operation
            op, arity, symbol,p = choices(OPERATIONS_BINARY,weights=BINARY_WEIGHTS, k=1)[0]
            if(len(access_node_by_path(mutant, point)) == 3):
                mutant = swap_operation_at_path(mutant, point, symbol)

    return mutant

In [51]:
prog = generate_program(2)
print(program_to_string(prog))
mutated = mutate(prog, 2)
print(program_to_string(mutated))

((x[0] ^ (4.038549501562752 + -8.728664529167734)) * (x[1] * (-1.870455881379831 + 3.792076628242043)))
((x[0] ^ 1.0315691987817703) * (x[1] * (-1.870455881379831 + 3.792076628242043)))


Let's create a new tweak function that is able to tweak a program by adding an unary operation in leaf nodes (if it not a constant value nor already a unary operation).

In [52]:
def tweak_program_2(program):
    """
    Modifica un sottoalbero del programma aggiungendo un operatore unario
    su una foglia, evitando di applicarlo a una foglia già modificata da un operatore unario.
    """
    # Trova tutte le foglie del programma
    def get_leaf_indices(node, path=()):
        """
        Ritorna i percorsi alle foglie dell'albero.
        Una foglia è un valore (stringa o numero) non ulteriormente divisibile.
        """
        if isinstance(node, (str, int, float)):  # Nodo foglia (variabile o costante)
            return [path]
        elif isinstance(node, list) and len(node) > 1:  # Nodo interno valido
            indices = []
            for i, child in enumerate(node[1:], start=1):  # Salta l'operatore
                indices.extend(get_leaf_indices(child, path + (i,)))
            return indices
        return []  # Nodo vuoto o non valido

    # Ottieni tutte le foglie
    leaf_indices = get_leaf_indices(program)
    if not leaf_indices:
        return program  # Nessuna modifica possibile

    # Seleziona casualmente una foglia
    selected_leaf_path = choice(leaf_indices)

    # Accedi alla foglia selezionata
    node = program
    for idx in selected_leaf_path[:-1]:
        node = node[idx]

    # Verifica che il nodo sia valido prima di modificare
    if isinstance(node, list) and len(selected_leaf_path) > 0:
        leaf = node[selected_leaf_path[-1]]
        
        # Verifica se la foglia è modificabile
        if isinstance(leaf, (str, int, float)):
            # Scegli un operatore unario
            unary_operator = choice(["sin({})", "cos({})", "tan({})", "log({})", "sqrt({})"])
            
            # Applica l'operatore unario
            node[selected_leaf_path[-1]] = [unary_operator, leaf]
        elif isinstance(leaf, list) and len(leaf) == 2 and isinstance(leaf[0], str):
            # La foglia è già un operatore unario applicato: non fare nulla
            pass

    return program


Let's also introduce a function that simply search for a leaf node of a constant (numerical) value and multiply it by 1.x, where x is randomicaly selected in order to convert the the number to a float value.

In [53]:
from random import choice, uniform

def tweak_program_with_constant(program):
    """
    Modifica un nodo foglia che sia una costante numerica, moltiplicandolo
    per una costante moltiplicativa del tipo 1,x, dove x è un float casuale.
    """
    # Trova tutte le foglie del programma
    def get_leaf_indices(node, path=()):
        """
        Ritorna i percorsi alle foglie dell'albero.
        Una foglia è un valore (numero o variabile) non ulteriormente divisibile.
        """
        if isinstance(node, (int, float)):  # Nodo foglia costante
            return [path]
        elif isinstance(node, list) and len(node) > 1:  # Nodo interno valido
            indices = []
            for i, child in enumerate(node[1:], start=1):  # Salta l'operatore
                indices.extend(get_leaf_indices(child, path + (i,)))
            return indices
        return []  # Nodo vuoto o non valido

    # Ottieni tutte le foglie
    leaf_indices = get_leaf_indices(program)
    if not leaf_indices:
        return program  # Nessuna modifica possibile

    # Filtra solo le costanti numeriche
    constant_leaf_indices = []
    for path in leaf_indices:
        node = program
        for idx in path[:-1]:
            node = node[idx]
        leaf = node[path[-1]]
        if isinstance(leaf, (int, float)):
            constant_leaf_indices.append(path)

    if not constant_leaf_indices:
        return program  # Nessuna costante da modificare

    # Seleziona casualmente una foglia costante
    selected_leaf_path = choice(constant_leaf_indices)

    # Accedi alla foglia selezionata
    node = program
    for idx in selected_leaf_path[:-1]:
        node = node[idx]

    # Moltiplica la costante per una costante moltiplicativa casuale
    leaf = node[selected_leaf_path[-1]]
    if isinstance(leaf, (int, float)):
        multiplier = uniform(1.0, 2.0)  # Genera un moltiplicatore casuale tra 1.0 e 2.0
        node[selected_leaf_path[-1]] = leaf * multiplier

    return program

# Esempio di utilizzo
program = ['+', 3, ['*', ['sin', 'x[0]'], 5], ['-', 7, 2]]
modified_program = tweak_program_with_constant(program)
print(modified_program)


['+', 3, ['*', ['sin', 'x[0]'], 5.295995572809986], ['-', 7, 2]]


## Crossover

We can use a croossover function that receives only 2 parents and, if one of them is a leaf program simply return casually one of the 2 programs (avoiding to perform the operation for programs with no childrens). Otherwise, select random indexes for both the parents and combine the first part of the tree with the second part of the tree of the 2 parents, returning a new individual.

In [54]:
def crossover(parent1, parent2, max_depth=3):
    """Crossover tra due programmi."""
    child1, child2 = copy.deepcopy(parent1), copy.deepcopy(parent2)

    if len(parent1) < 2 or len(parent2) < 2:
        return parent1, parent2  # Evita crossover se i programmi sono troppo piccoli

    # Punti di crossover validi
    def get_subtree_points(prog):
        return [i for i, node in enumerate(prog) if isinstance(node, list)]

    points1 = get_subtree_points(parent1)
    points2 = get_subtree_points(parent2)

    if not points1 or not points2:
        return parent1, parent2  # Nessun punto valido per il crossover

    point1 = choice(points1)
    point2 = choice(points2)

    # Scambia i sottoalberi
    subtree1 = parent1[point1]
    subtree2 = parent2[point2]

    child1[point1], child2[point2] = subtree2, subtree1

    return child1, child2

New crossover function that tries to swap two sub-trees only if they uses the same set of variables (to ensure that the result will actually contain each component of the input variable exactly once).

In [55]:
import copy
from random import choice

def check_depth(subtree, max_depth):
    """ Verifica se la profondità del sottoalbero supera max_depth. """
    def depth(tree, current_depth):
        if not isinstance(tree, list) or not tree:
            return current_depth
        return max(depth(child, current_depth + 1) for child in tree)
    return depth(subtree, 0) <= max_depth

def crossover(parent1, parent2, max_depth,input_dim):
    """ Crossover a singolo punto tra due programmi con controllo della profondità e variabili. """
    child1, child2 = copy.deepcopy(parent1), copy.deepcopy(parent2)
    
    points1 = get_subtree_points_recursive(child1)
    points2 = get_subtree_points_recursive(child2)
    
    if not points1 or not points2:
        return generate_program(input_dim), generate_program(input_dim)  
    
    valid = False
    attempts = 0
    while not valid and attempts < 100:  # Limita il numero di tentativi per evitare loop infiniti
        point1 = choice(points1)
        point2 = choice(points2)

        subtree1 = access_node_by_path(child1, point1)
        subtree2 = access_node_by_path(child2, point2)

        variable_set_1 = find_variable_indices(subtree1)
        variable_set_2 = find_variable_indices(subtree2)
        # Verifica se i sottoalberi possono essere scambiati rispetto a variabili e profondità
        if (set(find_variable_indices(subtree1)) == set(find_variable_indices(subtree2)) and
            len(variable_set_1)==input_dim and
            check_depth(subtree1, max_depth) and check_depth(subtree2, max_depth)):
            valid = True
        attempts += 1

    if valid:
        child1 = set_subtree_at_path(child1, point1, subtree2)
        child2 = set_subtree_at_path(child2, point2, subtree1)

    return child1, child2


## Genetic algorithm

### Data loading

In [56]:
#load the problem with problem_X, for X that goes from 0 to 8
problem = np.load('data/problem_4.npz')
x = problem['x']
y = problem['y']
print(x.shape)
print(y.shape)
print(x[0:10])
print(y[0:10])

(2, 5000)
(5000,)
[[ 3.15087424 -0.14015422 -2.77684915 ... -3.48341583  4.08664981
  -4.37254358]
 [-1.73013313 -1.69649662 -1.408818   ... -3.28548272 -2.58382568
   3.30721333]]
[ 1.88232927  2.41457111  4.66075397 -3.65489259 -3.54534824 -3.33158294
  6.42403265 -3.65243484  8.52126374 -3.94847988]


## Tournament selection for parents, tau set to 20

In [57]:
#The professor said that fitness hole could work in this case->implemented
#Returning the worst one with a low probability, otherwise the best one
def tournament_selection(population,tau=20):
    tau = min(tau, len(population)) #not needed in theory
    tournament_indices = np.random.choice(len(population), tau, replace=False)

    considered_individuals = []
    for index in tournament_indices:
        considered_individuals.append(population[index])
    considered_individuals.sort(key=lambda i: i.fitness)
    if random() < 0.9:
        winner = considered_individuals[0].genome
    else:
        #Select one among the second and the worst
        winner = considered_individuals[randint(1,len(considered_individuals)-1)].genome
    return winner


## Simulated annealing

In [58]:
import math


def simulated_annealing(initial_program, x, y, max_iterations=50, initial_temperature=100, cooling_rate=0.95):
    
    # Programma corrente e relativa fitness
    current_program = initial_program
    current_fitness = fitness_function(current_program, x, y)
    
    # Memorizza il miglior programma trovato
    best_program = current_program
    best_fitness = current_fitness
    
    # Inizializza la temperatura
    temperature = initial_temperature
    
    for iteration in range(max_iterations):
        # Crea un programma candidato con una mutazione
        candidate_program = mutate(current_program, x.shape[0])
        candidate_fitness = fitness_function(candidate_program, x, y)
        
        # Calcola la variazione di fitness
        fitness_delta = candidate_fitness - current_fitness
        
        # Accetta il nuovo programma se migliora o con probabilità decrescente
        if fitness_delta < 0 or random() < math.exp(-fitness_delta / temperature):
            current_program = candidate_program
            current_fitness = candidate_fitness
            
            # Aggiorna il miglior programma trovato
            if current_fitness < best_fitness:
                best_program = current_program
                best_fitness = current_fitness
        
        # Riduci la temperatura
        temperature *= cooling_rate
        
        # Interrompi se la temperatura è troppo bassa
        if temperature < 1e-3:
            break
    
    return best_program

## Parameters

Parametri precedenti:

In [59]:
# Parametri del GP
generations = 200
population_size = 1000
p_crossover = 0.6
p_mutation = 0.4
tweak_probability = 0.1
max_depth = 2 
elite_size = 2
offspring_size = population_size # Numero di discendenti generati per generazione

Parametri solo per il testing: ho cercato di scegliere dei parametri che fossero più leggeri per velocizzare le operazioni:

In [60]:
import copy
def safe_copy(obj):
    if isinstance(obj, list):
        return copy.deepcopy(obj)  # Copia superficiale per liste
    elif isinstance(obj, str):
        return obj  # Le stringhe sono immutabili, restituisci direttamente
    else:
        raise TypeError("Tipo non supportato: solo stringhe o liste sono consentite.")

Different EA aproach:
1. In this version we always include the elite inside the next generation as a first step
2. We extend population with the new population (resulting in having elites twice)
3. We take only the distinct individuals
4. We mantain inside the population only the population_size best individuals.

NUOVO CODICE PRESO DA QUELLO DI GIT:

In [61]:


def get_subtree(program):
    """
    Trova un sottoalbero casuale in un programma rappresentato come lista ricorsiva.
    """
    if isinstance(program, (str, int, float)):
        # È una foglia (variabile o costante), non ci sono sottoalberi
        return program, None

    # Scegli un nodo casuale (radice o uno dei figli)
    choice = randint(0, len(program) - 1)
    if choice == 0:
        # Radice: restituisci l'intero programma
        return program, None
    else:
        # Ricorsione su uno dei figli
        subtree, parent = get_subtree(program[choice])
        return subtree, (program, choice)

def crossover(parent1, parent2):
    """
    Esegue il crossover tra due programmi, sostituendo un sottoalbero di parent1
    con un sottoalbero di parent2.
    """
    # Ottieni un sottoalbero casuale da entrambi i genitori
    subtree1, parent1_info = get_subtree(parent1)
    subtree2, _ = get_subtree(parent2)

    if parent1_info is None:
        # Sostituisci tutto parent1 con il sottoalbero di parent2
        return subtree2
    else:
        # Sostituisci il sottoalbero in parent1
        parent, idx = parent1_info
        parent[idx] = subtree2
        return parent1

def subtree_mutation(program, random_program_generator):
    """
    Sostituisce un sottoalbero casuale con uno generato casualmente.
    """
    # Genera un nuovo programma casuale
    new_subtree = random_program_generator()
    
    # Ottieni un sottoalbero casuale
    _, parent_info = get_subtree(program)

    if parent_info is None:
        # Sostituisci l'intero programma
        return new_subtree
    else:
        # Sostituisci il sottoalbero in posizione casuale
        parent, idx = parent_info
        parent[idx] = new_subtree
        return program

def hoist_mutation(program):
    """
    Esegue la hoist mutation: sostituisce un sottoalbero con un sottoalbero interno.
    """
    # Ottieni un sottoalbero casuale
    subtree, parent_info = get_subtree(program)
    
    if not isinstance(subtree, list):
        # Se il sottoalbero è una foglia, non possiamo fare hoist
        return program

    # Ottieni un sottoalbero interno del sottoalbero
    inner_subtree, _ = get_subtree(subtree)

    if parent_info is None:
        # Sostituisci l'intero programma con l'inner subtree
        return inner_subtree
    else:
        # Sostituisci il sottoalbero in posizione casuale
        parent, idx = parent_info
        parent[idx] = inner_subtree
        return program

def point_mutation(program, function_set, terminal_set):
    """
    Esegue la point mutation: sostituisce funzioni o terminali con altri simili.
    """
    if isinstance(program, list):
        # Nodo funzione: muta ricorsivamente
        for i in range(1, len(program)):
            program[i] = point_mutation(program[i], function_set, terminal_set)

        # Sostituisci la funzione con un'altra della stessa arità
        if random() < 0.1:  # Probabilità di mutare la funzione
            arity = len(program) - 1
            program[0] = choice([f for f in function_set if f[1] == arity])

    elif isinstance(program, str):
        # Nodo terminale: sostituisci con un'altra variabile
        if random() < 0.1:
            program = choice(terminal_set)

    elif isinstance(program, (int, float)):
        # Nodo costante: modifica leggermente
        if random() < 0.1:
            program += random.uniform(-1, 1)

    return program


In [62]:
function_set = [
    ('add', 2, '({} + {})'),
    ('sub', 2, '({} - {})'),
    ('mul', 2, '({} * {})'),
    ('div', 2, '({} / {})'),
    ('sin', 1, 'sin({})'),
    ('cos', 1, 'cos({})')
]

terminal_set = ['x[0]', 'x[1]', 1.0, 2.0, 3.0]


In [63]:
# # Inizializza popolazione
# np.seterr(all='ignore')
# input_dim = x.shape[0]
# population = [Individual(genome=generate_program(input_dim)) for _ in range(population_size)]
# for i in population:
#     i.fitness=fitness_function(i.genome, x, y)
# # for i in range (10):
# #     ind = population[i]
# #     gen = simulated_annealing(ind.genome, x, y)
# #     fit = fitness_function(gen, x, y)
# #     population[i] = Individual(genome=gen, fitness=fit)
# # Loop principale per le generazioni
# def run_genetic_algorithm():
#     global population

#     for gen in range(generations):
#         population.sort(key=lambda i: i.fitness)
#         mse_to_print = mse(population[0].genome, x, y)
        
#         #break the cycle if you found the best solution you're able to find with training data
#         if(mse_to_print==0.0000): 
#             np.seterr(all='warn')
#             print('Best program found with mse=0')
#             return population[0].genome

#         np.seterr(all='warn')
#         print(f"Generazione {gen + 1}, miglior fitness: {mse_to_print:.6f}")
#         #population is already sorted, so:
#         print(f"Best formula: {program_to_string(population[0].genome)}")
#         np.seterr(all='ignore')
        
#         # Crea la nuova generazione
#         next_population = []
#         next_population.extend(population[:elite_size])  # Mantieni i migliori individui
#         # if(gen==6):
#         #     for el in population:
#         #         print(f"Formula: {program_to_string(el.genome)}; Fitness: {el.fitness}")
        
#         while len(next_population) < offspring_size:
#             if random() < p_crossover:
#                 # Crossover
#                 #With random choice is much faster than tournament selection
#                 #Choose the best and the second best parent
#                 parent1, parent2 = tournament_selection(population), tournament_selection(population)
#                 child1 = crossover(safe_copy(parent1), safe_copy(parent2))
                
#                 if random() < p_mutation:
#                     child1 = hoist_mutation(safe_copy(child1))
                
#                 #let's add the new individuals:
#                 next_population.append(Individual(genome=child1, fitness=fitness_function(child1, x, y)))
            
#                 if random() < tweak_probability and len(next_population) < offspring_size:
#                     #new_ind = simulated_annealing(safe_copy(child1), x, y)
#                     new_ind = mutate(safe_copy(child1), input_dim)
#                     next_population.append(Individual(genome=new_ind, fitness=fitness_function(new_ind, x, y)))
#             else:
#                 # Mutate directly a parent
#                 parent = tournament_selection(population)
#                 mutant = hoist_mutation(safe_copy(parent))
#                 next_population.append(Individual(genome=mutant, fitness=fitness_function(mutant, x, y)))
                    
#         # the new population is the one generated in the offspring
#         population.extend(next_population)

#         # Remove duplicates
#         unique_population = {}
#         for prog in population:
#             serialized = str(prog)
#             if serialized not in unique_population:
#                 unique_population[serialized] = prog
        
#         # update fitness of the new population
#         population = list(unique_population.values())
        
#         population.sort(key=lambda i: i.fitness)
#         population = population[:population_size]
#         # for ind in population:
#         #     ind.fitness = fitness_function(ind.genome, x, y)

#     # Identify the best program
#     population.sort(key=lambda i: i.fitness)
#     best_program = population[0]
#     best_mse = mse(best_program.genome, x, y)

#     print("Miglior programma:", best_program.genome, "; Fitness:", best_mse)
#     return best_program.genome

# best_program = run_genetic_algorithm()

In [64]:
#program_to_string(best_program)

In [65]:
def get_subtree(program):
    # Ricerca ricorsiva di un sottoalbero casuale
    if isinstance(program, (str, int, float)):
        return program, None
    idx = randint(1, len(program) - 1)
    return program[idx], (program, idx)

def replace_subtree(program, target, replacement):
    if program == target:
        return replacement
    for i in range(len(program)):
        if isinstance(program[i], list):
            program[i] = replace_subtree(program[i], target, replacement)
    return program

def subtree_mutation(program, random_program_generator):
    subtree, _ = get_subtree(program)
    new_subtree = random_program_generator()
    return replace_subtree(program, subtree, new_subtree)

def hoist_mutation(program):
    subtree, _ = get_subtree(program)
    inner_subtree, _ = get_subtree(subtree)
    return replace_subtree(program, subtree, inner_subtree)

def crossover(parent1, parent2):
    subtree1, _ = get_subtree(parent1)
    subtree2, _ = get_subtree(parent2)
    return replace_subtree(parent1, subtree1, subtree2)


In [66]:


def evolution_algorithm(population, x, y, generations, method_probs, tournament_size, random_program_generator, fitness_function):
    """
    Evoluzione della popolazione tramite crossover, mutazione e riproduzione.

    Args:
        population: Lista di programmi iniziali.
        x: Dati di input.
        y: Target.
        generations: Numero di generazioni da eseguire.
        method_probs: Probabilità di applicare crossover, mutazioni, riproduzione.
        tournament_size: Numero di partecipanti al torneo.
        random_program_generator: Funzione per generare programmi casuali.
        fitness_function: Funzione per calcolare la fitness di un programma.

    Returns:
        La popolazione evoluta e il miglior programma trovato.
    """
    best_program = None
    best_fitness = float('inf')

    for gen in range(generations):
        new_population = []

        while len(new_population) < len(population):
            # Selezione tramite torneo
            def tournament(population):
                candidates = sample(population, tournament_size)
                return min(candidates, key=lambda p: fitness_function(p, x, y))

            parent1 = tournament(population)

            # Determina l'operazione genetica
            method = random()

            if method < method_probs[0]:
                # Crossover
                parent2 = tournament(population)
                child = crossover(parent1, parent2)
            elif method < method_probs[1]:
                # Mutazione del sottoalbero
                child = subtree_mutation(parent1, random_program_generator)
            elif method < method_probs[2]:
                # Hoist mutation
                child = hoist_mutation(parent1)
            elif method < method_probs[3]:
                # Point mutation
                child = hoist_mutation(parent1)
            else:
                # Riproduzione
                child = parent1

            new_population.append(child)

        # Calcola la fitness della nuova popolazione
        population = new_population
        for program in population:
            fitness = fitness_function(program, x, y)
            if fitness < best_fitness:
                best_fitness = fitness
                best_program = program

        print(f"Generazione {gen + 1}: Migliore fitness = {best_fitness}")

    return best_program, population


In [67]:
method_probs = [0.9, 0.01, 0.01, 0.01, 0.07]  # [Crossover, subtree, hoist, point, reproduction]
population_size = 100
input_dim = x.shape[0]
population = [generate_program(input_dim) for _ in range(population_size)]

best_program, final_population = evolution_algorithm(
    population, x, y, 
    generations=50, 
    method_probs=method_probs, 
    tournament_size=5, 
    random_program_generator=generate_program, 
    fitness_function=fitness_function
)


  return op(*args)
  return op(*args)
  return op(*args)


Generazione 1: Migliore fitness = 2262.1773907810925
Generazione 2: Migliore fitness = 2262.1773907810925
Generazione 3: Migliore fitness = 2262.1773907810925
Generazione 4: Migliore fitness = 2262.1773907810925
Generazione 5: Migliore fitness = 2262.1773907810925
Generazione 6: Migliore fitness = 2262.1773907810925
Generazione 7: Migliore fitness = 2262.1773907810925
Generazione 8: Migliore fitness = 2262.1773907810925
Generazione 9: Migliore fitness = 2262.1773907810925
Generazione 10: Migliore fitness = 2262.1773907810925
Generazione 11: Migliore fitness = 2262.1773907810925
Generazione 12: Migliore fitness = 2262.1773907810925
Generazione 13: Migliore fitness = 2262.1773907810925
Generazione 14: Migliore fitness = 2262.1773907810925
Generazione 15: Migliore fitness = 2262.1773907810925
Generazione 16: Migliore fitness = 2262.1773907810925
Generazione 17: Migliore fitness = 2262.1773907810925
Generazione 18: Migliore fitness = 2262.1773907810925
Generazione 19: Migliore fitness = 22

KeyboardInterrupt: 