# Import

In [409]:
import numpy as np
from random import choice, random, randint, sample

# Operations

In [410]:
OPERATIONS = [
    (np.add, 2, "({} + {})"),
    (np.subtract, 2, "({} - {})"),
    (np.divide, 2, "({} / {})"),
    (np.sin, 1, "sin({})"),
]

# Genothype definition

Let's consider the genotype as a list of elements with the following structure: [operator, first operand, secondo operand].
In this way we are creating a recursive function that computes a valid formula and represent it as a list (example: [ "+", ["sin", "x[0]" ], "x[1]"]).

In [411]:
def random_program(depth, input_dim):
    if depth == 0 or random() < 0.3:
        return f"x[{randint(0, input_dim - 1)}]" #new leaf node

    op, arity, symbol = choice(OPERATIONS)
    children = [random_program(depth - 1, input_dim) for _ in range(arity)]
    return [symbol] + children  

In [412]:
random_program(1, 4)

'x[3]'

Now we need a function that given the genotype provide us with the output provided by the predicted function. This function must receive the input vector to perform his operation.

In [413]:
def evaluate_program(program, x):
    if isinstance(program, str):  # Leaf node
        return x[int(program[2:-1])]  # extract the value
    elif isinstance(program, list): 
        op = next(op for op, _, symbol in OPERATIONS if symbol == program[0])
        args = [evaluate_program(child, x) for child in program[1:]]
        try:
            return op(*args)
        except ZeroDivisionError:
            return np.inf

In [414]:
x = [2,3,4, 6]
program = ['({} / {})', 'x[1]', 'x[0]']
evaluate_program(program, x)

np.float64(1.5)

As you may notice this function verify with the function __isinstance(element, type)__ if "element" is an instance of the "type", with the objective of understanding if it is a __leaf node__. If this is the case we simply extract the value out of it, otherwise we still need to invoke the function recursively.

## Fitness function

For now, simply consider the fitness function of a solution as it's mean square error compared to the expected results.

In [415]:
def fitness_function(program, x, y):
    predictions = np.array([evaluate_program(program, x_row) for x_row in x.T])
    return np.mean((predictions - y) ** 2)

In [416]:
x = np.random.uniform(-1, 1, size=(2, 1000))  # 2 feature, 1000 esempi
y = x[0] + np.sin(x[1]) / 5
#fitness_function(program, x, y)

## Tweak function

There is a lot of __variability__ that has to be considered for the tweak function. We can now imagine to implement a recursive function that receives the program (which indicates the current function that we are using for the task), the number of dimensions for the input and the maximum depth allowed for the tweaked solution.

Recursively, if we end up into a leaf node, or if the current solution is still a list but with 0.3 probability, we simply generate a new sub-program.

Otherwise, we simply invoke the same function for a random index.

In [417]:
def mutate_program(program, input_dim, depth=3):
    if random() < 0.3 or not isinstance(program, list):  
        return random_program(depth, input_dim)
    idx = randint(1, len(program) - 1)
    program[idx] = mutate_program(program[idx], input_dim, depth - 1)
    return program

## Crossover

We can use a croossover function that receives only 2 parents and, if one of them is a leaf program simply return casually one of the 2 programs (avoiding to perform the operation for programs with no childrens). Otherwise, select random indexes for both the parents and combine the first part of the tree with the second part of the tree of the 2 parents, returning a new individual.

In [418]:
def crossover_program(parent1, parent2):
    if not isinstance(parent1, list) or not isinstance(parent2, list):
        return parent1 if random() < 0.5 else parent2
    idx1 = randint(1, len(parent1) - 1)
    idx2 = randint(1, len(parent2) - 1)
    child = parent1[:idx1] + parent2[idx2:]
    return child

In [419]:
def is_valid_program(program):
    """Verifica se un programma è valido (ha il numero corretto di operandi per ogni operazione)."""
    if isinstance(program, str):
        # Nodo foglia, è sempre valido
        return True

    op_symbol = program[0]

    # Verifica se è un'operazione unaria (sin, cos, ecc.)
    if len(program) == 2:
        return is_valid_program(program[1])

    # Verifica se è un'operazione binaria (somma, sottrazione, ecc.)
    elif len(program) == 3:
        return is_valid_program(program[1]) and is_valid_program(program[2])

    return False  # Se non è né un nodo foglia né un nodo valido con il giusto numero di operandi


In [420]:
def crossover_program(parent1, parent2):
    """Esegue il crossover tra due programmi genitori e restituisce un programma valido."""
    # Se uno dei genitori è un nodo foglia, scegliamo di restituirlo direttamente
    if isinstance(parent1, str) or isinstance(parent2, str):
        return parent1 if random() < 0.5 else parent2

    # Creazione di un nuovo programma a partire dai genitori
    child = parent1.copy()
    child["children"][randint(0, child["arity"] - 1)] = crossover_program(
        parent1["children"][randint(0, parent1["arity"] - 1)],
        parent2["children"][randint(0, parent2["arity"] - 1)],
    )

    # Verifica se il nuovo programma è valido
    if is_valid_program(child):
        return child
    else:
        # Se non è valido, rigenera il programma
        return random_program(3, input_dim)  # Adatta il livello di profondità e input_dim come necessario


## Genetic algorithm

### Parameter selection

In [421]:
generations=50
population_size=100

### Data selection

In [422]:
x = np.random.uniform(-1, 1, size=(2, 1000))  # 2 feature, 1000 esempi
y = x[0] + np.sin(x[1]) / 5

In [423]:
input_dim = x.shape[0]
population = [random_program(3, input_dim) for _ in range(population_size)]

for gen in range(generations):
    fitness = []
    for prog in population:
        fitness.append(fitness_function(prog, x, y))

    fitness = np.array(fitness)
    #fitness = np.array([fitness_function(prog, x, y) for prog in population]) #array of fitness value for each individual

    # Selection of only the fittest elements: the mid of them.
    sorted_indices = fitness.argsort()
    population = [population[i] for i in sorted_indices[:population_size // 2]]
    fitness = fitness[sorted_indices[:population_size // 2]]

    print(f"At generation {gen+1}, best fitness: {fitness[0]:.6f}")

    
    new_population = []
    while len(new_population) < population_size:
        parent1 = choice(population)
        parent2 = choice(population)
        offspring = crossover_program(parent1, parent2) if random() < 0.0 else mutate_program(parent1, input_dim)
        new_population.append(offspring)

    population = new_population

    # Ritorna il migliore
best_program = population[0]
best_fitness = fitness_function(best_program, x, y)
print("best program: ", best_program, "; con fitness: ", best_fitness)

  return op(*args)


At generation 1, best fitness: 0.011031


  return op(*args)


At generation 2, best fitness: 0.011031


KeyboardInterrupt: 