# Import

In [109]:
import numpy as np
from random import choice, random, randint, sample
import copy

# Operations

In [110]:
OPERATIONS = [
    (np.add, 2, "({} + {})"),
    (np.subtract, 2, "({} - {})"),
    (np.divide, 2, "({} / {})"),
    (np.multiply, 2, "({} * {})"),
    (np.sin, 1, "sin({})"),
    (np.cos, 1, "cos({})"),
    (np.tan, 1, "tan({})"),
    (np.exp, 1, "exp({})"),
    (np.log, 1, "log({})"),
    (np.sqrt, 1, "sqrt({})"),
    (np.square, 1, "square({})"),
]

# Genothype definition

Let's consider the genotype as a list of elements with the following structure: [operator, first operand, secondo operand].
In this way we are creating a recursive function that computes a valid formula and represent it as a list (example: [ "+", ["sin", "x[0]" ], "x[1]"]).

In [111]:
def random_program(depth, input_dim):
    if depth == 0 or random() < 0.3:
        #Return a costant between 1 and 10 or a random variable (prob=0.3)
        if random() < 0.3:
            return str(randint(2, 10)) #costant
        else:
            return f"x[{randint(0, input_dim - 1)}]" #new leaf node

    op, arity, symbol = choice(OPERATIONS)
    children = [random_program(depth - 1, input_dim) for _ in range(arity)]
    return [symbol] + children  

## Transform the program into a human readable function

In [112]:
def program_to_string(program):
    if isinstance(program, str):  # leaf
        return program  
    elif isinstance(program, list):  
        try:
            _, _, symbol = next((op, arity, s) for op, arity, s in OPERATIONS if s == program[0])
        except StopIteration:
            raise ValueError(f"Not known operation: {program[0]}")
        
        children = [program_to_string(child) for child in program[1:]]
        
        return symbol.format(*children)


In [113]:
program_to_string(random_program(6, 3))

'log(cos((cos(x[2]) - 5)))'

Now we need a function that given the genotype provide us with the output provided by the predicted function. This function must receive the input vector to perform his operation.

In [114]:
def evaluate_program(program, x):
    if isinstance(program, str):  # Leaf node
        #If it's a leaf, it could be a costant or a variable
        if program[0] == 'x':
            return x[int(program[2:-1])]
        else:
            return int(program)
    elif isinstance(program, list): 
        op = next(op for op, _, symbol in OPERATIONS if symbol == program[0])
        args = [evaluate_program(child, x) for child in program[1:]]
        try:
            return op(*args)
        except ZeroDivisionError:
            return np.inf

## Fitness function

For now, simply consider the fitness function of a solution as it's mean square error compared to the expected results.

In [116]:
#Let's also introduce a fitness function that is the same used by professors
def fitness_evaluation_result(program, x, y):
    predictions = np.array([evaluate_program(program, x_row) for x_row in x.T])
    return float(100 * np.mean(np.square(predictions - y)))

def fitness_function(program, x, y):
    predictions = np.array([evaluate_program(program, x_row) for x_row in x.T])
    return np.mean((predictions - y) ** 2)

### Data selection

In [124]:
def true_f(x: np.ndarray) -> np.ndarray:
    return x[0] + np.tan(x[1])/5 +x[0]*np.exp(x[1])

TEST_SIZE = 10_000
TRAIN_SIZE = 1000

x_validation = np.vstack(
    [
        np.random.random_sample(size=TEST_SIZE) * 2 * np.pi - np.pi,
        np.random.random_sample(size=TEST_SIZE) * 2 - 1,
    ]
)
y_validation = true_f(x_validation)
train_indexes = np.random.choice(TEST_SIZE, size=TRAIN_SIZE, replace=False)
x_train = x_validation[:, train_indexes]
y_train = y_validation[train_indexes]

x = x_train
y = y_train

Let's change the fitness function in a way that we penalize the more complex functions:

In [125]:
# Funzione di fitness avanzata ispirata a deep_based_FGP_NLS.py
def fitness_function(program, x, y):
    """
    Valuta la fitness di un programma.
    """
    try:
        # Valutazione del programma
        predictions = np.array([evaluate_program(program, x_row) for x_row in x.T])
        if np.any(np.isnan(predictions)) or np.any(np.isinf(predictions)):
            return np.inf  # Penalizza programmi invalidi

        # Calcolo dell'errore
        #error = np.mean((predictions - y) ** 2)  # Errore quadratico medio
        error = float(100 * np.mean(np.square(predictions - y)))
        

        # Aggiungi penalità basata sulla complessità del programma
        complexity_penalty = len(program) * 0.1
        fitness = error + complexity_penalty

    except Exception as e:
        # Penalizza programmi che generano errori
        print(f"Errore nella valutazione del programma: {e}")
        return np.inf

    return fitness

### Simulated annealing

In [134]:
def mutate_for_annealing(program, input_dim, max_depth=6, mutation_rate=0.7):
    """Mutazione leggera per simulated annealing."""
    mutant = copy.deepcopy(program)

    def mutate_node(node):
        if isinstance(node, list):
            if random() < mutation_rate:
                return random_program(randint(1, max_depth), input_dim)
            else:
                return [mutate_node(subnode) for subnode in node]
        return node

    # Applica la mutazione a nodi casuali
    mutant = mutate_node(mutant)

    return mutant


def simulated_annealing(initial_program, cost_function, input_data, output_data, max_iterations, initial_temp, alpha):
    current_program = initial_program
    current_cost = cost_function(current_program, input_data, output_data)
    temp = initial_temp
    
    for i in range(max_iterations):
        new_program = mutate(current_program, input_data.shape[0])
        new_cost = cost_function(new_program, input_data, output_data)
        cost_diff = new_cost - current_cost
        
        if cost_diff < 0 or random() < np.exp(-cost_diff / temp):
            current_program = new_program
            current_cost = new_cost
            
        temp *= alpha
        
        if temp < 1e-10:
            break
    
    return current_program

best_program = simulated_annealing(random_program(10, x.shape[0]), fitness_function, x, y, 10000, 1000, 0.98)



  return op(*args)
  return op(*args)
  return op(*args)
  return op(*args)
  return op(*args)
  return op(*args)
  return op(*args)
  error = float(100 * np.mean(np.square(predictions - y)))


In [135]:
program_to_string(best_program)
fitness_function(best_program, x, y)

141.65910899601482