In [44]:




# Example usage
tree = Node(np.multiply, [Node(np.add, [Node(10), Node('x_0')]), Node(2)])
y = tree(x=np.array([[1,2,6],[3,2,7], [3,4,8], [6,5,9]]))
y
#print(tree)
# Expected output: (10 + x) * 2 for x = [1, 2, 3, 4, 5]


array([[22., 24., 32.],
       [22., 24., 32.],
       [22., 24., 32.],
       [22., 24., 32.]])

In [7]:
from gp_node import Node
from typing import Callable, List
import random
import numpy as np
np.seterr(all="ignore")

operations = [
    (np.negative,1),
    (np.sqrt,1),
    (np.exp,1),
    (np.log,1),
    (np.sin,1),
    (np.cos,1),
    (np.tan,1),
    (np.square,1),
    (np.square,1),
    (np.cbrt,1),
    (np.reciprocal,1),
    (np.add,2),
    (np.subtract,2),
    (np.multiply,2),
    (np.divide,2),
    (np.power,2)
]

class GP:
    def __init__(self, population_size: int, generations: int, operators: List[Callable], num_var: int, mutation_rate: float, crossover_rate: float, max_depth: int):
        self.population_size = population_size
        self.generations = generations
        self.operators = operators
        self.num_variables = num_var
        self.mutation_rate = mutation_rate
        self.crossover_rate = crossover_rate
        self.max_depth = max_depth
        self.population = [self.generate_population() for _ in range(population_size)]
    

    def generate_population(self) -> Node:
        has_var =  [False]
        def generate_tree(has_var, depth=0) -> Node:
            if depth >= self.max_depth or (depth > 0 and random.random() < 0.3):
                if random.random() < 0.1 * depth and has_var[0]:
                    return Node(random.uniform(-10, 10))
                else:
                    has_var[0] = True
                    return Node(f'x_{random.randint(0,self.num_variables-1)}')
            op = random.choice(self.operators)
            num_children = op[1]
            return Node(op[0], [generate_tree(has_var, depth + 1) for _ in range(num_children)])
        
        return generate_tree(has_var)
    
    def fitness(self, tree: Node, X: np.ndarray, Y: np.ndarray) -> float:
        predictions = tree(x=X)
        return 100*np.square(Y-predictions).sum()/len(Y)
    
    def mutate(self, tree: Node) -> Node:
        if random.random() < self.mutation_rate:
            return self.generate_tree()
        if not tree.is_leaf:
            tree.children = [self.mutate(child) for child in tree.children]
        return tree
    
    def crossover(self, parent1: Node, parent2: Node) -> Node:
        if random.random() < self.crossover_rate:
            return parent2 if random.random() < 0.5 else parent1
        if not parent1.is_leaf and not parent2.is_leaf:
            child = Node(parent1.value, [self.crossover(c1, c2) for c1, c2 in zip(parent1.children, parent2.children)])
            return child
        return parent1
    
    def evolve(self, X: np.ndarray, Y: np.ndarray):
        for gen in range(self.generations):
            scores = [(self.fitness(tree, X, Y), tree) for tree in self.population]
            scores.sort(key=lambda x: x[0])
            self.population = [tree for _, tree in scores[:self.population_size//2]]
            new_population = []
            while len(new_population) < self.population_size:
                parent1, parent2 = random.sample(self.population, 2)
                child = self.crossover(parent1, parent2)
                child = self.mutate(child)
                new_population.append(child)
            self.population = new_population
            print(f"Generation {gen+1}, Best MSE: {scores[0][0]}")
        return scores[0][1]


In [8]:
#test
problem = np.load('../data/problem_1.npz')
X = problem['x']
Y = problem['y']

num_variables= X.shape[0]

symreg = GP(100, 10, operations, num_variables, 0.4, 0.2, 3)
#X = np.array([[1,2,3],[4,5,6],[7,8,9]])
scores = []
for expr in symreg.population:
    y = expr(x=X)
    mse = symreg.fitness(expr, X, Y)
    scores.append((mse, str(expr)))
    print(f'[mse: {mse:g}] y = {expr}')

scores.sort(key=lambda x: x[0])


[mse: 3.57432e+08] y = reciprocal(square(x_0))
[mse: 117.93] y = cbrt(exp(sin(x_0)))
[mse: nan] y = power(square(exp(x_0)), exp(power(-0.4431972858343425, x_0)))
[mse: 29.7589] y = power(square(square(x_0)), exp(cos(x_0)))
[mse: 46.4415] y = add(multiply(negative(x_0), cbrt(x_0)), cos(cos(-8.71761429521424)))
[mse: nan] y = sqrt(x_0)
[mse: 83.0125] y = cbrt(square(tan(x_0)))
[mse: nan] y = subtract(x_0, sqrt(x_0))
[mse: 96.8011] y = cos(tan(x_0))
[mse: nan] y = power(subtract(add(x_0, x_0), tan(x_0)), square(negative(x_0)))
[mse: 21988.9] y = divide(exp(cbrt(x_0)), x_0)
[mse: 0.328155] y = log(exp(x_0))
[mse: 25.4354] y = subtract(x_0, x_0)
[mse: 157.73] y = cbrt(negative(x_0))
[mse: 166.421] y = cbrt(tan(divide(x_0, x_0)))
[mse: 28.6699] y = tan(cbrt(x_0))
[mse: 105.181] y = cos(x_0)
[mse: inf] y = reciprocal(multiply(x_0, subtract(x_0, x_0)))
[mse: 173837] y = divide(exp(cos(x_0)), sin(negative(x_0)))
[mse: 44.5459] y = square(x_0)
[mse: 23732.8] y = negative(reciprocal(x_0))
[mse: 1

In [6]:
scores.sort(key=lambda x: x[0])
scores

[(np.float64(nan), 'sqrt(add(x_0, x_0))'),
 (np.float64(7.125940794232773e-32), 'sin(x_0)'),
 (np.float64(0.08876601629478617), 'sin(tan(sin(x_0)))'),
 (np.float64(1.1009172718022273), 'tan(sin(x_0))'),
 (np.float64(10.446687530996925), 'divide(x_0, cos(x_0))'),
 (np.float64(18.317581169395535),
  'multiply(subtract(x_0, square(x_0)), exp(subtract(x_0, x_0)))'),
 (np.float64(23.451727230913992), 'cbrt(cbrt(x_0))'),
 (np.float64(25.61057134924241), 'square(subtract(x_0, tan(x_0)))'),
 (np.float64(35.57630472456574), 'negative(square(sin(x_0)))'),
 (np.float64(40.70696310379202),
  'divide(x_0, multiply(negative(x_0), sqrt(5.657790385920949)))'),
 (np.float64(41.67597845645442), 'sin(square(x_0))'),
 (np.float64(44.54594770444776), 'square(x_0)'),
 (np.float64(69.34351456364236), 'cbrt(square(x_0))'),
 (np.float64(105.1805508436661), 'cos(x_0)'),
 (np.float64(105.1805508436661), 'cos(x_0)'),
 (np.float64(116.21953207803394), 'cos(square(negative(x_0)))'),
 (np.float64(nan), 'sqrt(power(a