In [6]:
import numpy as np
from numpy import random
from tqdm import tqdm
import warnings

In [25]:
problem = np.load('../data/problem_4.npz')
x = problem['x']
y = problem['y']
x.shape

(2, 5000)

In [26]:
print("x:", x)
print("x shape:", x.shape)
print("x min:", x.min(), "x max:", x.max())


x: [[ 3.15087424 -0.14015422 -2.77684915 ... -3.48341583  4.08664981
  -4.37254358]
 [-1.73013313 -1.69649662 -1.408818   ... -3.28548272 -2.58382568
   3.30721333]]
x shape: (2, 5000)
x min: -4.999705785571271 x max: 4.999534886222849


In [27]:
# Define irreducible functions
FUNCTIONS = [
    np.add, np.subtract, np.multiply, np.divide,
    np.sin, np.cos, np.tan,
    np.exp, np.log, np.sqrt, np.abs
]

In [28]:
def Generate_range(x):
    """
    Calculates the extended range for generating constants.
    """
    x_min, x_max = x.min(), x.max()
    delta = 0.1 * (x_max - x_min)
    extended_min = x_min - delta
    extended_max = x_max + delta
    return int(extended_min), int(extended_max)


def Generate_end_leafs(x, extended_range):
    """
    Generates end leafs: constants or input variables.
    
    Parameters:
        x (np.ndarray): Input array.
        extended_range (tuple): Tuple containing (extended_min, extended_max).
    """
    extended_min, extended_max = extended_range

    # Random integer constant within the extended range
    constant = random.randint(extended_min, extended_max + 1)
    while constant == 0:
        constant = random.randint(extended_min, extended_max + 1)

    # Randomly return either a constant or a variable
    if random.rand() < 0.5:
        return constant  # Return constant
    else:
        return f"x[{random.randint(0, x.shape[0])}]"  # Return variable
    

def Generate_tree(max_depth, x, extended_range):
    """
    Generates a random tree structure.

    Parameters:
        max_depth (int): The maximum depth of the tree.
        x (np.ndarray): Input data.
        extended_range (tuple): Precomputed (extended_min, extended_max) for generating constants.

    Returns:
        tuple or str/int: A tree structure or a terminal node.
    """
    # Base case: generate terminal node
    if max_depth == 0 or random.random() < 0.3:  # Fixed `random.rand` to `random.random`
        return Generate_end_leafs(x, extended_range)

    # Select a random function
    func = random.choice(FUNCTIONS)

    # Generate left and right subtrees
    left_subtree = Generate_tree(max_depth - 1, x, extended_range)
    right_subtree = Generate_tree(max_depth - 1, x, extended_range)

    # Return the tree as a tuple
    return (func, left_subtree, right_subtree)


def Convert_tree_to_expression(tree):
    """
    Converts a tree structure into a NumPy-executable formula.
    """
    if isinstance(tree, (int, str)):
        if isinstance(tree, str):  # It's a variable like "x[0]"
            return f"x[{int(tree[2:-1])}]"
        return str(tree)  # It's a constant value

    func, left, right = tree
    left_expr = Convert_tree_to_expression(left)
    right_expr = Convert_tree_to_expression(right)

    # Get the function name or operator for binary and unary functions
    if func in [np.add, np.subtract, np.multiply, np.divide]:
        operator = {
            np.add: "+",
            np.subtract: "-",
            np.multiply: "*",
            np.divide: "/",
        }[func]
        return f"({left_expr} {operator} {right_expr})"
    else:
        return f"np.{func.__name__}({left_expr})"  # Use NumPy function name   


def Calculate_mse(tree, x: np.ndarray, y: np.ndarray) -> float:
    """
    Calculates the Mean Squared Error (MSE) for a tree representation.
    """
    # Convert the tree into an expression
    expression = Convert_tree_to_expression(tree)
    
    # Construct a Python function dynamically
    formula = f"""
def f(x: np.ndarray) -> np.ndarray:
    return {expression}
"""

    # Map function names dynamically from FUNCTIONS
    func_mapping = {f.__name__: f for f in FUNCTIONS}
    exec_locals = {}
    exec_globals = {"np": np, **func_mapping}  # Include mapped functions

    try:
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)  # Ignore runtime warnings
            exec(formula, exec_globals, exec_locals)  # Dynamically define f
            f = exec_locals["f"]
            predictions = f(x)  # Evaluate predictions using x

            # Check for invalid values in predictions
            if not np.isfinite(predictions).all():
                return float('inf')  # Return infinite MSE if invalid values are found

            # Calculate MSE
            mse = np.mean((predictions - y) ** 2)
            return mse
    except Exception as e:
        # Return infinite MSE for any exceptions during execution
        return float('inf')
        

def Sym_reg_rand(x, y, max_depth=3, num_sol=100):
    """
    Symbolic regression using genetic programming principles.

    Parameters:
        x (np.ndarray): Input features.
        y (np.ndarray): Target values.
        max_depth (int): Maximum depth of the tree.
        num_sol (int): Number of random trees to generate.

    Returns:
        tuple: The best tree and its MSE.
    """
    best_tree = None
    best_mse = float('inf')
    extended_range = Generate_range(x)

    for _ in tqdm(range(num_sol), desc="Generating Trees"):
        # Generate a random tree
        tree = Generate_tree(max_depth, x, extended_range)

        # Evaluate the tree and calculate MSE
        try:
            # Convert the tree to a formula and calculate MSE
            mse = Calculate_mse(tree, x, y)
        except Exception as e:
            mse = float('inf')  # Penalize invalid trees

        # Update the best tree if the current one is better
        if mse < best_mse:
            best_mse = mse
            best_tree = tree

    # Print the best solution
    best_formula = Convert_tree_to_expression(best_tree)
    print("Best Tree:", best_tree)
    print("Best Formula:", f"def f(x): return {best_formula}")
    print("Best MSE:", best_mse)

    return best_tree, best_mse



In [29]:
def Select_random_subtree(tree):
    """
    Selects a random subtree (or terminal node) from the given tree.

    Parameters:
        tree: The tree to select a random subtree from.

    Returns:
        The selected subtree and its parent structure (None if it's the root).
    """
    if isinstance(tree, (int, str)):
        # Base case: tree is a terminal node
        return tree, None

    func, left, right = tree

    # Decide to pick the current node or descend into subtrees
    if random.random() < 0.33:  # 33% chance to pick the current node
        return tree, None
    elif random.random() < 0.5:  # 50% chance to go to the left subtree
        subtree, parent = Select_random_subtree(left)
        return subtree, (func, subtree, right) if parent is None else (func, parent, right)
    else:  # 50% chance to go to the right subtree
        subtree, parent = Select_random_subtree(right)
        return subtree, (func, left, subtree) if parent is None else (func, left, parent)


def Mutate_tree(tree, x, extended_range):
    """
    Mutates a random subtree or terminal node in the tree.

    Parameters:
        tree: The tree to mutate.
        x (np.ndarray): Input data for generating terminals.
        extended_range (tuple): The range for generating constants.
        
    Returns:
        The mutated tree.
    """
    subtree, parent = Select_random_subtree(tree)

    if isinstance(subtree, (int, str)):
        # Subtree is a terminal node; replace it
        new_terminal = Generate_end_leafs(x, extended_range)
        if parent is None:
            return new_terminal  # If root node, return the new terminal
        else:
            func, left, right = parent
            if subtree == left:
                return (func, new_terminal, right)
            else:
                return (func, left, new_terminal)
    else:
        # Subtree is a function node; replace the function
        func, left, right = subtree
        new_func = random.choice(FUNCTIONS)
        while new_func == func:
            new_func = random.choice(FUNCTIONS)
        new_subtree = (new_func, left, right)
        if parent is None:
            return new_subtree  # If root node, return the new subtree
        else:
            func, left, right = parent
            if subtree == left:
                return (func, new_subtree, right)
            else:
                return (func, left, new_subtree)


def Recombinate_trees(tree1, tree2):
    """
    Swaps a random subtree between two trees.

    Parameters:
        tree1: The first tree.
        tree2: The second tree.

    Returns:
        A tuple of the two new trees after recombination.
    """
    subtree1, parent1 = Select_random_subtree(tree1)
    subtree2, parent2 = Select_random_subtree(tree2)

    # Swap subtrees
    if parent1 is None and parent2 is None:
        # Both are root nodes
        return subtree2, subtree1
    elif parent1 is None:
        # tree1's root node is replaced
        return subtree2, tree2
    elif parent2 is None:
        # tree2's root node is replaced
        return tree1, subtree1
    else:
        # Replace in respective parents
        func1, left1, right1 = parent1
        func2, left2, right2 = parent2

        if subtree1 == left1:
            parent1 = (func1, subtree2, right1)
        else:
            parent1 = (func1, left1, subtree2)

        if subtree2 == left2:
            parent2 = (func2, subtree1, right2)
        else:
            parent2 = (func2, left2, subtree1)

        return parent1, parent2


In [30]:
# Run symbolic regression
best_tree, best_mse = Sym_reg_rand(x, y, max_depth=10, num_sol=10000)

Generating Trees:   0%|          | 0/10000 [00:00<?, ?it/s]

Generating Trees: 100%|██████████| 10000/10000 [00:51<00:00, 193.70it/s]

Best Tree: (<ufunc 'exp'>, (<ufunc 'exp'>, (<ufunc 'cos'>, (<ufunc 'true_divide'>, 'x[1]', -1), (<ufunc 'cos'>, 'x[0]', -2)), (<ufunc 'exp'>, (<ufunc 'add'>, 'x[0]', (<ufunc 'subtract'>, (<ufunc 'log'>, (<ufunc 'cos'>, (<ufunc 'sin'>, (<ufunc 'log'>, (<ufunc 'log'>, -2, 3), (<ufunc 'cos'>, 'x[1]', 4)), (<ufunc 'absolute'>, (<ufunc 'multiply'>, 'x[1]', -1), 2)), (<ufunc 'subtract'>, 4, 'x[0]')), 'x[1]'), (<ufunc 'subtract'>, (<ufunc 'sin'>, 'x[1]', (<ufunc 'subtract'>, (<ufunc 'true_divide'>, (<ufunc 'exp'>, -2, 'x[0]'), (<ufunc 'tan'>, 1, 'x[1]')), (<ufunc 'sin'>, 'x[0]', (<ufunc 'multiply'>, 5, -2)))), 'x[1]'))), (<ufunc 'cos'>, 'x[1]', (<ufunc 'cos'>, (<ufunc 'subtract'>, (<ufunc 'true_divide'>, (<ufunc 'tan'>, (<ufunc 'absolute'>, (<ufunc 'cos'>, -1, 'x[0]'), (<ufunc 'multiply'>, 2, 'x[1]')), (<ufunc 'cos'>, (<ufunc 'subtract'>, -2, 5), (<ufunc 'absolute'>, 2, -5))), (<ufunc 'add'>, (<ufunc 'absolute'>, (<ufunc 'cos'>, 'x[0]', 'x[1]'), (<ufunc 'tan'>, 5, -1)), (<ufunc 'tan'>, (<ufun




In [31]:
def Sym_reg(x, y, max_depth=3, population_size=1000, generations=10, mutation_rate=0.5):
    """
    Symbolic regression using genetic programming principles.

    Parameters:
        x (np.ndarray): Input features.
        y (np.ndarray): Target values.
        max_depth (int): Maximum depth of the tree.
        population_size (int): Number of individuals in the population.
        generations (int): Number of generations to evolve the population.
        mutation_rate (float): Probability of mutating an individual.

    Returns:
        tuple: The best tree and its MSE.
    """
    extended_range = Generate_range(x)

    # Initialize population with random trees
    population = [
        Generate_tree(max_depth, x, extended_range)
        for _ in range(population_size)
    ]

    # Function to evaluate MSE for all individuals
    def evaluate_population(population):
        return [(tree, Calculate_mse(tree, x, y)) for tree in population]

    # Evaluate initial population
    evaluated_population = evaluate_population(population)
    evaluated_population.sort(key=lambda ind: ind[1])  # Sort by MSE

    for gen in range(generations):
        print(f"Generation {gen + 1}/{generations}")

        # Select the top half of the population (elitism)
        num_survivors = population_size // 2
        survivors = [ind[0] for ind in evaluated_population[:num_survivors]]

        # Reproduce new individuals by mutation or recombination
        offspring = []

        # Perform mutation
        while len(offspring) < num_survivors:
            if np.random.rand() < mutation_rate:
                # Select a random parent from survivors for mutation
                parent = survivors[np.random.choice(len(survivors))]
                offspring.append(Mutate_tree(parent, x, extended_range))
            else:
                # Select two random parents from survivors for recombination
                parent1_index, parent2_index = np.random.choice(len(survivors), size=2, replace=False)
                parent1, parent2 = survivors[parent1_index], survivors[parent2_index]
                child1, child2 = Recombinate_trees(parent1, parent2)
                offspring.extend([child1, child2])


        # Trim offspring if it exceeds the required size
        offspring = offspring[:num_survivors]

        # Update population with survivors and offspring
        population = survivors + offspring

        # Evaluate the new population
        evaluated_population = evaluate_population(population)
        evaluated_population.sort(key=lambda ind: ind[1])  # Sort by MSE

        # Print the best solution of the generation
        best_tree, best_mse = evaluated_population[0]
        print(f"Best MSE: {best_mse:.6f}")

    # Return the best solution overall
    best_tree, best_mse = evaluated_population[0]
    best_formula = Convert_tree_to_expression(best_tree)
    print("Best Tree:", best_tree)
    print("Best Formula:", f"def f(x): return {best_formula}")
    print("Best MSE:", best_mse)

    return best_tree, best_mse


In [32]:
# Run symbolic regression
best_tree, best_mse = Sym_reg(x, y, max_depth=10, population_size=1000, generations=20, mutation_rate=0.5)

Generation 1/20
Best MSE: 16.569760
Generation 2/20
Best MSE: 10.310958
Generation 3/20
Best MSE: 10.310958
Generation 4/20
Best MSE: 10.310958
Generation 5/20
Best MSE: 10.310958
Generation 6/20
Best MSE: 10.310958
Generation 7/20
Best MSE: 10.310958
Generation 8/20
Best MSE: 9.429531
Generation 9/20
Best MSE: 9.429531
Generation 10/20
Best MSE: 9.429531
Generation 11/20
Best MSE: 9.429531
Generation 12/20
Best MSE: 9.429531
Generation 13/20
Best MSE: 9.429531
Generation 14/20
Best MSE: 9.429531
Generation 15/20
Best MSE: 9.429531
Generation 16/20
Best MSE: 9.429531
Generation 17/20
Best MSE: 8.602785
Generation 18/20
Best MSE: 8.602785
Generation 19/20
Best MSE: 8.602785
Generation 20/20
Best MSE: 8.602785
Best Tree: (<ufunc 'multiply'>, (<ufunc 'exp'>, (<ufunc 'cos'>, 'x[1]', 'x[1]'), (<ufunc 'add'>, (<ufunc 'add'>, (<ufunc 'subtract'>, 'x[0]', 'x[1]'), (<ufunc 'true_divide'>, (<ufunc 'cos'>, (<ufunc 'sqrt'>, -2, (<ufunc 'exp'>, 4, (<ufunc 'sqrt'>, 5, (<ufunc 'multiply'>, 'x[0]', -5