In [1]:
import numpy as np
from numpy import random
from tqdm import tqdm

In [76]:
problem = np.load('../data/problem_2.npz')
x = problem['x']
y = problem['y']
x.shape

(3, 5000)

In [77]:
print("x:", x)
print("x shape:", x.shape)
print("x min:", x.min(), "x max:", x.max())


x: [[ 4.52991777  1.90003334 -4.72053815 ... -3.19087477 -3.59684929
  -1.976865  ]
 [-0.18656382  4.07797331 -0.08251329 ...  4.21951227 -3.43401969
  -1.74649933]
 [-1.60071107  3.23380643 -1.4492076  ...  3.71076268 -4.50575429
   2.35481779]]
x shape: (3, 5000)
x min: -4.999400642356134 x max: 4.998785132288306


In [78]:
# Define irreducible functions
FUNCTIONS = [
    np.add, np.subtract, np.multiply, np.divide,
    np.sin, np.cos, np.tan,
    np.exp, np.log, np.sqrt, np.abs
]

In [79]:
# Generate End Leafs

def generate_end_leafs(x):
    """Generates terminal nodes: constants or input variables."""
    # 10% extension on the margins
    x_min, x_max = x.min(), x.max()
    delta = 0.1 * (x_max - x_min)
    extended_min = x_min - delta
    extended_max = x_max + delta

    # Random integer constant within the extended range
    constant = random.randint(int(extended_min), int(extended_max) + 1)

    # Randomly return either a constant or a variable
    if random.rand() < 0.5:
        return constant  # Return constant
    else:
        return f"x[{random.randint(0, x.shape[0])}]"  # Return variable
    

# Generate Random Tree

def generate_tree(max_depth, x):
    """Generates a random tree structure."""
    if max_depth == 0 or random.rand() < 0.3:  # Base case: generate terminal node
        return generate_end_leafs(x)

    # Select a random function
    func = random.choice(FUNCTIONS)

    # Generate left and right subtrees
    left_subtree = generate_tree(max_depth - 1, x)
    right_subtree = generate_tree(max_depth - 1, x)

    # Return the tree as a tuple
    return (func, left_subtree, right_subtree)

In [80]:
def convert_tree_to_expression(tree):
    """
    Converts a tree structure into a NumPy-executable formula.
    """
    if isinstance(tree, (int, float, str)):
        if isinstance(tree, str):  # It's a variable like "x[0]"
            return f"x[{int(tree[2:-1])}]"
        return str(tree)  # It's a constant value

    func, left, right = tree
    left_expr = convert_tree_to_expression(left)
    right_expr = convert_tree_to_expression(right)

    # Get the function name or operator for binary and unary functions
    if func in [np.add, np.subtract, np.multiply, np.divide]:
        operator = {
            np.add: "+",
            np.subtract: "-",
            np.multiply: "*",
            np.divide: "/",
        }[func]
        return f"({left_expr} {operator} {right_expr})"
    else:
        return f"np.{func.__name__}({left_expr})"  # Use NumPy function name


In [81]:
def calculate_mse(tree, x: np.ndarray, y: np.ndarray) -> float:
    """
    Calculates the Mean Squared Error (MSE) for a tree representation.
    """
    # Convert the tree into an expression
    expression = convert_tree_to_expression(tree)
    
    # Construct a Python function dynamically
    formula = f"""
def f(x: np.ndarray) -> np.ndarray:
    return {expression}
"""

    # Map function names dynamically from FUNCTIONS
    func_mapping = {f.__name__: f for f in FUNCTIONS}
    exec_locals = {}
    exec_globals = {"np": np, **func_mapping}  # Include mapped functions

    try:
        exec(formula, exec_globals, exec_locals)  # Dynamically define f
        f = exec_locals["f"]
        predictions = f(x)  # Evaluate predictions using x
        mse = np.mean((predictions - y) ** 2)  # Calculate MSE
        return mse
    except Exception as e:
        raise ValueError(f"Error evaluating formula: {expression}, Error: {e}")


In [86]:


def sym_reg(x, y, max_depth=3, num_sol=100):
    """
    Symbolic regression using genetic programming principles.

    Parameters:
        x (np.ndarray): Input features.
        y (np.ndarray): Target values.
        max_depth (int): Maximum depth of the tree.
        num_sol (int): Number of random trees to generate.

    Returns:
        tuple: The best tree and its MSE.
    """
    best_tree = None
    best_mse = float('inf')

    for _ in tqdm(range(num_sol), desc="Generating Trees"):
        # Generate a random tree
        tree = generate_tree(max_depth, x)

        # Evaluate the tree and calculate MSE
        try:
            # Convert the tree to a formula and calculate MSE
            mse = calculate_mse(tree, x, y)
        except Exception as e:
            mse = float('inf')  # Penalize invalid trees

        # Update the best tree if the current one is better
        if mse < best_mse:
            best_mse = mse
            best_tree = tree

    # Print the best solution
    best_formula = convert_tree_to_expression(best_tree)
    print("Best Tree:", best_tree)
    print("Best Formula:", f"def f(x): return {best_formula}")
    print("Best MSE:", best_mse)

    return best_tree, best_mse


In [88]:
# Run symbolic regression
best_tree, best_mse = sym_reg(x, y, max_depth=6, num_sol=10000)

  mse = np.mean((predictions - y) ** 2)  # Calculate MSE
Generating Trees: 100%|██████████| 10000/10000 [00:09<00:00, 1000.95it/s]

Best Tree: (<ufunc 'true_divide'>, (<ufunc 'subtract'>, (<ufunc 'add'>, (<ufunc 'exp'>, (<ufunc 'add'>, (<ufunc 'add'>, 'x[0]', 2), (<ufunc 'absolute'>, -5, 'x[0]')), (<ufunc 'subtract'>, 'x[1]', (<ufunc 'cos'>, 'x[0]', -4))), 'x[1]'), (<ufunc 'add'>, (<ufunc 'tan'>, (<ufunc 'absolute'>, 'x[0]', (<ufunc 'tan'>, 'x[2]', 'x[1]')), -3), (<ufunc 'cos'>, (<ufunc 'exp'>, 'x[0]', -2), 'x[2]'))), (<ufunc 'sqrt'>, (<ufunc 'absolute'>, (<ufunc 'sin'>, (<ufunc 'exp'>, -5, (<ufunc 'subtract'>, 'x[2]', 'x[2]')), 'x[0]'), (<ufunc 'sin'>, (<ufunc 'exp'>, (<ufunc 'add'>, 'x[2]', 5), (<ufunc 'tan'>, 'x[2]', 1)), -3)), 'x[1]'))
Best Formula: def f(x): return (((np.exp(((x[0] + 2) + np.absolute(-5))) + x[1]) - (np.tan(np.absolute(x[0])) + np.cos(np.exp(x[0])))) / np.sqrt(np.absolute(np.sin(np.exp(-5)))))
Best MSE: 28276329515333.48



