In [2]:
import numpy as np
import pandas as pd
from edts import EDTS
import logging


# Generate a synthetic regression dataset with a known equation
def generate_data(num_samples=10_000):
    x_0 = np.random.normal(0, 10, num_samples)
    x_1 = np.random.normal(0, 10, num_samples)

    # y = 2*x0 + 3*x1 + 5
    y = 2 * x_0 + 3 * x_1 + 5 + np.random.normal(0, 1, num_samples)

    return pd.DataFrame({'x0': x_0, 'x1': x_1, 'target': y})
logging.basicConfig(level=logging.INFO)
jax_logger = logging.getLogger('jax')
jax_logger.setLevel(logging.ERROR)

data = generate_data()

In [3]:
edts = EDTS(
    data=data,
    feature_names=['x0', 'x1'],
    building_blocks=[
        'c_',
        'c_*x_',
        'c_*x_*x_',
        'c_*x_**2',
        'sin(c_*x_)',
    ],
    iterations=2000,
)

edts.beam_search(
    beam_width=10,
    max_depth=4,
)

INFO:edc:Number of expanded building blocks: 8
INFO:edc:Best loss at depth 0: 426.21954
INFO:edc:Best loss at iteration 10/28: 426.21954
INFO:edc:Best loss at iteration 20/28: 426.21954
INFO:edc:Best loss at depth 1: 26.182837
INFO:edc:Best loss at iteration 10/33: 26.182837
INFO:edc:Best loss at iteration 20/33: 26.182837
INFO:edc:Best loss at iteration 30/33: 26.182837
INFO:edc:Best loss at depth 2: 1.4503188
INFO:edc:Best loss at iteration 10/27: 1.4503188
INFO:edc:Best loss at iteration 20/27: 1.4503188
INFO:edc:Best loss at depth 3: 1.9768323


In [4]:
print(f"Found equation: {edts.best_equation}")
print("True equation: 2*x0 + 3*x1 + 5")

Found equation: 2.02553653717041*x0 + 2.99970316886902*x1 + 4.95020389556885
True equation: 2*x0 + 3*x1 + 5
