# Cycle
This cycle uses mixture experimentalist, BMS theorist, and equation sampler as a source for the ground truth. 

In [402]:
import copy
from dataclasses import dataclass, field
from typing import List

from sklearn.base import BaseEstimator
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_absolute_error
import numpy as np
import math
import pandas as pd
from autora.variable import VariableCollection, Variable
from autora.state.standard import StandardState
from autora.state import on_state
from autora.state.wrapper import state_fn_from_estimator
from autora.theorist.bms import BMSRegressor
from equation_tree import sample 
from equation_tree.tree import instantiate_constants
from equation_tree.prior import DEFAULT_PRIOR_FUNCTIONS, DEFAULT_PRIOR_OPERATORS, \
    structure_prior_from_max_depth
import pprint
from autora.experiment_runner.synthetic.abstract.equation import equation_experiment
from autora.experimentalist.mixture import sample as mixture_sample
from autora.experimentalist.grid_ import grid_pool
from autora.experimentalist.random_ import random_sample, random_pool
from autora.state import Delta
from autora.experimentalist.falsification import falsification_score_sample
from autora.experimentalist.model_disagreement import model_disagreement_score_sample
from autora.experimentalist.novelty import novelty_score_sample

In [403]:
# SAMPLING
N_CONDITIONS = 50000
TEMPERATURE = 1.
WEIGHTS = {'falsification':[.1, .1], 'novelty':[.5, .5], 'disagreement': [.3, .3]}
NUM_SAMPLES = 100
POOL_RANGE = 5

# EQUATION
MAX_TREE_DEPTH = 4
MAX_NUM_VARIABLES = 4
NUM_POOL_SAMPLES = 10_000
CONSTANT_SIZE = 5

# SIMULATION
EPOCHS = 100
CYCLES = 10


## Ground truth
Sampling the ground truth for this simulation.

In [404]:
structure_prior = structure_prior_from_max_depth(MAX_TREE_DEPTH)
pprint.pprint(structure_prior)
pprint.pprint(DEFAULT_PRIOR_FUNCTIONS)
pprint.pprint(DEFAULT_PRIOR_OPERATORS)
feature_prior = {'constants': .3, 'variables': .7}
prior = {'functions': DEFAULT_PRIOR_FUNCTIONS, 'operators': DEFAULT_PRIOR_OPERATORS, 'structures': structure_prior, 'features': feature_prior}

{'[0, 1, 1, 2]': 0.16666666666666666,
 '[0, 1, 1]': 0.16666666666666666,
 '[0, 1, 2, 1]': 0.16666666666666666,
 '[0, 1, 2, 2]': 0.16666666666666666,
 '[0, 1, 2, 3]': 0.16666666666666666,
 '[0, 1, 2]': 0.16666666666666666}
{'abs': 0.14285714285714285,
 'cos': 0.14285714285714285,
 'exp': 0.14285714285714285,
 'log': 0.14285714285714285,
 'sin': 0.14285714285714285,
 'sqrt': 0.14285714285714285,
 'tan': 0.14285714285714285}
{'*': 0.2, '+': 0.2, '-': 0.2, '/': 0.2, '^': 0.2}


In [405]:
equation_raw = sample(n=1, prior=prior, max_num_variables=MAX_NUM_VARIABLES)
equation_raw[0].sympy_expr

Processing: 100%|██████████| 1/1 [00:00<00:00, 95.15iteration/s]


x_2*sin(x_1)

In [406]:
equation = instantiate_constants(equation_raw[0], lambda: np.random.rand()*CONSTANT_SIZE)
equation.sympy_expr


x_2*sin(x_1)

Defining the metadata based on the sampled ground truth.

In [407]:
independent_variables = []
for v in range(equation.n_variables_unique):
    independent_variables.append(Variable(equation.variables_unique[v],value_range=(-POOL_RANGE, POOL_RANGE)))

variables=VariableCollection(
        independent_variables=independent_variables,
        dependent_variables=[Variable("y")]
    )

Defining experiment runner from the equation and the variable collection

In [408]:
experiment = equation_experiment(equation.sympy_expr, variables.independent_variables, variables.dependent_variables[0], rename_output_columns=False)

### Defining the state
We can define an initial state for our discovery problem based on the variable specification above. Wrapping experiment runner into the state.

In [409]:
@dataclass(frozen=True)
class ExtendedState(StandardState):
    models_bms: List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    models_linear: List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    models_polynom: List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    rejections: List[int] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    mad: List[float] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )

state = ExtendedState(
    variables=variables
)
runner_on_state = on_state(experiment.experiment_runner, output=["experiment_data"])

### Pooler

In [410]:
@on_state()
def experimentalist_pooler(variables, equation):
    conditions_ = pd.DataFrame(columns=[v.name for v in variables.independent_variables])
    i = 0
    while i < 1_000_000 and len(conditions_.index) < NUM_POOL_SAMPLES:
        _sample = random_pool(variables, NUM_POOL_SAMPLES)
        evaluation = equation.evaluate(_sample)
        bad_indices = np.where(np.isnan(evaluation) | np.isinf(evaluation))[0]
        _sample = _sample.drop(bad_indices)
        if np.isnan(evaluation).any() or np.isinf(evaluation).any():
            i+=len(bad_indices)
        conditions_ = pd.concat([conditions_,_sample], ignore_index=True)
    if i >= 1_000_000:
        return None
    conditions_ = conditions_.head(NUM_POOL_SAMPLES)
    return Delta(conditions=conditions_, rejections=[i])

## Mixture experimentalist
Defining the mixture experimentalist and wrapping it into the state

In [411]:
"""
Mixture Experimentalist Sampler
"""

import numpy as np
from typing import Optional, Union

import pandas as pd


def adjust_distribution(p_, temperature):
    # temperature cannot be 0
    assert temperature != 0, 'Temperature cannot be 0'
    p = np.array(p_)
    # If the temperature is very low (close to 0), then the sampling will become almost deterministic, picking the event with the highest probability.
    # If the temperature is very high, then the sampling will be closer to uniform, with all events having roughly equal probability.

    p = p / np.sum(np.abs(p))  # Normalizing the initial distribution

    p = np.exp(p / temperature)
    final_p = p / np.sum(p)  # Normalizing the final distribution
    return final_p


def sample(conditions: Union[pd.DataFrame, np.ndarray], temperature: float,
                   samplers: list, params: dict,
                   num_samples: Optional[int] = None) -> pd.DataFrame:
    """

    Args:
        conditions: pool of experimental conditions to evaluate: pd.Dataframe
        temperature: how random is selection of conditions (cannot be 0; (0:1) - the choices are more deterministic than the choices made wrt
        samplers: tuple containing sampler functions, their names, and weights
        for sampler functions that return both positive and negative scores, user can provide a list with two weights: the first one will be applied to positive scores, the second one -- to the negative
        params: nested dictionary. keys correspond to the sampler function names (same as provided in samplers),
        values correspond to the dictionaries of function arguments (argument name: its value)
        num_samples: number of experimental conditions to select

    Returns:
        Sampled pool of experimental conditions with the scores attached to them
    """

    condition_pool = pd.DataFrame(conditions)

    rankings = pd.DataFrame()
    mixture_scores = np.zeros(len(condition_pool))
    ## getting rankings and weighted scores from each function
    for (function, name, weight) in samplers:

        sampler_params = params[name]
        pd_ranking = function(conditions=condition_pool, **sampler_params)

        # except:
        #     pd_ranking = function(conditions=condition_pool)
        # sorting by index
        pd_ranking = pd_ranking.sort_index()
        # if only one weight is provided, use it for both negative and positive dimensions
        if isinstance(weight, float) or isinstance(weight, int):
            pd_ranking["score"] = pd_ranking["score"] * weight
        else:
            if len(pd_ranking["score"] < 0) > 0 and len(pd_ranking["score"] > 0) > 0:  # there are both positive and negative values
                pd_ranking.loc[pd_ranking["score"] > 0]["score"] = pd_ranking.loc[pd_ranking["score"] > 0]["score"] * weight[0]  # positive dimension gets the first weight
                pd_ranking.loc[pd_ranking["score"] < 0]["score"] = pd_ranking.loc[pd_ranking["score"] < 0]["score"] * weight[1]  # negative dimension gets the second weight
            else:
                pd_ranking["score"] = pd_ranking["score"] * weight[0]

        pd_ranking.rename(columns={"score": f"{name}_score"}, inplace=True)
        # sum_scores are arranged based on the original conditions_ indices
        mixture_scores = mixture_scores + pd_ranking[f"{name}_score"]

        rankings = pd.merge(rankings, pd_ranking, left_index=True, right_index=True, how="outer")

    # adjust mixture scores wrt temperature
    weighted_mixture_scores_adjusted = adjust_distribution(mixture_scores, temperature)

    if num_samples is None:
        num_samples = condition_pool.shape[0]

    condition_indices = np.random.choice(np.arange(len(condition_pool)), num_samples,
                                         p=weighted_mixture_scores_adjusted, replace=False)
    conditions_ = condition_pool.iloc[condition_indices]
    conditions_["score"] = mixture_scores

    return conditions_


mixture_sample_test = sample

In [412]:
def get_best_model(models, X, y):
    mads = []
    for m in models:
        prediction = m.predict(X)
        mad = mean_absolute_error(y, prediction)
        mads.append(mad)
    min_value = min(mads)
    min_index = mads.index(min_value)
    mads[min_index] = math.inf
    min_value_second = min(mads)
    min_index_second = mads.index(min_value_second)
    return models[min_index], models[min_index_second], min_value


In [413]:
@on_state()
def experimentalist_sample(conditions,
                           models,
                           models_bms,
                           models_linear,
                           models_polynom,
                           experiment_data,
                           variables,
                           temperature,
                           weights,
                           num_samples):
    if models is None or experiment_data is None:
        print('First cycle: Using random sampler')
        conditions_ = random_sample(conditions, num_samples)
        mad = None
    else:
        experiment_conditions = experiment_data[[v.name for v in variables.independent_variables]]
        experiment_observations = experiment_data[[v.name for v in variables.dependent_variables]]
        params_ = {} #copy.deepcopy(params)
        params_["falsification"] = {"reference_conditions": experiment_conditions, "reference_observations": experiment_observations, "model": models[-1]}

        params_["novelty"] = {"reference_conditions": experiment_conditions}
        models_to_consider = [models_bms[-1], models_linear[-1], models_polynom[-1]]
        best_model, second_best_model, mad = get_best_model(models_to_consider, experiment_conditions, experiment_observations)

        params_["disagreement"] = {"models": [best_model, second_best_model], "num_samples": NUM_POOL_SAMPLES}

        samplers = [
            [novelty_score_sample, "novelty", weights["novelty"]],
            [falsification_score_sample, "falsification", weights["falsification"]],
            [model_disagreement_score_sample, "disagreement", weights["disagreement"]]
        ]

        conditions_ = mixture_sample_test(conditions, temperature, samplers, params_, num_samples)
        conditions_ = conditions_.drop("score", axis = 1)
    #d = Delta(conditions=conditions)
    d = Delta(conditions = conditions_, mads=[mad])
    return d

## BMS theorist
Defining the BMS theorist and wrapping it into the state

In [414]:
@on_state()
def bms_theorist(experiment_data: pd.DataFrame, variables: VariableCollection, **kwargs):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    new_model = BMSRegressor(epochs=EPOCHS).set_params(**kwargs).fit(X, y)
    return Delta(models_bms=[new_model])

@on_state()
def linear_theorist(experiment_data: pd.DataFrame, variables: VariableCollection, **kwargs):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    new_model = LinearRegression().set_params(**kwargs).fit(X, y)
    return Delta(models_linear=[new_model])


def PolynomialRegression(degree=3, **kwargs):
    return make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))


@on_state()
def polynomial_theorist(experiment_data: pd.DataFrame, variables: VariableCollection, **kwargs):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    new_model = PolynomialRegression()
    new_model.fit(X, y)
    return Delta(models_polynom=[new_model])


In [415]:
@on_state()
def best_model(models_bms, models_linear, models_polynom, experiment_data, variables):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    prediction_bms = models_bms[-1].predict(X)
    prediction_linear = models_linear[-1].predict(X)
    prediction_polynomial = models_polynom[-1].predict(X)
    mad_bms = mean_absolute_error(y, prediction_bms)
    mad_linear = mean_absolute_error(y, prediction_linear)
    mad_poly = mean_absolute_error(y, prediction_polynomial)
    if mad_bms <= mad_linear and mad_bms <= mad_poly:
        new_model = models_bms[-1]
    elif mad_linear <= mad_bms and mad_linear <= mad_poly:
        new_model = models_linear[-1]
    elif mad_poly <= mad_linear and mad_poly <= mad_bms:
        new_model = models_polynom[-1]

    return Delta(model=new_model)


In [417]:
def cycle(s):
    s_pool = experimentalist_pooler(s, equation=equation)

    s_conditions = experimentalist_sample(s_pool, temperature=TEMPERATURE, weights=WEIGHTS, num_samples=NUM_SAMPLES)
    s_run = runner_on_state(s_conditions)
    s_theory = bms_theorist(s_run)
    s_theory = linear_theorist(s_theory)
    s_theory = polynomial_theorist(s_theory)
    s_best = best_model(s_theory)
    return s_best


In [418]:
state = ExtendedState(variables=variables)
for _ in range(CYCLES):
    state = cycle(state)
    print(state)


INFO:autora.theorist.bms.regressor:BMS fitting started


First cycle: Using random sampler


100%|██████████| 100/100 [00:02<00:00, 44.87it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
6024 -4.972588  1.246799
9431 -3.476911  2.086347
1139  3.763949 -3.922504
4424  1.875390  3.138668
6284  0.000512  2.228118
...        ...       ...
6380  3.448111  3.694001
58   -2.902460 -1.563460
3728 -4.140809 -0.737536
7444  4.361752  0.668926
2094 -1.478860  2.005088

[100 rows x 2 columns], experiment_data=           x_1       x_2         y
6024 -4.972588  1.246799  1.198879
9431 -3.47691

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 44.81it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
2521 -3.526926  0.867416
78    1.940771 -1.529789
1451 -4.586940  4.458852
6292 -4.872850  1.188156
9379  1.287893  2.660605
...        ...       ...
3322 -0.977152 -0.644851
8253  1.471356  0.566655
4267 -2.530962  3.447864
5930  0.685021 -2.931624
8847  0.823629  1.602749

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 41.44it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
2404 -3.278960 -3.518387
6843 -1.859844  4.514991
4047  0.430599  1.327597
7092 -0.547253 -2.526311
6802  0.646113  4.997263
...        ...       ...
2928 -4.267605  2.824307
7806 -4.051484  1.667711
695   1.701781  1.165968
2065 -4.705109  2.584661
2285  4.010731 -3.020411

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 40.86it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
1533 -1.120089  2.778907
373  -4.637710  3.987552
8577 -0.673066 -2.560820
1958 -3.424120 -0.990772
9375 -4.017809 -1.262675
...        ...       ...
5890 -2.782835  1.654703
2693  2.045892 -4.793129
31   -4.999577 -3.101986
7813 -2.685108 -1.214866
7968  0.825698 -3.015203

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 42.15it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
157   2.114677  2.414143
3433 -4.553861  1.126245
8918  3.223231  4.849803
5255  1.549843 -1.944177
4382  0.663279  0.041648
...        ...       ...
7093 -4.605171 -2.713041
9479 -0.144879 -2.746810
7466  3.295300 -3.624275
1713  3.925974  0.407284
2182 -3.879264  3.431895

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 41.70it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
802  -3.667642 -3.535421
8479 -4.666356 -4.243002
7112 -1.130224  2.791542
3522  3.495622 -2.241599
9001 -2.949734 -1.763489
...        ...       ...
6104  1.467849 -2.399360
2925 -4.664150  1.836307
8147  2.457941  2.604310
5946 -4.535442 -3.209113
7127 -2.320022  2.949855

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 42.92it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
7622  4.475036 -1.201707
2717 -0.046040 -0.838346
4762 -1.788619  1.788093
900   0.968853 -2.138498
7746  1.534532  0.399176
...        ...       ...
5103 -2.900829 -0.758359
7475 -1.413704 -0.848575
2167  1.954391  4.461027
6623 -2.397992  0.167554
5539 -2.276921  4.928952

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 40.71it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
7616  2.279701  3.368215
4161 -4.187935 -1.833601
167   4.322503 -0.911154
4737  2.244603  0.178311
6917 -4.520401  4.154067
...        ...       ...
2850  2.507628 -4.767286
8519  1.515521 -1.864143
3313 -0.042368 -3.273431
3889  1.525949 -4.373424
6553  4.170788 -3.831876

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 39.52it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
3673  2.422131 -2.519638
454   1.905083  4.805466
9287 -1.084428 -1.039220
9670  4.119285 -4.395728
3794 -3.865051 -1.939633
...        ...       ...
780  -2.235227  4.941947
8676 -4.372358  1.532387
6525  2.670493  1.766028
4207  3.986476 -4.772748
2482  4.716485 -2.851212

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  

INFO:autora.theorist.bms.regressor:BMS fitting started
100%|██████████| 100/100 [00:02<00:00, 39.21it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


ExtendedState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False), Variable(name='x_2', value_range=(-5, 5), allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1       x_2
1322  4.947106 -0.163254
3420  1.400895  4.745172
4880 -1.811978  2.674648
2739  0.296847  2.208251
2324 -3.290069  4.160282
...        ...       ...
4858  4.265698  3.521263
6376  4.964952 -2.424741
2499 -2.258300 -4.568774
1814  3.686737 -3.470595
2164 -1.573526  1.043834

[100 rows x 2 columns], experiment_data=          x_1       x_2         y
0   -4.972588  1.246799  1.198879
1   -3.476911  