# Cycle
This cycle uses mixture experimentalist, BMS theorist, and equation sampler as a source for the ground truth. 

In [264]:
import copy
from dataclasses import dataclass, field
from typing import List

from sklearn.base import BaseEstimator
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_absolute_error
import numpy as np
import math
import pandas as pd
from autora.variable import VariableCollection, Variable
from autora.state.standard import StandardState
from autora.state import on_state
from autora.state.wrapper import state_fn_from_estimator
from autora.theorist.bms import BMSRegressor
from equation_tree import sample 
from equation_tree.tree import instantiate_constants
from equation_tree.prior import DEFAULT_PRIOR_FUNCTIONS, DEFAULT_PRIOR_OPERATORS, \
    structure_prior_from_max_depth
import pprint
from autora.experiment_runner.synthetic.abstract.equation import equation_experiment
from autora.experimentalist.mixture import sample as mixture_sample
from autora.experimentalist.grid_ import grid_pool
from autora.experimentalist.random_ import random_sample, random_pool
from autora.state import Delta
from autora.experimentalist.falsification import falsification_score_sample
from autora.experimentalist.model_disagreement import model_disagreement_score_sample
from autora.experimentalist.novelty import novelty_score_sample

In [265]:
# SAMPLING
N_CONDITIONS = 50000
TEMPERATURE = 1.
WEIGHTS = {'falsification':[.1, .1], 'novelty':[.5, .5], 'disagreement': [.3, .3]}
NUM_SAMPLES = 100
POOL_RANGE = 5

# EQUATION
MAX_TREE_DEPTH = 4
MAX_NUM_VARIABLES = 4
NUM_POOL_SAMPLES = 10_000
CONSTANT_SIZE = 5

## Ground truth
Sampling the ground truth for this simulation.

In [266]:
structure_prior = structure_prior_from_max_depth(MAX_TREE_DEPTH)
pprint.pprint(structure_prior)
pprint.pprint(DEFAULT_PRIOR_FUNCTIONS)
pprint.pprint(DEFAULT_PRIOR_OPERATORS)
feature_prior = {'constants': .3, 'variables': .7}
prior = {'functions': DEFAULT_PRIOR_FUNCTIONS, 'operators': DEFAULT_PRIOR_OPERATORS, 'structures': structure_prior, 'features': feature_prior}

{'[0, 1, 1, 2]': 0.16666666666666666,
 '[0, 1, 1]': 0.16666666666666666,
 '[0, 1, 2, 1]': 0.16666666666666666,
 '[0, 1, 2, 2]': 0.16666666666666666,
 '[0, 1, 2, 3]': 0.16666666666666666,
 '[0, 1, 2]': 0.16666666666666666}
{'abs': 0.14285714285714285,
 'cos': 0.14285714285714285,
 'exp': 0.14285714285714285,
 'log': 0.14285714285714285,
 'sin': 0.14285714285714285,
 'sqrt': 0.14285714285714285,
 'tan': 0.14285714285714285}
{'*': 0.2, '+': 0.2, '-': 0.2, '/': 0.2, '^': 0.2}


In [267]:
equation_raw = sample(n=1, prior=prior, max_num_variables=MAX_NUM_VARIABLES)
equation_raw[0].sympy_expr

Processing: 100%|██████████| 1/1 [00:00<00:00, 65.15iteration/s]


x_1**tan(x_2)

In [268]:
equation = instantiate_constants(equation_raw[0], lambda: np.random.rand()*CONSTANT_SIZE)
equation.sympy_expr


x_1**tan(x_2)

Defining the metadata based on the sampled ground truth.

In [269]:
independent_variables = []
for v in range(equation.n_variables_unique):
    independent_variables.append(Variable(equation.variables_unique[v],value_range=(-POOL_RANGE, POOL_RANGE)))

variables=VariableCollection(
        independent_variables=independent_variables,
        dependent_variables=[Variable("y")]
    )

Defining experiment runner from the equation and the variable collection

In [270]:
experiment = equation_experiment(equation.sympy_expr, variables.independent_variables, variables.dependent_variables[0], rename_output_columns=False)

### Defining the state
We can define an initial state for our discovery problem based on the variable specification above. Wrapping experiment runner into the state.

In [271]:
@dataclass(frozen=True)
class ExtendedState(StandardState):
    models_bms: List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    models_linear: List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    models_polynom: List[BaseEstimator] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    rejections: List[int] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )
    mad: List[float] = field(
        default_factory=list,
        metadata={"delta": "extend"},
    )

state = ExtendedState(
    variables=variables
)
runner_on_state = on_state(experiment.experiment_runner, output=["experiment_data"])

### Pooler

In [272]:
@on_state()
def experimentalist_pooler(variables, equation):
    conditions_ = pd.DataFrame(columns=[v.name for v in variables.independent_variables])
    i = 0
    while i < 1_000_000 and len(conditions_.index) < NUM_POOL_SAMPLES:
        _sample = random_pool(variables, NUM_POOL_SAMPLES)
        evaluation = equation.evaluate(_sample)
        bad_indices = np.where(np.isnan(evaluation) | np.isinf(evaluation))[0]
        _sample = _sample.drop(bad_indices)
        if np.isnan(evaluation).any() or np.isinf(evaluation).any():
            i+=len(bad_indices)
        conditions_ = pd.concat([conditions_,_sample], ignore_index=True)
    if i >= 1_000_000:
        return None
    conditions_ = conditions_.head(NUM_POOL_SAMPLES)
    return Delta(conditions=conditions_, rejections=[i])

## Mixture experimentalist
Defining the mixture experimentalist and wrapping it into the state

In [306]:
"""
Mixture Experimentalist Sampler
"""

import numpy as np
from typing import Optional, Union

import pandas as pd


def adjust_distribution(p_, temperature):
    # temperature cannot be 0
    assert temperature != 0, 'Temperature cannot be 0'
    p = np.array(p_)
    # If the temperature is very low (close to 0), then the sampling will become almost deterministic, picking the event with the highest probability.
    # If the temperature is very high, then the sampling will be closer to uniform, with all events having roughly equal probability.

    p = p / np.sum(np.abs(p))  # Normalizing the initial distribution

    p = np.exp(p / temperature)
    final_p = p / np.sum(p)  # Normalizing the final distribution
    print(final_p)
    return final_p


def sample(conditions: Union[pd.DataFrame, np.ndarray], temperature: float,
                   samplers: list, params: dict,
                   num_samples: Optional[int] = None) -> pd.DataFrame:
    """

    Args:
        conditions: pool of experimental conditions to evaluate: pd.Dataframe
        temperature: how random is selection of conditions (cannot be 0; (0:1) - the choices are more deterministic than the choices made wrt
        samplers: tuple containing sampler functions, their names, and weights
        for sampler functions that return both positive and negative scores, user can provide a list with two weights: the first one will be applied to positive scores, the second one -- to the negative
        params: nested dictionary. keys correspond to the sampler function names (same as provided in samplers),
        values correspond to the dictionaries of function arguments (argument name: its value)
        num_samples: number of experimental conditions to select

    Returns:
        Sampled pool of experimental conditions with the scores attached to them
    """

    condition_pool = pd.DataFrame(conditions)

    rankings = pd.DataFrame()
    mixture_scores = np.zeros(len(condition_pool))
    ## getting rankings and weighted scores from each function
    for (function, name, weight) in samplers:

        sampler_params = params[name]
        print(name)
        print(params[name])
        pd_ranking = function(conditions=condition_pool, **sampler_params)
        print(pd_ranking)

        # except:
        #     pd_ranking = function(conditions=condition_pool)
        # sorting by index
        pd_ranking = pd_ranking.sort_index()
        # if only one weight is provided, use it for both negative and positive dimensions
        if isinstance(weight, float) or isinstance(weight, int):
            pd_ranking["score"] = pd_ranking["score"] * weight
        else:
            if len(pd_ranking["score"] < 0) > 0 and len(pd_ranking["score"] > 0) > 0:  # there are both positive and negative values
                pd_ranking.loc[pd_ranking["score"] > 0]["score"] = pd_ranking.loc[pd_ranking["score"] > 0]["score"] * weight[0]  # positive dimension gets the first weight
                pd_ranking.loc[pd_ranking["score"] < 0]["score"] = pd_ranking.loc[pd_ranking["score"] < 0]["score"] * weight[1]  # negative dimension gets the second weight
            else:
                pd_ranking["score"] = pd_ranking["score"] * weight[0]

        pd_ranking.rename(columns={"score": f"{name}_score"}, inplace=True)
        # sum_scores are arranged based on the original conditions_ indices
        mixture_scores = mixture_scores + pd_ranking[f"{name}_score"]

        rankings = pd.merge(rankings, pd_ranking, left_index=True, right_index=True, how="outer")

    # adjust mixture scores wrt temperature
    weighted_mixture_scores_adjusted = adjust_distribution(mixture_scores, temperature)

    if num_samples is None:
        num_samples = condition_pool.shape[0]

    condition_indices = np.random.choice(np.arange(len(condition_pool)), num_samples,
                                         p=weighted_mixture_scores_adjusted, replace=False)
    conditions_ = condition_pool.iloc[condition_indices]
    conditions_["score"] = mixture_scores

    return conditions_


mixture_sample_test = sample

In [307]:
def get_best_model(models, X, y):
    mads = []
    for m in models:
        prediction = m.predict(X)
        mad = mean_absolute_error(y, prediction)
        mads.append(mad)
    min_value = min(mads)
    min_index = mads.index(min_value)
    mads[min_index] = math.inf
    min_value_second = min(mads)
    min_index_second = mads.index(min_value_second)
    return models[min_index], models[min_index_second], min_value


In [356]:
import itertools
from typing import Iterable, List, Union
import numpy as np
import pandas as pd

from autora.utils.deprecation import deprecated_alias
from sklearn.preprocessing import StandardScaler

def score_sample(conditions: Union[pd.DataFrame, np.ndarray],
           models: List,
           num_samples: Optional[int] = None):
    """
    A sampler that returns selected samples for independent variables
    for which the models disagree the most in terms of their predictions.

    Args:
        X: pool of IV conditions to evaluate in terms of model disagreement
        models: List of Scikit-learn (regression or classification) models to compare
        num_samples: number of samples to select

    Returns: Sampled pool
    """

    if isinstance(conditions, Iterable) and not isinstance(conditions, pd.DataFrame):
        conditions = np.array(list(conditions))

    condition_pool_copy = conditions.copy()
    # conditions = np.array(conditions)
    #
    # X_predict = np.array(conditions)
    # if len(X_predict.shape) == 1:
    #     X_predict = X_predict.reshape(-1, 1)

    model_disagreement = list()

    # collect diagreements for each model pair
    for model_a, model_b in itertools.combinations(models, 2):
        print('*')
        print(model_a)
        print(type(model_a))
        print('*')
        print(model_b)
        print(type(model_b))


        # determine the prediction method
        if hasattr(model_a, "predict_proba") and hasattr(model_b, "predict_proba"):
            model_a_predict = model_a.predict_proba
            model_b_predict = model_b.predict_proba
        elif hasattr(model_a, "predict") and hasattr(model_b, "predict"):
            model_a_predict = model_a.predict
            model_b_predict = model_b.predict
        else:
            raise AttributeError(
                "Models must both have `predict_proba` or `predict` method."
            )

        #print(X_predict)
        # get predictions from both models
        y_a = np.array(model_a_predict(conditions))
        y_b = np.array(model_b_predict(conditions))
        print('***')
        print(y_a)
        print('*')
        print(y_b)
        print('***')

        assert y_a.shape == y_b.shape, "Models must have same output shape."

        # determine the disagreement between the two models in terms of mean-squared error
        if len(y_a.shape) == 1:
            disagreement = (y_a - y_b) ** 2
        else:
            disagreement = np.mean((y_a - y_b) ** 2, axis=1)

        model_disagreement.append(disagreement)

    assert len(model_disagreement) >= 1, "No disagreements to compare."

    # sum up all model disagreements
    summed_disagreement = np.sum(model_disagreement, axis=0)

    if isinstance(condition_pool_copy, pd.DataFrame):
        conditions = pd.DataFrame(conditions, columns=condition_pool_copy.columns)
    else:
        conditions = pd.DataFrame(conditions)

    # normalize the distances
    scaler = StandardScaler()
    score = scaler.fit_transform(summed_disagreement.reshape(-1, 1)).flatten()

    # order rows in Y from highest to lowest
    conditions["score"] = score
    conditions = conditions.sort_values(by="score", ascending=False)
    if num_samples is None:
        return conditions
    return conditions.head(num_samples)

def sample(conditions: Union[pd.DataFrame, np.ndarray],
           models: List,
           num_samples: int = 1):
    """
    A sampler that returns selected samples for independent variables
    for which the models disagree the most in terms of their predictions.

    Args:
        X: pool of IV conditions to evaluate in terms of model disagreement
        models: List of Scikit-learn (regression or classification) models to compare
        num_samples: number of samples to select

    Returns: Sampled pool
    """

    selected_conditions = score_sample(conditions, models, num_samples)
    selected_conditions.drop(columns=["score"], inplace=True)

    return selected_conditions

model_disagreement_sample = sample
model_disagreement_sample.__doc__ = """Alias for sample"""
model_disagreement_score_sample_2 = score_sample


In [362]:
@on_state()
def experimentalist_sample(conditions,
                           models,
                           models_bms,
                           models_linear,
                           models_polynom,
                           experiment_data,
                           variables,
                           temperature,
                           weights,
                           num_samples):
    if models is None or experiment_data is None:
        print('First cycle: Using random sampler')
        conditions_ = random_sample(conditions, num_samples)
        mad = None
    else:
        experiment_conditions = experiment_data[[v.name for v in variables.independent_variables]]
        experiment_observations = experiment_data[[v.name for v in variables.dependent_variables]]
        params_ = {} #copy.deepcopy(params)
        params_["falsification"] = {"reference_conditions": experiment_conditions, "reference_observations": experiment_observations, "model": models[-1]}

        params_["novelty"] = {"reference_conditions": experiment_conditions}
        models_to_consider = [models_bms[-1], models_linear[-1], models_polynom[-1]]
        best_model, second_best_model, mad = get_best_model(models_to_consider, experiment_conditions, experiment_observations)

        params_["disagreement"] = {"models": [best_model, second_best_model]}

        samplers = [
            [novelty_score_sample, "novelty", weights["novelty"]],
            [falsification_score_sample, "falsification", weights["falsification"]],
            [model_disagreement_score_sample_2, "disagreement", weights["disagreement"]]
        ]

        conditions_ = mixture_sample_test(conditions, temperature, samplers, params_, num_samples)
        conditions_ = conditions_.drop("score", axis = 1)
    #d = Delta(conditions=conditions)
    d = Delta(conditions = conditions_, mads=[mad])
    return d

## BMS theorist
Defining the BMS theorist and wrapping it into the state

In [363]:
@on_state()
def bms_theorist(experiment_data: pd.DataFrame, variables: VariableCollection, **kwargs):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    new_model = BMSRegressor(epochs=10).set_params(**kwargs).fit(X, y)
    return Delta(models_bms=[new_model])

@on_state()
def linear_theorist(experiment_data: pd.DataFrame, variables: VariableCollection, **kwargs):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    new_model = LinearRegression().set_params(**kwargs).fit(X, y)
    return Delta(models_linear=[new_model])


def PolynomialRegression(degree=3, **kwargs):
    return make_pipeline(PolynomialFeatures(degree), LinearRegression(**kwargs))


@on_state()
def polynomial_theorist(experiment_data: pd.DataFrame, variables: VariableCollection, **kwargs):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    new_model = PolynomialRegression()
    new_model.fit(X, y)
    return Delta(models_polynom=[new_model])


In [364]:
@on_state()
def best_model(models_bms, models_linear, models_polynom, experiment_data, variables):
    ivs = [v.name for v in variables.independent_variables]
    dvs = [v.name for v in variables.dependent_variables]
    X, y = experiment_data[ivs], experiment_data[dvs]
    prediction_bms = models_bms[-1].predict(X)
    prediction_linear = models_linear[-1].predict(X)
    prediction_polynomial = models_polynom[-1].predict(X)
    mad_bms = mean_absolute_error(y, prediction_bms)
    mad_linear = mean_absolute_error(y, prediction_linear)
    mad_poly = mean_absolute_error(y, prediction_polynomial)
    if mad_bms <= mad_linear and mad_bms <= mad_poly:
        new_model = models_bms[-1]
    elif mad_linear <= mad_bms and mad_linear <= mad_poly:
        new_model = models_linear[-1]
    elif mad_poly <= mad_linear and mad_poly <= mad_bms:
        new_model = models_polynom[-1]

    return Delta(model=new_model)


In [365]:
def cycle(s):
    s_pool = experimentalist_pooler(s, equation=equation)

    s_conditions = experimentalist_sample(s_pool, temperature=TEMPERATURE, weights=WEIGHTS, num_samples=NUM_SAMPLES)
    s_run = runner_on_state(s_conditions)
    s_theory = bms_theorist(s_run)
    s_theory = linear_theorist(s_theory)
    s_theory = polynomial_theorist(s_theory)
    s_best = best_model(s_theory)
    return s_best


In [366]:
state = ExtendedState(variables=variables)
for _ in range(10):
    state = cycle(state)
    #print(state)


INFO:autora.theorist.bms.regressor:BMS fitting started


First cycle: Using random sampler


100%|██████████| 10/10 [00:00<00:00, 36.05it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


novelty
{'reference_conditions':            x_2       x_1
6719  3.640071  0.578201
5633 -2.073078  0.277541
6751 -3.131717  0.030537
4354  4.843291  4.178432
2839 -2.115178  0.365652
...        ...       ...
9004  4.972474  2.709792
8477 -1.347855  0.074540
1941 -3.288130  1.927619
4218 -4.644877  2.912865
517   4.924244  4.393572

[100 rows x 2 columns]}
           x_2       x_1     score
1845  4.995451  4.956534  2.751918
8670  4.973543  4.978256  2.743043
5455 -4.956387  0.078461  2.646398
747  -4.975788  0.125577  2.636224
7706  4.871983  4.978970  2.632343
...        ...       ...       ...
8384 -0.197487  2.507371 -1.467620
9476 -0.079526  2.334320 -1.467744
4788 -0.116831  2.505667 -1.468134
6084 -0.071880  2.482072 -1.468198
8538 -0.146291  2.478373 -1.468967

[10000 rows x 3 columns]
falsification
{'reference_conditions':            x_2       x_1
6719  3.640071  0.578201
5633 -2.073078  0.277541
6751 -3.131717  0.030537
4354  4.843291  4.178432
2839 -2.115178  0.365652
...    

INFO:autora.theorist.bms.regressor:BMS fitting started


           x_2       x_1     score
0     4.688133  3.789325  8.811518
1     4.700895  3.826751  8.790206
2     4.667707  3.801004  8.787696
3     4.883668  3.968775  8.784983
4     4.606841  3.754486  8.776972
...        ...       ...       ...
9995 -0.276897  0.126102 -1.386545
9996 -0.259466  0.116295 -1.626013
9997 -0.317569  0.073714 -1.639683
9998 -0.370991  0.011927 -1.938700
9999 -0.313216  0.004767 -2.524486

[10000 rows x 3 columns]
disagreement
{'models': [x_1, LinearRegression()]}
*
x_1
<class 'autora.theorist.bms.regressor.BMSRegressor'>
*
LinearRegression()
<class 'sklearn.linear_model._base.LinearRegression'>
***
[[3.65166372]
 [1.31632518]
 [3.11433824]
 ...
 [3.21838001]
 [3.15764804]
 [3.09285835]]
*
[[ 2.72935549e+58]
 [-9.79272828e+57]
 [ 2.52303514e+58]
 ...
 [ 2.69199969e+58]
 [ 3.29770657e+58]
 [-7.02537640e+57]]
***
           x_2       x_1     score
8670  4.973543  4.978256  4.717630
1845  4.995451  4.956534  4.708154
7706  4.871983  4.978970  4.610564
5912  4.9

100%|██████████| 10/10 [00:00<00:00, 33.89it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


novelty
{'reference_conditions':           x_2       x_1
0    3.640071  0.578201
1   -2.073078  0.277541
2   -3.131717  0.030537
3    4.843291  4.178432
4   -2.115178  0.365652
..        ...       ...
195  4.786528  0.408847
196 -0.456241  3.740288
197 -3.592439  4.131470
198  3.387096  1.251998
199 -2.512824  0.271259

[200 rows x 2 columns]}
           x_2       x_1     score
9656 -4.990451  0.024780  2.814429
1602 -4.973431  0.070106  2.765909
6069 -4.969827  0.074790  2.758829
5811 -4.898946  4.948576  2.743787
4506 -4.881386  4.952562  2.727202
...        ...       ...       ...
8575  0.255714  2.198205 -1.456090
6028  0.141552  2.246408 -1.456443
606   0.226992  2.319334 -1.456543
3793  0.148623  2.254049 -1.456607
95    0.227717  2.289020 -1.456953

[10000 rows x 3 columns]
falsification
{'reference_conditions':           x_2       x_1
0    3.640071  0.578201
1   -2.073078  0.277541
2   -3.131717  0.030537
3    4.843291  4.178432
4   -2.115178  0.365652
..        ...       ...
1

INFO:autora.theorist.bms.regressor:BMS fitting started


           x_2       x_1      score
0     4.864975  3.967725  17.804863
1     4.947167  4.012238  17.732672
2     4.858415  3.977670  17.658003
3     4.828424  3.963694  17.350960
4     4.709502  3.822540  17.181192
...        ...       ...        ...
9995  1.396194  1.286996  -1.562402
9996  1.470865  1.347071  -1.593036
9997  1.541364  1.398736  -1.687661
9998  1.564207  1.395104  -1.866400
9999  1.464945  1.322041  -1.930904

[10000 rows x 3 columns]
disagreement
{'models': [x_2, LinearRegression()]}
*
x_2
<class 'autora.theorist.bms.regressor.BMSRegressor'>
*
LinearRegression()
<class 'sklearn.linear_model._base.LinearRegression'>
***
[[-3.70291106]
 [ 2.8272235 ]
 [ 4.98657381]
 ...
 [-1.8675388 ]
 [ 4.59344585]
 [-2.34670821]]
*
[[-1.03457897e+58]
 [ 1.44330891e+58]
 [ 2.27930698e+58]
 ...
 [ 5.13097857e+57]
 [ 1.93546718e+58]
 [-4.17322766e+57]]
***
           x_2       x_1     score
8042  4.964167  4.960558  4.528690
545   4.880342  4.973122  4.460634
6754  4.936422  4.933333  

100%|██████████| 10/10 [00:00<00:00, 33.97it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


novelty
{'reference_conditions':           x_2       x_1
0    3.640071  0.578201
1   -2.073078  0.277541
2   -3.131717  0.030537
3    4.843291  4.178432
4   -2.115178  0.365652
..        ...       ...
295  3.437872  0.598185
296 -2.222314  2.471699
297  0.635158  1.357578
298  2.014445  0.342645
299  0.650836  3.761771

[300 rows x 2 columns]}
           x_2       x_1     score
3359 -4.964619  4.962686  2.731135
324  -4.944230  4.990332  2.726800
861  -4.923789  0.092415  2.717120
3847 -4.958870  0.181139  2.700261
512  -4.940601  4.929344  2.683516
...        ...       ...       ...
8867  0.115802  2.410757 -1.475551
1154  0.195173  2.373362 -1.476118
8023  0.277812  2.425605 -1.476603
637   0.173918  2.404321 -1.476627
4894  0.246434  2.397997 -1.476641

[10000 rows x 3 columns]
falsification
{'reference_conditions':           x_2       x_1
0    3.640071  0.578201
1   -2.073078  0.277541
2   -3.131717  0.030537
3    4.843291  4.178432
4   -2.115178  0.365652
..        ...       ...
2

INFO:autora.theorist.bms.regressor:BMS fitting started


           x_2       x_1      score
0     4.955891  4.045981  14.143484
1     4.833603  3.942703  14.062147
2     4.825177  3.919625  13.993958
3     4.826746  3.917338  13.960115
4     4.714858  3.844762  13.938753
...        ...       ...        ...
9995  1.759531  1.406069  -0.698974
9996  1.625679  1.288100  -0.726492
9997  1.677430  1.363652  -0.731371
9998  0.064515  0.026228  -0.848889
9999  0.056231  0.093883  -0.860784

[10000 rows x 3 columns]
disagreement
{'models': [x_1, LinearRegression()]}
*
x_1
<class 'autora.theorist.bms.regressor.BMSRegressor'>
*
LinearRegression()
<class 'sklearn.linear_model._base.LinearRegression'>
***
[[1.39384421]
 [2.89617782]
 [2.42186524]
 ...
 [0.89023742]
 [4.25560858]
 [3.20655412]]
*
[[5.33000256e+57]
 [2.91083693e+57]
 [7.90856119e+57]
 ...
 [2.48727858e+57]
 [1.07940581e+58]
 [6.49618729e+57]]
***
           x_2       x_1     score
8525  4.996741  4.963255  4.548916
5544  4.968592  4.964527  4.519155
2002  4.916182  4.996312  4.503823
324

100%|██████████| 10/10 [00:00<00:00, 31.54it/s]
INFO:autora.theorist.bms.regressor:BMS fitting finished


novelty
{'reference_conditions':           x_2       x_1
0    3.640071  0.578201
1   -2.073078  0.277541
2   -3.131717  0.030537
3    4.843291  4.178432
4   -2.115178  0.365652
..        ...       ...
395 -1.551470  2.419946
396 -1.039725  1.084759
397  0.236167  2.945265
398  0.895100  4.019025
399  1.439961  0.945434

[400 rows x 2 columns]}
           x_2       x_1     score
4268 -4.995920  0.110614  2.725445
3085 -4.971884  0.133437  2.684854
6919 -4.882564  0.011101  2.667924
1349 -4.995126  4.945804  2.665075
3938 -4.894418  0.054625  2.651965
...        ...       ...       ...
1944  0.001904  2.634008 -1.504059
6173  0.188326  2.544103 -1.504221
844   0.070708  2.652684 -1.504533
1376  0.161455  2.532631 -1.504862
7100  0.073026  2.640712 -1.504917

[10000 rows x 3 columns]
falsification
{'reference_conditions':           x_2       x_1
0    3.640071  0.578201
1   -2.073078  0.277541
2   -3.131717  0.030537
3    4.843291  4.178432
4   -2.115178  0.365652
..        ...       ...
3

ValueError: Input X contains infinity or a value too large for dtype('float64').