# Cycle
This cycle uses mixture experimentalist, BMS theorist, and equation sampler as a source for the ground truth. 

In [1]:
import copy

import numpy as np
import math
import pandas as pd
from autora.variable import VariableCollection, Variable
from autora.state.bundled import StandardState
from autora.state.delta import on_state
from autora.state.wrapper import state_fn_from_estimator
from autora.theorist.bms import BMSRegressor
from equation_tree import sample 
from equation_tree.tree import instantiate_constants
from equation_tree.prior import DEFAULT_PRIOR_FUNCTIONS, DEFAULT_PRIOR_OPERATORS, \
    structure_prior_from_max_depth
import pprint
from autora.experiment_runner.synthetic.abstract.equation import equation_experiment
from autora.experimentalist.mixture import sample as mixture_sample
from autora.experimentalist.grid_ import grid_pool
from autora.experimentalist.random_ import random_sample
from autora.state.delta import Delta
from autora.experimentalist.falsification import falsification_score_sample
from autora.experimentalist.model_disagreement import model_disagreement_score_sample

In [2]:
N_CONDITIONS = 50000

## Ground truth
Sampling the ground truth for this simulation.

In [3]:
structure_prior = structure_prior_from_max_depth(10)
pprint.pprint(structure_prior)
pprint.pprint(DEFAULT_PRIOR_FUNCTIONS)
pprint.pprint(DEFAULT_PRIOR_OPERATORS)
feature_prior = {'constants': .3, 'variables': .7}
prior = {'functions': DEFAULT_PRIOR_FUNCTIONS, 'operators': DEFAULT_PRIOR_OPERATORS, 'structures': structure_prior, 'features': feature_prior}
pprint.pprint(prior)

{'[0, 1, 1, 2, 2, 3, 3, 4, 4, 5]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3, 4, 4]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3, 4, 5, 4]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3, 4, 5, 5]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3, 4, 5, 6]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3, 4, 5]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3, 4]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 3]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 3, 4, 4]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 3, 4, 5]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 3, 4]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 3]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 4, 3, 4]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 4, 3]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 4, 5, 3]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 4, 5, 5]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 4, 5, 6]': 0.0007288629737609329,
 '[0, 1, 1, 2, 2, 3, 4, 4, 5]': 0.0007288629737

In [4]:
equation_raw = sample(n=1, prior=prior, max_num_variables=4)
equation_raw[0].sympy_expr

Processing: 100%|██████████| 1/1 [00:00<00:00,  2.16iteration/s]


x_1 - x_1/c_1**cos(x_2/x_3)

In [5]:
equation = instantiate_constants(equation_raw[0], lambda: np.random.rand()*100)
equation.sympy_expr


x_1 - x_1/4.7627810800126795**cos(x_2/x_3)

Defining the metadata based on the sampled ground truth.

In [6]:
independent_variables = []
for v in range(equation.n_variables_unique):
    # taking a floor depending on n of variables so that each experimental space has roughly the same coarseness
    independent_variables.append(Variable(f"x_{v+1}",allowed_values=np.linspace(-10, 10, math.floor(N_CONDITIONS**(1/equation.n_variables_unique)))))
    
variables=VariableCollection(
        independent_variables=independent_variables,
        dependent_variables=[Variable("y")]
    )

Defining experiment runner from the equation and the variable collection

In [7]:
experiment = equation_experiment(equation.sympy_expr, variables.independent_variables, variables.dependent_variables[0])

### Defining the state
We can define an initial state for our discovery problem based on the variable specification above. Wrapping experiment runner into the state.

In [8]:
state = StandardState(
    variables=variables
)
runner_on_state = on_state(experiment.experiment_runner, output=["experiment_data"])

### Pooler

In [9]:
experimentalist_pooler = on_state(grid_pool, output=["conditions"])

In [83]:
state1 = experimentalist_pooler(state)

StandardState(variables=VariableCollection(independent_variables=[Variable(name='x_1', value_range=None, allowed_values=array([-10.        ,  -9.99959999,  -9.99919998, ...,   9.99919998,
         9.99959999,  10.        ]), units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], dependent_variables=[Variable(name='y', value_range=None, allowed_values=None, units='', type=<ValueType.REAL: 'real'>, variable_label='', rescale=1, is_covariate=False)], covariates=[]), conditions=           x_1
0     -10.0000
1      -9.9996
2      -9.9992
3      -9.9988
4      -9.9984
...        ...
49995   9.9984
49996   9.9988
49997   9.9992
49998   9.9996
49999  10.0000

[50000 rows x 1 columns], experiment_data=None, models=[])

## Mixture experimentalist
Defining the mixture experimentalist and wrapping it into the state

In [9]:
@on_state()
def experimentalist_sample(conditions, models, experiment_data, variables, temperature, samplers, params, num_samples):     
    if not models or not experiment_data:
        conditions_ = random_sample(conditions, num_samples)
    else:
        experiment_conditions = experiment_data[[v.name for v in variables.independent_variables]]
        experiment_observations = experiment_data[[v.name for v in variables.dependent_variables]]
        params_ = copy.deepcopy(params) 
        params_["falsification"] = {"reference_conditions": experiment_conditions, "reference_observations": experiment_observations, "model": models[-1]}
        params_["novelty"] = {"reference_conditions": experiment_conditions}

        samplers=[[novelty_score_sample, "novelty", [0.7, 0.1]], [falsification_score_sampler, "falsification", [0.5, 0]]],

        conditions_ = mixture_sample(conditions, temperature, samplers, params_, num_samples)
        conditions_ = conditions_.drop("score", axis = 1)
    
    d = Delta(conditions = conditions_)
    return d

## BMS theorist
Defining the BMS theorist and wrapping it into the state

In [8]:
theorist = state_fn_from_estimator(BMSRegressor(epochs=1500))