In [1]:
%load_ext autoreload
%autoreload 2
from eqlearner.dataset.multivariate.datasetcreator import DatasetCreator,utils_dataclasses
from eqlearner.dataset.processing import tokenization
import numpy as np
from tqdm import tqdm

### First of all define variable and operator variables

In [2]:
basis_functions = ["Id","exp","log","sin","sqrt","inv"] #Pay attention as the order is indeed important, for testing we put it in alphabetical order (apart from x)
symbols = ["x","y","z","n","p"]

### Define then the dataclasses number of elements and constants
The first argument of Constants is the external constant interval, while the second, the internal constant interval

In [3]:
num_elements = utils_dataclasses.NumElements(3,2)
consts = utils_dataclasses.Constants([(10,10)],[(-5,5)]) 

In [4]:
creator_object = DatasetCreator(basis_functions,symbols,num_elements=num_elements,constants=consts, max_num_equations=100)

In [5]:
fun_object = creator_object.generate_fun()
fun_object

Function(elem_without_constant='exp(x)+log(x)+sqrt(y)', elem_with_constant=-4.91*sqrt(y) - 4.22*exp(x) - 3.58*log(x))

In [6]:
for i in tqdm(range(500)):
    fun_object = creator_object.generate_fun()

100%|██████████| 500/500 [00:01<00:00, 329.25it/s]


### Define muldimensional range and generate evaluation points


In [7]:
support = []
for x in symbols:
    support.append(np.arange(0.1,3))
y = creator_object.evaluate_function(support,fun_object.elem_with_constant)
y

array([140.424     ,   2.87887603,  -4.36584127])

### Get the tokens

In [8]:
separated_dict = tokenization.extract_terms({"Single": [fun_object.elem_without_constant]})
numberized_dict, sym_mapping = tokenization.numberize_terms(separated_dict,symbols=symbols)
final_seq = tokenization.flatten_seq(numberized_dict,mapping=sym_mapping)
final_seq

[12, 16, 10, 25, 9, 16, 10, 26, 9, 25, 9, 25, 7, 5, 15, 17, 6, 13]

### Get the string back

In [9]:
tokenization.get_string(final_seq, sym_mapping=sym_mapping)

'1/x+1/y+x+x**(-2)'