### Simple example for performing symbolic regression for a set of points

In [134]:
from nesymres.architectures.model import Model
from nesymres.utils import load_metadata_hdf5
from nesymres.dclasses import FitParams, NNEquation, BFGSParams
from pathlib import Path
from functools import partial
import torch
from sympy import lambdify

In [135]:
## Load equation configuration and architecture configuration
import omegaconf
with open('equation_setting.json', 'r') as json_file:
  eq_setting = json.load(json_file)

test_data = load_metadata_hdf5("../data/validation")
cfg = omegaconf.OmegaConf.load("config.yaml")

In [136]:
## Set up BFGS load rom the hydra config yaml
bfgs = BFGSParams(
        activated= cfg.inference.bfgs.activated,
        n_restarts=cfg.inference.bfgs.n_restarts,
        add_coefficients_if_not_existing=cfg.inference.bfgs.add_coefficients_if_not_existing,
        normalization_o=cfg.inference.bfgs.normalization_o,
        idx_remove=cfg.inference.bfgs.idx_remove,
        normalization_type=cfg.inference.bfgs.normalization_type,
        stop_time=cfg.inference.bfgs.stop_time,
    )


In [137]:
params_fit = FitParams(word2id=eq_setting["word2id"], 
                            id2word=eq_setting["id2word"], 
                            una_ops=eq_setting["una_ops"], 
                            bin_ops=eq_setting["bin_ops"], 
                            total_variables=list(eq_setting["total_variables"]),  
                            total_coefficients=list(eq_setting["total_coefficients"]),
                            rewrite_functions=list(eq_setting["rewrite_functions"]),
                            bfgs=bfgs,
                            beam_size=cfg.inference.beam_size #This parameter is a tradeoff between accuracy and fitting time
                            )

In [138]:

params_fit = FitParams(word2id=eq_setting["word2id"], 
                            id2word=test_data.id2word, 
                            una_ops=test_data.una_ops, 
                            bin_ops=test_data.bin_ops, 
                            total_variables=list(test_data.total_variables),  
                            total_coefficients=list(test_data.total_coefficients),
                            rewrite_functions=list(test_data.rewrite_functions),
                            bfgs=bfgs,
                            beam_size=cfg.inference.beam_size #This parameter is a tradeoff between accuracy and fitting time
                            )

In [142]:
test_data.id2word

{4: 'x_1',
 5: 'x_2',
 6: 'x_3',
 7: 'abs',
 8: 'acos',
 9: 'add',
 10: 'asin',
 11: 'atan',
 12: 'cos',
 13: 'cosh',
 14: 'coth',
 15: 'div',
 16: 'exp',
 17: 'ln',
 18: 'mul',
 19: 'pow',
 20: 'sin',
 21: 'sinh',
 22: 'sqrt',
 23: 'tan',
 24: 'tanh',
 25: '-3',
 26: '-2',
 27: '-1',
 28: '0',
 29: '1',
 30: '2',
 31: '3',
 32: '4',
 33: '5',
 1: 'S',
 2: 'F',
 3: 'c'}

In [143]:
{int(k): v for k,v eq_setting["id2word"]

{'4': 'x_1',
 '5': 'x_2',
 '6': 'x_3',
 '7': 'abs',
 '8': 'acos',
 '9': 'add',
 '10': 'asin',
 '11': 'atan',
 '12': 'cos',
 '13': 'cosh',
 '14': 'coth',
 '15': 'div',
 '16': 'exp',
 '17': 'ln',
 '18': 'mul',
 '19': 'pow',
 '20': 'sin',
 '21': 'sinh',
 '22': 'sqrt',
 '23': 'tan',
 '24': 'tanh',
 '25': '-3',
 '26': '-2',
 '27': '-1',
 '28': '0',
 '29': '1',
 '30': '2',
 '31': '3',
 '32': '4',
 '33': '5',
 '1': 'S',
 '2': 'F',
 '3': 'c'}

In [123]:
weights_path = "../weights/100M.ckpt"

In [124]:
## Load architecture, set into eval mode, and pass the config parameters
model = Model.load_from_checkpoint(weights_path, cfg=cfg.architecture)
model.eval()
if torch.cuda.is_available(): 
  model.cuda()

In [126]:
fitfunc = partial(model.fitfunc,cfg_params=params_fit)

In [127]:
# Create points from an equation
number_of_points = 500
n_variables = 1

#To get best results make sure that your support inside the max and mix support
max_supp = cfg.dataset_train.fun_support["max"] 
min_supp = cfg.dataset_train.fun_support["min"]
X = torch.rand(number_of_points,len(list(test_data.total_variables)))*(max_supp-min_supp)+min_supp
X[:,n_variables:] = 0
target_eq = "x_1*sin(x_1)" #Use x_1,x_2 and x_3 as independent variables
X_dict = {x:X[:,idx].cpu() for idx, x in enumerate(test_data.total_variables)} #CHECK ME
y = lambdify(",".join(test_data.total_variables), target_eq)(**X_dict)

In [128]:
print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  torch.Size([500, 3])
y shape:  torch.Size([500])


In [129]:
output = fitfunc(X,y) 

Memory footprint of the encoder: 4.096e-05GB 

Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...
Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...


In [130]:
output

{'all_bfgs_preds': ['((x_1)*(sin(x_1)))', '((x_1)*((cos(x_1))*(tan(x_1))))'],
 'all_bfgs_loss': [0.0, 5.317946e-14],
 'best_bfgs_preds': ['((x_1)*(sin(x_1)))'],
 'best_bfgs_loss': [0.0]}