### Simple example for performing symbolic regression for a set of points

In [34]:
from nesymres.architectures.model import Model
from nesymres.utils import load_metadata_hdf5
from nesymres.dclasses import FitParams, NNEquation, BFGSParams
from pathlib import Path
from functools import partial
import torch
from sympy import lambdify
import json

In [35]:
## Load equation configuration and architecture configuration
import omegaconf
with open('100M/eq_setting.json', 'r') as json_file:
  eq_setting = json.load(json_file)
print(eq_setting)

cfg = omegaconf.OmegaConf.load("100M/config.yaml")

{'config': {'max_len': 20, 'positive': True, 'env_name': 'eqlearn', 'operators': 'add:10,mul:10,sub:5,div:5,sqrt:4,pow2:4,pow3:2,pow4:1,pow5:1,ln:4,exp:4,sin:4,cos:4,tan:4,asin:2', 'max_ops': 5, 'int_base': 10, 'precision': 10, 'rewrite_functions': '', 'variables': ['x_1', 'x_2', 'x_3'], 'eos_index': 1, 'pad_index': 0}, 'total_coefficients': ['cm_0', 'cm_1', 'cm_2', 'cm_3', 'cm_4', 'cm_5', 'cm_6', 'cm_7', 'cm_8', 'cm_9', 'cm_10', 'cm_11', 'cm_12', 'cm_13', 'cm_14', 'cm_15', 'cm_16', 'cm_17', 'cm_18', 'cm_19', 'cm_20', 'cm_21', 'cm_22', 'cm_23', 'cm_24', 'cm_25', 'cm_26', 'cm_27', 'cm_28', 'cm_29', 'cm_30', 'cm_31', 'cm_32', 'cm_33', 'cm_34', 'cm_35', 'cm_36', 'cm_37', 'cm_38', 'cm_39', 'ca_0', 'ca_1', 'ca_2', 'ca_3', 'ca_4', 'ca_5', 'ca_6', 'ca_7', 'ca_8', 'ca_9', 'ca_10', 'ca_11', 'ca_12', 'ca_13', 'ca_14', 'ca_15', 'ca_16', 'ca_17', 'ca_18', 'ca_19', 'ca_20', 'ca_21', 'ca_22', 'ca_23', 'ca_24', 'ca_25', 'ca_26', 'ca_27', 'ca_28', 'ca_29', 'ca_30', 'ca_31', 'ca_32', 'ca_33', 'ca_34', 

In [36]:
## Set up BFGS load rom the hydra config yaml
bfgs = BFGSParams(
        activated= cfg.inference.bfgs.activated,
        n_restarts=cfg.inference.bfgs.n_restarts,
        add_coefficients_if_not_existing=cfg.inference.bfgs.add_coefficients_if_not_existing,
        normalization_o=cfg.inference.bfgs.normalization_o,
        idx_remove=cfg.inference.bfgs.idx_remove,
        normalization_type=cfg.inference.bfgs.normalization_type,
        stop_time=cfg.inference.bfgs.stop_time,
    )


In [37]:
params_fit = FitParams(word2id=eq_setting["word2id"], 
                            id2word={int(k): v for k,v in eq_setting["id2word"].items()}, 
                            una_ops=eq_setting["una_ops"], 
                            bin_ops=eq_setting["bin_ops"], 
                            total_variables=list(eq_setting["total_variables"]),  
                            total_coefficients=list(eq_setting["total_coefficients"]),
                            rewrite_functions=list(eq_setting["rewrite_functions"]),
                            bfgs=bfgs,
                            beam_size=5#cfg.inference.beam_size #This parameter is a tradeoff between accuracy and fitting time
                            )

In [38]:
weights_path = "../weights/100M.ckpt"

In [39]:
## Load architecture, set into eval mode, and pass the config parameters
model = Model.load_from_checkpoint(weights_path, cfg=cfg.architecture)
model.eval()
if torch.cuda.is_available(): 
  model.cuda()

Lightning automatically upgraded your loaded checkpoint from v1.3.3 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../weights/100M.ckpt`


In [40]:
fitfunc = partial(model.fitfunc, cfg_params=params_fit)

In [41]:
# Create points from an equation
number_of_points = 500
n_variables = 1

#To get best results make sure that your support inside the max and mix support
max_supp = cfg.dataset_train.fun_support["max"] 
min_supp = cfg.dataset_train.fun_support["min"]
X = torch.rand(number_of_points,len(list(eq_setting["total_variables"])))*(max_supp-min_supp)+min_supp
X[:,n_variables:] = 0
target_eq = "x_1*sin(x_1)" #Use x_1,x_2 and x_3 as independent variables
X_dict = {x:X[:,idx].cpu() for idx, x in enumerate(eq_setting["total_variables"])} 
y = lambdify(",".join(eq_setting["total_variables"]), target_eq)(**X_dict)

In [42]:
print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  torch.Size([500, 3])
y shape:  torch.Size([500])


In [43]:
import time
start = time.time()
output = fitfunc(X,y)
print(output)
print(time.time() - start)

{'all_bfgs_preds': ['((x_1)*(sin(x_1)))', '((x_1)*((cos(x_1))*(tan(x_1))))', 'x_1*(-2.4124309458153e-10*x_1 + sin(x_1))', 'x_1*(-1.45303738201554e-9*(x_1 + 0.372434186667584)**2 + sin(x_1))', 'x_1*(-6.5095906123594e-11*x_1**2 + sin(x_1))'], 'all_bfgs_loss': [0.0, 5.058057e-14, 4.6386506e-18, 3.4721214e-13, 1.5486613e-16], 'best_bfgs_preds': ['((x_1)*(sin(x_1)))'], 'best_bfgs_loss': [0.0]}
18.853568077087402


In [44]:
output

{'all_bfgs_preds': ['((x_1)*(sin(x_1)))',
  '((x_1)*((cos(x_1))*(tan(x_1))))',
  'x_1*(-2.4124309458153e-10*x_1 + sin(x_1))',
  'x_1*(-1.45303738201554e-9*(x_1 + 0.372434186667584)**2 + sin(x_1))',
  'x_1*(-6.5095906123594e-11*x_1**2 + sin(x_1))'],
 'all_bfgs_loss': [0.0,
  5.058057e-14,
  4.6386506e-18,
  3.4721214e-13,
  1.5486613e-16],
 'best_bfgs_preds': ['((x_1)*(sin(x_1)))'],
 'best_bfgs_loss': [0.0]}