In [9]:
import sys
import os
# Add the root directory of the nesymres module to the Python path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../src')))


### Simple example for performing symbolic regression for a set of points

In [None]:
from nesymres.architectures.model import Model
from nesymres.utils import load_metadata_hdf5
from nesymres.dclasses import FitParams, NNEquation, BFGSParams
from pathlib import Path
from functools import partial
import torch
from sympy import lambdify
import json
import numpy as np

ImportError: libtorch_cpu.so: cannot enable executable stack as shared object requires: Invalid argument

In [15]:
## Load equation configuration and architecture configuration
import omegaconf
with open('100M/eq_setting.json', 'r') as json_file:
  eq_setting = json.load(json_file)

cfg = omegaconf.OmegaConf.load("100M/config.yaml")

In [16]:
## Set up BFGS load rom the hydra config yaml
bfgs = BFGSParams(
        activated= cfg.inference.bfgs.activated,
        n_restarts=cfg.inference.bfgs.n_restarts,
        add_coefficients_if_not_existing=cfg.inference.bfgs.add_coefficients_if_not_existing,
        normalization_o=cfg.inference.bfgs.normalization_o,
        idx_remove=cfg.inference.bfgs.idx_remove,
        normalization_type=cfg.inference.bfgs.normalization_type,
        stop_time=cfg.inference.bfgs.stop_time,
    )


In [17]:
params_fit = FitParams(word2id=eq_setting["word2id"], 
                            id2word={int(k): v for k,v in eq_setting["id2word"].items()}, 
                            una_ops=eq_setting["una_ops"], 
                            bin_ops=eq_setting["bin_ops"], 
                            total_variables=list(eq_setting["total_variables"]),  
                            total_coefficients=list(eq_setting["total_coefficients"]),
                            rewrite_functions=list(eq_setting["rewrite_functions"]),
                            bfgs=bfgs,
                            beam_size=cfg.inference.beam_size #This parameter is a tradeoff between accuracy and fitting time
                            )

In [18]:
weights_path = "../weights/100M.ckpt"

In [19]:
## Load architecture, set into eval mode, and pass the config parameters
model = Model.load_from_checkpoint(weights_path, cfg=cfg.architecture)
model.eval()
if torch.cuda.is_available(): 
  model.cuda()

Lightning automatically upgraded your loaded checkpoint from v1.3.3 to v2.4.0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint ../weights/100M.ckpt`


In [None]:
from functools import partial
from nesymres.architectures.beam_search import generate_multiple_beams

# Define multiple beam search configurations for ablation study
beam_configs = [
    {"beam_size": 1, "length_penalty": 1.0, "max_len": 50},
    {"beam_size": 3, "length_penalty": 1.0, "max_len": 75},
    {"beam_size": 5, "length_penalty": 1.0, "max_len": 100},
    {"beam_size": 10, "length_penalty": 1.0, "max_len": 150},
]

fitfunc = partial(model.fitfunc, cfg_params=params_fit, beam_configs=beam_configs)

In [None]:
# Create points from an equation
number_of_points = 500
n_variables = 2

#To get best results make sure that your support inside the max and mix support
max_supp = cfg.dataset_train.fun_support["max"] 
min_supp = cfg.dataset_train.fun_support["min"]
X = torch.rand(number_of_points,len(list(eq_setting["total_variables"])))*(max_supp-min_supp)+min_supp
X[:,n_variables:] = 0
target_eq = "x_1*sin(x_1)+cos(x_2)" #Use x_1,x_2 and x_3 as independent variables
X_dict = {x:X[:,idx].cpu() for idx, x in enumerate(eq_setting["total_variables"])}
y_pre_noise = lambdify(",".join(eq_setting["total_variables"]), target_eq)(**X_dict)

### Added Noise
noise_std = 0.1   #Standard deviation of the noise
noise = np.random.normal(loc=0.0, scale=noise_std, size=y.shape)
y = y_pre_noise + noise

NameError: name 'cfg' is not defined

In [22]:
print("X shape: ", X.shape)
print("y shape: ", y.shape)

X shape:  torch.Size([500, 3])
y shape:  torch.Size([500])


In [23]:
output = fitfunc(X,y) 

Memory footprint of the encoder: 0.0001024GB 

Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...
Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...
Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...
Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...
Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values...
checking input values range...
Loss constructed, starting new BFGS optmization...
Memory footprint of the encoder: 0.0002048GB 

Constructing BFGS loss...
Flag idx remove ON, Removing indeces with high values..

In [24]:
output

[{'config': {'beam_size': 5, 'length_penalty': 1.0, 'max_len': 100},
  'output': {'all_bfgs_preds': ['(((x_1)*(sin(x_1)))+(cos(x_2)))',
    'x_1*cos(x_1 + 4.71238898953286) + cos(x_2)',
    'x_1*sin(x_1) + 1.00000000286262*cos(x_2)',
    '1.00000000688604*x_1*sin(x_1) + cos(x_2)',
    '(((x_1)+(cos(x_2)))*(sin(x_1)))'],
   'all_bfgs_loss': [0.0, 1.4166446e-14, 0.0, 0.0, 0.7319734],
   'best_bfgs_preds': ['(((x_1)*(sin(x_1)))+(cos(x_2)))'],
   'best_bfgs_loss': [0.0]}},
 {'config': {'beam_size': 10, 'length_penalty': 0.8, 'max_len': 150},
  'output': {'all_bfgs_preds': ['(((x_1)*(sin(x_1)))+(cos(x_2)))',
    'x_1*cos(x_1 - 1.57079631450141) + cos(x_2)',
    'x_1*sin(x_1) + 1.00000003054959*cos(x_2)',
    '0.999999976838865*x_1*sin(x_1) + cos(x_2)',
    'x_1*sin(x_1) + cos(x_2 + 4.08816707536777e-9)',
    '(x_1 + 8.19135437257756e-9)*sin(x_1) + cos(x_2)',
    '(((x_1)+(cos(x_2)))*(sin(x_1)))',
    '(((x_1)*(sin(x_1)))+(sin(cos(x_2))))',
    'x_1*sin(x_1) + cos(1.00000000756742*x_2)',
   