In [2]:
cd ..

/home/kpgelvan/SymbolicMathematics


In [3]:
import os
import numpy as np
import sympy as sp
import torch

from src.utils import AttrDict
from src.envs import build_env
from src.model import build_modules

from src.utils import to_cuda
from src.envs.sympy_utils import simplify

## Build environment / Reload model

In [6]:
'''def reload_checkpoint(path):
    """
    Reload a checkpoint if we find one.
    """
    checkpoint_path = os.path.join(path, 'checkpoint.pth')
    data = torch.load(checkpoint_path, map_location='cpu')

    # reload model parameters
    for k, v in self.modules.items():
        v.load_state_dict(data[k])

    # reload optimizers
    for name in self.optimizers.keys():
        # AMP checkpoint reloading is buggy, we cannot reload optimizers
        # instead, we only reload current iterations / learning rates
        if self.params.amp == -1:
            self.optimizers[name].load_state_dict(data[f'{name}_optimizer'])
        else:
            for group_id, param_group in enumerate(self.optimizers[name].param_groups):
                if 'num_updates' not in param_group:
                    logger.warning(f"No 'num_updates' for optimizer {name}.")
                    continue
                logger.warning(f"Reloading 'num_updates' and 'lr' for optimizer {name}.")
                param_group['num_updates'] = data[f'{name}_optimizer']['param_groups'][group_id]['num_updates']
                param_group['lr'] = self.optimizers[name].get_lr_for_step(param_group['num_updates'])

    # reload main metrics
    self.epoch = data['epoch'] + 1
    self.n_total_iter = data['n_total_iter']
    self.best_metrics = data['best_metrics']
    self.best_stopping_criterion = data['best_stopping_criterion']'''



In [6]:
# trained model, e.g. "wget https://dl.fbaipublicfiles.com/SymbolicMathematics/models/fwd_bwd.pth"
#model_path = 'fwd.pth'
model_path = 'dumped/seq_rel_att_0202/272202/checkpoint.pth'
assert os.path.isfile(model_path)

In [31]:
params = params = AttrDict({

    # environment parameters
    'env_name': 'char_sp',
    'int_base': 10,
    'balanced': False,
    'positive': True,
    'precision': 10,
    'n_variables': 1,
    'n_coefficients': 0,
    'leaf_probs': '0.75,0,0.25,0',
    'max_len': 512,
    'max_int': 5,
    'max_ops': 15,
    'max_ops_G': 15,
    'clean_prefix_expr': True,
    'rewrite_functions': '',
    'tasks': 'prim_fwd',
    'operators': 'add:10,sub:3,mul:10,div:5,sqrt:4,pow2:4,pow3:2,pow4:1,pow5:1,ln:4,exp:4,sin:4,cos:4,tan:4,asin:1,acos:1,atan:1,sinh:1,cosh:1,tanh:1,asinh:1,acosh:1,atanh:1',

    'max_relative_pos':250,
    'use_neg_dist':True,
    'use_encdec_seq_rel_att':False,
    'max_path_width':-1,
    'max_path_depth':-1,
    'use_tree_pos_enc_E':False,
    'use_tree_pos_enc_D':False,
    'use_tree_rel_att':"",
    'tree_rel_vocab_size':0,
    'use_pos_embeddings_E':False,
    'use_pos_embeddings_D':False,
    
    # model parameters
    'cpu': True,
    'emb_dim': 256,
    'n_enc_layers': 4,
    'n_dec_layers': 4,
    'n_heads': 4,
    'dropout': 0,
    'attention_dropout': 0,
    'sinusoidal_embeddings': False,
    'share_inout_emb': True,
    'reload_model': model_path,

})

In [32]:
env = build_env(params)
x = env.local_dict['x']

In [33]:
modules = build_modules(env, params)
encoder = modules['encoder']
decoder = modules['decoder']

In [37]:
decoder.proj.weight

Parameter containing:
tensor([[ 0.0739, -0.2999,  0.2414,  ..., -0.0033,  0.1309,  0.2013],
        [-0.1916, -0.0242,  0.0273,  ..., -0.1218,  0.0545,  0.2007],
        [-0.1722, -0.0376,  0.0310,  ..., -0.1337,  0.0441,  0.1990],
        ...,
        [-0.1476, -0.0959,  0.1432,  ...,  0.0296,  0.0949, -0.2183],
        [ 0.0864, -0.0371, -0.0220,  ..., -0.1343,  0.1126, -0.1488],
        [ 0.0472, -0.0501, -0.0385,  ...,  0.2324,  0.0864, -0.1636]],
       requires_grad=True)

In [38]:
decoder.embeddings.weight

Parameter containing:
tensor([[ 0.0739, -0.2999,  0.2414,  ..., -0.0033,  0.1309,  0.2013],
        [-0.1916, -0.0242,  0.0273,  ..., -0.1218,  0.0545,  0.2007],
        [-0.1722, -0.0376,  0.0310,  ..., -0.1337,  0.0441,  0.1990],
        ...,
        [-0.1476, -0.0959,  0.1432,  ...,  0.0296,  0.0949, -0.2183],
        [ 0.0864, -0.0371, -0.0220,  ..., -0.1343,  0.1126, -0.1488],
        [ 0.0472, -0.0501, -0.0385,  ...,  0.2324,  0.0864, -0.1636]],
       requires_grad=True)

## Start from a function F, compute its derivative f = F', and try to recover F from f

In [39]:
# here you can modify the integral function the model has to predict, F
F_infix = 'x * tan(exp(x)/x)'
#F_infix = 'x * cos(x**2) * tan(x)'
#F_infix = 'cos(x**2 * exp(x * cos(x)))'
#F_infix = 'ln(cos(x + exp(x)) * sin(x**2 + 2) * exp(x) / x)'

In [40]:
# F (integral, that the model will try to predict)
F = sp.S(F_infix, locals=env.local_dict)
F

x*tan(exp(x)/x)

In [41]:
# f (F', that the model will take as input)
f = F.diff(x)
f

x*(exp(x)/x - exp(x)/x**2)*(tan(exp(x)/x)**2 + 1) + tan(exp(x)/x)

### Compute prefix representations

In [47]:
F_prefix = env.sympy_to_prefix(F)
f_prefix = env.sympy_to_prefix(f)
print(f"F prefix: {F_prefix}")
print(f"f prefix: {f_prefix}")

F prefix: ['mul', 'x', 'tan', 'mul', 'pow', 'x', 'INT-', '1', 'exp', 'x']
f prefix: ['add', 'mul', 'x', 'mul', 'add', 'INT+', '1', 'pow', 'tan', 'mul', 'pow', 'x', 'INT-', '1', 'exp', 'x', 'INT+', '2', 'add', 'mul', 'pow', 'x', 'INT-', '1', 'exp', 'x', 'mul', 'INT-', '1', 'mul', 'pow', 'x', 'INT-', '2', 'exp', 'x', 'tan', 'mul', 'pow', 'x', 'INT-', '1', 'exp', 'x']


### Encode input

In [48]:
x1_prefix = env.clean_prefix(['sub', 'derivative', 'f', 'x', 'x'] + f_prefix)
x1 = torch.LongTensor(
    [env.eos_index] +
    [env.word2id[w] for w in x1_prefix] +
    [env.eos_index]
).view(-1, 1)
len1 = torch.LongTensor([len(x1)])
x1, len1 = to_cuda(x1, len1)

with torch.no_grad():
    encoded = encoder('fwd', x=x1, lengths=len1, causal=False).transpose(0, 1)

### Decode with beam search

In [53]:
beam_size = 100
with torch.no_grad():
    _, _, beam = decoder.generate_beam(encoded, len1, beam_size=beam_size, length_penalty=1.0, early_stopping=1, max_len=200)
    assert len(beam) == 1
hypotheses = beam[0].hyp
assert len(hypotheses) == beam_size

### Print results

In [54]:
print(f"Input function f: {f}")
print(f"Reference function F: {F}")
print("")

for score, sent in sorted(hypotheses, key=lambda x: x[0], reverse=True):

    # parse decoded hypothesis
    ids = sent[1:].tolist()                  # decoded token IDs
    tok = [env.id2word[wid] for wid in ids]  # convert to prefix

    try:
        hyp = env.prefix_to_infix(tok)       # convert to infix
        hyp = env.infix_to_sympy(hyp)        # convert to SymPy

        # check whether we recover f if we differentiate the hypothesis
        # note that sometimes, SymPy fails to show that hyp' - f == 0, and the result is considered as invalid, although it may be correct
        res = "OK" if simplify(hyp.diff(x) - f, seconds=1) == 0 else "NO"

    except:
        res = "INVALID PREFIX EXPRESSION"
        hyp = tok

    # print result
    print("%.5f  %s  %s" % (score, res, hyp))

Input function f: x*(exp(x)/x - exp(x)/x**2)*(tan(exp(x)/x)**2 + 1) + tan(exp(x)/x)
Reference function F: x*tan(exp(x)/x)

-0.18937  NO  0
-0.19152  NO  exp(x)
-0.19262  NO  x*tan(exp(x)/x) - exp(x)
-0.20441  NO  x*tan(exp(x)/x) + exp(x)
-0.20617  OK  x*tan(exp(x)/x)
-0.20694  NO  x*tan(exp(x)/x) - exp(2*x)/2 - exp(x) + log(tan(exp(x)/x)**2 + 1)/2
-0.20715  NO  exp(x) + log(tan(exp(x)/x)**2 + 1)/2
-0.20731  NO  -2*exp(x) + log(tan(exp(x)/x)**2 + 1)/2 - tan(exp(x)/x)
-0.20772  NO  -exp(2*x)/2 - exp(x)*tan(exp(x)/x) - exp(x) + log(tan(exp(x)/x)**2 + 1)/2
-0.20893  NO  -x*tan(exp(x)/x) + exp(x)
-0.20930  NO  -exp(x)
-0.21063  NO  x*tan(exp(x)/x) - exp(2*x)/2 + log(tan(exp(x)/x)**2 + 1)/2
-0.21140  NO  x*tan(exp(x)/x) - exp(2*x)/2 + exp(x) + log(tan(exp(x)/x)**2 + 1)/2
-0.21171  NO  x*tan(exp(x)/x) - 2*exp(x)
-0.21290  NO  -x*tan(exp(x)/x) - exp(2*x)/2 + log(tan(exp(x)/x)**2 + 1)/2
-0.21479  NO  -x*tan(exp(x)/x) - exp(2*x)/2 + exp(x) + log(tan(exp(x)/x)**2 + 1)/2
-0.21501  NO  -exp(x)*tan(