In [1]:
import os
import numpy as np
import sympy as sp
import torch

In [20]:
cd ../

/home/kpgelvan/SymbolicMathematics


In [21]:
from src.utils import AttrDict
from src.envs import build_env
from src.model import build_modules

from src.utils import to_cuda
from src.envs.sympy_utils import simplify

### 

In [22]:
OPERATORS = {
        # Elementary functions
        'add': 2,
        'sub': 2,
        'mul': 2,
        'div': 2,
        'pow': 2,
        'rac': 2,
        'inv': 1,
        'pow2': 1,
        'pow3': 1,
        'pow4': 1,
        'pow5': 1,
        'sqrt': 1,
        'exp': 1,
        'ln': 1,
        'abs': 1,
        'sign': 1,
        # Trigonometric Functions
        'sin': 1,
        'cos': 1,
        'tan': 1,
        'cot': 1,
        'sec': 1,
        'csc': 1,
        # Trigonometric Inverses
        'asin': 1,
        'acos': 1,
        'atan': 1,
        'acot': 1,
        'asec': 1,
        'acsc': 1,
        # Hyperbolic Functions
        'sinh': 1,
        'cosh': 1,
        'tanh': 1,
        'coth': 1,
        'sech': 1,
        'csch': 1,
        # Hyperbolic Inverses
        'asinh': 1,
        'acosh': 1,
        'atanh': 1,
        'acoth': 1,
        'asech': 1,
        'acsch': 1,
        # Derivative
        'derivative': 2,
        # custom functions
        'f': 1,
        'g': 2,
        'h': 3,
    }

symbols = ['I', 'INT+', 'INT-', 'INT', 'FLOAT', '-', '.', '10^', 'Y', "Y'", "Y''"]

In [23]:
constants = ['pi', 'E']
variables = ['x', 'y', 'z', 't']
functions = ['f', 'g', 'h']
elements = [str(i) for i in range(-10, 10)]
coefficients = [f'a{i}' for i in range(10)]

In [24]:
no_child_symbols = constants + variables + functions + elements + coefficients

###

In [25]:
from tqdm import tqdm
import queue

def get_ancestors(exp_list, exp_len):
    q = queue.LifoQueue()
    q.put(-1)                            # so last element gets this parent but doesn't save it

    ancestors = {0: []}
    node2parent = {}
    levels = {0: -1}

    parent = 0
    for i in range(exp_len):
        op_now = exp_list[i]

        node2parent[i] = parent
        levels[i] = levels[parent] + 1

        if op_now in OPERATORS or op_now in symbols:   # <=> node has children
            if op_now in OPERATORS and OPERATORS[op_now] == 2:    # <=> node has 2 children
                q.put(i)
            parent = i
        elif op_now in no_child_symbols:
            if op_now.isdigit() and i + 1 < exp_len and exp_list[i + 1].isdigit():   # e.x. 18
                parent = i
            else:
                parent = q.get()
        else:
            print(op_now)
            raise(NotFound)
        ancestors[i] = [i] + ancestors[node2parent[i]]

    return ancestors, levels

In [26]:
def get_path(i, j):
    if i == j:
        return "<self>"
    anc_i = set(ancestors[i])
      
    for node in ancestors[j][-(levels[i] + 1) :]:
        if node in anc_i:
            up_n = levels[i] - levels[node]
            down_n = levels[j] - levels[node]
            return str(up_n + 0.001 * down_n)

In [27]:
def get_ud_masks(ancestors, levels, exp_len):
    path_rels = []
    for i in range(exp_len):
        path_rels.append(" ".join([get_path(i, j) for j in range(exp_len)]))
    
    return path_rels

###

In [28]:
F_prefix  = ['mul', 'x', 'tan', 'mul', 'pow', 'x', 'INT-', '1', 'exp', 'x']

In [29]:
ancestors, levels = get_ancestors(F_prefix, len(F_prefix))
rel_matrix = get_ud_masks(ancestors, levels, len(F_prefix))
rel_matrix

['<self> 0.001 0.001 0.002 0.003 0.004 0.004 0.005 0.003 0.004',
 '1.0 <self> 1.001 1.002 1.003 1.004 1.004 1.005 1.003 1.004',
 '1.0 1.001 <self> 0.001 0.002 0.003 0.003 0.004 0.002 0.003',
 '2.0 2.001 1.0 <self> 0.001 0.002 0.002 0.003 0.001 0.002',
 '3.0 3.001 2.0 1.0 <self> 0.001 0.001 0.002 1.001 1.002',
 '4.0 4.001 3.0 2.0 1.0 <self> 1.001 1.002 2.001 2.002',
 '4.0 4.001 3.0 2.0 1.0 1.001 <self> 0.001 2.001 2.002',
 '5.0 5.001 4.0 3.0 2.0 2.001 1.0 <self> 3.001 3.002',
 '3.0 3.001 2.0 1.0 1.001 1.002 1.002 1.003 <self> 0.001',
 '4.0 4.001 3.0 2.0 2.001 2.002 2.002 2.003 1.0 <self>']

###

In [33]:
model_path = '../checkpoint.pth'
assert os.path.isfile(model_path)

In [34]:
params = params = AttrDict({

    # environment parameters
    'env_name': 'char_sp',
    'int_base': 10,
    'balanced': False,
    'positive': True,
    'precision': 10,
    'n_variables': 1,
    'n_coefficients': 0,
    'leaf_probs': '0.75,0,0.25,0',
    'max_len': 512,
    'max_int': 5,
    'max_ops': 15,
    'max_ops_G': 15,
    'clean_prefix_expr': True,
    'rewrite_functions': '',
    'tasks': 'prim_fwd',
    'operators': 'add:10,sub:3,mul:10,div:5,sqrt:4,pow2:4,pow3:2,pow4:1,pow5:1,ln:4,exp:4,sin:4,cos:4,tan:4,asin:1,acos:1,atan:1,sinh:1,cosh:1,tanh:1,asinh:1,acosh:1,atanh:1',

    # model parameters
    'cpu': False,
    'emb_dim': 1024,
    'n_enc_layers': 6,
    'n_dec_layers': 6,
    'n_heads': 4,
    'dropout': 0,
    'attention_dropout': 0,
    'sinusoidal_embeddings': False,
    'share_inout_emb': True,
    'reload_model': model_path,
    'max_relative_pos':0,
    'use_neg_dist':False
})

In [35]:
env = build_env(params)
x = env.local_dict['x']

In [1]:
modules = build_modules(env, params)
encoder = modules['encoder']
decoder = modules['decoder']

"modules = build_modules(env, params)\nencoder = modules['encoder']\ndecoder = modules['decoder']"

###

In [12]:
import sys
sys.path.append('src/envs/')
sys.path.append('src')

In [14]:
#from char_sp import prefix_to_infix, infix_to_sympy

In [15]:
#from utils import create_logger
#from utils import bool_flag
#from utils import timeout, TimeoutError
#from char_sp import prefix_to_infix, infix_to_sympy


In [16]:
f_prefix = ['sub', "Y'", 'pow', 'x', 'INT+', '2']
x1_prefix = env.clean_prefix(['sub', 'derivative', 'f', 'x', 'x'] + f_prefix)
x1_prefix
#x1 = torch.LongTensor(
#    [env.eos_index] +
#    [env.word2id[w] for w in x1_prefix] +
#    [env.eos_index]
#).view(-1, 1)
#len1 = torch.LongTensor([len(x1)])
#x1, len1 = to_cuda(x1, len1)


NameError: name 'env' is not defined

In [58]:
import json
import jsonlines

for set_name in ['valid', 'test', 'train']:
    with open('data/prim_fwd.' + set_name, 'r') as expressions:
        with jsonlines.open('data/LINES_rel_matrix_'+set_name+'.jsonl', 'w') as rel_matrix_json:
            for i, line in tqdm(enumerate(expressions)):
                #print(line)
                q, a = line.split('|')[1].split('\t')
                #print(q, ';', a)
                
                q = q.split()
                a = a.split()
                
                ancestors, levels = get_ancestors(q, len(q))
                rel_matrix_q = get_ud_masks(ancestors, levels, len(q))              

                #ancestors, levels = get_ancestors(a, len(a))
                #rel_matrix_a = get_ud_masks(ancestors, levels, len(a))
                
                rel_matrix_json.write(json.dumps(rel_matrix_q, indent=0))

9985it [00:06, 1556.64it/s]
9986it [00:06, 1578.29it/s]
288it [00:00, 2340.65it/s]


KeyboardInterrupt: 

In [63]:
with jsonlines.open('data/LINES_rel_matrix_test.jsonl') as reader:
    for i, obj in enumerate(reader):
        matrix = json.loads(obj)
        new_matrix = [np.array(line.split()) for line in matrix]
        print(new_matrix)
        print('gg wp')
        if i > 3:
            break

[array(['<self>', '0.001', '0.002', '0.003', '0.003', '0.004', '0.004',
       '0.005'], dtype='<U6'), array(['1.0', '<self>', '0.001', '0.002', '0.002', '0.003', '0.003',
       '0.004'], dtype='<U6'), array(['2.0', '1.0', '<self>', '0.001', '0.001', '0.002', '0.002',
       '0.003'], dtype='<U6'), array(['3.0', '2.0', '1.0', '<self>', '1.001', '1.002', '1.002', '1.003'],
      dtype='<U6'), array(['3.0', '2.0', '1.0', '1.001', '<self>', '0.001', '0.001', '0.002'],
      dtype='<U6'), array(['4.0', '3.0', '2.0', '2.001', '1.0', '<self>', '1.001', '1.002'],
      dtype='<U6'), array(['4.0', '3.0', '2.0', '2.001', '1.0', '1.001', '<self>', '0.001'],
      dtype='<U6'), array(['5.0', '4.0', '3.0', '3.001', '2.0', '2.001', '1.0', '<self>'],
      dtype='<U6')]
gg wp
[array(['<self>', '0.001', '0.002', '0.003', '0.003', '0.004'], dtype='<U6'), array(['1.0', '<self>', '0.001', '0.002', '0.002', '0.003'], dtype='<U6'), array(['2.0', '1.0', '<self>', '0.001', '0.001', '0.002'], dtype='<U6'), 