### Requirements

In [1]:
import json
import os

from latex2sympy2 import latex2sympy, latex2latex
from sympy import simplify, srepr, Eq
from difflib import SequenceMatcher
from main import *

### Constants

In [2]:
BASE_PATH = os.path.dirname(os.path.abspath("__file__"))
DATA_FILE = "data_example.json"

### Read data

In [3]:
# Latex string data

expr1 = r"\frac{d}{dx}(x^2 + 2*x) \times \int x \,dx"
expr2 = r"x^3 + x^2" # Correct would be x^3 + x^2

In [4]:
# JSON tree data

with open(os.path.join(BASE_PATH, DATA_FILE), 'r') as file:
    json_data = json.load(file)

tree1 = json_data.get("exprl", {}) # Template answer
tree2 = json_data.get("expr2", {}) # Right answer
tree3 = json_data.get("expr3", {}) # Wrong answer

### Test of similarity tree analysis

In [None]:
tree1 = latex_to_tree(expr1)
tree2 = latex_to_tree(expr2)
expression_tree_similarity = get_tree_sequence_similarity(tree1, tree2)
print(f"Expression tree similarity: {round(expression_tree_similarity*100, 0)}%")

### Test Bert text similarity

In [None]:
from latex2sympy2 import latex2sympy, latex2latex
from sympy import simplify, srepr, Eq
from difflib import SequenceMatcher
from main import *

In [None]:
# Carregar o modelo BERT pr√©-treinado e o tokenizador
modelo = BertModel.from_pretrained('bert-base-uncased')
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

In [None]:
emb1 = get_bert_embeddings(latex2latex(expr1), modelo, tokenizer)
emb2 = get_bert_embeddings(latex2latex(expr2), modelo, tokenizer)
print(f"Expression tree similarity: {round(get_text_similarity(emb1, emb2)*100,0)}%")

### Tree edit distance (Zhang-Shasha)

In [5]:
def tree_edit_distance(tree1, tree2):
    # Helper function to calculate the cost between two nodes
    def node_cost(node1, node2):
        return 0 if node1["val"] == node2["val"] else 1

    # Helper function to compute the minimum cost between subtrees
    def edit_distance(node1, node2, memo):
        if "children" not in node1 and "children" not in node2:
            cost = node_cost(node1, node2)
        else:
            cost = float('inf')
            for child1 in node1.get("children", []):
                for child2 in node2.get("children", []):
                    cost = min(cost, edit_distance(child1, child2, memo))

            cost += node_cost(node1, node2)

        memo[(node1["id"], node2["id"])] = cost
        return cost

    # Initialize memoization dictionary
    memo = {}

    # Calculate the tree edit distance
    distance = edit_distance(tree1, tree2, memo)
    return distance

In [6]:
distance = tree_edit_distance(tree1, tree2)
print("Tree Edit Distance:", distance)

Tree Edit Distance: 0


In [7]:
distance = tree_edit_distance(tree1, tree3)
print("Tree Edit Distance:", distance)

Tree Edit Distance: 0
