## arXiV

## BLEU score between Reference formula and Candidate formula (converted using the Lucas-blecher tool)

In [19]:
import re
import csv
from collections import Counter

# List of allowed built-in LaTeX functions
allowed_functions = ['frac', 'sin', 'cos', 'tan', 'log', 'ln', 'sqrt', 'sum', 'cdot', 'left', 'right', 'circ', 'to', 'operatorname', 'rightarrow', 'dot','prime', 'lbrace', 'hat','rceil',
                     'prod', 'lim', 'int', 'sigma', 'pi', 'mu', 'infty', 'mathrm', 'sup', 'leq', 'uparrow', 'partial', 'varnothing', 'Theta', 'text', 'mid', 'textstyle',
                     'beta', 'lambda', 'mathbf', 'bf', 'bar', 'tau', 'varepsilon', 'Omega','xi', 'alpha', 'mathcal', 'cdots', 'bigg', 'rangle']

def tokenize_formula(formula):
    pattern = r"\\([a-zA-Z]+)"
    
    def replace_function(match):
        function_name = match.group(1)
        if function_name.lower() in [f.lower() for f in allowed_functions]:
            return f"@{function_name}@"
        else:
            raise ValueError(f"Invalid LaTeX command: {match.group(0)}")
    
    formula = re.sub(pattern, replace_function, formula)
    
    return formula.lower()

# Function to calculate BLEU score for LaTeX formulas
def calculate_formula_bleu_score(reference_formula, candidate_formula, weights):
    # Tokenize the formulas
    ref_tokens = tokenize_formula(reference_formula)
    cand_tokens = tokenize_formula(candidate_formula)

    # Calculate n-gram precision
    precisions = []
    for n in range(1, len(weights) + 1):
        ref_ngrams = [tuple(ref_tokens[i:i + n]) for i in range(len(ref_tokens) - n + 1)]
        cand_ngrams = [tuple(cand_tokens[i:i + n]) for i in range(len(cand_tokens) - n + 1)]
        ref_ngram_counts = Counter(ref_ngrams)
        cand_ngram_counts = Counter(cand_ngrams)
        common_ngram_counts = ref_ngram_counts & cand_ngram_counts
        precision = sum(common_ngram_counts.values()) / sum(cand_ngram_counts.values())
        precisions.append(precision)

    # Calculate cumulative precision
    cumulative_precision = sum(p * w for p, w in zip(precisions, weights))

    # Calculate length penalty
    reference_length = len(ref_tokens)
    candidate_length = len(cand_tokens)
    length_penalty = 1 if candidate_length >= reference_length else pow(2, 1 - reference_length / candidate_length)

    # Calculate modified BLEU score
    bleu_score = length_penalty * cumulative_precision

    return bleu_score

# Read CSV file and calculate BLEU score for each pair of formulas
with open('evaluation_arxiv_lucas.csv', 'r') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=';')
    next(csvreader)  # Skip header row if present
    
    for row in csvreader:
        reference_formula = row[0]
        candidate_formula = row[1]

        weights = [0.25, 0.25, 0.25, 0.25]  # Equal weights for 1-gram, 2-gram, 3-gram, and 4-gram

        bleu_score = calculate_formula_bleu_score(reference_formula, candidate_formula, weights)
        print(f"BLEU score for reference formula: {reference_formula}, candidate formula: {candidate_formula}")
        print("BLEU score:", bleu_score)
        print()

BLEU score for reference formula: \mathrm{vi}, candidate formula:  \textstyle\left\lbrace{\hat{l}}\right\rceil
BLEU score: 0.05520408163265306

BLEU score for reference formula: \frac{1}{2} + m + i \cdot \frac{\beta}{2k}, candidate formula:  {\textstyle{\frac{1}{2}}}+m+i\cdot{\frac{\beta}{2k}}
BLEU score: 0.5781168241921417

BLEU score for reference formula: \sum_{j=(0)} \prod_{i=1}^{k} \lambda \left((a \cdot \mathbf{n} + j + ((W \cdot m) + r)) \cdot b_i\right) \cdot \left(j + ((W \cdot m) + r) \cdot b_i\right), candidate formula:  \sum_{j=(0)i=1}^{k}\lambda((d\cdot{\bf n}+j+((W\cdot m)+r))\cdot b_{i})\cdot(j+((W\cdot m)+r)\cdot b_{i})
BLEU score: 0.5012424698124592

BLEU score for reference formula: \left(\sum_{i=m+1}^{m+m_1} \bar{a}_i \cdot \left(\frac{\partial}{\partial \mathbf{w}}(h_{\tau,\bar{\tau}}) \cdot \frac{\partial}{\partial \bar{z}_i}(\mathbf{a},\bar{\mathbf{a}})\right) - \left(\frac{\bar{c}_h}{\bar{c}_g} \cdot \varepsilon \cdot \frac{\partial}{\partial \mathbf{w}}(g_{\tau,

## BLEU score between Reference formula and Candidate formula (converted using MathPix tool)

In [22]:
import re
import csv
from collections import Counter

# List of allowed built-in LaTeX functions
allowed_functions = ['frac', 'sin', 'cos', 'tan', 'log', 'ln', 'sqrt', 'sum', 'cdot', 'left', 'right', 'circ', 'to', 'operatorname', 'rightarrow', 'dot','prime', 'lbrace', 'hat','rceil',
                     'prod', 'lim', 'int', 'sigma', 'pi', 'mu', 'infty', 'mathrm', 'sup', 'leq', 'uparrow', 'partial', 'varnothing', 'Theta', 'text', 'mid', 'textstyle',
                     'beta', 'lambda', 'mathbf', 'bf', 'bar', 'tau', 'varepsilon', 'Omega','xi', 'alpha', 'mathcal', 'cdots', 'bigg', 'rangle', 'overline', 'boldsymbol']

def tokenize_formula(formula):
    pattern = r"\\([a-zA-Z]+)"
    
    def replace_function(match):
        function_name = match.group(1)
        if function_name.lower() in [f.lower() for f in allowed_functions]:
            return f"@{function_name}@"
        else:
            raise ValueError(f"Invalid LaTeX command: {match.group(0)}")
    
    formula = re.sub(pattern, replace_function, formula)
    
    return formula.lower()

# Function to calculate BLEU score for LaTeX formulas
def calculate_formula_bleu_score(reference_formula, candidate_formula, weights):
    # Tokenize the formulas
    ref_tokens = tokenize_formula(reference_formula)
    cand_tokens = tokenize_formula(candidate_formula)

    # Calculate n-gram precision
    precisions = []
    for n in range(1, len(weights) + 1):
        ref_ngrams = [tuple(ref_tokens[i:i + n]) for i in range(len(ref_tokens) - n + 1)]
        cand_ngrams = [tuple(cand_tokens[i:i + n]) for i in range(len(cand_tokens) - n + 1)]
        ref_ngram_counts = Counter(ref_ngrams)
        cand_ngram_counts = Counter(cand_ngrams)
        common_ngram_counts = ref_ngram_counts & cand_ngram_counts
        precision = sum(common_ngram_counts.values()) / sum(cand_ngram_counts.values())
        precisions.append(precision)

    # Calculate cumulative precision
    cumulative_precision = sum(p * w for p, w in zip(precisions, weights))

    # Calculate length penalty
    reference_length = len(ref_tokens)
    candidate_length = len(cand_tokens)
    length_penalty = 1 if candidate_length >= reference_length else pow(2, 1 - reference_length / candidate_length)

    # Calculate modified BLEU score
    bleu_score = length_penalty * cumulative_precision

    return bleu_score

# Read CSV file and calculate BLEU score for each pair of formulas
with open('evaluation_arxiv_mathpix.csv', 'r') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=';')
    next(csvreader)  # Skip header row if present
    
    for row in csvreader:
        reference_formula = row[0]
        candidate_formula = row[1]

        weights = [0.25, 0.25, 0.25, 0.25]  # Equal weights for 1-gram, 2-gram, 3-gram, and 4-gram

        bleu_score = calculate_formula_bleu_score(reference_formula, candidate_formula, weights)
        print(f"BLEU score for reference formula: {reference_formula}, candidate formula: {candidate_formula}")
        print("BLEU score:", bleu_score)
        print()

BLEU score for reference formula: \mathrm{vi}, candidate formula:  \text { vi }
BLEU score: 0.14423076923076922

BLEU score for reference formula: \frac{1}{2} + m + i \cdot \frac{\beta}{2k}, candidate formula:  \frac{1}{2}+m+i \cdot \frac{\beta}{2 k}
BLEU score: 0.8185547575950004

BLEU score for reference formula: \sum_{j=(0)} \prod_{i=1}^{k} \lambda \left((a \cdot \mathbf{n} + j + ((W \cdot m) + r)) \cdot b_i\right) \cdot \left(j + ((W \cdot m) + r) \cdot b_i\right), candidate formula:  \sum_{j=(0) i=1}^k \prod_1^k \lambda\left((a \cdot \mathbf{n}+j+((W \cdot m)+r)) \cdot b_i\right) \cdot\left(j+((W \cdot m)+r) \cdot b_i\right)
BLEU score: 0.8137844016222417

BLEU score for reference formula: \left(\sum_{i=m+1}^{m+m_1} \bar{a}_i \cdot \left(\frac{\partial}{\partial \mathbf{w}}(h_{\tau,\bar{\tau}}) \cdot \frac{\partial}{\partial \bar{z}_i}(\mathbf{a},\bar{\mathbf{a}})\right) - \left(\frac{\bar{c}_h}{\bar{c}_g} \cdot \varepsilon \cdot \frac{\partial}{\partial \mathbf{w}}(g_{\tau,\bar{\