## zbMath

## BLEU score between Reference formula and Candidate formula (converted using the Lucas-blecher tool)

In [33]:
import re
import csv
from collections import Counter

# List of allowed built-in LaTeX functions
allowed_functions = ['frac', 'sin', 'cos', 'tan', 'log', 'ln', 'sqrt', 'sum', 'cdot', 'left', 'right', 'circ', 'to', 'operatorname', 'rightarrow', 'dot','prime', 
                     'prod', 'lim', 'int', 'sigma', 'pi', 'mu', 'infty', 'mathrm', 'sup', 'leq', 'uparrow', 'partial', 'varnothing', 'Theta']

def tokenize_formula(formula):
    pattern = r"\\([a-zA-Z]+)"
    
    def replace_function(match):
        function_name = match.group(1)
        if function_name.lower() in [f.lower() for f in allowed_functions]:
            return f"@{function_name}@"
        else:
            raise ValueError(f"Invalid LaTeX command: {match.group(0)}")
    
    formula = re.sub(pattern, replace_function, formula)
    
    return formula.lower()

# Function to calculate BLEU score for LaTeX formulas
def calculate_formula_bleu_score(reference_formula, candidate_formula, weights):
    # Tokenize the formulas
    ref_tokens = tokenize_formula(reference_formula)
    cand_tokens = tokenize_formula(candidate_formula)

    # Calculate n-gram precision
    precisions = []
    for n in range(1, len(weights) + 1):
        ref_ngrams = [tuple(ref_tokens[i:i + n]) for i in range(len(ref_tokens) - n + 1)]
        cand_ngrams = [tuple(cand_tokens[i:i + n]) for i in range(len(cand_tokens) - n + 1)]
        ref_ngram_counts = Counter(ref_ngrams)
        cand_ngram_counts = Counter(cand_ngrams)
        common_ngram_counts = ref_ngram_counts & cand_ngram_counts
        precision = sum(common_ngram_counts.values()) / sum(cand_ngram_counts.values())
        precisions.append(precision)

    # Calculate cumulative precision
    cumulative_precision = sum(p * w for p, w in zip(precisions, weights))

    # Calculate length penalty
    reference_length = len(ref_tokens)
    candidate_length = len(cand_tokens)
    length_penalty = 1 if candidate_length >= reference_length else pow(2, 1 - reference_length / candidate_length)

    # Calculate modified BLEU score
    bleu_score = length_penalty * cumulative_precision

    return bleu_score

# Read CSV file and calculate BLEU score for each pair of formulas
with open('evaluation_zbmath_lucas.csv', 'r') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=';')
    next(csvreader)  # Skip header row if present
    
    for row in csvreader:
        reference_formula = row[0]
        candidate_formula = row[1]

        weights = [0.25, 0.25, 0.25, 0.25]  # Equal weights for 1-gram, 2-gram, 3-gram, and 4-gram

        bleu_score = calculate_formula_bleu_score(reference_formula, candidate_formula, weights)
        print(f"BLEU score for reference formula: {reference_formula}, candidate formula: {candidate_formula}")
        print("BLEU score:", bleu_score)
        print()

BLEU score for reference formula:  \mathrm{𝐋𝐨}, candidate formula:  \mathrm{LO}
BLEU score: 0.7558566433566434

BLEU score for reference formula:  f^{2 \cdot 2^q + 1}, candidate formula:  f^{2}\cdot2^{q}+1
BLEU score: 0.5053862912346019

BLEU score for reference formula:  \left\{ \left( u - k \right)^{\int_0^1 x^{u - 2k - 1} \left(1 - x\right)^k dx} \circ x \right\}^{-1}, candidate formula:  \left\{(U-k)\int_{0}^{1}x^{u-2k-1}(1-x)^{k}d x\,\circ\,X\right\}-1
BLEU score: 0.37726075795131636

BLEU score for reference formula:  \left( b - e - \lim_{{t \to 0^+}} \left( \frac{1}{{t^2}} \left\{ F_T \left( (z, z^*) + t(v, v^*) \right) - F_T (z, z^*) - t \left( (v, v^*), (z, z^*) \right) \right\} \right) \right) \cdot (v, v^*), candidate formula:  \left(b-\mathrm{e}-\operatorname*{lim}_{t\rightarrow0^{+}}\left(\frac1t\dot{\prime}\left(F_{T}((Z,Z^{*})+t(V,V^{*})\right)-F_{T}(Z,Z^{*})-t((V,V^{*}),(Z,Z^{*})\right)\right)\right)\cdot\cdot\cdot(V,V^{*})
BLEU score: 0.4226334311820454

BLEU score for

## BLEU score between Reference formula and Candidate formula (converted using MathPix tool)

In [36]:
import re
import csv
from collections import Counter

# List of allowed built-in LaTeX functions
allowed_functions = ['frac', 'sin', 'cos', 'tan', 'log', 'ln', 'sqrt', 'sum', 'cdot', 'left', 'right', 'circ', 'to', 'operatorname', 'rightarrow', 'dot','prime', 
                     'prod', 'lim', 'int', 'sigma', 'pi', 'mu', 'infty', 'mathrm', 'sup', 'leq', 'uparrow', 'partial', 'varnothing', 'Theta', 'text', 'mid']

def tokenize_formula(formula):
    pattern = r"\\([a-zA-Z]+)"
    
    def replace_function(match):
        function_name = match.group(1)
        if function_name.lower() in [f.lower() for f in allowed_functions]:
            return f"@{function_name}@"
        else:
            raise ValueError(f"Invalid LaTeX command: {match.group(0)}")
    
    formula = re.sub(pattern, replace_function, formula)
    
    return formula.lower()

# Function to calculate BLEU score for LaTeX formulas
def calculate_formula_bleu_score(reference_formula, candidate_formula, weights):
    # Tokenize the formulas
    ref_tokens = tokenize_formula(reference_formula)
    cand_tokens = tokenize_formula(candidate_formula)

    # Calculate n-gram precision
    precisions = []
    for n in range(1, len(weights) + 1):
        ref_ngrams = [tuple(ref_tokens[i:i + n]) for i in range(len(ref_tokens) - n + 1)]
        cand_ngrams = [tuple(cand_tokens[i:i + n]) for i in range(len(cand_tokens) - n + 1)]
        ref_ngram_counts = Counter(ref_ngrams)
        cand_ngram_counts = Counter(cand_ngrams)
        common_ngram_counts = ref_ngram_counts & cand_ngram_counts
        precision = sum(common_ngram_counts.values()) / sum(cand_ngram_counts.values())
        precisions.append(precision)

    # Calculate cumulative precision
    cumulative_precision = sum(p * w for p, w in zip(precisions, weights))

    # Calculate length penalty
    reference_length = len(ref_tokens)
    candidate_length = len(cand_tokens)
    length_penalty = 1 if candidate_length >= reference_length else pow(2, 1 - reference_length / candidate_length)

    # Calculate modified BLEU score
    bleu_score = length_penalty * cumulative_precision

    return bleu_score

# Read CSV file and calculate BLEU score for each pair of formulas
with open('evaluation_zbmath_mathpix.csv', 'r') as csvfile:
    csvreader = csv.reader(csvfile, delimiter=';')
    next(csvreader)  # Skip header row if present
    
    for row in csvreader:
        reference_formula = row[0]
        candidate_formula = row[1]

        weights = [0.25, 0.25, 0.25, 0.25]  # Equal weights for 1-gram, 2-gram, 3-gram, and 4-gram

        bleu_score = calculate_formula_bleu_score(reference_formula, candidate_formula, weights)
        print(f"BLEU score for reference formula: {reference_formula}, candidate formula: {candidate_formula}")
        print("BLEU score:", bleu_score)
        print()

BLEU score for reference formula:  \mathrm{𝐋𝐨}, candidate formula:  \text { Lo }
BLEU score: 0.12637362637362637

BLEU score for reference formula:  f^{2 \cdot 2^q + 1}, candidate formula:  f^2 \cdot 2^q+1
BLEU score: 0.6772075419083671

BLEU score for reference formula:  \left\{ \left( u - k \right)^{\int_0^1 x^{u - 2k - 1} \left(1 - x\right)^k dx} \circ x \right\}^{-1}, candidate formula:  \left\{(u-k)^{\int_0^1 x^{u-2 k-1}(1-x)^k d x} \circ x\right\}^{-1}
BLEU score: 0.5423433176121712

BLEU score for reference formula:  \left( b - e - \lim_{{t \to 0^+}} \left( \frac{1}{{t^2}} \left\{ F_T \left( (z, z^*) + t(v, v^*) \right) - F_T (z, z^*) - t \left( (v, v^*), (z, z^*) \right) \right\} \right) \right) \cdot (v, v^*), candidate formula:  \left(b-e-\lim _{t \rightarrow 0^{+}}\left(\frac{1}{t^2}\left\{F_T\left(\left(z, z^*\right)+t\left(v, v^*\right)\right)-F_T\left(z, z^*\right)-t\left(\left(v, v^*\right),\left(z, z^*\right)\right)\right\}\right)\right) \cdot\left(v, v^*\right)
BLEU sc