In [3]:
from PIL import Image
from pix2tex.cli import LatexOCR

img = Image.open('./output.png')
model = LatexOCR()
print(model(img))

INFO:numexpr.utils:NumExpr defaulting to 8 threads.


\begin{array}{r l r l r}{{\overline{{}}}{\overline{{{\hat{c}}\rangle}}}}&{{}}&{{\underline{{{\hat{c}}}}\big\}}}&{{\underline{{{\hat{c}}}}\big\}}}&{{\underline{{{\hat{c}}}}\big\}}}\\ {{\overline{{{\hat{c}}}}\big


In [16]:
import random
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

def balance_brackets(latex_code):
    bracket_stack = []
    bracket_pairs = {'{': '}', '[': ']', '(': ')'}
    new_code = []
    for char in latex_code:
        if char in bracket_pairs:
            bracket_stack.append(char)
            new_code.append(char)
        elif char in bracket_pairs.values():
            if bracket_stack and bracket_pairs[bracket_stack[-1]] == char:
                bracket_stack.pop()
                new_code.append(char)
            else:
                for opening_bracket, closing_bracket in bracket_pairs.items():
                    if closing_bracket == char:
                        new_code.append(opening_bracket)
                        bracket_stack.append(opening_bracket)
                        new_code.append(char)
                        break
        else:
            new_code.append(char)
    while bracket_stack:
        opening_bracket = bracket_stack.pop()
        new_code.append(bracket_pairs[opening_bracket])
    return ''.join(new_code)

data_dir = './dataset1'
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
finetuned_model = VisionEncoderDecoderModel.from_pretrained("./my_trained_model")
indices = random.sample(range(1000), 20)
images = [(i, Image.open(data_dir + f'/{i}.png').convert("RGB")) for i in indices]
for i, image in images:
    pixel_values = processor(image, return_tensors="pt").pixel_values
    generated_ids = finetuned_model.generate(pixel_values, temperature=0.1, do_sample=True)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    # generated_text = balance_brackets(generated_text)
    with open(data_dir + '/latex.txt', 'r') as file:
        lines = file.readlines()
        line = lines[i].strip()

    print('Prediction for', line, ':  ', generated_text)

Prediction for 5 :   5
Prediction for {\psi} :   {\psi}}}
Prediction for 69\cdot {{\phi}}_{-s} :   {69\\\}\\\}\\cd\\
Prediction for \frac{33}{{\pi}} :   \frac{{\pi}}}}}}
Prediction for R-S :   R
Prediction for \frac{20}{326831} :   \frac{{{
Prediction for {18}^{{\pi}}{\phi} :   {18}^^^}^}}^{{\\\{\}}}}}}{\}}{\{\}}}}}}}}{\}}
Prediction for 36 :   36
Prediction for {25}_{{\pi}} :   {25_{{\
Prediction for {\cot{Z}\times R}^{X} :   \cot{{{{{K}}}
Prediction for \tan{19} :   \tan{{{
Prediction for \int_{-\infty}^{\infty}n :   \int_{-inininftyinin}inin^inin{\inin
Prediction for {\sin{a}}_{X} :   \sin{{X}}}}}}}}}}}
Prediction for {\theta} :   {\theta}}}
Prediction for 61 :   {
Prediction for {\delta} :   {\dddelta
Prediction for {W\cdot 18}^{36} :   {W}}}\}}cd}}
Prediction for \tan{\tan{{\nu}}} :   \tan{{\nu}}}}}}}}}}}}}{{\\\nu}}}}}}}}}}}}}
Prediction for 25/73 :   {25}}}
Prediction for \sum_{i=0}^{n}59 :   \sum_{i}}}=}}n}}


In [17]:
import pandas as pd

df = pd.read_csv('./archive/im2latex_test.csv')
indices = random.sample(range(1, 8000), 20)
images = [(df.iloc[i, 0], Image.open('./archive/images/images/' + str(df.iloc[i, 1]))) for i in indices]
for code, image in images:
    pixel_values = processor(image, return_tensors="pt").pixel_values
    generated_ids = finetuned_model.generate(pixel_values, temperature=0.1, do_sample=True)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    generated_text = balance_brackets(generated_text)
    code = code.replace(" ", "")
    print('Prediction for', code, ':  ', generated_text)

Prediction for \beta(e)=-\frac{e^{3}}{16\pi^{2}}(c_{gauge}+c_{\lambda}+c_{q}+c_{sq}) :   {n}{}{}_{m\\\{\\\{{\\}}}}}
Prediction for \sum_{\gamma\in\Gamma}\frac{|\gamma^{\prime}(f({\bfy}))|^{\Delta}}{|f({\bfx})-\gammaf({\bfy})|^{2\Delta}}\;=\;|f^{\prime}({\bfx})|^{-\Delta}|f^{\prime}({\bfy})|^{-\Delta}\sum_{\gamma\inf^{-1}\Gammaf}\frac{|\gamma^{\prime}({\bfy})|^{\Delta}}{|{\bfx}-\gamma{\bfy}|^{2\Delta}}. :   {_{_{_{sum_{m}}}m}^{\\\{\\\}\{\{\\{\}{\\}{\{\{\}\}}\\-\\}{\\_{\\}}}}}
Prediction for \sum_{n}\left(\lambda_{n}+\lambda_{n}^{\prime}\right)=\mp1\, :   {74}{}{}_{}{}^}{}{\_{W}}{}}
Prediction for \delta^{ab}\,\delta_{xy}\,=\,\langle\,\frac{\deltaS}{\delta\bar{c}_{x}^{a}}\,\bar{c}_{y}^{b}\,\rangle\,=\,\widetildeZ_{3}\,\langle\,i(\partialD_{r}c)_{x}^{a}\,\bar{c}_{y}^{b}\,\rangle\,-\,\frac{Z_{\lambda}\lambda}{Z_{3}}\,\langle\,(s_{r}B_{x}^{a})\,\bar{c}_{y}^{b}\,\rangle\;. :   {n_{n}\\\}\{}{}\{\\\{\}\_{\}}}
Prediction for \bar{\lambda}_{\theta}=\partial_{1}\bar{\theta}+\frac\xi2\bar{\theta},

AttributeError: 'float' object has no attribute 'replace'

In [12]:
import nltk
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
import random
from cer import calculate_cer

data_dir = './dataset1'
finetuned_model = VisionEncoderDecoderModel.from_pretrained("./my_trained_model")
indices = random.sample(range(1000), 200)
images = [(i, Image.open(data_dir + f'/{i}.png').convert("RGB")) for i in indices]
bleu = 0
cer = 0
for i, image in images:
    pixel_values = processor(image, return_tensors="pt").pixel_values
    generated_ids = finetuned_model.generate(pixel_values, temperature=0.1, do_sample=True)
    generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    with open(data_dir + '/latex.txt', 'r') as file:
        lines = file.readlines()
        line = lines[i].strip()
    bleu += nltk.translate.bleu_score.sentence_bleu([[*line]], [*generated_text])
    cer += calculate_cer([*line], [*generated_text])
print(bleu, bleu / len(images))
print(cer, cer / len(images))

The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


57.55224276549043 0.2877612138274522
102.90763638470793 0.5145381819235396
