# Load Requirements

In [None]:
!pip install torchmetrics jiwer

In [1]:
from IPython.display import Markdown, display

In [3]:
# read md file
with open('res/benchmark_data.md', encoding="utf8") as f:
    benchmark_data = f.read()

# display md file
# display(Markdown(benchmark_data))

In [4]:
# Character Error rate (CER)
from torchmetrics.text import CharErrorRate
import jiwer


cer = CharErrorRate()

transforms = jiwer.Compose(
    [
        jiwer.ExpandCommonEnglishContractions(),
        jiwer.RemoveEmptyStrings(),
        jiwer.ToLowerCase(),
        jiwer.RemoveMultipleSpaces(),
        jiwer.Strip(),
        jiwer.RemovePunctuation(),
        jiwer.ReduceToListOfListOfWords(),
    ]
)

  from .autonotebook import tqdm as notebook_tqdm


In [10]:
# benchmarking function
def benchmark(benchmark_data, parser_res):
    target = benchmark_data
    preds = parser_res

    # cer score
    cer_score = cer(preds=preds, target=target).item()

    # wer score
    wer_score = jiwer.wer(reference=target, hypothesis=preds, truth_transform=transforms, hypothesis_transform=transforms)

    return cer_score, wer_score

# LlamaParse

In [8]:
# read md file
with open('res/llamaparse_res.md', encoding="utf8") as f:
    llamaparse_res = f.read()

# display md file
# display(Markdown(llamaparse_res))

In [11]:
# benchmarking llamaparse
llamaparse_cer, llamaparse_wer = benchmark(benchmark_data, llamaparse_res)

print(f"CER: {llamaparse_cer}")
print(f"WER: {llamaparse_wer}")

CER: 0.19881021976470947
WER: 0.1814739295077192


# Mistral OCR

In [13]:
# read md file
with open('res/mistralocr_res.md', encoding="utf8") as f:
    mistralocr_res = f.read()

# display md file
# display(Markdown(mistralocr_res))

In [14]:
# benchmarking mistralocr
mistralocr_cer, mistralocr_wer = benchmark(benchmark_data, mistralocr_res)

print(f"CER: {mistralocr_cer}")
print(f"WER: {mistralocr_wer}")

CER: 0.23246526718139648
WER: 0.27847363821730264


# Pipeline 1 (Yolo + GOT-OCR2 + Gemma 3)

In [16]:
# read md file
with open('res/pipeline_1_res.md', encoding="utf8") as f:
    pipeline_1_res = f.read()

# display md file
# display(Markdown(pipeline_1_res))

In [17]:
pipeline_1_cer, pipeline_1_wer = benchmark(benchmark_data, pipeline_1_res)

print(f"CER: {pipeline_1_cer}")
print(f"WER: {pipeline_1_wer}")

CER: 0.467052161693573
WER: 0.533935333527527


# Pipeline 2 (Yolo + Universal.io + Gemma 3 + Gemma 3)

In [19]:
# read md file
with open('res/pipeline_2_res.md', encoding="utf8") as f:
    pipeline_2_res = f.read()

# display md file
# display(Markdown(pipeline_2_res))

In [20]:
pipeline_2_cer, pipeline_2_wer = benchmark(benchmark_data, pipeline_2_res)

print(f"CER: {pipeline_2_cer}")
print(f"WER: {pipeline_2_wer}")

CER: 0.246193528175354
WER: 0.25080104864549957


# Pipeline 3 (Yolo + Universal.io + Gemma 3 + phi-4)

In [None]:
# with open('json_res/pipeline_2_res.jsonl') as f:
#     pipeline_2_res = json.load(f)

# pipeline_2_cer, pipeline_2_wer = benchmark_text(benchmark_data, pipeline_2_res)

# print(f"CER List: {pipeline_2_cer}")
# print(f"WER List: {pipeline_2_wer}")
# print()
# print(f"Average CER: {sum(pipeline_2_cer)/len(pipeline_2_cer):.4f}")
# print(f"Average WER: {sum(pipeline_2_wer)/len(pipeline_2_wer):.4f}")