---
# Imports

In [None]:
import os, sys
import pandas as pd
import numpy as np

from tqdm import tqdm

import easyocr
import evaluation_helper

import matplotlib.pyplot as plt

---
# Setup

In [None]:
DATA_PATH = os.path.join('..', 'data')
IMAGES_PATH = os.path.join(DATA_PATH, 'train_val_images', 'train_images')

assert os.path.exists(DATA_PATH), f"Data path {DATA_PATH} does not exist. Please create it and add the data files."
assert os.path.exists(IMAGES_PATH), f"Images path {IMAGES_PATH} does not exist. Please create it and add the image files."

---
# Data Import

In [None]:
annotation_df = pd.read_csv(os.path.join(DATA_PATH, 'cleaned_annot.csv'))

In [None]:
images_df = pd.read_csv(os.path.join(DATA_PATH, 'img.csv'))
images_df.drop(columns=['Unnamed: 0'], inplace=True)

In [None]:
images = images_df['id'].to_list()

---
# EasyOCR Evaluation

In [None]:
reader = easyocr.Reader(['en'])

In [None]:
correct, total_words = 0, 0

for image in tqdm(images):
    image_name = image + '.jpg'
    image_path = os.path.join(IMAGES_PATH, image_name)
    
    if not os.path.exists(image_path):
        print(f"Image {image} does not exist. Skipping.")
        continue

    extracted_words = evaluation_helper.extract_words(reader, image_path)
    expected_words = annotation_df[annotation_df['image_id'] == image]['utf8_string'].to_list()
    
    total_words += len(expected_words)
    
    for word in extracted_words:
        if word in expected_words:
            correct += 1
            expected_words.remove(word)

incorrect = total_words - correct

In [None]:
print(f"Correct: {correct}, Incorrect: {incorrect}")
print(f"Accuracy: {correct / total_words:.2%}")