In [8]:
import easyocr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont, ImageShow
from utils.ocr_data_models import OCRData
from utils.metrics import OCRMetrics
import tqdm
import json
import os

In [2]:
custom_model_path = './custom_example/model'
custom_network_path = './custom_example/user_network'

reader_base = easyocr.Reader(
    lang_list=['en', 'th'],
    detector=False,
    gpu=True,
    download_enabled=False
)

reader_custom_v1 = easyocr.Reader(
    lang_list=['en', 'th'],
    detector=False,
    gpu=True,
    download_enabled=False,
    recog_network='custom_thai_v1',
    model_storage_directory='./custom_example/model',
    user_network_directory='./custom_example/user_network'
)

In [3]:
folder_images = '/Volumes/BACKUP/data/processed/'
pd_files = pd.read_csv('/Volumes/BACKUP/data/processed/file_mapping.csv')
print(f"Total images: {len(pd_files)}")

Total images: 4985


In [None]:
selected = pd_files.sample(n=20, random_state=42)

all_references = []
all_hypotheses = []
all_images_info = []

for index, row in tqdm.tqdm(selected.iterrows(), total=len(selected), desc="Processing images"):
    new_json_name: OCRData = OCRData.from_json(json_path=folder_images + row['new_json_name'])
    image_path = folder_images + row['new_image_name']
    boxs = new_json_name.programs
    
    if len(boxs) == 0:
        print(f"No boxes found in image {row['new_image_name']}")
        continue
        
    for i, box in enumerate(boxs[0].frames[0].text_regions):
        cropped_image = Image.open(image_path).crop((box.x, box.y, box.x + box.width, box.y + box.height))
        ocr_matrix = reader_base.recognize(np.array(cropped_image))
        ocr_text = ocr_matrix[0][1] if ocr_matrix else "No text"
        ground_truth = box.text.replace("\n", "")
        
        all_references.append(ground_truth)
        all_hypotheses.append(ocr_text)
        all_images_info.append({
            'image_name': row['new_image_name'],
            'box_index': i,
            'cropped_image': cropped_image,
            'ground_truth': ground_truth,
            'ocr_text': ocr_text
        })

Processing images: 100%|██████████| 20/20 [00:08<00:00,  2.48it/s]


In [None]:
metrics = OCRMetrics()
individual_metrics = []
individual_metrics_path = './individual_metrics_realtime.json'
if os.path.exists(individual_metrics_path):
    os.remove(individual_metrics_path)

try:
    with open(individual_metrics_path, 'r', encoding='utf-8') as f:
        existing_data = json.load(f)
except FileNotFoundError:
    existing_data = []

for i, info in tqdm.tqdm(enumerate(all_images_info), total=len(all_images_info), desc="Calculating metrics"):
    individual_metrics.append(metrics.evaluate(reference=info['ground_truth'], hypothesis=info['ocr_text']))

    with open(individual_metrics_path, 'w', encoding='utf-8') as f:
        json.dump(existing_data + individual_metrics, f, ensure_ascii=False, indent=4)

Loaded semantic model: distiluse-base-multilingual-cased


Calculating metrics: 100%|██████████| 83/83 [00:01<00:00, 48.14it/s]
