In [19]:
import easyocr
from memory_profiler import profile
from PIL import Image
import numpy as np


from functools import wraps
import time
import string
import csv
import os
from pprint import pprint
import statistics
from itertools import chain

In [10]:
def timeit(func):
    """ measure execution time of function"""
    @wraps(func)
    def timeit_wrapper(*args, **kwargs):
        start_time = time.perf_counter()
        result = func(*args, **kwargs)
        total_time = time.perf_counter() - start_time
        print(
            f'Function {func.__name__}{args} {kwargs} Took {total_time:.4f} seconds')
        return result
    return timeit_wrapper


def normalization_text(text: str) -> str:  # TODO later
    return text.translate(str.maketrans('', '', string.punctuation))

In [16]:
def setup_ocr2(lang: list[str]) -> easyocr.easyocr.Reader:
    time1 = time.perf_counter()
    result = easyocr.Reader(lang, recognizer='Transformer')
    time2 = f'{(time.perf_counter() - time1):.4f}'
    yield time2, result


def setup_ocr(lang: list[str]) -> easyocr.easyocr.Reader:
    time1 = time.perf_counter()
    result = easyocr.Reader(lang)
    time2 = f'{(time.perf_counter() - time1):.4f}'
    yield time2, result


def image2text(image_path: np.ndarray, model: easyocr.easyocr.Reader):
    time1 = time.perf_counter()
    result = model.readtext(image_path, detail=0, paragraph=True)
    time2 = f'{(time.perf_counter() - time1):.4f}'
    yield time2, ' '.join(result)


def list_file_names(directory='images/test_images') -> list[str]:
    image_file_names = []
    image_extensions = [".jpg", ".jpeg", ".png"]

    for root, _, filenames in os.walk(directory):
        for filename in filenames:
            if any(filename.lower().endswith(ext) for ext in image_extensions):
                image_file_names.append(filename)
    return image_file_names


def preprocessing_image(filepath) -> np.ndarray:
    time1 = time.perf_counter()
    res = np.array(Image.open(filepath).convert('L'))
    time2 = f'{(time.perf_counter() - time1):.4f}'
    yield time2, res

In [22]:
def test_models(directory: str ='images/test_images', csv_file: str = 'images/results.csv') -> None:
    results = [['Image', 'Time (Standard)', 'Time (Transformer + Grayscale)', 'Output (Standard)', 'Output (Transformer + GrayScale)']]
    time_preprocessing = ['Time (Preprocessing for grayscale)', '', '', '']
    images = list_file_names(directory) 

    
    res1 = ['setup_ocr (ru_en)', '', '']
    res2 = ['setup_ocr (en)', '', '']
    res1[1], ru_en = tuple(*setup_ocr(['ru', 'en']))
    res2[1], en = tuple(*setup_ocr(['en']))

    res1[2], ru_en2 = tuple(*setup_ocr2(['ru', 'en']))
    res2[2], en2 = tuple(*setup_ocr2(['en']))
    res1.extend(('', ''))
    res2.extend(('', ''))
    results.extend((res1, res2))

    for i in images:
        res = [i, '', '', '', '']
        i = f'images/test_images/{i}'

        if 'ru' in i:
            res[1], res[3] = tuple(*image2text(i, ru_en))
            timex, i = tuple(*preprocessing_image(i))
            res[2], res[4] = tuple(*image2text(i, ru_en2))  
        elif 'en' in i:
            res[1], res[3] = tuple(*image2text(i, en))
            timex, i = tuple(*preprocessing_image(i))
            res[2], res[4] = tuple(*image2text(i, en2))

        time_preprocessing.append(timex)
        results.append(res)

    res = ['Mean', str(statistics.fmean(map(lambda i: float(i[1]), results[4:]))), str(statistics.fmean(map(lambda i: float(i[2]), results[4:]))), '', '']
    res2 = ['Median', str(statistics.median(map(lambda i: float(i[1]), results[4:]))), str(statistics.median(map(lambda i: float(i[2]), results[4:]))), '', '']
    results.append(res)
    results.append(res2)
    
    res3 = str(statistics.fmean(map(lambda i: float(i), time_preprocessing[4:])))
    res4 = str(statistics.median(map(lambda i: float(i), time_preprocessing[4:])))
    time_preprocessing.append(res3)
    time_preprocessing.append(res4)
    results = map(lambda x: x[1] + [x[0]], zip(time_preprocessing, results))

    with open(csv_file, 'w', encoding='UTF8', newline='') as f:
        writer = csv.writer(f)
        writer.writerows(results)

if "__main__" == __name__:
    test_models('images/test_images', 'images/grayscale_transformer_prod.csv')
