In [None]:
!pip install easyocr

In [None]:
import os
import cv2
import glob
import json
import torch
import easyocr
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

# Parse data path

In [3]:
keyframes_dir = './Keyframes'
all_keyframe_paths = dict()
for part in sorted(os.listdir(keyframes_dir)):
    data_part = part.split('_')[-1] # L01, L02 for ex
    all_keyframe_paths[data_part] =  dict()

for data_part in sorted(all_keyframe_paths.keys()):
    data_part_path = f'{keyframes_dir}/{data_part}'
    video_dirs = sorted(os.listdir(data_part_path))
    video_ids = [video_dir.split('_')[-1] for video_dir in video_dirs]
    for video_id, video_dir in zip(video_ids, video_dirs):
        keyframe_paths = sorted(glob.glob(f'{data_part_path}/{video_dir}/*.jpg'))
        all_keyframe_paths[data_part][video_id] = keyframe_paths

# Run inference

In [5]:
reader = easyocr.Reader(['vi'], gpu=True) # this needs to run only once to load the model into memory

In [None]:
bs = 16
save_dir = './ocr'
if not os.path.exists(save_dir):
    os.mkdir(save_dir)

keys = sorted(all_keyframe_paths.keys())
for key in tqdm(keys):
    video_keyframe_paths = all_keyframe_paths[key]
    video_ids = sorted(video_keyframe_paths.keys())

    if not os.path.exists(os.path.join(save_dir, key)):
        os.mkdir(os.path.join(save_dir, key))

    for video_id in tqdm(video_ids):
        video_keyframe_path = video_keyframe_paths[video_id]
        video_ocr_results = []
        for i in range(0, len(video_keyframe_path), bs):
            # Support batchsize inferencing
            image_paths = video_keyframe_path[i:i+bs]
            results = reader.readtext_batched(image_paths, batch_size=len(image_paths))
            for result in results:
                refined_result = []
                for item in result: 
                    if item[2] > 0.5:
                        refined_result.append(item)   
                refined_result = easyocr.utils.get_paragraph(refined_result)
                text_detected = [item[1] for item in refined_result]
                video_ocr_results.append(text_detected)

        with open(f'{save_dir}/{key}/{video_id}.json',"w", encoding='utf-8') as jsonfile:
            json.dump(video_ocr_results, jsonfile, ensure_ascii=False)