In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

from glob import glob
from tqdm import tqdm
from PIL import Image

from Modules import LineDetection, OCRInference
from Utils import create_dir, get_file_name, batch_data

In [2]:
def show_image(image: np.array):
    display(Image.fromarray(image))

In [3]:
line_model_config = "Models/LineModels/line_model_config.json"
ocr_model_config = "Models/OCRModels/LhasaKanjur/ocr_model_config.json"
line_inference = LineDetection(config_file=line_model_config, binarize_output=False)
ocr_inference = OCRInference(config_file=ocr_model_config)

In [4]:
def run_ocr(image_path: str, out_path: str, save_preview: bool = False):
    image_name = get_file_name(image_path)
    image = cv2.imread(image_path)
    prediction, line_images, sorted_contours, peaks = line_inference.predict(image, 0)
    predicted_text, raw_prediction = ocr_inference.run(line_images)
    
    out_text = f"{out_path}/{image_name}.txt"

    with open(out_text, "w", encoding="utf-8") as f:
        for line in predicted_text:
            f.write(f"{line}\n")

    if save_preview:
        prediction = cv2.cvtColor(prediction, cv2.COLOR_GRAY2BGR)
        cv2.addWeighted(prediction, 0.4, image, 1-0.4, 0, image)
        out_prediction = f"{out_path}/{image_name}_prediction.jpg"
        cv2.imwrite(out_prediction, image)

#### Run Batched

In [4]:
import json
from Utils import resize_image, pad_image, patch_image
import onnxruntime as ort
patch_size = 256

In [5]:
model_file = open(line_model_config)
json_content = json.loads(model_file.read())
onnx_model_file = json_content["cpu-model"]

line_inference = ort.InferenceSession(onnx_model_file, providers=["CPUExecutionProvider"])

In [6]:
image_path = "Data\W26071-v56"
images = glob(f"{image_path}/*.tif")
print(f"Image: {len(images)}")

out_path = os.path.join(image_path, "predictions")
create_dir(out_path)

batched_images = batch_data(images)
print(len(batched_images))

Image: 932
117


In [69]:
image_batch = batched_images[1]
b_size = image_batch.shape[0]
print(b_size)

b_image_names = [get_file_name(x) for x in image_batch]
b_images = [cv2.imread(x) for x in image_batch]
b_images = [resize_image(x)[0] for x in b_images]
b_images = [pad_image(x, patch_size=256) for x in b_images]

8


In [81]:

b_size = image_batch.shape[0]
b_image_names = [get_file_name(x) for x in image_batch]
b_images = [cv2.imread(x) for x in image_batch]
b_images = [resize_image(x)[0] for x in b_images]
b_images = [pad_image(x, patch_size=256) for x in b_images]
image_patches = [patch_image(b_images[x][0], patch_size=256) for x in range(len(b_images))]
img_batch = [x[0] for x in image_patches]
image_batch = np.vstack(img_batch)
image_batch = image_batch.astype(np.float32)
image_batch /= 255.0

image_batch = np.transpose(image_batch, axes=[0, 3, 1, 2]) 

ort_batch = ort.OrtValue.ortvalue_from_numpy(image_batch)
ocr_results = line_inference.run_with_ort_values(["output"], {"input": ort_batch})
prediction = ocr_results[0].numpy()

(192, 256, 256, 3)


In [7]:
for b_idx, image_batch in tqdm(enumerate(batched_images), total=len(batched_images)):
    b_size = image_batch.shape[0]
    b_image_names = [get_file_name(x) for x in image_batch]
    b_images = [cv2.imread(x) for x in image_batch]
    b_images = [resize_image(x)[0] for x in b_images]
    b_images = [pad_image(x, patch_size=256) for x in b_images]
    
    image_patches = [patch_image(b_images[x][0], patch_size=256) for x in range(len(b_images))]
    img_batch = [x[0] for x in image_patches]

    image_batch = np.vstack(img_batch)
    image_batch = image_batch.astype(np.float32)
    image_batch /= 255.0

    image_batch = np.transpose(image_batch, axes=[0, 3, 1, 2]) 

    ort_batch = ort.OrtValue.ortvalue_from_numpy(image_batch)
    ocr_results = line_inference.run_with_ort_values(["output"], {"input": ort_batch})
    prediction = ocr_results[0].numpy()
    #predicted_text, raw_prediction = ocr_inference.run(line_images)

  0%|          | 0/117 [00:00<?, ?it/s]

  4%|▍         | 5/117 [02:05<46:48, 25.08s/it]

In [12]:
#run patched prediction
for _, image_path in tqdm(enumerate(images), total=len(images)):
    run_ocr(image_path, out_path, save_preview=True)

  0%|          | 0/7 [00:00<?, ?it/s]2023-09-21 13:03:07.267649: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
100%|██████████| 7/7 [00:30<00:00,  4.38s/it]


#### Run OCR on Testset 2

In [23]:
line_inference = LineDetection(config_file=line_model_config, dilate_kernel=10, dilate_iterations=10, binarize_output=False)

In [24]:
image_path = "Data\W2DB4577"
images = glob(f"{image_path}/*.jpg")
print(f"Image: {len(images)}")

out_path = os.path.join(image_path, "predictions")
create_dir(out_path)

Image: 6


In [25]:
for _, image_path in tqdm(enumerate(images), total=len(images)):
    run_ocr(image_path, out_path, save_preview=True)

100%|██████████| 6/6 [00:56<00:00,  9.46s/it]


#### Run OCR on Testset 3

In [8]:
line_inference = LineDetection(config_file=line_model_config, dilate_kernel=10, dilate_iterations=10, binarize_output=False)

image_path = "Data\W26071-v56"
images = glob(f"{image_path}/*.jpg")
print(f"Image: {len(images)}")

out_path = os.path.join(image_path, "predictions")
create_dir(out_path)

Image: 6
