In [79]:
!pip install ultralytics
!pip install jdeskew
!pip install deskew
!pip install gdown
!pip install joblib



In [14]:
!gdown 1pfFmbmzbiPPryt2LEltOvml7HH9SrMVQ -O letter_model.pt

Downloading...
From: https://drive.google.com/uc?id=1pfFmbmzbiPPryt2LEltOvml7HH9SrMVQ
To: /content/letter_model.pt
100% 546M/546M [00:10<00:00, 54.5MB/s]


In [15]:
!curl https://raw.githubusercontent.com/SeniorProject-2023/ocr-app/main/src/Backend/InferenceServer/word_model.pt -o word_model.pt

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 85.5M  100 85.5M    0     0  12.8M      0  0:00:06  0:00:06 --:--:-- 24.9M


In [2]:
import re
import string
import cv2
import numpy as np
import torch
from more_itertools import split_when
from ultralytics import YOLO
from ultralytics.yolo.utils.plotting import Annotator

def normalize_arabic(text):
    text = re.sub("[إأآا]", "ا", text)
    text = re.sub("ى", "ي", text)
    text = re.sub("ؤ", "ء", text)
    text = re.sub("ئ", "ء", text)
    text = re.sub("ة", "ه", text)
    text = re.sub("گ", "ك", text)

    return text


def infer_words(word_model: YOLO, img: np.ndarray):
    # returns List[(word_image, bb, class)]
    results = word_model.predict(img, verbose=False)
    returnable = []
    for r in results:
        boxes = r.boxes
        for box in boxes:
            x1, y1, x2, y2 = box.xyxy[0].cpu(
            ).data.numpy().astype(int).tolist()
            returnable.append(box.xyxy[0].tolist())
            img = cv2.rectangle(img, (x1, y1), (x2, y2),
                                (255, 255, 255), -1)
    return returnable

def maximize(box):
    box_list = box.tolist()
    return max(box_list[0], box_list[2], box_list[4])

def infer_letters(model: YOLO, img: np.ndarray, **kwargs):
    results = model.predict(img, verbose=False, **kwargs)[0]
    boxes = results.boxes.data
    xs = [maximize(box) for box in boxes]
    
    sorted_indices = sorted(range(len(xs)), key=xs.__getitem__, reverse=True)
    letters = [model.names[int(results.boxes.cls[i])] for i in sorted_indices]
    word = "".join(letters)
    return_result = normalize_arabic(word)
    return return_result


def get_y_center(b):
    x1, y1, x2, y2 = b[1].xyxy[0].tolist()
    return (y1 + y2) / 2


def groupbyrow(boxes):
    def get_y_center(b):
        x1, y1, x2, y2 = b
        return (y1 + y2) / 2

    def not_vertically_overlapping(b1, b2):
        _, up1, _, down1 = b1
        _, up2, _, down2 = b2
        return down1 < up2 or (down1 - up2) < (up2 - up1)

    sorted_boxes = sorted(boxes, key=get_y_center)
    return list(split_when(sorted_boxes, not_vertically_overlapping))


def merge_boxes(boxes, iou_thresh=0.3):
    boxes = sorted(boxes, key=lambda b: b[0])

    if len(boxes) == 0:
        return []

    merged_boxes = [boxes[0]]

    for k in range(1, len(boxes)):
        prev_box = merged_boxes[-1]

        x1 = boxes[k][0]
        y1 = boxes[k][1]
        x2 = boxes[k][2]
        y2 = boxes[k][3]
        x1_other = prev_box[0]
        y1_other = prev_box[1]
        x2_other = prev_box[2]
        y2_other = prev_box[3]

        intersection_area = 0 if x2_other < x1 else (
            x2_other - x1) * (max(y2, y2_other) - max(y1, y1_other))
        area = (x2 - x1) * (y2 - y1)
        area_other = (x2_other - x1_other) * (y2_other - y1_other)
        union_area = area + area_other - intersection_area
        if intersection_area / union_area > iou_thresh or intersection_area / area > 0.7 or intersection_area / area_other > 0.7:
            x1_new = np.minimum(x1, x1_other)
            y1_new = np.minimum(y1, y1_other)
            x2_new = np.maximum(x2, x2_other)
            y2_new = np.maximum(y2, y2_other)

            merged_boxes[-1] = ([x1_new, y1_new, x2_new, y2_new])
        else:
            merged_boxes.append([x1, y1, x2, y2])

    return merged_boxes


def map2d(func, grid):
    return [[func(value) for value in row] for row in grid]


In [3]:
import cv2
import numpy as np


def highPassFilter(img, kSize=51):
    if not kSize % 2:
        kSize += 1
    kernel = np.ones((kSize, kSize), np.float32) / (kSize * kSize)
    filtered = cv2.filter2D(img, -1, kernel)
    filtered = img.astype('float32') - filtered.astype('float32')
    filtered = filtered + 127 * np.ones(img.shape, np.uint8)
    filtered = filtered.astype('uint8')

    return filtered


def blackPointSelect(img, blackPoint=66):
    def map(x, in_min, in_max, out_min, out_max):
        return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min
    img = img.astype('int32')
    img = map(img, blackPoint, 255, 0, 255)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_TOZERO)
    return img.astype('uint8')


def whitePointSelect(img, whitePoint=127):
    def map(x, in_min, in_max, out_min, out_max):
        return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min
    _, img = cv2.threshold(img, whitePoint, 255, cv2.THRESH_TRUNC)
    img = img.astype('int32')
    img = map(img, 0, whitePoint, 0, 255)
    img = img.astype('uint8')

    return img


In [8]:
from PIL import Image
from deskew import determine_skew
from jdeskew.utility import rotate
from functools import reduce
from torch import Tensor
import datetime
from joblib import delayed, Parallel


def infer_image(word_model: YOLO, letter_model: YOLO, img_array):
    img = cv2.imdecode(img_array, cv2.IMREAD_COLOR)
    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = highPassFilter(img)
    img = whitePointSelect(img)
    img = blackPointSelect(img)
    _, img = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    img = rotate(img, determine_skew(img),
                 border_mode=cv2.BORDER_CONSTANT, border_value=255)
    img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
    pil_img_before_inference = Image.fromarray(img)
    word_boxes = infer_words(word_model, img)

    box_bounds = [
        [
          max(0, b[0] + ((b[0] - b[2]) * 0.05)),
          max(0, b[1] + ((b[1] - b[3]) * 0.2)),
          min(img.shape[1] - 1, b[2] + ((b[2] - b[0]) * 0.05)),
          min(img.shape[0] - 1, b[3] + ((b[3] - b[1]) * 0.05))
        ] for b in word_boxes]  # pad the boxes and make sure padding doesn't spill out of image
    box_bounds = reduce(lambda x, y: np.vstack((x, y)), box_bounds)
    rows_of_boxes = groupbyrow(box_bounds)
    rows_of_boxes = [merge_boxes(row) for row in rows_of_boxes]

    rows_of_word_imgs = map2d(lambda box: np.array(
        pil_img_before_inference.crop(box)), rows_of_boxes)

    im = Image.fromarray(rows_of_word_imgs[0][0])
    im.save(f"C:\\Users\\PC\\Desktop\\letter.jpg")
    rows_of_word_texts = [infer_letters(letter_model, x, conf=0.3, iou=0.5, agnostic_nms=True) for y in rows_of_word_imgs for x in y]
    #rows_of_word_texts = Parallel(n_jobs=4)(delayed(infer_letters)(letter_model, x, conf=0.3, iou=0.5, agnostic_nms=True) for y in rows_of_word_imgs for x in y)

    final_rows = [" ".join(reversed(row)) for row in rows_of_word_texts]

    return "\n".join(final_rows)

In [9]:
data = open(f"C:\\Users\\PC\\Desktop\\testocr.jpg", "rb").read()
image_numpy = np.array(bytearray(data), dtype=np.uint8)

word_model = YOLO('./word_model.pt')
letter_model = YOLO('./letter_model.pt')


infer_image(word_model, letter_model, image_numpy)

<class 'numpy.ndarray'>


AttributeError: 'numpy.ndarray' object has no attribute 'save'

In [26]:
import timeit
import re

code1 = '''
def normalize_arabic(text):
    text = re.sub("[إأآا]", "ا", text)
    text = re.sub("ى", "ي", text)
    text = re.sub("ؤ", "ء", text)
    text = re.sub("ئ", "ء", text)
    text = re.sub("ة", "ه", text)
    text = re.sub("گ", "ك", text)

    return text

    normalize_arabic("اهلا")
'''

code2 = '''

replacements_dict = {
    'أ': 'ا',
    'إ': 'ا',
    'آ': 'ا',
    'ي': 'ى',
}

string = "اهلا".translate(str.maketrans(replacements_dict))


'''
print(timeit.timeit(stmt=code1, number=5000))
print(timeit.timeit(stmt=code2, number=5000))

0.0003202999996574363
0.003236599999581813


In [53]:
import timeit

# Slow
print(timeit.timeit(stmt='''
for i in range(10000):
    t.append(i)''', setup='t=[]', number=10000))

# Faster
print(timeit.timeit(stmt='''
for i in range(10000):
    l(i)''', setup='t=[]; l=t.append', number=10000))

# Faster still
print(timeit.timeit(stmt='t = [i for i in range(10000)]', number=10000))


5.752683799999431
5.263248199998998
2.2057134000006045
