## Input Preparation

### Libraries

In [1]:
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt
from skimage.color import rgb2gray
from PIL import Image
import tensorflow as tf
from skimage.filters import threshold_otsu
import keras_ocr
import gc
from numba import njit
from keras.models import load_model
import keras
from tensorflow.keras.layers.experimental.preprocessing import StringLookup

In [2]:
def denoise(img):
    image_gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    dst = cv2.fastNlMeansDenoising(image_gray, h=25, templateWindowSize=15, searchWindowSize=35)
    return dst

def resize(img):
    width, height = img.size
    if width > 2000 or height > 2000:
        if width > height:
            ratio = 2000 / width
        else:
            ratio = 2000 / height
        img = img.resize((int(width * ratio), int(height * ratio)), Image.LANCZOS)
    return img

def preproces_image(image, *, kernel_size=15, crop_side=50, blocksize=35, constant=15, max_value=255):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    bit = cv2.bitwise_not(gray)
    image_adapted = cv2.adaptiveThreshold(
        src=bit,
        maxValue=max_value,
        adaptiveMethod=cv2.ADAPTIVE_THRESH_MEAN_C,
        thresholdType=cv2.THRESH_BINARY,
        blockSize=blocksize,
        C=constant,
    )
    kernel = np.ones((kernel_size, kernel_size), np.uint8)
    erosion = cv2.erode(image_adapted, kernel, iterations=2)
    return erosion[crop_side:-crop_side, crop_side:-crop_side]

def find_edges(image_preprocessed, *, bw_threshold=150, limits=(0.2, 0.15)):
    mask = image_preprocessed < bw_threshold
    edges = []
    for axis in (1, 0):
        count = mask.sum(axis=axis)
        limit = limits[axis] * image_preprocessed.shape[axis]
        index_ = np.where(count >= limit)
        _min, _max = index_[0][0], index_[0][-1]
        edges.append((_min, _max))
    return edges

def adapt_edges(edges, *, height, width):
    MAX_EDGE_EXTENSION = 100
    EDGE_EXTENSION_FACTOR = 10
    EDGE_EXTENSION_DIVISOR = 11

    (x_min, x_max), (y_min, y_max) = edges
    x_min2 = x_min
    x_max2 = x_max + min(MAX_EDGE_EXTENSION, (height - x_max) * EDGE_EXTENSION_FACTOR // EDGE_EXTENSION_DIVISOR)
    y_min2 = max(0, y_min)
    y_max2 = y_max + min(MAX_EDGE_EXTENSION, (width - y_max) * EDGE_EXTENSION_FACTOR // EDGE_EXTENSION_DIVISOR)
    return (x_min2, x_max2), (y_min2, y_max2)

### Preprocess Image

In [3]:
image = cv2.imread('trial.tif')
image = Image.fromarray(image)
# Resize the image
image = resize(image)
image = np.array(image)

# Preprocess the image

height, width = image.shape[0:2]
image_preprocessed = preproces_image(image)
edges = find_edges(image_preprocessed)
(x_min, x_max), (y_min, y_max) = adapt_edges(edges, height=height, width=width)
image_cropped = image[x_min:x_max, y_min:y_max]
# img_denoised = denoise(img_cropped)


### Detect Words

In [5]:
words_array = []
try:
        detector = keras_ocr.detection.Detector(weights='clovaai_general')
        boxes = detector.detect(images=[image])[0]
        for i, box in enumerate(boxes):
                cropped_img = keras_ocr.tools.warpBox(image=image, box=box)
                words_array.append(cropped_img)
except:
        print('error')


Looking for C:\Users\Ayman\.keras-ocr\craft_mlt_25k.h5


### Edge Enhancement

In [7]:
#image contrast enhancement
def grayscalize(img):
    #check if its 3 channel or grayscale, based on that convert to grayscale
    if img.ndim > 2: # is this is a rgb/rgba image
        img = rgb2gray(img)
    return img

def binarize_image(image):
    threshold = threshold_otsu(image)
    return image < threshold


def denoise_binary_image(binary_image, kernel_size=5):
    # Define a kernel for morphological operations
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_size, kernel_size))

    # Perform erosion followed by dilation to remove noise
    denoised_image = cv2.morphologyEx(binary_image, cv2.MORPH_OPEN, kernel)

    return denoised_image

def thresholding(image, threshold, typee='Binary', param1=0, param2=0):
    # A function to apply intensity thresholding to a grey-scale image
    # The thresholding could be simple binary thresholding or adaptive gaussian thresholding
    # If the type is not set to 'Binary' then the parameters for adaptive thresholdinf must
    # be used which are:
    #param1: local region size ( preferably an odd number)
    #param2: constant to be added to local mean
    if(typee.lower()=='binary'):
        ret, thresh= cv2.threshold(image,threshold,255,cv2.THRESH_BINARY_INV)
    else:
        thresh = cv2.adaptiveThreshold(image,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV,param1,param2)
    return thresh

def edg_enhance(image):
    denoised = denoise_binary_image(image)
    gray = grayscalize(image)
    binarized_image = binarize_image(gray)
    # thresh = thresholding(denoised, 127, typee='Binary')
    #convert image data type to uint8
    binarized_image_uint8 = (binarized_image * 255).astype(np.uint8)
    
    return binarized_image_uint8

In [8]:
words_array = [edg_enhance(image) for image in words_array]

### Island Segmentation

In [10]:
def directionalHistogram(img, direction='H'):
    (w,h) = img.shape
    pixel_count = 0
    if(direction=='H'):
        return [img[j].tolist().count(255) for j in range(w-1)]
    else:
        return [img[:,j].tolist().count(255) for j in range(h-1)]

def cropLineToWords(viable_sequences, image):
    (w,h) = image.shape
    words = [image[0:w-1, viable_sequences[i-1]:viable_sequences[i]] for i in range(1,len(viable_sequences))]
    words.append(image[0:w-1, viable_sequences[-1]:h-1])
    return words

def removeSpaces(words):
    return [word for word in words if np.sum(word[:,:]>0)]

In [11]:
segmented_words = []
for image in words_array:
    words = []
    (w,h) = image.shape
    hist_vertical = directionalHistogram(image, direction='V')
    zero_sites = np.where(np.asarray(hist_vertical)==0)[0]
    sequences = [[zero_sites[i-1], zero_sites[i]] for i in range(1, len(zero_sites)) if zero_sites[i] != zero_sites[i-1] + 1]
    if not sequences:
        continue
    sequence_lengths = [seq[1] - seq[0] + 1 for seq in sequences]
    average_sequence_length = sum(sequence_lengths[1:-1]) / len(sequences)
    overlap_factor = 0.75 * average_sequence_length
    viable_sequences_unrolled = [seq[0] for seq in sequences if seq[1] - seq[0] + 1 >= average_sequence_length - overlap_factor] + [-1]
    if viable_sequences_unrolled[0] != 0:
        viable_sequences_unrolled = [0] + viable_sequences_unrolled
    words.append(cropLineToWords(viable_sequences_unrolled, image))
    ordered_words = [word if np.sum(word[:,:]) else 'space' for word in words[0]]
    for ordered_word in ordered_words:
        if not isinstance(ordered_word, str):
            segmented_words.append(ordered_word)

### Cleaning Unnecessary Segments

In [14]:
def clean_segment(image):
    avg = np.average(image)
    
    if image.shape[1] < 10 and avg < 0.5:
        return 'space'
    elif avg > 0.15:
        return image

In [15]:
segmented_words = [clean_segment(image) for image in segmented_words]

In [16]:
# remove spaces
segmented_words = [word for word in segmented_words if not isinstance(word, str)]

### Padding The Segments

In [18]:
def directionalHistogram(img, direction='H'):
    (w,h) = img.shape
    if(direction=='H'):
        return [img[j].tolist().count(255) for j in range(w-1)]
    else:
        return [img[:,j].tolist().count(255) for j in range(h-1)]

def crop_image(image, direction='H'):
    w,h=image.shape
    if(w<10 or h<10):
        return image
    hist=directionalHistogram(image, direction )
    flipped_hist=np.flip(hist)
    try:
        startpos = next(i for i in range(1, len(hist)-1) if hist[i-1]==0 and hist[i]==0 and hist[i+1]!=0)
        endpos = len(flipped_hist)-1 - next(i for i in range(1, len(flipped_hist)-1) if flipped_hist[i-1]==0 and flipped_hist[i]==0 and flipped_hist[i+1]!=0)
    except StopIteration:
        return image
    cropped_image = image[startpos:endpos,:] if direction == 'H' else image[:,startpos:endpos]
    if cropped_image.shape[0] <= 0 or cropped_image.shape[1] <= 0:
        return image
    return cropped_image

image_width = 32
image_height = 32
def distortion_free_resize(image, img_size=(image_height, image_width)):
    h,w= img_size
    if h <= 0 or w <= 0:
        raise ValueError("Image size must be positive")
    if image.shape[0] <= 0 or image.shape[1] <= 0:
        return image
    # convert to 3-channel image
    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    image = tf.image.resize(image, size=(h, w), preserve_aspect_ratio=True)
    pad_height = h - tf.shape(image)[0]
    pad_width = w - tf.shape(image)[1]
    pad_height_top = pad_height // 2 if pad_height % 2 == 0 else pad_height // 2 + 1
    pad_width_left = pad_width // 2 if pad_width % 2 == 0 else pad_width // 2 + 1
    image = tf.pad(image, paddings=[[pad_height_top, pad_height//2], [pad_width_left, pad_width//2], [0, 0]])
    image = tf.transpose(image, perm=[1, 0, 2])
    image = tf.image.flip_left_right(image)
    return image

def perform_padding(image):
    image_v_cropped=crop_image(image, 'V')
    image=distortion_free_resize(image_v_cropped)
    image=np.rot90(image)
    return image

In [19]:
segmented_words = np.array([perform_padding(image) for image in segmented_words])

In [20]:
# scale images
segmented_words = segmented_words / 255.0

### Characters Preprocessing

In [24]:
AUTOTUNE = tf.data.AUTOTUNE

characters = {'ء','آ','أ','ؤ','إ','ئ','ا','ب','ة','ت','ث','ج','ح','خ','د','ذ','ر','ز','س','ش','ص','ض','ط','ظ','ع','غ','ف','ق','ك','ل','م','ن','ه','و','ى','ي'}
max_len = 7
# Mapping characters to integers.
char_to_num = StringLookup(vocabulary=sorted(list(characters)), mask_token=None)

# Mapping integers back to original characters.
num_to_char = StringLookup(
    vocabulary=char_to_num.get_vocabulary(), mask_token=None, invert=True
)

def decode_batch_predictions(pred):
    input_len = np.ones(pred.shape[0]) * pred.shape[1]
    # Use greedy search. For complex tasks, you can use beam search.
    results = keras.backend.ctc_decode(pred, input_length=input_len, greedy=True)[0][0][
        :, :max_len
    ]
    # Iterate over the results and get back the text.
    output_text = []
    for res in results:
        res = tf.gather(res, tf.where(tf.math.not_equal(res, -1)))
        res = tf.strings.reduce_join(num_to_char(res)).numpy().decode("utf-8")
        output_text.append(res)
    return output_text