In [1]:
import os
import cv2 as cv
import numpy as np
import tensorflow as tf
from scipy.ndimage import interpolation as inter
from PIL import Image as im



In [2]:
def binary_otsus(image, filter:int=1):
    if len(image.shape) == 3:
        gray_img = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    else:
        gray_img = image

    # Otsus Binarization
    if filter != 0:
        blur = cv.GaussianBlur(gray_img, (3,3), 0)
        binary_img = cv.threshold(blur, 0, 255, cv.THRESH_BINARY+cv.THRESH_OTSU)[1]
    else:
        binary_img = cv.threshold(gray_img, 0, 255, cv.THRESH_BINARY+cv.THRESH_OTSU)[1]
    
    return binary_img

In [3]:
def find_score(arr, angle):
    data = inter.rotate(arr, angle, reshape=False, order=0)
    hist = np.sum(data, axis=1)
    score = np.sum((hist[1:] - hist[:-1]) ** 2)
    return hist, score

In [15]:
def deskew(binary_img):
    ht, wd = binary_img.shape
    bin_img = (binary_img // 255.0)  # Convert binary image to float (0 and 1)

    delta = 0.1
    limit = 3
    angles = np.arange(-limit, limit+delta, delta)
    scores = []
    for angle in angles:
        hist, score = find_score(bin_img, angle)
        scores.append(score)

    best_score = max(scores)
    best_angle = angles[scores.index(best_score)]

    # Correct skew using the best angle
    data = inter.rotate(bin_img, best_angle, reshape=False, order=0)
    img = im.fromarray((255 * data).astype("uint8"))  # Convert back to uint8 for PIL

    pix = np.array(img)
    return pix

In [5]:
def crop_image(image, direction='H'):
    (w,h) = image.shape
    sum = []
    pixel_count=0
    if(direction=='H'):
        for j in range(w-1):
            for i in range(h-1):
                pixel=image[j,i]
                if(pixel==255):
                    pixel_count+=1
            sum.append(pixel_count)
            pixel_count=0
    else:
        for j in range(h-1):
            for i in range(w-1):
                pixel=image[i,j]
                if(pixel==255):
                    pixel_count+=1
            sum.append(pixel_count)
            pixel_count=0

    flipped_hist=np.flip(sum)
    startpos=0
    endpos=0
    for i in range(1, len(sum)-1):
        if(sum[i-1]==0 and sum[i]==0 and sum[i+1]!=0):
            startpos=i
            break
    for i in range(1, len(flipped_hist)-1):
        if(sum[i-1]==0 and sum[i]==0 and sum[i+1]!=0):
            endpos=len(flipped_hist)-1-i
            break

    diff=np.abs(startpos-endpos)
    if(direction=='H' and diff>=10 and startpos<endpos):
        if(startpos<endpos):
            return image[startpos:endpos,:]
        else:
            return image[endpos:startpos,:]
    elif(direction=='V' and diff>=10 and startpos<endpos):
        if(startpos<endpos):
            return image[:,startpos:endpos]
        else:
            return image[:,endpos:startpos]
    else:
        return image



In [30]:
def distortion_free_resize(image, img_size=(512,1024)):
    h, w = img_size
    image = cv.resize(image, (w, h))

    return image

In [32]:
input_folder = 'C:/Users/nahel/Desktop/Spring2024/GP2/graduation_project-Ocr_module/paragraphs_per_user/paragraphs_per_user'
output_folder = 'preprocessed_images'

if not os.path.exists(output_folder):
    os.makedirs(output_folder)

for root, dirs, files in os.walk(input_folder):
    for filename in files:
        if filename.endswith('.jpg') or filename.endswith('.png'):
            input_path = os.path.join(root, filename)
            output_path = os.path.join(output_folder, filename)

            # Load image
            image = cv.imread(input_path)

            # Preprocessing pipeline
            binary_image = binary_otsus(image)
            deskewed_image = deskew(binary_image)
            cropped_image = crop_image(deskewed_image, direction='H')
            processed_image = distortion_free_resize(cropped_image)

            # Save the processed image
            cv.imwrite(output_path, processed_image)

  data = inter.rotate(arr, angle, reshape=False, order=0)
  data = inter.rotate(bin_img, best_angle, reshape=False, order=0)
