In [892]:
import os

image_folder = "Ukr"
east_path = "text_detection/frozen_east_text_detection.pb"
min_confidence = 0.5
width = 320
height = 320

output_folder = "tmp"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [893]:
import cv2
import numpy as np
  
  

def noise_removal(image):
  import numpy as np
  kernel = np.ones((1,1),np.uint8)
  image = cv2.dilate(image, kernel, iterations=1)
  kernel = np.ones((1,1),np.uint8)
  image = cv2.erode(image, kernel, iterations=1)    
  image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel, iterations=4)
  image = cv2.medianBlur(image, 3)
  return (image)

def illumination_removal(image):
  se=cv2.getStructuringElement(cv2.MORPH_RECT , (8,8))
  bg=cv2.morphologyEx(image, cv2.MORPH_DILATE, se)
  out_gray=cv2.divide(image, bg, scale=255)
  return out_gray

def getSkewAngle(cvImage) -> float:
  # Prep image, copy, convert to gray scale, blur, and threshold
  newImage = cvImage.copy()
  gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
  blur = cv2.GaussianBlur(gray, (9, 9), 0)
  thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

  # Apply dilate to merge text into meaningful lines/paragraphs.
  # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
  # But use smaller kernel on Y axis to separate between different blocks of text
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (30, 5))
  dilate = cv2.dilate(thresh, kernel, iterations=2)

  # Find all contours
  contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
  contours = sorted(contours, key = cv2.contourArea, reverse = True)
  for c in contours:
      rect = cv2.boundingRect(c)
      x,y,w,h = rect
      cv2.rectangle(newImage,(x,y),(x+w,y+h),(0,255,0),2)

  # Find largest contour and surround in min area box
  largestContour = contours[0]
  print (len(contours))
  minAreaRect = cv2.minAreaRect(largestContour)
  cv2.imwrite("temp/boxes.jpg", newImage)
  # Determine the angle. Convert it to the value that was originally used to obtain skewed image
  angle = minAreaRect[-1]
  if angle < -45:
      angle = 90 + angle
  return -1.0 * angle
# Rotate the image around its center
def rotateImage(cvImage, angle: float):
  newImage = cvImage.copy()
  (h, w) = newImage.shape[:2]
  center = (w // 2, h // 2)
  M = cv2.getRotationMatrix2D(center, angle, 1.0)
  newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
  return newImage


In [894]:
def clear_border(image):
    top, bottom, left, right = 1, 1, 1, 1
    image_without_borders = image[top:-bottom, left:-right]

    image_with_border = cv2.copyMakeBorder(image_without_borders, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=[255, 255, 255])
    return image_with_border

In [895]:
def format_image(image):
    if image is None or not isinstance(image, np.ndarray) or len(image.shape) < 2:
        raise ValueError("Invalid input image.")

    if len(image.shape) == 2 or image.shape[2] == 1:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] > 3:
        image = image[:, :, :3]
    return image

In [896]:
import cv2
import numpy as np

def fully_clear_background(image):
    formated = format_image(image)

    image_with_border = clear_border(formated)

    gray_image = cv2.cvtColor(image_with_border, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise
    blurred_image = cv2.GaussianBlur(gray_image, (3, 3), 0)

    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 50)
    
    return thresh

In [897]:
import cv2
import numpy as np

def is_line_empty(line, threshold=0.03, gray_threshold=130):
    """
    Determine if a line contains text based on the number of non-white pixels.

    Args:
    line (numpy.ndarray): Image of the line.
    threshold (float): Threshold for the proportion of non-white pixels to consider a line as empty. Default is 0.01 (1%).
    gray_threshold (int): Gray level threshold to consider a pixel as non-white. Default is 200.

    Returns:
    bool: True if the line is empty, False otherwise.
    """

    non_white_pixels = np.count_nonzero(line < gray_threshold)
    total_pixels = line.size

    if non_white_pixels / total_pixels < threshold:
        print('true:', non_white_pixels / total_pixels)
        return True
    else:
        print('false:', non_white_pixels / total_pixels)
        return False

In [898]:
from imutils import contours

def segment_words(image, p_image, file_name, line_number):
    converted = cv2.bitwise_not(p_image)
    
    blurred_image = cv2.GaussianBlur(converted, (5, 5), 0)

    # Apply morphological dilation to connect words
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 12))
    dilated = cv2.dilate(blurred_image, kernel, iterations=1)

    cnts = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    

    words_images = []
    if len(cnts) > 0:
        cnts, _ = contours.sort_contours(cnts, method="left-to-right")
        
        for c in cnts:
            area = cv2.contourArea(c)
            if area > 10:
                x, y, w, h = cv2.boundingRect(c)
                ROI = image[y:y+h, x:x+w]
                words_images.append(ROI)

    return words_images


In [899]:
import cv2
import numpy as np

def extract_letters(word_image):
    no_border = clear_border(word_image)
    _, otsu_threshold = cv2.threshold(no_border, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Apply morphological dilation to connect words
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 7))
    dilated = cv2.dilate(otsu_threshold, kernel, iterations=1)
    
    # Apply morphological erosion to separate connected letters
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 4))  # Increase kernel size
    eroded = cv2.erode(dilated, kernel, iterations=1)
    
    contours, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    image_copy = word_image.copy()
    image_copy = cv2.cvtColor(image_copy, cv2.COLOR_GRAY2BGR)
    letters = []

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if h > 0 and w > 0:
            # Draw rectangle around the letter on the word image
            cv2.rectangle(image_copy, (x, y), (x + w, y + h), (0, 255, 0), 2)

            letter = word_image[y:y+h, x:x+w]
            letters.append(letter)

    return letters


In [900]:
import os
import cv2
from opencv_text_detection.text_detection import text_detection
from Page_to_lines import get_lines, display_lines

# Assuming you have the other necessary functions defined above

for file_name in os.listdir(image_folder):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        image_path = os.path.join(image_folder, file_name)
        print(f"Processing {image_path}")

        if "_page" in file_name:
            image = cv2.imread(image_path)
            lines = get_lines(image_path, kernel_size=17, sigma=2, theta=9, smooth_window_len=4, threshold=0.3, peak_min_distance=2)

            # Create a folder for the current image
            current_image_folder = os.path.join(output_folder, file_name[:-4])
            if not os.path.exists(current_image_folder):
                os.makedirs(current_image_folder)

            # Process each line and save it in the folder
            for idx, line in enumerate(lines):
                if not line.size == 0:  # Check if the line is not empty
                    p_line = fully_clear_background(line)
                    if not is_line_empty(p_line):  # Check if the line contains text
                        words_images = segment_words(line, p_line, file_name, idx)

                        # Create a folder for the current line
                        current_line_folder = os.path.join(current_image_folder, f"line_{idx}")
                        if not os.path.exists(current_line_folder):
                            os.makedirs(current_line_folder)

                        for word_idx, word_image in enumerate(words_images):
                            letters = extract_letters(word_image)

                            # Create a folder for the current word
                            current_word_folder = os.path.join(current_line_folder, f"word_{word_idx}")
                            if not os.path.exists(current_word_folder):
                                os.makedirs(current_word_folder)

                            # Save each letter in the folder
                            for letter_idx, letter_image in enumerate(letters):
                                output_file = os.path.join(current_word_folder, f"letter_{letter_idx}.jpg")
                                cv2.imwrite(output_file, letter_image)

                        # Save word image in the line folder
                        output_word_file = os.path.join(current_line_folder, f"word_{word_idx}.jpg")
                        cv2.imwrite(output_word_file, word_image)

                # Save line image in the image folder only if it's not empty
                if line.size > 0:
                    output_line_file = os.path.join(current_image_folder, f"line_{idx}.jpg")
                    cv2.imwrite(output_line_file, line)
        else:
            data, result_img = text_detection(image_path, east_path, min_confidence, width, height)
            output_file = os.path.join(output_folder, file_name)
            cv2.imwrite(output_file, result_img)

print("Processing completed.")


Processing Ukr\Franko_page.jpg
x1= 0 , x2= 0 , Diff=  0
x1= 0 , x2= 10 , Diff=  10
x1= 10 , x2= 22 , Diff=  12
x1= 22 , x2= 51 , Diff=  29
x1= 51 , x2= 79 , Diff=  28
x1= 79 , x2= 107 , Diff=  28
x1= 107 , x2= 135 , Diff=  28
x1= 135 , x2= 163 , Diff=  28
x1= 163 , x2= 195 , Diff=  32
x1= 195 , x2= 236 , Diff=  41
x1= 236 , x2= 264 , Diff=  28
x1= 264 , x2= 292 , Diff=  28
x1= 292 , x2= 320 , Diff=  28
x1= 320 , x2= 348 , Diff=  28
x1= 348 , x2= 376 , Diff=  28
x1= 376 , x2= 404 , Diff=  28
x1= 404 , x2= 432 , Diff=  28
x1= 432 , x2= 459 , Diff=  27
x1= 459 , x2= 499 , Diff=  40
x1= 499 , x2= 532 , Diff=  33
x1= 532 , x2= 560 , Diff=  28
x1= 560 , x2= 587 , Diff=  27
x1= 587 , x2= 615 , Diff=  28
x1= 615 , x2= 642 , Diff=  27
x1= 642 , x2= 671 , Diff=  29
x1= 671 , x2= 722 , Diff=  51
true: 0.0
true: 0.0
false: 0.046130560673861544
false: 0.05234460196292257
false: 0.07163304252998909
false: 0.051731188658669575
false: 0.05111777535441658
false: 0.04276001908396947
false: 0.03579407931