In [11]:
import os
import cv2
import numpy as np

image_folder = "Ukr"
east_path = "opencv_text_detection/frozen_east_text_detection.pb"
min_confidence = 0.5
width = 320
height = 320

output_folder = "tmp"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [12]:
def clear_border(image):
    top, bottom, left, right = 1, 1, 1, 1
    image_without_borders = image[top:-bottom, left:-right]

    image_with_border = cv2.copyMakeBorder(image_without_borders, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=[255, 255, 255])
    return image_with_border

In [13]:
def format_image(image):
    if image is None or not isinstance(image, np.ndarray) or len(image.shape) < 2:
        raise ValueError("Invalid input image.")

    if len(image.shape) == 2 or image.shape[2] == 1:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] > 3:
        image = image[:, :, :3]
    return image

In [14]:
import cv2
import numpy as np

def fully_clear_background(image):
    formated = format_image(image)

    image_with_border = clear_border(formated)

    gray_image = cv2.cvtColor(image_with_border, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise
    blurred_image = cv2.GaussianBlur(gray_image, (3, 3), 0)

    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 50)

    return thresh

In [15]:
import cv2
import numpy as np

def is_line_empty(line, threshold=0.03, gray_threshold=130):
    """
    Determine if a line contains text based on the number of non-white pixels.

    Args:
    line (numpy.ndarray): Image of the line.
    threshold (float): Threshold for the proportion of non-white pixels to consider a line as empty. Default is 0.01 (1%).
    gray_threshold (int): Gray level threshold to consider a pixel as non-white. Default is 200.

    Returns:
    bool: True if the line is empty, False otherwise.
    """

    non_white_pixels = np.count_nonzero(line < gray_threshold)
    total_pixels = line.size

    if non_white_pixels / total_pixels < threshold:
        print('true:', non_white_pixels / total_pixels)
        return True
    else:
        print('false:', non_white_pixels / total_pixels)
        return False

In [16]:
from imutils import contours

def segment_words(image, p_image, file_name, line_number):
    converted = cv2.bitwise_not(p_image)
    
    blurred_image = cv2.GaussianBlur(converted, (5, 5), 0)

    # Apply morphological dilation to connect words
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 12))
    dilated = cv2.dilate(blurred_image, kernel, iterations=1)

    cnts = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    

    words_images = []
    if len(cnts) > 0:
        cnts, _ = contours.sort_contours(cnts, method="left-to-right")
        
        for c in cnts:
            area = cv2.contourArea(c)
            if area > 10:
                x, y, w, h = cv2.boundingRect(c)
                ROI = image[y:y+h, x:x+w]
                words_images.append(ROI)

    return words_images


In [17]:
import cv2
import numpy as np

def extract_letters(word_image):
    no_border = clear_border(word_image)
    _, otsu_threshold = cv2.threshold(no_border, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    
    # Apply morphological dilation to connect words
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 7))
    dilated = cv2.dilate(otsu_threshold, kernel, iterations=1)
    
    # Apply morphological erosion to separate connected letters
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 4))  # Increase kernel size
    eroded = cv2.erode(dilated, kernel, iterations=1)
    
    contours, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    image_copy = word_image.copy()
    image_copy = cv2.cvtColor(image_copy, cv2.COLOR_GRAY2BGR)
    letters = []

    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        if h > 0 and w > 0:
            # Draw rectangle around the letter on the word image
            cv2.rectangle(image_copy, (x, y), (x + w, y + h), (0, 255, 0), 2)

            letter = word_image[y:y+h, x:x+w]
            letters.append(letter)

    return letters


In [18]:
import os
def save_image(folder_path, file_name, image):
    output_file = os.path.join(folder_path, file_name)
    cv2.imwrite(output_file, image)


In [19]:
import cv2
from opencv_text_detection.text_detection import text_detection
from Page_to_lines import get_lines, display_lines

# Assuming you have the other necessary functions defined above


for file_name in os.listdir(image_folder):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        image_path = os.path.join(image_folder, file_name)
        print(f"Processing {image_path}")

        if "_page" in file_name:
            image = cv2.imread(image_path)
            lines = get_lines(image_path, kernel_size=17, sigma=2, theta=9, smooth_window_len=4, threshold=0.3, peak_min_distance=2)

            # Create a folder for the current image
            current_image_folder = os.path.join(output_folder, file_name[:-4])
            if not os.path.exists(current_image_folder):
                os.makedirs(current_image_folder)

            # Process each line and save it in the folder
            for idx, line in enumerate(lines):
                if not line.size == 0:  # Check if the line is not empty
                    p_line = fully_clear_background(line)
                    if not is_line_empty(p_line):  # Check if the line contains text
                        words_images = segment_words(line, p_line, file_name, idx)

                        # Create a folder for the current line
                        current_line_folder = os.path.join(current_image_folder, f"line_{idx}")
                        if not os.path.exists(current_line_folder):
                            os.makedirs(current_line_folder)

                        for word_idx, word_image in enumerate(words_images):
                            letters = extract_letters(word_image)

                            # Create a folder for the current word
                            current_word_folder = os.path.join(current_line_folder, f"word_{word_idx}")
                            if not os.path.exists(current_word_folder):
                                os.makedirs(current_word_folder)

                            # Save each letter in the folder
                            for letter_idx, letter_image in enumerate(letters):
                                save_image(current_word_folder, f"letter_{letter_idx}.jpg", letter_image)
                            
                            # Save word image in the line folder
                            save_image(current_line_folder, f"word_{word_idx}.jpg", word_image)


                # Save line image in the image folder only if it's not empty
                if line.size > 0:
                    save_image(current_image_folder, f"line_{idx}.jpg", line)
        else:
            data, result_img = text_detection(image_path, east_path, min_confidence, width, height)
            save_image(output_folder, file_name, result_img)

print("Processing completed.")

Processing Ukr\f274983b03b9efbd237843eb2737e175_page.jpg
x1= 0 , x2= 0 , Diff=  0
x1= 0 , x2= 23 , Diff=  23
x1= 23 , x2= 119 , Diff=  96
x1= 119 , x2= 139 , Diff=  20
x1= 139 , x2= 144 , Diff=  5
x1= 144 , x2= 151 , Diff=  7
x1= 151 , x2= 169 , Diff=  18
x1= 169 , x2= 207 , Diff=  38
x1= 207 , x2= 236 , Diff=  29
x1= 236 , x2= 266 , Diff=  30
x1= 266 , x2= 306 , Diff=  40
x1= 306 , x2= 316 , Diff=  10
x1= 316 , x2= 352 , Diff=  36
x1= 352 , x2= 380 , Diff=  28
x1= 380 , x2= 409 , Diff=  29
x1= 409 , x2= 446 , Diff=  37
x1= 446 , x2= 496 , Diff=  50
x1= 496 , x2= 525 , Diff=  29
x1= 525 , x2= 553 , Diff=  28
x1= 553 , x2= 602 , Diff=  49
x1= 602 , x2= 643 , Diff=  41
true: 0.0
true: 0.0
true: 0.009578804347826087
true: 0.0
true: 0.0
true: 0.0
false: 0.10229548054919908
false: 0.15245502248875561
false: 0.12545289855072464
false: 0.08495244565217391
true: 0.00828804347826087
false: 0.125
false: 0.13688858695652173
false: 0.15625
false: 0.11949177438307873
false: 0.07510869565217392
fals