In [34]:
import os
import cv2
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model


image_folder = "Ukr"
east_path = "opencv_text_detection/frozen_east_text_detection.pb"
min_confidence = 0.5
width = 320
height = 320
model_path = 'UkrainianOCR/examples/Ukrainian_OCR_extended_Resnet_with_blure_and_aug_new.h5'
model = load_model(model_path)


output_folder = "tmp"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

In [35]:
def resize_image(image, max_size=960):
    height, width = image.shape[:2]
    
    if height > max_size or width > max_size:
        if height > width:
            new_height = max_size
            new_width = int((width * max_size) / height)
        else:
            new_width = max_size
            new_height = int((height * max_size) / width)
        return cv2.resize(image, (new_width, new_height))
    else:
        return image

In [36]:
def clear_border(image):
    top, bottom, left, right = 1, 1, 1, 1
    image_without_borders = image[top:-bottom, left:-right]

    image_with_border = cv2.copyMakeBorder(image_without_borders, 1, 1, 1, 1, cv2.BORDER_CONSTANT, value=[255, 255, 255])
    return image_with_border

In [37]:
def format_image(image):
    if image is None or not isinstance(image, np.ndarray) or len(image.shape) < 2:
        raise ValueError("Invalid input image.")

    if len(image.shape) == 2 or image.shape[2] == 1:
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
    elif image.shape[2] > 3:
        image = image[:, :, :3]
    return image

In [38]:
import cv2
import numpy as np

def fully_clear_background(image):
    formated = format_image(image)

    image_with_border = clear_border(formated)

    gray_image = cv2.cvtColor(image_with_border, cv2.COLOR_BGR2GRAY)

    # Apply GaussianBlur to reduce noise
    blurred_image = cv2.GaussianBlur(gray_image, (3, 3), 0)

    # Apply adaptive thresholding
    thresh = cv2.adaptiveThreshold(blurred_image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 27, 50)
    
    return thresh

In [39]:
import cv2
import numpy as np

def is_line_empty(line, threshold=0.03, gray_threshold=130):
    """
    Determine if a line contains text based on the number of non-white pixels.

    Args:
    line (numpy.ndarray): Image of the line.
    threshold (float): Threshold for the proportion of non-white pixels to consider a line as empty. Default is 0.01 (1%).
    gray_threshold (int): Gray level threshold to consider a pixel as non-white. Default is 200.

    Returns:
    bool: True if the line is empty, False otherwise.
    """

    non_white_pixels = np.count_nonzero(line < gray_threshold)
    total_pixels = line.size

    if non_white_pixels / total_pixels < threshold:
        print('true:', non_white_pixels / total_pixels)
        return True
    else:
        print('false:', non_white_pixels / total_pixels)
        return False

In [40]:
from imutils import contours

def segment_words(image, p_image, file_name, line_number):
    converted = cv2.bitwise_not(p_image)
    
    blurred_image = cv2.GaussianBlur(converted, (5, 5), 0)

    # Apply morphological dilation to connect words
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 12))
    dilated = cv2.dilate(blurred_image, kernel, iterations=1)

    cnts = cv2.findContours(dilated, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    

    words_images = []
    if len(cnts) > 0:
        cnts, _ = contours.sort_contours(cnts, method="left-to-right")
        
        for c in cnts:
            area = cv2.contourArea(c)
            if area > 10:
                x, y, w, h = cv2.boundingRect(c)
                ROI = image[y:y+h, x:x+w]
                words_images.append(ROI)

    return words_images


In [41]:
import cv2
import numpy as np
from imutils import contours


global_max_width = 32
global_max_height = 32

def extract_letters(word_image, global_max_width, global_max_height):
    no_border = clear_border(word_image)
    _, otsu_threshold = cv2.threshold(no_border, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 7))
    dilated = cv2.dilate(otsu_threshold, kernel, iterations=1)

    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 4))
    eroded = cv2.erode(dilated, kernel, iterations=1)

    cnts, _ = cv2.findContours(eroded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    image_copy = word_image.copy()
    image_copy = cv2.cvtColor(image_copy, cv2.COLOR_GRAY2BGR)
    letters = []

    cnts_sorted, _ = contours.sort_contours(cnts, method="left-to-right")

    # Process each contour and pad the images
    for cont in cnts_sorted:
        x, y, w, h = cv2.boundingRect(cont)
        if h > 0 and w > 0:
            cv2.rectangle(image_copy, (x, y), (x + w, y + h), (0, 255, 0), 2)

            letter = word_image[y:y+h, x:x+w]
            
            thresh = cv2.adaptiveThreshold(letter, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 181, 40)

            # Calculate padding for the current letter
            pad_top = max(0, global_max_height - h)
            pad_bottom = 2
            pad_left = max(0, (global_max_width - w) // 2)
            pad_right = max(0, global_max_width - w - pad_left)

            # Pad the letter image to match the maximum dimensions
            letter_padded = cv2.copyMakeBorder(thresh, pad_top, pad_bottom, pad_left, pad_right, cv2.BORDER_CONSTANT, value=0)
            letters.append(letter_padded)

    return letters


In [42]:


def recognize_letter(letter_image, model):
    letters = [
    'А','Б','В','Г','Ґ','Д','Е','Є','Ж','З','И','І','Ї','Й','К',
    'Л','М','Н','О','П','Р','С','Т','У','Ф','Х','Ц','Ч','Ш','Щ',
    'Ь','Ю','Я','а','б','в','г','ґ','д','е','є','ж','з','и','і',
    'ї','й','к','л','м','н','о','п','р','с','т','у','ф','х','ц',
    'ч','ш','щ','ь','ю','я','1','2','3','4','5','6','7','8','9',
    '0','№','%','@',',','.','?',':',';','"','!','(',')','-','\''
    ]
    

    # Змінюємо розмір зображення літери до 32x32
    resized_letter = cv2.resize(letter_image, (32, 32), interpolation=cv2.INTER_AREA)
    
    blur_size = (3, 3)  # розмір ядра для гаусового блюру, можна змінювати за потреби
    blur_sigma = 0  # відхилення, якщо дорівнює 0, відхилення обчислюється автоматично
    letter_blured = cv2.GaussianBlur(resized_letter, blur_size, blur_sigma)

    # Перевіряємо кількість каналів у зображенні
    if len(letter_blured.shape) == 3:
        # Конвертуємо зображення у відтінки сірого, якщо воно кольорове
        gray_letter = cv2.cvtColor(letter_blured, cv2.COLOR_BGR2GRAY)
    else:
        gray_letter = letter_blured
    

    # Конвертуємо в float32 та нормалізуємо
    data = np.array(gray_letter, dtype=np.float32)
    data = np.expand_dims(data, axis=-1)
    data /= 255.0
    
    # Передбачаємо літеру за допомогою навченої моделі
    prediction = model.predict(np.array([data]))[0]
    predicted_index = np.argmax(prediction)
    probability = prediction[predicted_index]  # Отримуємо ймовірність передбаченого індексу
    
    predicted_letter = letters[predicted_index]  # Отримуємо передбачену літеру з масиву літер
    
    return predicted_letter, probability, predicted_index

In [43]:
import os
def save_image(folder_path, file_name, image):
    output_file = os.path.join(folder_path, file_name)
    cv2.imwrite(output_file, image)


In [44]:
import cv2
from opencv_text_detection.text_detection import text_detection
from Page_to_lines import get_lines, display_lines

# Assuming you have the other necessary functions defined above

for file_name in os.listdir(image_folder):
    if file_name.endswith(".jpg") or file_name.endswith(".png"):
        image_path = os.path.join(image_folder, file_name)
        print(f"Processing {image_path}")

        if "_page" in file_name:
            result = ''
            image = cv2.imread(image_path)
            
            # Resize the image before processing
            resized_image = resize_image(image)

            # Save the resized image, overwriting the original image
            cv2.imwrite(image_path, resized_image)
            
            lines = get_lines(image_path, kernel_size=17, sigma=2, theta=9, smooth_window_len=4, threshold=0.3, peak_min_distance=2)

            # Create a folder for the current image
            current_image_folder = os.path.join(output_folder, file_name[:-4])
            if not os.path.exists(current_image_folder):
                os.makedirs(current_image_folder)

            # Process each line and save it in the folder
            for line_idx, line in enumerate(lines):
                if not line.size == 0:  # Check if the line is not empty
                    p_line = fully_clear_background(line)
                    if not is_line_empty(p_line):  # Check if the line contains text
                        words_images = segment_words(line, p_line, file_name, line_idx)
                        
                        # Create a folder for the current line
                        current_line_folder = os.path.join(current_image_folder, f"line_{line_idx}")
                        if not os.path.exists(current_line_folder):
                            os.makedirs(current_line_folder)

                        for word_idx, word_image in enumerate(words_images):

                            letters = extract_letters(word_image,global_max_width, global_max_height)

                            # Create a folder for the current word
                            current_word_folder = os.path.join(current_line_folder, f"word_{word_idx}")
                            if not os.path.exists(current_word_folder):
                                os.makedirs(current_word_folder)
                                
                            letters_folder = os.path.join(output_folder, f"letters")
                            if not os.path.exists(letters_folder):
                                os.makedirs(letters_folder)
                            
                            # Save each letter in the folder
                            for letter_idx, letter_image in enumerate(letters):
                                save_image(current_word_folder, f"letter_{letter_idx}.jpg", letter_image)
                                
                                # Recognize the letter using the trained model                               

                                predicted_letter, probability, predicted_index = recognize_letter(letter_image, model)
                                result += predicted_letter
                                save_image(letters_folder, f"file_{file_name}_line_{line_idx}_word_{word_idx}_letter_{letter_idx}_prediction_{predicted_index}.jpg", letter_image)
                                print(f"Letter {letter_idx} is recognized as '{predicted_letter}' with probability {probability:.2f}")
                                
                            # Add space after each word
                            result += ' '
                            
                            # Save word image in the line folder
                            save_image(current_line_folder, f"word_{word_idx}.jpg", word_image)

                # Save line image in the image folder only if it's not empty
                if line.size > 0:
                    save_image(current_image_folder, f"line_{line_idx}.jpg", line)

            # Add space after each line
            result += ' '
            
            print(result)
        else:
            data, result_img = text_detection(image_path, east_path, min_confidence, width, height)
            save_image(output_folder, file_name, result_img)

print("Processing completed.")

Processing Ukr\Franko_page.jpg
x1= 0 , x2= 0 , Diff=  0
x1= 0 , x2= 10 , Diff=  10
x1= 10 , x2= 22 , Diff=  12
x1= 22 , x2= 51 , Diff=  29
x1= 51 , x2= 79 , Diff=  28
x1= 79 , x2= 107 , Diff=  28
x1= 107 , x2= 135 , Diff=  28
x1= 135 , x2= 163 , Diff=  28
x1= 163 , x2= 195 , Diff=  32
x1= 195 , x2= 236 , Diff=  41
x1= 236 , x2= 264 , Diff=  28
x1= 264 , x2= 292 , Diff=  28
x1= 292 , x2= 320 , Diff=  28
x1= 320 , x2= 348 , Diff=  28
x1= 348 , x2= 376 , Diff=  28
x1= 376 , x2= 404 , Diff=  28
x1= 404 , x2= 432 , Diff=  28
x1= 432 , x2= 459 , Diff=  27
x1= 459 , x2= 499 , Diff=  40
x1= 499 , x2= 532 , Diff=  33
x1= 532 , x2= 560 , Diff=  28
x1= 560 , x2= 587 , Diff=  27
x1= 587 , x2= 615 , Diff=  28
x1= 615 , x2= 642 , Diff=  27
x1= 642 , x2= 671 , Diff=  29
x1= 671 , x2= 722 , Diff=  51
true: 0.0
true: 0.0
false: 0.04678862858647012
Letter 0 is recognized as 'І' with probability 0.53
Letter 0 is recognized as 'п' with probability 0.97
Letter 1 is recognized as 'р' with probability 1.00
L

KeyboardInterrupt: 