In [1]:
"""
Preprocessing a handwritten dataset from stamps and block letters.

Copyright (c) 2020 NORLIST.kz
Written by Galymzhan Abdimanap.
Version 1.0
"""


# Import library.
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.image as mpimg

# Import Tesseract.
import pytesseract
from pytesseract import Output

# Import Mask RCNN
# To find local version of the library
# For the independence of the program from the built-in OS libraries, the Mask RCNN library is included in the program folder
from mrcnn.config import Config
from mrcnn import utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log
import tensorflow as tf

import cv2
import glob

Using TensorFlow backend.


In [2]:
#------------------------------------------------------------------------------
# Mask RCNN model
#------------------------------------------------------------------------------
graph = tf.get_default_graph()

ROOT_DIR = os.path.abspath("")

sys.path.append(ROOT_DIR)
WEIGHTS_DIR_NAME = "weights"

# Сhoose device type
DEVICE = "/gpu:0"  # /cpu:0 or /gpu:0


MODEL_DIR = os.path.join(ROOT_DIR, WEIGHTS_DIR_NAME)

class SealConfig(Config):
    """Configuration for training on the toy  dataset.
    Derives from the base Config class and overrides some values.
    """
    # Give the configuration a recognizable name
    NAME = "seal"
    # We use a GPU with 12GB memory, which can fit two images.
    # Adjust down if you use a smaller GPU.
    IMAGES_PER_GPU = 1
    # Number of classes (including background)
    NUM_CLASSES = 1 + 1  # Background + seal
    # Number of training steps per epoch
    STEPS_PER_EPOCH = 100
    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.95

config = SealConfig()

class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()




# Create model in inference mode
with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Or, load the last model you trained
weights_path = model.find_last()
#weights_path = os.path.abspath('G:\Stuff\Daniyar\Development\Python\detectStamp\logs\seal20200221T1748\mask_rcnn_seal_0030.h5')
print(weights_path)
# Load weights
model.load_weights(weights_path, by_name = True)








Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
box_ind is deprecated, use box_indices instead


Instructions for updating:
Use `tf.cast` instead.
/media/manapov/7ff2a4aa-26aa-48f8-ae2a-9fba42bc53431/Work/POST_project/Stamp_text_drop/weights/seal20200221T1748/mask_rcnn_seal_0030.h5






Re-starting from epoch 30


In [3]:
def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax


In [4]:
#------------------------------------------------------------------------------
# Stamp detection
#------------------------------------------------------------------------------

def detect_seal(image, min_probability = 0.91, min_width_height_ratio = 0.85):
    """Detects stamp (seal) in given image.
    Returns positions of stamp.
    """
    assert image is not None

    results_rois = []
    results_scores = []

   
    # Detect the stamp.
    results = model.detect([image], graph)
    
    

    # Parse detected result
    w = image.shape[1]
    h = image.shape[0]
    r = results[0] 
    rois = r['rois'] # format r['rois']=[y1,x1,y2,x2]
    scores = r['scores']
    masks = r['masks']
    
    
    # For visualize process detects stamp.
    # ax = get_ax(1)
    # visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
    #                         'seal', r['scores'], ax=ax,
    #                         title="Predictions")
    
    # print(r['rois'])
    return r['rois']
    
    

In [5]:
def cleaning_img(filename, img, rois):
    '''Coloring the circle with white. Drop the stamp.'''
    for i, el in enumerate(rois): 
        center =(int(rois[i][1] + (rois[i][3] - rois[i][1])/2), int(rois[i][0] + (rois[i][2] - rois[i][0])/2)) # (x,y)
        radius = int((rois[i][2] - rois[i][0])/2)
        color = (255, 255, 255)
        cv2.circle(img, center, radius + 30, color, -1)
    return img
    
    

In [6]:
def get_crop_positions_form_kaznitu(image):
    """Get positions of area for crop form KAZNITU. Return y1, y2, h.
    We get the positions of two specific words and
    color the area between these words with white."""
    
    # Load the input image, convert it from BGR to RGB channel ordering,
    # and use Tesseract to localize each area of text in the input image.
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang='kaz')

    FIRST_KEY_WORD = "МИНИСТЕРСТВО"
    LAST_KEY_WORD = "ИИН"
    FIRST_KEY_WORD_FOOTER = "КазНИТУ"
    y_first_key_word_footer = None
    y_first_key_word = None
    y_last_key_word = None
    h_last_key_word = None


    # loop over each of the individual text localizations
    for i in range(0, len(results["text"])):
        if results["text"][i] == FIRST_KEY_WORD:
            # extract the bounding box coordinates of the text region from
            # the current result
            x_first_key_word = results["left"][i]
            y_first_key_word = results["top"][i]
            w_first_key_word = results["width"][i]
            h_first_key_word = results["height"][i]

        if results["text"][i] == LAST_KEY_WORD:
            # extract the bounding box coordinates of the text region from
            # the current result
            x_last_key_word = results["left"][i]
            y_last_key_word = results["top"][i]
            w_last_key_word = results["width"][i]
            h_last_key_word = results["height"][i]

        if results["text"][i] == FIRST_KEY_WORD_FOOTER:
            # extract the bounding box coordinates of the text region from
            # the current result
            x_first_key_word_footer = results["left"][i]
            y_first_key_word_footer = results["top"][i]
            w_first_key_word_footer = results["width"][i]
            h_first_key_word_footer = results["height"][i]
        
        if y_first_key_word_footer is not None:     
            cv2.rectangle(image, (0, y_first_key_word_footer), (image.shape[1], image.shape[0]), (255, 255, 255), -1)
    
    return y_first_key_word, y_last_key_word, h_last_key_word

    

   


In [7]:
def get_crop_positions_form_kaznu(image):
    """Get positions of area for crop form KAZNU. Return y1, y2, h.
    We get the positions of two specific words and
    color the area between these words with white."""
    
    # Load the input image, convert it from BGR to RGB channel ordering,
    # and use Tesseract to localize each area of text in the input image.
    rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    results = pytesseract.image_to_data(rgb, output_type=Output.DICT, lang='kaz')

    FIRST_KEY_WORDS = ["әл-фараби", "печать", "билетов"]
    LAST_KEY_WORDS = ["м.о.", "мансурова", "мусиралиева", "нұрмұхан", "турлыбекова", "табаев", "ақжігітова"]
    FLAG_ON_FIRST_WORD = 0
    FLAG_ON_LAST_WORD = 0
    
    y_first_key_word = None
    y_last_key_word = None
    h_last_key_word = None


    # loop over each of the individual text localizations
    for i in range(0, len(results["text"])):
        if results["text"][i].lower() in FIRST_KEY_WORDS and FLAG_ON_FIRST_WORD == 0:
            # extract the bounding box coordinates of the text region from
            # the current result
            x_first_key_word = results["left"][i]
            y_first_key_word = results["top"][i]
            w_first_key_word = results["width"][i]
            h_first_key_word = results["height"][i]
            FLAG_ON_FIRST_WORD = 1

        if results["text"][i].lower() in LAST_KEY_WORDS and FLAG_ON_LAST_WORD == 0:
            # extract the bounding box coordinates of the text region from
            # the current result
            x_last_key_word = results["left"][i]
            y_last_key_word = results["top"][i]
            w_last_key_word = results["width"][i]
            h_last_key_word = results["height"][i]
            FLAG_ON_LAST_WORD = 1
    
    return y_first_key_word, y_last_key_word, h_last_key_word

    

In [None]:
# Script run. 


IMAGE_FILES_PATH = 'dataset/20200921_Examination_sheets/*.jpg'
CLEANED_IMAGE_FILES_PATH = 'dataset/cleaned_images_20200921_Examination_sheets/'
HANDWRITTEN_FORM = 'KAZNU' # 'KAZNITU'  or  'KAZNU'


# Get a list with images filename.
images_filename = glob.glob(IMAGE_FILES_PATH)

counter = 0
for image_filename in images_filename:
    # img = cv2.imread(image_filename)
    # For read images with cirilyc letters.
    img = cv2.imdecode(np.fromfile(image_filename, dtype=np.uint8), cv2.IMREAD_UNCHANGED)
    # Detects stamp (seal) in given image.
    rois = detect_seal(img)
    # If find a stamp on the image.
    if len(rois)>0:
        img = cleaning_img(os.path.basename(image_filename), img, rois)
        
    
    if HANDWRITTEN_FORM == 'KAZNITU':
#         id_form = 0
        # Get positions of area for crop form KAZNITU.
        y_first_key_word, y_last_key_word, h_last_key_word = get_crop_positions_form_kaznitu(img)
    elif HANDWRITTEN_FORM == 'KAZNU':
#         id_form = 1
        # Get positions of area for crop form KAZNU.
        y_first_key_word, y_last_key_word, h_last_key_word = get_crop_positions_form_kaznu(img)
    else:
        print("Please choose HANDWRITTEN_FORM! KAZNITU or KAZNU")
        break
    
    # print(image_filename)
    # Coloring the area with white. Drop the words.
    if y_last_key_word and h_last_key_word is not None:
        cv2.rectangle(img, (0, 0), (img.shape[1], y_last_key_word + h_last_key_word + 50), (255, 255, 255), -1)
    elif  y_first_key_word or y_last_key_word or h_last_key_word is not None:
        print("Error image: ", image_filename)
        print("Error image: ", str(counter) + '.jpg' )
    
    #  Save cleaned image.
    cv2.imwrite(CLEANED_IMAGE_FILES_PATH + str(counter) + '.jpg', img)
    counter += 1
    
    
    
    
    




           


In [11]:
# img = cv2.imread('images/1_7.jpg')

# # Adding custom options
# custom_config = r'--oem 3 --psm 6'
# pytesseract.image_to_data(img, output_type=Output.DICT, lang='kaz')