In [None]:
"""
deskew_scans.ipynb

Created on Wed Oct 19 08:17:10 2022

@author: Lukas

This script is used to deskew Teikoku scans using a specifically trained Mask R-CNN model.
"""

# install Pytorch and Detectron2

!pip install -U torch==1.5 torchvision==0.6 -f https://download.pytorch.org/whl/cu101/torch_stable.html
!pip install cython pyyaml==5.1
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
!pip install detectron2==0.1.3 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in links: https://download.pytorch.org/whl/cu101/torch_stable.html
Collecting torch==1.5
  Downloading https://download.pytorch.org/whl/cu101/torch-1.5.0%2Bcu101-cp37-cp37m-linux_x86_64.whl (703.8 MB)
[K     |████████████████████████████████| 703.8 MB 22 kB/s 
[?25hCollecting torchvision==0.6
  Downloading https://download.pytorch.org/whl/cu101/torchvision-0.6.0%2Bcu101-cp37-cp37m-linux_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 60.2 MB/s 
Installing collected packages: torch, torchvision
  Attempting uninstall: torch
    Found existing installation: torch 1.12.1+cu113
    Uninstalling torch-1.12.1+cu113:
      Successfully uninstalled torch-1.12.1+cu113
  Attempting uninstall: torchvision
    Found existing installation: torchvision 0.13.1+cu113
    Uninstalling torchvision-0.13.1+cu113:
      Successfully uninstalled torchvision-0.13.1+cu113
[31mERR

In [None]:
# import packages

import torch, torchvision
import detectron2

from detectron2.utils.visualizer import ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.utils.visualizer import Visualizer
from detectron2.engine import DefaultPredictor
from detectron2.utils.logger import setup_logger
from detectron2.structures import BoxMode

import os
import numpy as np
import json
import cv2
import matplotlib.pyplot as plt
import pickle
import cProfile

from skimage import io
from skimage.transform import resize
from skimage import img_as_bool

from scipy import ndimage, misc

from google.colab.patches import cv2_imshow

from matplotlib import pyplot as plt

In [None]:
# mount drive when running this in colab

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# load the model

filename = '/content/drive/MyDrive/combined_training_data_R101/R101_output/config.pkl'

with open(filename, 'rb') as f:
     cfg = pickle.load(f)
     
predictor = DefaultPredictor(cfg)

In [None]:
# get the binary mask

def get_masks(image):
    """
    This function takes an image and returns the binary mask output from the DNN model.

    Parameters
    ----------

    image : numpy array
           The image to be processed.

    Returns
    -------

    mask : list(numpy array)
          The binary mask output from the DNN model.

    """
    outputs = predictor(image)
    masks = [outputs["instances"].pred_masks[i] for i in range(len(outputs["instances"].pred_masks))]
    mask = mask_union(masks)
    split_masks = split_mask(mask)

    #plt.imshow(split_masks[1])
    #plt.show()

    return split_masks

In [None]:
# get union over binary masks

def mask_union(masks):
    """
    This function takes a list of binary masks and returns the union of the masks.
    
    Parameters
    ----------
    
    masks : list(numpy array)
            The binary masks to be processed.
    
    Returns
    -------
    
    mask : numpy array
         The union of the binary masks.
    
    """
    mask = np.zeros(masks[0].shape)
    for i in range(len(masks)):
        mask = np.logical_or(mask, masks[i])
    return mask > 0

In [None]:
# split a given binary mask (for the whole scan) in two (one for each page)

def split_mask(mask):
    """ 
    This function takes a binary mask and splits it in two (one for each page).

    Parameters
    ----------

    mask : numpy array
            The binary mask to be split.

    Returns
    -------

    mask1 : numpy array
            The binary mask for the first page.

    mask2 : numpy array
            The binary mask for the second page.
            
    """
    # go through each column and find the first column with a True value
    for i in range(mask.shape[1]):
        if torch.any(mask[:,i]):
            first_col = i
            break

    # go through each column and find the last column with a True value
    for i in range(mask.shape[1]-1, 0, -1):
        if torch.any(mask[:,i]):
            last_col = i
            break

    # compute the middle column
    middle_col = int((first_col + last_col)/2)

    # split the mask in two
    mask1 = mask[:, :middle_col]
    mask2 = mask[:, middle_col:]

    # concatenate with the zero mask
    mask1_padded = np.concatenate((mask1, np.zeros((mask2.shape[0], mask2.shape[1]))), axis=1)
    mask2_padded = np.concatenate((np.zeros((mask1.shape[0], mask1.shape[1])), mask2), axis=1)

    return mask1_padded, mask2_padded

In [None]:
# compute polygon points given a resized mask

def mask_to_polygon(mask):
    """
    Compute a set of points on the boundary of the polygon defined by the given mask
    
    Parameters
    ----------

    mask : 2D numpy array
        A binary mask of the polygon

    Returns
    -------

    boundary_points : 2D numpy array
        A set of points on the boundary of the polygon

    """
    # go through each column of the mask and find the first True value and the last True value
    boundary_points = []

    for i in range(mask.shape[1]):
        col = mask[:,i]
        if np.any(col):
            first = np.where(col)[0][0]
            last = np.where(col)[0][-1]
            boundary_points.append([i,first])
            boundary_points.append([i,last])
    boundary_points = np.array(boundary_points)

    return boundary_points

In [None]:
def crop_rect(img, rect):
    """
    Crops a rotated rectangle from an image.

    Parameters
    ----------

    img : numpy.ndarray
        Image to be cropped.

    rect : tuple
        Rotated rectangle to be cropped.

    Returns
    -------

    img_crop : numpy.ndarray
    
    """

    # the order of the box points: bottom left, top left, top right,
    # bottom right
    box = cv2.boxPoints(rect)

    np_corners = np.array([np.array(elem) for elem in box])

    # cv2.drawContours(img, [np_corners], 0, (0, 0, 255), 2)

    # get width and height of the detected rectangle
    width = int(rect[1][0])
    height = int(rect[1][1])

    src_pts = np_corners.astype("float32")
    # coordinate of the points in box points after the rectangle has been
    # straightened
    dst_pts = np.array([[0, height-1],
                        [0, 0],
                        [width-1, 0],
                        [width-1, height-1]], dtype="float32")

    # the perspective transformation matrix
    M = cv2.getPerspectiveTransform(src_pts, dst_pts)

    # directly warp the rotated rectangle to get the straightened rectangle
    warped = cv2.warpPerspective(img, M, (width, height))

    return warped

In [None]:
# main function

def main(image_path, output_path):
    """
    deskews all images in a given folder and saves them to a given output folder

    Parameters
    ----------

    image_path : string
        Path to the folder containing the images to be deskewed.

    output_path : string
        Path to the folder where the deskewed images should be saved.

    Returns
    -------

    None (deskewed cropped images are saved in the output folder)


    """

    # iterate over all images in the input folder
    for scan in os.listdir(image_path):
        name = scan.split(".")[0]
        print(scan)

    # name = 'TK1936_779_ddd6'
    # image = cv2.imread('/content/drive/MyDrive/training_data_shao-yu_R50/images/TK1936_779_ddd6.jpeg')

        image = cv2.imread(os.path.join(image_path, scan))
        masks = get_masks(image)
        
        for i in range(2):

            # compute the polygon points
            boundary_points = mask_to_polygon(masks[i])

            # compute the smallest rotated rectangle containing the polygon
            rect = cv2.minAreaRect(boundary_points)

        #box = cv2.boxPoints(rect)
        #box = np.int0(box)
        #cv2.drawContours(image, [box], 0, (0, 0, 255), 2)
        #cv2_imshow(image)

            # crop and deskew
            cropped_image = crop_rect(image, rect) # ERROR MUST BE HERE

            # rotate output if necessary
            output_height, output_width = cropped_image.shape[0], cropped_image.shape[1] 
            print(output_height, output_width)

            if output_width > output_height:
              cropped_image = cv2.rotate(cropped_image, cv2.ROTATE_90_CLOCKWISE)


            # save cropped and deskewed image
            output_name = os.path.join(output_path, 'dk_' + name + '_' + str(i) + '.png')
            # cv2_imshow(cropped_image)
            print(cropped_image.shape)
      
            cv2.imwrite(output_name, cropped_image)

In [None]:
in_path = '/content/drive/MyDrive/deskew_scans_pipeline/Images'

out_path = '/content/drive/MyDrive/deskew_scans_pipeline/Deskewed_Scans'

# cProfile.run("main(in_path, out_path)")

main(in_path, out_path)

TK1935_90_3678.jpeg
1512 2090
(2090, 1512, 3)
1510 2058
(2058, 1510, 3)
TK1934_1247_c76a.jpeg
1535 2068
(2068, 1535, 3)
2076 1523
(2076, 1523, 3)
TK1934_870_0ee9.jpeg
2053 1498
(2053, 1498, 3)
2064 1501
(2064, 1501, 3)
TK1934_747_40eb.jpeg
2066 1500
(2066, 1500, 3)
2049 1515
(2049, 1515, 3)
TK1930_242_9c82.jpeg
1503 2060
(2060, 1503, 3)
1500 2058
(2058, 1500, 3)
TK1930_82_9d30.jpeg
1505 2067
(2067, 1505, 3)
1506 2054
(2054, 1506, 3)
TK1936_1416_2e30.jpeg
2064 1497
(2064, 1497, 3)
2078 1502
(2078, 1502, 3)
TK1936_1153_8d2e.jpeg
1491 2049
(2049, 1491, 3)
2072 1509
(2072, 1509, 3)
TK1936_1152_21a0.jpeg
1505 2058
(2058, 1505, 3)
2065 1511
(2065, 1511, 3)
TK1936_928_24a1.jpeg
2067 1503
(2067, 1503, 3)
2076 1499
(2076, 1499, 3)
TK1935_1103_0c51.jpeg
1508 2052
(2052, 1508, 3)
1501 2044
(2044, 1501, 3)
TK1935_1102_b0d2.jpeg
1505 2067
(2067, 1505, 3)
2081 1523
(2081, 1523, 3)
TK1935_948_78d9.jpeg
1503 2060
(2060, 1503, 3)
1509 2065
(2065, 1509, 3)
TK1935_887_b3f8.jpeg
1503 2043
(2043, 1503, 3)
