In [None]:
# get to work without crashing all the time
# test for 1000 image and see how much the error is.
# change augmentation to make it easier and test again?
# try this later: https://mzucker.github.io/2016/10/11/unprojecting-text-with-ellipses.html

In [1]:
# https://www.pyimagesearch.com/2017/02/20/text-skew-correction-opencv-python/
import numpy as np
import argparse
import cv2
import os

from PIL import Image
import random

from imgaug import augmenters as iaa
import imgaug as ia

In [2]:
# use if want to run this in cmd with args

# # construct the argument parse and parse the arguments
# ap = argparse.ArgumentParser()
# ap.add_argument("-i", "--image", required=True,
# 	help="path to input image file")
# args = vars(ap.parse_args())

# # load the image from disk
# image = cv2.imread(args["image"])

In [3]:
max_foreground_size = 300
scaler = 3

In [4]:
def add_background_img(foreground, background, max_foreground_size, scaler):
    width = foreground.size[0]
    height = foreground.size[1]

    if(width > height):
        percentage = max_foreground_size/width
        max_size = int(width*scaler*percentage)
    else:
        percentage = max_foreground_size/height
        max_size = int(height*scaler*percentage)
        
#     print(width, height, percentage)

    foreground = foreground.resize((int(width*percentage), int(height*percentage)), Image.ANTIALIAS)

    background = background.resize((max_size, max_size), Image.ANTIALIAS)

    margin_w = int((background.size[0]-foreground.size[0])/2)
    margin_h = int((background.size[1]-foreground.size[1])/2)

    # foreground.show()
    background.paste(foreground, (margin_w, margin_h))
#     background.show()

    return background

In [5]:
def augment_image(foreground, background, max_foreground_size, scaler):
    # load image with background
    open_cv_image = np.array(add_background_img(foreground, background, max_foreground_size, scaler).convert('RGB'))

    # Convert RGB to BGR
    open_cv_image = open_cv_image[:, :, ::-1].copy() 

    # define augmentations
    rotation = random.uniform(-85,85)
    shear = random.uniform(-10,10)

    # 2. Add some more distortions
    blur_aug = ia.augmenters.blur.MotionBlur(k=(3,10), angle=(0, 360), direction=(-1.0, 1.0))
    
    # 3. Rotate and sheer image
    rotate_aug = ia.augmenters.geometric.Affine(rotate=rotation)
    shear_aug = ia.augmenters.geometric.Affine(shear=shear)
    
    # exectue augmentation
    new_img = blur_aug.augment_image(open_cv_image)
    new_img = rotate_aug.augment_image(new_img)
    new_img = shear_aug.augment_image(new_img)

    # show
    img = cv2.cvtColor(new_img, cv2.COLOR_BGR2RGB)
    img = Image.fromarray(img)
#     img.show()

    # crop image
    rand_scaler = scaler*random.uniform(0.7,1.3)
#     print("rand_scaler", rand_scaler)
    crop_side_percentage = (rand_scaler-1)/(2*rand_scaler)

    area = (
        img.size[0]*crop_side_percentage, # width left
        img.size[1]*crop_side_percentage, # height top
        img.size[0]*(1-crop_side_percentage), # width right
        img.size[1]*(1-crop_side_percentage), # height bottom
    )
    cropped_img = img.crop(area)
#     cropped_img.show()
    
    return cropped_img, rotation, shear

In [6]:
input_data_folder = "invoice_img_data"
background_img_folder = "background_img"

input_img_names = [x for x in os.listdir(input_data_folder) if "_label" not in x and ".png" in x]
background_img_names = [
    x for x in os.listdir(background_img_folder) if "_label" not in x and ".png" in x or ".jpg" in x or ".jpeg" in x]

In [7]:
# # test augmentation
# for i in range(5):
#     background = Image.open(os.path.join(background_img_folder, random.choice(background_img_names)))
#     foreground = Image.open(os.path.join(input_data_folder, random.choice(input_img_names)))

#     final_img, rotation, shear = augment_image(foreground, background, max_foreground_size, scaler)
#     final_img.show()

In [8]:
def distort_image(background_img_names, input_img_names):
    background = Image.open(os.path.join(background_img_folder, random.choice(background_img_names)))
    foreground = Image.open(os.path.join(input_data_folder, random.choice(input_img_names)))

    final_img, rotation, shear = augment_image(foreground, background, max_foreground_size, scaler)
    return final_img, rotation, shear

In [9]:
def get_angle_opencv(distorted_image):
    image = np.array(distorted_image.convert('RGB'))
    # Convert RGB to BGR
    image = image[:, :, ::-1].copy() 

    # convert the image to grayscale and flip the foreground
    # and background to ensure foreground is now "white" and
    # the background is "black"
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray)

    # threshold the image, setting all foreground pixels to
    # 255 and all background pixels to 0
    thresh = cv2.threshold(gray, 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    
    # grab the (x, y) coordinates of all pixel values that
    # are greater than zero, then use these coordinates to
    # compute a rotated bounding box that contains all
    # coordinates
    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]

    # the `cv2.minAreaRect` function returns values in the
    # range [-90, 0); as the rectangle rotates clockwise the
    # returned angle trends to 0 -- in this special case we
    # need to add 90 degrees to the angle
    if angle < -45:
        angle = -(90 + angle)

    # otherwise, just take the inverse of the angle to make
    # it positive
    else:
        angle = -angle
        
    return image, angle

In [10]:
def show_corrected_image(image, angle):
    # rotate the image to deskew it
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h),
        flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    
    # draw the correction angle on the image so we can validate it
    cv2.putText(rotated, "Angle: {:.2f} degrees".format(angle),
        (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)

    # show the output image
    print("[INFO] angle: {:.3f}".format(angle))
    cv2.imshow("Input", image)
    cv2.imshow("Rotated", rotated)
    cv2.waitKey(0)

In [None]:
for i in range(5):
    distorted_image, rotation, shear = distort_image(background_img_names, input_img_names)
    # distorted_image.show()
    open_cv_image, angle = get_angle_opencv(distorted_image)
    show_corrected_image(open_cv_image, angle)

[INFO] angle: -0.000
