### Aims to create a fuction to deskew (i.e., straighten) an image

Code based on __[this post](https://becominghuman.ai/how-to-automatically-deskew-straighten-a-text-image-using-opencv-a0c30aed83df)__.

In [179]:
import cv2 
from IPython.display import Image as Imag
from PIL import Image

# img_path = "../test_images/preprocessing_sample_3.jpg"
img_path = "../test_images/preprocessing_sample_4.jpg"
# img_path = "../test_images/preprocessing_sample_6.jpg"
# img_path = "../test_images/preprocessing_sample_7.jpg"

img = cv2.imread(img_path)

Image.fromarray(img).show()

In [180]:
def getSkewAngle(cvImage) -> float:
    """
    Calculate skew angle of an image.
    Input: image
    Output: angle
    """

    # Prep image, copy, convert to gray scale, blur, and threshold
    newImage = cvImage.copy()
    gray = cv2.cvtColor(newImage, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (9, 9), 0)
    thresh = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]

    # Dilate pixels to merge text into meaningful lines/paragraphs.
    # Use larger kernel on X axis to merge characters into single line, cancelling out any spaces.
    # But use smaller kernel on Y axis to separate between different blocks of text
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 10))
    dilate = cv2.dilate(thresh, kernel, iterations=5)

    # Find all contours
    contours, hierarchy = cv2.findContours(dilate, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key = cv2.contourArea, reverse = True)

    # Find largest contour and surround in min area box
    largestContour = contours[0]
    minAreaRect = cv2.minAreaRect(largestContour)

    # Uncomment next 2 lines to display largest contour used to determine skew angle
    # cv2.drawContours(newImage, [largestContour], 0, (0,255,0), 3)
    # Image.fromarray(newImage).show()

    # Determine the angle. 
    angle = minAreaRect[-1]
    if angle > 45:
        angle = angle - 90 
    return angle

def rotateImage(cvImage, angle: float):
    """
    Rotates image
    Input: image, angle to rotate
    Output: rotated image
    """

    newImage = cvImage.copy()
    (h, w) = newImage.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    newImage = cv2.warpAffine(newImage, M, (w, h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    return newImage

# Deskew image using the above functions
def deskew(cvImage):
    """
    Straightens (de-skews) an image 
    Input: image
    Output: straigntened image
    """

    angle = getSkewAngle(cvImage)
    return rotateImage(cvImage, angle)

In [181]:
deskewed_image = deskew(img)
print('Skew angle:', getSkewAngle(img))
Image.fromarray(deskewed_image).show()

Skew angle: -14.237281799316406


In [None]:
"""
Notes:

1. May need to fine tune kernel in getSkewAngle
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (20, 10))
Currently set to (20, 10)
Increase 1st value to increase x-axis dilation
Increase 2nd value to increase y-axis dilation
Keeping X>Y will help ensure lines stay together

2. Needs more testing to ensure rotation is in correct direction
Sometimes rotates text so lines are vertical instead of horizontal

3. Likely works better after ruled lines have been removed from image

4. Uncomment lines in getSkewAngle to view contour used to determine skew angle

"""