### Preprocessing - Skewness Correction - Computer Vision - Base Package

*This module is designed to correct skewness using OpenCV Libraries*

Approach
* Edge Detection
* Hough Transformation
* Correct the document with the most contributing angle


For this purpose, I have used a publicly available Kaggle dataset ("Noisy and Rotated Scanned Documents"). The objective of choosing this dataset is because it has labelled data. I want to use supervised CNN for detecting angles and eventually compare the accuracy with the computer vision approach. Finally, all models will be applied to Input Data created using Course Descriptor.

### Import Libraries

In [None]:
import os
import json
import random
#import matplotlib.pyplot as plt
import PIL.Image as Image
import numpy as np
import pandas as pd
import cv2
#from google.colab.patches import cv2_imshow

from skimage.transform import hough_line, hough_line_peaks, rotate
from skimage.feature import canny
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from scipy.stats import mode

from sklearn.metrics import confusion_matrix
import seaborn as sns

In [None]:
# get random image and label names
def get_random_img(image_paths, label_paths, train_list, label_list, r=[0, 499]):
    id = random.randint(r[0], r[1])
    sample_image_path = os.path.join(image_paths, train_list[id])
    sample_label_path = os.path.join(label_paths, label_list[id])
    return id, sample_image_path, sample_label_path

# display image
def display_image(path=None, title=None):
    if path is not None:
        img = Image.open(path)

    return img, title

# get random image and label names
def get_random_cd_img(location, cd_input_data, r=[0, 499]):
    id = random.randint(r[0], r[1])
    if cd_input_data.iloc[id][0].endswith('.png'):
      sample_image_path = os.path.join(location, cd_input_data.iloc[id][0])
    else:
      sample_image_path = os.path.join(location, f'{cd_input_data.iloc[id][0]}.png')
    if cd_input_data.iloc[id][1]  == 'N':
      sample_label = 'Normal: '
    elif cd_input_data.iloc[id][1]  == 'S':
      sample_label = 'Skewed: '
    elif cd_input_data.iloc[id][1]  == 'T':
      sample_label = 'Tilt: '
    sample_label += str(np.round(float(cd_input_data.iloc[id][2]),2))

    return id, sample_image_path, sample_label



### Apply Hough Transform
Steps
* Read image as gray scale
* Use canny edge detector
* Customize the range of angles
* Detect all lines on the image
* Get most prominent lines separated by a certain angle
* Vote among angles of the prominent lines

In [None]:
def display_Hough_Transform(image_path, angle_range, offset):

    # Use canny edge detector
    image = imread(image_path)
    # Check if the image is grayscale
    if len(image.shape) != 2:
      image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = canny(image)

    # Classic straight-line Hough transform
    # Note that the maximum skewed angel 5 degree so we choose angel here between 90 + 6 and 90 - 6 degree
    # That make Algorithm run faster and make a better result
    tested_angles = np.deg2rad(np.arange( 90.0 + angle_range[0] - offset , 90.0 + angle_range[1] + offset)) # (84, 96)
    #tested_angles = np.deg2rad(np.arange( 90.0 + angle_range[0] - 1.0 , 90.0 + angle_range[1] + 1.0, 0.1)) # (84, 96)

    h, theta, d = hough_line(edges, theta=tested_angles)

    origin = np.array((0, image.shape[1]))

    angles=[]
    y = []
    for _, angle, dist in zip(*hough_line_peaks(h, theta, d)):
        y.append( (dist - origin * np.cos(angle)) / np.sin(angle))
        angles.append(angle)


    angle = np.rad2deg(mode(angles)[0])
    #Choosing angle using vote
    fixed_angle = -(90.0 - np.rad2deg(mode(angles)[0]))

    return image, edges, y, angle,fixed_angle

def Hough_Transform(image_path, angle_range, offset):

    # Use canny edge detector
    image = imread(image_path)
    if len(image.shape) != 2:
      image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = canny(image)

    # Classic straight-line Hough transform
    # Note that the maximum skewed angel 5 degree so we choose angel here between 90 + 6 and 90 - 6 degree
    # That make Algorithm run faster and make a better result
    tested_angles = np.deg2rad(np.arange( 90.0 + angle_range[0] - offset, 90.0 + angle_range[1] + offset)) # (84, 96)

    h, theta, d = hough_line(edges, theta=tested_angles)

    _, angles,_ =hough_line_peaks(h, theta, d)

    angle = np.rad2deg(mode(angles)[0])

    #Choosing angle using vote
    fixed_angle = -(90.0 - angle)

    return angle,fixed_angle

def cd_Hough_Transform(img, angle_range, offset, step):
  # Use canny edge detector
  if(len(img.shape) != 2):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  edges = canny(img)

  # Classic straight-line Hough transform
  tested_angles = np.deg2rad(np.arange( 90.0 + angle_range[0] - offset , 90.0 + angle_range[1] + offset, step))
  h, theta, d = hough_line(edges, theta=tested_angles)

  _, angles,_ =hough_line_peaks(h, theta, d)

  angle = np.rad2deg(mode(angles)[0])
  #Choosing angle using vote
  fixed_angle = -(90.0 - angle)

  return angle,fixed_angle

###-----------------------------------------------------------------------###
# This section is for Tilt Correction
def display_Hough_Transform_tilt(image_path, shear_values):

    # Use canny edge detector
    image = imread(image_path)
    # Check if the image is grayscale
    if len(image.shape) != 2:
      image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    edges = canny(image)

    # Classic straight-line Hough transform
    # Note that the maximum skewed angel 5 degree so we choose angel here between 90 + 6 and 90 - 6 degree
    # That make Algorithm run faster and make a better result
    #tested_angles = np.arange(shear_range[0] - offset,shear_range[1] + offset, step)
    tested_angles = np.array(shear_values)

    h, theta, d = hough_line(edges, theta=tested_angles)

    origin = np.array((0, image.shape[1]))

    angles=[]
    y=[]
    for _, angle, dist in zip(*hough_line_peaks(h, theta, d)):
        y.append((dist - origin * np.cos(angle)) / np.sin(angle))
        angles.append(angle)

    angle = mode(angles)[0]
    #Choosing angle using vote
    fixed_angle = angle

    return image, edges, y, angle,fixed_angle


def cd_Hough_Transform_tilt(img, shear_values):
  # Use canny edge detector
  if(len(img.shape) != 2):
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  edges = canny(img)

  # Classic straight-line Hough transform
  # Note that the maximum skewed angel 5 degree so we choose angel here between 90 + 6 and 90 - 6 degree
  # That make Algorithm run faster and make a better result
  #tested_angles = np.arange(shear_range[0] - offset,shear_range[1] + offset, step)
  tested_angles = np.array(shear_values)
  h, theta, d = hough_line(edges, theta=tested_angles)

  _, angles,_ =hough_line_peaks(h, theta, d)

  angle = mode(angles)[0]
  #Choosing angle using vote
  fixed_angle = angle

  return angle,fixed_angle

In [None]:
def display_corrected_img(path=None, title=None, fixed_angle = 0.0):
    # Load the image
    image = cv2.imread(path)

    # Define the angle by which the document is skewed (in degrees)
    angle = fixed_angle  # Replace with the actual angle value

    # Get the image dimensions
    height, width = image.shape[:2]

    # Calculate the rotation matrix
    rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), angle, 1)

    # Apply the rotation to the image
    corrected_image = cv2.warpAffine(image, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

    return corrected_image, title



# Function to accept image and return corrected image
def cd_skewness_correction(img, angle_range, offset, step):

  angle, fixed_angle = cd_Hough_Transform(img, angle_range, offset, step)

  # Get the image dimensions
  height, width = img.shape[:2]

  # Calculate the rotation matrix
  rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), fixed_angle, 1)

  # Apply the rotation to the image
  corrected_image = cv2.warpAffine(img, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

  return corrected_image

# Function to accept image and return corrected image
def cd_skewness_correction_cnn(img, fixed_angle, offset, step):


  # Get the image dimensions
  height, width = img.shape[:2]

  # Calculate the rotation matrix
  rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), fixed_angle, 1)

  # Apply the rotation to the image
  corrected_image = cv2.warpAffine(img, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

  return corrected_image



### ------------------------------------------------------------------   ###
# Function to display corrected tilted image
def display_corrected_tilt_img(path=None, title=None, fixed_angle = 0.0):
    # Load the image
    image = cv2.imread(path)

    # Define the angle by which the document is skewed (in degrees)
    angle = fixed_angle  # Replace with the actual angle value

    # Get the image dimensions
    height, width = image.shape[:2]

    # Calculate the rotation matrix
    # Define the shear matrix
    if fixed_angle < 0:
      shear_matrix = np.array([[1, fixed_angle,  0], [0, 1, 0]], dtype=np.float32)
    else:
      shear_matrix = np.array([[1, fixed_angle,  -fixed_angle * height], [0, 1, 0]], dtype=np.float32)

    # Apply the rotation to the image
    corrected_image = cv2.warpAffine(image, shear_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

    return corrected_image, title


# Function to accept image and return corrected image
def cd_tilt_correction(img, shear_values):

  angle, fixed_angle = cd_Hough_Transform_tilt(img, shear_values)

  # Get the image dimensions
  height, width = img.shape[:2]

  # Define the shear matrix
  if fixed_angle < 0:
    shear_matrix = np.array([[1, fixed_angle,  0], [0, 1, 0]], dtype=np.float32)
  else:
    shear_matrix = np.array([[1, fixed_angle,  -fixed_angle * height], [0, 1, 0]], dtype=np.float32)

  # Apply the rotation to the image
  corrected_image = cv2.warpAffine(img, shear_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

  return corrected_image


# Function to accept image and return corrected image
def cd_tilt_correction_cnn(img, fixed_angle):

  # Get the image dimensions
  height, width = img.shape[:2]

  # Define the shear matrix
  if fixed_angle < 0:
    shear_matrix = np.array([[1, fixed_angle,  0], [0, 1, 0]], dtype=np.float32)
  else:
    shear_matrix = np.array([[1, fixed_angle,  -fixed_angle * height], [0, 1, 0]], dtype=np.float32)

  # Apply the rotation to the image
  corrected_image = cv2.warpAffine(img, shear_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

  return corrected_image