### Import Libraries

In [None]:
!pip uninstall fitz
!pip uninstall PyMuPDF

[0m

In [None]:
!pip install PyMuPDF

Collecting PyMuPDF
  Downloading PyMuPDF-1.23.5-cp310-none-manylinux2014_x86_64.whl (4.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.3/4.3 MB[0m [31m27.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting PyMuPDFb==1.23.5 (from PyMuPDF)
  Downloading PyMuPDFb-1.23.5-py3-none-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (30.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.6/30.6 MB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: PyMuPDFb, PyMuPDF
Successfully installed PyMuPDF-1.23.5 PyMuPDFb-1.23.5


In [None]:
import fitz  # PyMuPDF
from PIL import Image
import cv2
import os
import random
import numpy as np
import math
import csv
from google.colab.patches import cv2_imshow

import json
import matplotlib.pyplot as plt
import pandas as pd

from skimage.transform import hough_line, hough_line_peaks, rotate
from skimage.feature import canny
from skimage.io import imread, imsave
from skimage.color import rgb2gray
from scipy.stats import mode

### Connect to Google Drive

*Google Drive holds training data for this research*
* PDF files
* PDF files created using images

In [None]:
# Connect Google Drive
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


### Global Parameters

In [None]:
# Global Parameters
## Ground Truth Parameter details
Source_location = '/content/gdrive/MyDrive/OWR/source'
cd_file = '/content/gdrive/MyDrive/OWR/data/files/2023 Programme Handbook.pdf'
input_path = '/content/gdrive/MyDrive/OWR/data/input'
tmp_input_path = '/content/gdrive/MyDrive/OWR/data/tmp_input'
tmp_tilt_path = '/content/gdrive/MyDrive/OWR/data/tmp_tilt'
tmp_skewed_path = '/content/gdrive/MyDrive/OWR/data/tmp_skewed'
start_page = 39
end_page   = 130
exception_pages = [120]
angle_range = (-5, 5)
shear_values=(-0.2, -0.1, 0, 0.1, 0.2)
csv_file_name = 'input.csv'



## Parameters for Training Data
## for Skewness and Tilt

kaggle_url = 'https://www.kaggle.com/datasets/sthabile/noisy-and-rotated-scanned-documents?select=scan_doc_rotation'
kaggle_destination = '/content/gdrive/MyDrive/OWR/data/Kaggle'
skewed_dataset = '/content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents'
tilted_dataset = '/content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents-tilted'
root = 'scan_doc_rotation'


image_paths = os.path.join(root, 'images')
imageT_paths = os.path.join(root, 'images_T')
label_paths = os.path.join(root, 'labels')


### Convert PDF to images

In [None]:
def convert_pdf_to_images(pdf_path, image_path, start_page=0, end_page=0 ):
  # Open the PDF file
  pdf_doc = fitz.open(pdf_path)

  # Ensure the start page is valid
  if start_page < 0:
    start_page = 0
  if end_page < 0:
    end_page = len(pdf_doc)

  # Iterate through the pages
  for page_number in range(start_page, end_page):
    # Skip 1 page which is not a CD
    if page_number + 1  in exception_pages:
      continue

    # Get the page
    page = pdf_doc[page_number]

    # Convert the page to an image
    pix = page.get_pixmap(matrix=fitz.Matrix(2.0, 2.0))

    # Create a Pillow image from the PyMuPDF pixmap
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

    # Save the image to a file
    img.save(f'{image_path}/page_{page_number + 1}.png', 'PNG')



In [None]:
# Call PDF conversion
convert_pdf_to_images(cd_file, tmp_input_path, start_page, end_page)

### Introduce Skewness to the image

In [None]:
def rotate_images_with_skewness(img, angle):

    # Perform the rotation
  rows, cols, _ = img.shape
  #print('rows: ',rows, 'cols: ',cols)

  new_width = int(abs(cols * math.cos(math.radians(angle))) + abs(rows * math.sin(math.radians(angle))))
  new_height = int(abs(cols * math.sin(math.radians(angle))) + abs(rows * math.cos(math.radians(angle))))
  #print('new width:',new_width,'new height:',new_height)

  rotation_matrix = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
  #print('rotation matrix: ',rotation_matrix)


  # Adjust the translation part of the matrix to ensure the image fits within the new dimensions
  rotation_matrix[0, 2] += (new_width - cols) / 2
  rotation_matrix[1, 2] += (new_height - rows) / 2
  #print('new rotation matrix: ',rotation_matrix)

  rotated_image = cv2.warpAffine(img, rotation_matrix, (new_width, new_height), borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

  return rotated_image

### Introduce Tilt to the image

In [None]:
def tilt_images( img, shear_factor):

  # Calculate the new size for the sheared image
  rows, cols, _ = img.shape
  #print('rows: ',rows, 'cols: ',cols)

  new_cols = int(cols + abs(shear_factor) * rows)
  #print('new colms: ', new_cols)

  # Define the shear matrix
  if shear_factor >= 0:
    shear_matrix = np.array([[1, shear_factor,  0], [0, 1, 0]], dtype=np.float32)
  else:
    shear_matrix = np.array([[1, shear_factor,  -shear_factor * rows], [0, 1, 0]], dtype=np.float32)

  #print('shear matrix: ', shear_matrix)

  # Apply the shear transformation while retaining the full image
  sheared_image = cv2.warpAffine(img, shear_matrix, (new_cols, rows), borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

  return sheared_image

In [None]:
url = '/content/gdrive/MyDrive/OWR/data/tmp_input/page_101.png'
image = cv2.imread(url)

#angle_range = (-5, 5)
#shear_range=(-0.2, 0.2)
#ngle = angle_range[0] + (angle_range[1] - angle_range[0]) * random.random()
#shear_factor = shear_range[0] + (shear_range[1] - shear_range[0]) * random.random()


#angle = round(angle,2)
#angle = 2.00
#image_modified = rotate_images_with_skewness(image, angle)
#cv2_imshow(image_modified)
#shear_factor = round(shear_factor,2)
shear_factor = 0.1
image_modified2 = tilt_images(image,shear_factor)
#cv2_imshow(image_modified2)

shear_factor3 = -0.1
image_modified3 = tilt_images(image_modified2,shear_factor3)
cv2_imshow(image_modified3)

### Generate Input Data

In [None]:
# Collect random images for each type (normal, skewed, tilted) based on the percentage given.
def assign_page_to_type(start_page, end_page, percentages):
  random.seed(42)

  if sum(percentages) != 100:
    raise ValueError("Percentages should add up to 100")

  total_numbers = end_page - start_page + 1
  numbers = list(range(start_page, end_page + 1))

  normal_count = int(total_numbers * (percentages[0] / 100))
  skewed_count = int(total_numbers * (percentages[1] / 100))

  # Ensure that tilted gets the remaining numbers to handle any rounding issues
  tilted_count = total_numbers - normal_count - skewed_count

  normal_numbers = random.sample(numbers, normal_count)
  for num in normal_numbers:
        numbers.remove(num)

  skewed_numbers = random.sample(numbers, skewed_count)
  for num in skewed_numbers:
    numbers.remove(num)

  tilted_numbers = numbers  # All remaining numbers belong to class3

  return normal_numbers, skewed_numbers, tilted_numbers


In [None]:
def create_input():
  random.seed(42)

  # Get user input for the type percentages and location
  normal_percentage = float(input("Enter percentage for Normal Images: "))
  skewed_percentage = float(input("Enter percentage for Skewed Images: "))
  tilted_percentage = 100.0 - normal_percentage - skewed_percentage

  percentages = [normal_percentage, skewed_percentage, tilted_percentage ]

  location_ref = int(input("Enter 1 for \'input\' folder, 2 for \'tmp_skewed\' folder or 3 for \'tmp_tilt\' folder: "))

  match location_ref:
    case 1:
      location = input_path
    case 2:
      location = tmp_skewed_path
    case 3:
      location = tmp_tilt_path

  print(f'\n Percentages selected are Normal: {normal_percentage}, Skewed: {skewed_percentage} and Tilted: {tilted_percentage}')
  print(f'Location of images: {location}')

  normal_page_numbers, skewed_page_numbers, tilted_page_numbers = assign_page_to_type(start_page, end_page, percentages)

  # Open the CSV file in write mode and write the data
  with open(f'{location}/{csv_file_name}', mode="w", newline="") as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write the header (optional)
    csv_writer.writerow(["FILE_NAME", "TYPE", "VALUE"])


    for num in range(start_page, end_page):
      image_name = f'page_{num + 1}'
      image_path = f'{tmp_input_path}/{image_name}.png'

      if not os.path.exists(image_path):
        print(f"Image {image_path} not found. Skipping.")
        continue

      image = cv2.imread(image_path)

      if num in normal_page_numbers:
        page_type = 'N'
        value = 0.00
        image_modified = image
      elif num in skewed_page_numbers:
        page_type = 'S'
        angle = angle_range[0] + (angle_range[1] - angle_range[0]) * random.random()
        angle = round(angle,2)
        image_modified = rotate_images_with_skewness(image, angle)
        value = angle
      elif num in tilted_page_numbers:
        page_type = 'T'
        #shear_factor = shear_range[0] + (shear_range[1] - shear_range[0]) * random.random()
        #shear_factor = round(shear_factor,2)
        shear_factor = random.choice(shear_values)
        image_modified = tilt_images(image,shear_factor)
        value = shear_factor
      else:
        page_type = 'X'
        value = 0.00

      # Save the rotated image
      input_image_path = f'{location}/{image_name}.png'
      cv2.imwrite(input_image_path, image_modified)

      # Write the data rows
      csv_writer.writerow([image_name, page_type, value])

  print('Input Data Created')


In [None]:
# Create input data
# Function will prompt for image distribution
create_input()

Enter percentage for Normal Images: 10
Enter percentage for Skewed Images: 45
Enter 1 for 'input' folder, 2 for 'tmp_skewed' folder or 3 for 'tmp_tilt' folder: 1

 Percentages selected are Normal: 10.0, Skewed: 45.0 and Tilted: 45.0
Location of images: /content/gdrive/MyDrive/OWR/data/input
Image /content/gdrive/MyDrive/OWR/data/tmp_input/page_120.png not found. Skipping.
Input Data Created


## Create Training Dataset for Skewness Correction and Tilt Correction

In [None]:
# Load Kaggle dataset to G Drive
# This foolder will be use to train and verify skewness correction functinality

!pip install opendatasets
import opendatasets as od

od.download(kaggle_url, data_dir = kaggle_destination)

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22
Please provide your Kaggle credentials to download this dataset. Learn more: http://bit.ly/kaggle-creds
Your Kaggle username: sachithgunawardane
Your Kaggle Key: ··········
Downloading noisy-and-rotated-scanned-documents.zip to /content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents


100%|██████████| 8.31M/8.31M [00:00<00:00, 38.3MB/s]





## Correct Skewnss Using OpenCV Libraries

In [None]:
# Load and open images
url_path_images = os.path.join(skewed_dataset,image_paths)
names = [ file for file in os.listdir(url_path_images) ]
names = sorted(names)
N = len(names)
print(names[0:10])
print(N)

['scan_000.png', 'scan_001.png', 'scan_002.png', 'scan_003.png', 'scan_004.png', 'scan_005.png', 'scan_006.png', 'scan_007.png', 'scan_008.png', 'scan_009.png']
600


In [None]:
# Train List
train_list_path = os.path.join(os.path.join(skewed_dataset,root), 'train_list.json')

with open(train_list_path) as f:
    train_list = json.load(f)

label_list = [x.split('.')[0] + '.txt' for x in train_list]
print(label_list[0:10])

['scan_000.txt', 'scan_001.txt', 'scan_002.txt', 'scan_003.txt', 'scan_004.txt', 'scan_005.txt', 'scan_006.txt', 'scan_007.txt', 'scan_008.txt', 'scan_009.txt']


In [None]:
url_path_labels = os.path.join(skewed_dataset,label_paths)
M = len(label_list)

labels =  [ np.loadtxt(os.path.join(url_path_labels ,label_list[j])) for j in range(M) ]

# Load first 10 labels
[print(labels[i]) for i in range(5)]
print('Total number of labels %d'%len(labels))

-0.337673521065156
-2.22703348158525
-4.72505826974727
-4.79902371317565
-0.9209119766851366
Total number of labels 500


In [None]:
# source
print(url_path_images)

# destination
url_image_des = os.path.join(tilted_dataset,image_paths)
print(url_image_des)

# if not exists, create folder structure to hold images
if not os.path.exists(url_image_des):
  os.makedirs(url_image_des)

/content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents/scan_doc_rotation/images
/content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents-tilted/scan_doc_rotation/images


In [None]:
# Function to create tilted database for training
def create_tilted_dataset():
  random.seed(42)

  # Open the CSV file in write mode and write the data
  with open(f'{url_image_des}/{csv_file_name}', mode="w", newline="") as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write the header (optional)
    csv_writer.writerow(["FILE_NAME", "TYPE", "VALUE"])

    for i in range(M):
      # Load the image
      image = cv2.imread(f'{url_path_images}/{names[i]}')
      #plt.imshow(image);

      # Get the image dimensions
      height, width = image.shape[:2]

      # Calculate the rotation matrix
      rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), -float(labels[i]), 1)

      # Apply the rotation to the image
      corrected_image = cv2.warpAffine(image, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

      #plt.imshow(corrected_image, cmap='gray');
      #plt.imshow(corrected_image);

      #shear_factor = shear_range[0] + (shear_range[1] - shear_range[0]) * random.random()
      #shear_factor = round(shear_factor,2)
      shear_factor = random.choice(shear_values)
      image_modified = tilt_images(corrected_image,shear_factor)
      value = shear_factor

      # Save the rotated image
      input_image_path = f'{url_image_des}/{names[i]}'
      cv2.imwrite(input_image_path, image_modified)

      # Write the data rows
      csv_writer.writerow([names[i],'T', value])

  print('Tilted Data Created')

In [None]:
create_tilted_dataset()

Tilted Data Created


In [None]:
# source
print(url_path_images)

# destination
url_image_des = os.path.join(tilted_dataset,imageT_paths)
print(url_image_des)

# if not exists, create folder structure to hold images
if not os.path.exists(url_image_des):
  os.makedirs(url_image_des)

/content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents/scan_doc_rotation/images
/content/gdrive/MyDrive/OWR/data/Kaggle/noisy-and-rotated-scanned-documents-tilted/scan_doc_rotation/images_T


In [None]:
# Function to create tilted database for training
def create_tilted_dataset2():
  random.seed(42)

  # Open the CSV file in write mode and write the data
  with open(f'{url_image_des}/{csv_file_name}', mode="w", newline="") as csv_file:
    csv_writer = csv.writer(csv_file)

    # Write the header (optional)
    csv_writer.writerow(["FILE_NAME", "TYPE", "VALUE"])

    for i in range(M):
      # Load the image
      image = cv2.imread(f'{url_path_images}/{names[i]}')
      #plt.imshow(image);

      # Get the image dimensions
      height, width = image.shape[:2]

      # Calculate the rotation matrix
      rotation_matrix = cv2.getRotationMatrix2D((width / 2, height / 2), -float(labels[i]), 1)

      # Apply the rotation to the image
      corrected_image = cv2.warpAffine(image, rotation_matrix, (width, height), flags=cv2.INTER_LINEAR, borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

      #plt.imshow(corrected_image, cmap='gray');
      #plt.imshow(corrected_image);


      # Create a blank image for the table (white background)
      table_height = 4
      table_width = 3
      cell_width = 100
      cell_height = 50
      border_thickness = 1  # Thickness of the table border

      # Calculate the size of the table
      table_width_pixels = cell_width * table_width
      table_height_pixels = cell_height * table_height

      # Create a blank image with a white background
      table = np.ones((table_height_pixels, table_width_pixels, 3), dtype=np.uint8) * 255  # White background

      # Draw lines for the table
      for ii in range(1, table_height):
          y = ii * cell_height
          cv2.line(table, (0, y), (table_width_pixels, y), (0, 0, 0), border_thickness)  # Black horizontal lines

      for j in range(1, table_width):
          x = j * cell_width
          cv2.line(table, (x, 0), (x, table_height_pixels), (0, 0, 0), border_thickness)  # Black vertical lines

      # Draw a black rectangle around the entire table (border)
      cv2.rectangle(table, (0, 0), (table_width_pixels, table_height_pixels), (0, 0, 0), border_thickness)

      # Draw labels in the table
      font = cv2.FONT_HERSHEY_COMPLEX_SMALL
      font_scale = 0.5
      font_color = (0, 0, 0)  # Black text
      font_thickness = 1

      for ii in range(table_height):
          for j in range(table_width):
              cell_text = f"Cell {ii+1}-{j+1}"
              text_size = cv2.getTextSize(cell_text, font, font_scale, font_thickness)[0]
              text_x = j * cell_width + (cell_width - text_size[0]) // 2
              text_y = ii * cell_height + (cell_height + text_size[1]) // 2
              cv2.putText(table, cell_text, (text_x, text_y), font, font_scale, font_color, font_thickness, cv2.LINE_AA)

      # Load an existing image
      #background_image = cv2.imread("existing_image.jpg")

      # Insert the table into the existing image
      x_offset = 50  # Adjust this value for the desired position
      y_offset = 300  # Adjust this value for the desired position
      corrected_image[y_offset:y_offset + table.shape[0], x_offset:x_offset + table.shape[1]] = table

      #shear_factor = shear_range[0] + (shear_range[1] - shear_range[0]) * random.random()
      #shear_factor = round(shear_factor,2)
      shear_factor = random.choice(shear_values)
      image_modified = tilt_images(corrected_image,shear_factor)
      value = shear_factor

      # Save the rotated image
      input_image_path = f'{url_image_des}/{names[i]}'
      cv2.imwrite(input_image_path, image_modified)

      # Write the data rows
      csv_writer.writerow([names[i],'T', value])

  print('Tilted Data Created')


In [None]:
create_tilted_dataset2()

Tilted Data Created
