# Document Segmentation

This module is designed to create functions to support the segmentation process and integrate OWR models.

The segmentation will be managed using the function designed in owr_segmentation.ipynb and function taken from owr_segmentation_v2.ipynb and further clean only to hold required OWR functionality

In [1]:
# do not run when we import this package as module
#if __name__ == '__main__':
# Connect Google Drive
from google.colab import drive
drive.mount('/content/gdrive/', force_remount=True)

Mounted at /content/gdrive/


In [2]:
#if __name__ == '__main__':
## Load all ipynb files from Google Drive to Colab environment
import os
import shutil

def find_and_copy_files(src_folder, dest_folder, file_extension):
  for foldername, subfolders, filenames in os.walk(src_folder):
      for filename in filenames:
          if filename.endswith(file_extension):
              src_file = os.path.join(foldername, filename)
              dest_file = os.path.join(dest_folder, filename)
              try:
                  shutil.copy2(src_file, dest_file)  # Use shutil.copy if you don't need metadata
                  print(f"Copied: {src_file} to {dest_file}")
              except Exception as e:
                  print(f"Error copying {src_file}: {e}")

In [3]:
#if __name__ == '__main__':
src_folder = '/content/gdrive/MyDrive/OWR/source'
dest_folder = '/content'
file_extension = '.ipynb'

# Call the function to find and copy files
find_and_copy_files(src_folder, dest_folder, file_extension)

Copied: /content/gdrive/MyDrive/OWR/source/input_generator/owr_input_generator.ipynb to /content/owr_input_generator.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/owr_model_v2.ipynb to /content/owr_model_v2.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/owr_segmentation_v3.ipynb to /content/owr_segmentation_v3.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/preprocessing/owr_preprocess_skew_CNN_classification.ipynb to /content/owr_preprocess_skew_CNN_classification.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/preprocessing/owr_preprocess_skew_CNN_regression.ipynb to /content/owr_preprocess_skew_CNN_regression.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/preprocessing/owr_preprocess_skew_cv.ipynb to /content/owr_preprocess_skew_cv.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/preprocessing/owr_preprocessing.ipynb to /content/owr_preprocessing.ipynb
Copied: /content/gdrive/MyDrive/OWR/source/owr_model/preprocess

In [4]:
#if __name__ == '__main__':
from IPython.display import clear_output as cls
!pip install import_ipynb
import import_ipynb

cls()

In [5]:
#if __name__ == '__main__':
# Import Colab Models
import owr_preprocessing as BPP
import owr_model_v2 as WM

importing Jupyter notebook from owr_preprocessing.ipynb
importing Jupyter notebook from owr_model_v2.ipynb


In [6]:
# Load Libraries
import os
import json
import random
import matplotlib.pyplot as plt
import PIL.Image as Image
import numpy as np
import math
from datetime import datetime
import pandas as pd
import cv2
from google.colab.patches import cv2_imshow
from IPython.display import clear_output as cls

from tensorflow import keras
import tensorflow as tf

In [7]:
# Global Parameters
CharHeight4Line = 6
CharWidth4Line = 185

CharHeight = 6
CharWidth = 5

In [8]:
def imgZoom(img):
  # Get the current size of the image
  current_height, current_width, _ = img.shape

  # Calculate the new size (3/2 times the current size)
  new_height = int(current_height * 3 / 2)
  new_width = int(current_width * 3 / 2)

  # Resize the image
  return cv2.resize(img, (new_width, new_height))

In [9]:
# The prerequisite for the function is a Binary image.
def getContours(img):

  # Define Kernel for Morphological Operations

  # Image width
  kernel_length = np.array(img).shape[1]//80

  # A verticle kernel of (1 x kernel_length)
  verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1,kernel_length))

  # A horizontal kernel of (kernel_length x 1)
  hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length,1))

  # A kernel of (3 x 3) ones
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))


  # Morphological operation to detect vertical lines from an image
  img_temp1 = cv2.erode(img, verticle_kernel, iterations=3)
  vertical_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)

  # Morphological operation to detect horizontal lines from an image
  img_temp2 = cv2.erode(img, hori_kernel, iterations=3)
  horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)

  # Weighting parameters, this will decide the quantity of an image to be added to make a new image.
  alpha = 0.5
  beta = 1.0 - alpha


  # This function helps to add two image with specific weight parameter to get a third image as summation of two image.
  img_final_bin = cv2.addWeighted(vertical_lines_img, alpha, horizontal_lines_img, beta, 0.0)
  img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)

  (thresh, img_final_bin) = cv2.threshold(img_final_bin, 128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)

  # Find contours for image, which will detect all the boxes
  contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

  '''
  0 : Star [1,-1,-1,-1]
  … Next — Outer Square (1)
  … Previous — No Contour(-1)
  … Child — No Child (-1)
  … Parent — No Parent (-1)

  '''

  # Initialize lists to store outer rectangles and their child contours
  outer_rectangles = []
  child_contours = []

  # Iterate through the hierarchy to find outer rectangles and their children
  for i in range(len(contours)):

      if hierarchy[0][i][3] == 0:  # Check if the contour has no parent (i.e., it's an outer contour)
          # Calculate the bounding rectangle of the outer contour
          x, y, w, h = cv2.boundingRect(contours[i])
          outer_rectangles.append((x, y, x + w, y + h))

          # Find child contours of the outer contour
          children = []
          for j in range(len(contours)):
              if hierarchy[0][j][3] == i:  # Check if contour j is a child of contour i
                  child_contour = contours[j]
                  x, y, w, h = cv2.boundingRect(child_contour)
                  children.append((x, y, x + w, y + h))
          child_contours.append(children)

  # Sort the outer rectangles based on their top-left coordinates (y, x)
  outer_rectangles = sorted(outer_rectangles, key=lambda rect: (rect[1], rect[0]))

  # Sort the child contours based on their top-left coordinates (y, x)
  for i in range(len(child_contours)):
    child_contours[i] = sorted(child_contours[i], key=lambda rect: (rect[1], rect[0]))

  child_min_y = []
  for i in range(len(child_contours)):
    y_min = np.array(img).shape[0]
    for j in range(len(child_contours[i])):
      if y_min > child_contours[i][j][1]:
        y_min = child_contours[i][j][1]

    child_min_y.append((y_min, i))
  child_min_y = np.array(child_min_y)
  if child_min_y.ndim != 1:
    sorted_array = child_min_y[child_min_y[:, 0].argsort()]
  else: sorted_array = child_min_y


  tempChild = []
  for i in range(len(sorted_array)):
    tempChild.append(child_contours[sorted_array[i][1]])

  child_contours = tempChild

  # Return the contours
  return outer_rectangles, child_contours

In [10]:
def getBinarized(OrgImg, Zoom=True):

  if Zoom:
    OrgImg = imgZoom(OrgImg)

  # Convert image to Grayscale
  GrayImg = BPP.set_grayscale(OrgImg)

  # Binarization
  BinaryImg = BPP.set_Adaptive_Binarization(GrayImg)

  return BinaryImg

In [11]:
def getTableImg(orgImg, outer_rectangles, extract=-1):
  ''' Retuen Table from Image
    As per the CD template there are two tables
    1. Code and Course Title
    2. Assessments
    Input Parameter
    Extract:
      1 - Code and Course Title
      2 - Assessments
      -1 - Without Code, Course Title and Assessments
  '''
  orgImg = orgImg.copy()

  if extract == 1 and len(outer_rectangles) > 0:
    x1, y1, x2,y2 = outer_rectangles[0]
    return orgImg[y1:y2, x1:x2]
  elif extract == 2 and len(outer_rectangles) > 1:
    x1, y1, x2,y2 = outer_rectangles[1]
    return orgImg[y1:y2, x1:x2]
  else:
    for rect in outer_rectangles:
      x1, y1, x2, y2 = rect
      cv2.rectangle(orgImg, (x1, y1), (x2, y2), 0, thickness=cv2.FILLED)
    return orgImg



In [12]:
def imgCourseCode(img, outer_rectangles, child_contours):
  try:
    out_x1, out_y1, out_x2, out_y2 = outer_rectangles[0]
    child_x1, child_y1, child_x2, child_y2 = child_contours[0][0]

    x1 = child_x1 - out_x1
    y1 = child_y1 - out_y1
    x2 = child_x2 - out_x1
    y2 = child_y2 - out_y1

    img = img[y1:y2, x1:x2]
  except: img = img

  return img

In [13]:
def imgCourseTitle(img, outer_rectangles, child_contours):
  try:
    out_x1, out_y1, out_x2, out_y2 = outer_rectangles[0]
    child_x1, child_y1, child_x2, child_y2 = child_contours[0][1]

    x1 = child_x1 - out_x1
    y1 = child_y1 - out_y1
    x2 = child_x2 - out_x1
    y2 = child_y2 - out_y1

    img = img[y1:y2, x1:x2]
  except: img = img

  return img

In [14]:
def imgAssessments(img, outer_rectangles, child_contours, row):
  if row == 0:
    index = 0
  else:
    index = 0
    for j in range(row):
      index += 3

  try:

    out_x1, out_y1, out_x2, out_y2 = outer_rectangles[1]

    for i in range(3):
      child_x1, child_y1, child_x2, child_y2 = child_contours[1][index + i]

      x1 = child_x1 - out_x1
      y1 = child_y1 - out_y1
      x2 = child_x2 - out_x1
      y2 = child_y2 - out_y1

      if i == 0:
        method = img[y1:y2, x1:x2]
      elif i == 1:
        weight = img[y1:y2, x1:x2]
      else:
        learning = img[y1:y2, x1:x2]
  except:
      if i == 0:
        method = img
      elif i == 1:
        weight = img
      else:
        learning = img

  return method, weight, learning

In [15]:
def getLines(img):
  kernel = np.ones((CharHeight4Line, CharWidth4Line), np.uint8)
  try:
    dilated = cv2.dilate(img, kernel, iterations=1)

    #cv2_imshow(dilated)

    (contours, heirarchy) = cv2.findContours(dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    sorted_contours_lines = sorted(contours, key = lambda ctr : cv2.boundingRect(ctr)[1]) # (x, y, w, h)

    line_contours = []
    for line in sorted_contours_lines:
      x, y, w, h = cv2.boundingRect(line)
      line_contours.append((x, y, x + w, y + h))

  except: line_contours = []

  return line_contours

In [16]:
def getWords(img, lines, index):
  words_list = []
  kernel = np.ones((CharHeight,CharWidth), np.uint8)
  try:
    x = lines[index][0]
    y = lines[index][1]
    x_e = lines[index][2]
    y_e = lines[index][3]


    #print(x, y, x_e, y_e)
    img = img[y:y_e, x:x_e]
    #cv2_imshow(img)
    #print(img.shape)

    dilated = cv2.dilate(img, kernel, iterations = 1)

    # draw contours on each word
    (cnt, heirarchy) = cv2.findContours(dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
    sorted_contour_words = sorted(cnt, key=lambda cntr : cv2.boundingRect(cntr)[0])

    for word in sorted_contour_words:

      x2, y2, w2, h2 = cv2.boundingRect(word)
      words_list.append([x+x2, y+y2, x+x2+w2, y+y2+h2])

    # Remove overlapping rectangles
    non_overlapping_coordinates = remove_overlapping(words_list)
  except: non_overlapping_coordinates = words_list
  return non_overlapping_coordinates

In [17]:
def calculate_area(coord):
    # Calculate the area of a rectangle defined by [x, y, w, h]
    return (coord[2] - coord[0]) * (coord[3] - coord[1])

def is_overlapping(coord1, coord2):
    buffer = 10
    # Check if two rectangles defined by [x, y, w, h] are overlapping
    x1, y1, x12, y12 = coord1
    x2, y2, x22, y22 = coord2
    #return (x1 < x2 + w2 and x1 + w1 > x2 and y1 < y2 + h2 and y1 + h1 > y2)
    return ((x1 - buffer <= x2 <= x12 + buffer and x1 - buffer <= x22 <= x12 + buffer
             and y1 - buffer <= y2 <= y12 + buffer and y1 - buffer <= y22 <= y12 +buffer)
          or (x2 - buffer <= x1 <= x22 + buffer and x2 - buffer <= x12 <= x22 + buffer
              and y2 - buffer <= y1 <= y22 + buffer and y2 - buffer <= y12 <= y22 + buffer))

def remove_overlapping(rectangles):
    # Sort the rectangles by area (largest first)
    rectangles.sort(key=calculate_area, reverse=True)

    # Initialize a list to store non-overlapping rectangles
    non_overlapping_rectangles = []

    for rect in rectangles:
        if all(not is_overlapping(rect, existing_rect) for existing_rect in non_overlapping_rectangles):
            non_overlapping_rectangles.append(rect)

    non_overlapping_rectangles = sorted(non_overlapping_rectangles, key=lambda rect : rect[0])
    # Sort the outer rectangles based on their top-left coordinates (y, x)
    #outer_rectangles = sorted(outer_rectangles, key=lambda rect: (rect[1], rect[0]))

    return non_overlapping_rectangles



In [18]:
# Load Word Prediction Model
def load_word_prediction_model(model_path):

    model = WM.Image_text_recogniser_model_1('predict')

    model.load_weights(model_path)

    #Letters present in the Label Text
    letters= '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'

    n_letters = len(letters)
    return model, letters, n_letters

In [19]:
def findWord(model, img):

  img = cv2.resize(img,(170,32))
  img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  img = img[:,:,1]
  img = img.T
  img = np.expand_dims(img, axis=-1)
  img = np.expand_dims(img, axis=0)
  img = img/255

  model_output = model.predict(img, verbose=None)
  predicted_output = WM.decode_label(model_output)

  return predicted_output

In [20]:
def get_Words(img, Model_Word, funWord, WordClasses):
  wordList = []

  Lines = getLines(img)

  for i in range(len(Lines)):
    words = getWords(img, Lines, i)

    result_word = ''
    for word in words:
      imgWord = img[word[1]:word[3], word[0]:word[2]]

      # Predic Word
      word_image = cv2.bitwise_not(imgWord)
      predict_word = funWord(Model_Word, word_image)
      #wordList.append(predict_word)

      if len(result_word) > 0:
        result_word = result_word + ' ' + predict_word
      else: result_word =  predict_word

    wordList.append(result_word)

  return wordList

In [21]:
def get_courseCode(img, outer_rectangles, child_contours, \
                   Model_Word, funWord, WordClasses ):

  # Get Course Code
  img = imgCourseCode(img, outer_rectangles, child_contours)

  wordList = get_Words(img, Model_Word, funWord, WordClasses)

  for i in range(len(wordList)):
    wordList[i] = wordList[i].replace(' ', '')

  courseCode = ''.join(wordList)

  courseCode = courseCode[0:2] + courseCode[2:].replace('S','5')
  courseCode = courseCode[0:2] + courseCode[2:].replace('A','4')
  courseCode = courseCode[0:2] + courseCode[2:].replace('L','1')
  courseCode = courseCode[0:2] + courseCode[2:].replace('O','0')
  courseCode = courseCode[0:2] + courseCode[2:].replace('I','1')
  courseCode = courseCode[0:2] + courseCode[2:].replace('Z','2')
  courseCode = courseCode[0:2] + courseCode[2:].replace('E','3')
  courseCode = courseCode[0:2] + courseCode[2:].replace('B','3')

  return courseCode

def get_courseTitle(img, outer_rectangles, child_contours, \
                   Model_Word, funWord, WordClasses ):

  # Get Course Title
  img = imgCourseTitle(img, outer_rectangles, child_contours)

  wordList = get_Words(img, Model_Word, funWord, WordClasses )

  courseTitle = ' '.join(wordList)

  return courseTitle


def get_courseAssessment(img, outer_rectangles, child_contours, \
                         Model_Word, funWord, WordClasses ):

  if len(child_contours) > 1:
    num_of_rows = len(child_contours[1]) // 3
  else: num_of_rows = 0

  word_method = []
  word_weight = []
  word_learning = []


  # Get Assessment
  for row in range(num_of_rows):
    imgChild1, imgChild2, imgChild3 = imgAssessments(img, outer_rectangles, child_contours, row)

    wordMethod = get_Words(imgChild1, Model_Word, funWord, WordClasses)
    wordWeight = get_Words(imgChild2, Model_Word, funWord, WordClasses)
    wordLearning = get_Words(imgChild3, Model_Word, funWord, WordClasses)

    word_method.append(wordMethod)
    word_weight.append(wordWeight)
    word_learning.append(wordLearning)


  return (word_method, word_weight, word_learning)


def get_Doc(img, Model_Word, funWord, WordClasses):

  return get_Words(img, Model_Word, funWord, WordClasses)