In [1]:
import cv2
import matplotlib.pyplot as plt
import os

import numpy as np

In [2]:
def sort_contours(cnts, method="left-to-right"):
    reverse = False
    i = 0

    # handle if we need to sort in reverse
    if method == "right-to-left" or method == "bottom-to-top":
        reverse = True

    # handle if we are sorting against the y-coordinate rather than
    # the x-coordinate of the bounding box
    if method == "top-to-bottom" or method == "bottom-to-top":
        i = 1

    # construct the list of bounding boxes and sort them from top to
    # bottom
    boundingBoxes = [cv2.boundingRect(c) for c in cnts]
    (cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
        key=lambda b:b[1][i], reverse=reverse))

    # return the list of sorted contours and bounding boxes
    return (cnts, boundingBoxes)

In [3]:
def contour_extraction(img_path):
    img = cv2.imread(img_path, 0) 
    (thresh, img_bin) = cv2.threshold(img, 128, 255,
                                      cv2.THRESH_BINARY | cv2.THRESH_OTSU)  # Thresholding the image
    img_bin = 255-img_bin  # Invert the image   
    # Defining a kernel length
    kernel_length = np.array(img).shape[1]//40
    ##create kernel for morphological transformation for vertical and horizontal
    ## erosion & dilation need a kernel for the nature of operation
    verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))  
    hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))
    img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=3)
    verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=3)

    img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=3)
    horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=3)

    alpha = 0.5
    beta = 1.0 - alpha
    #calculates weighted sum of two arrays
    img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)
    img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=2)
    ##when grayscale images are converted into binary we need to apply threshold limits
    (thresh, img_final_bin) = cv2.threshold(img_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    ##Contours can be explained simply as a curve joining all the continuous points (along the boundary), having same color or intensity. 
    ##The contours are a useful tool for shape analysis and object detection and recognition
    im2, contours, hierarchy = cv2.findContours(
        img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    # Sort all the contours by top to bottom.
    (contours, boundingBoxes) = sort_contours(contours, method="top-to-bottom")

    return contours

### Box 1 

In [4]:
contours =  contour_extraction('boxes1.jpg')
idx = 0
X= []
Y = [] 
W = []
H = [] 
for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    #print(x,y,w,h)
    ##Creating list and appending coordinates of all shapes for separation later
    X.append(x)
    Y.append(y)
    W.append(w)
    H.append(h)
    idx += 1

idx = 1 #outer boundary  
i = 0
box = 0
for c in contours:
    idx +=1
    i +=1
    if idx > 2:
        try:
            delta =   W[(idx-1)] - W[idx] ## applying threshold limit for closer points
            ##due to thickness in the line of boundaries, fincontours might find multiple edges on the same line 
            if(delta > 15):
                box += 1
        except:
            print("Exclusion",idx) ##multiple coordinates found on the same line are excluded
print("Number of boxes :",box)

Exclusion 7
Exclusion 8
Number of boxes : 4


### Box 2

In [5]:
contours =  contour_extraction('boxes2.jpg')
idx = 0
for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    #print(w,h)
    ## Applying cell boundary limit to define & search for approx cell size in the table
    if (w > 80 and h > 20): 
        idx += 1
#         new_img = img[y:y+h, x:x+w]
#         plt.figure()
#         plt.imshow(new_img)
#         print("done")
print("Number of boxes: ",idx)

Number of boxes:  17


### Table Extraction

In [6]:
contours =  contour_extraction('table1.jpg')
idx = 0
for c in contours:
    x, y, w, h = cv2.boundingRect(c)
    #print(w,h)
    if (w > 40 and h > 20):
        idx += 1
#         new_img = img[y:y+h, x:x+w]
#         plt.figure()
#         plt.imshow(new_img)
#         print("done")
print("Number of boxes: ",idx)

Number of boxes:  71
