# Segmentation of Rectangles from the complete form

In [107]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
#read the image
img = cv2.imread('form01.jpg', 0)

# Thresholding Image to Global Threshold Value
thresh, img_bin = cv2.threshold(img, 128, 255,cv2.THRESH_BINARY|cv2.THRESH_OTSU)

# Binarizing the image
img_bin = 255-img_bin 
cv2.imwrite("binary_image.jpg",img_bin)

True

In [108]:
# Morphological Operations
# defining the kernel length
kernel_length = np.array(img).shape[1]//413

# defining a kernel for finding vertical lines
verticle_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, kernel_length))

# defining a kerel for finding horizontal lines
hori_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length, 1))

# creating a kernel of 3x3
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3))

In [109]:
# Eroding the boundaries from the binary image for vertical line segmentation
img_temp1 = cv2.erode(img_bin, verticle_kernel, iterations=10)

# Dilating the boundaries of eroded image for vertical line segmentation
verticle_lines_img = cv2.dilate(img_temp1, verticle_kernel, iterations=10)

# the image containing all the vertical lines detected by the kernel
cv2.imwrite("binary_verticle_lines.jpg",verticle_lines_img)

True

In [110]:
# Eroding the boundaries from the binary image for horizontal line segmentation
img_temp2 = cv2.erode(img_bin, hori_kernel, iterations=10)

# Dilating the boundaries of eroded image for horizontal line segmentation
horizontal_lines_img = cv2.dilate(img_temp2, hori_kernel, iterations=10)

# the image containing all the horizontal lines detect by the kernel
cv2.imwrite("binary_horizonal_image.jpg",horizontal_lines_img)

True

In [111]:
# detecting the boxes by adding horizontal lines image and vertical_lines image
alpha = 0.5
beta = 1.0 - alpha

#adding weighted sum of both the arrays
img_final_bin = cv2.addWeighted(verticle_lines_img, alpha, horizontal_lines_img, beta, 0.0)

# eroding the inverted binary image with the 3x3 kernel
img_final_bin = cv2.erode(~img_final_bin, kernel, iterations=3)

#thresholding the final binary image
(thresh, img_final_bin) = cv2.threshold(img_final_bin, 128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite("final_binary_image.jpg",img_final_bin)

True

In [112]:
# Defining a function for sorting out the contours that are detected from the image.
# Contours can be sorted in 4 ways:
#     left to right
#     right to left
#     top to bottom
#     botton to top

import numpy as np
import argparse
import imutils
import cv2
 
def sort_contours(cnts, method="left-to-right"):
	# initialize the reverse flag and sort index
	reverse = False
	i = 0
 
	# handle if we need to sort in reverse
	if method == "right-to-left" or method == "bottom-to-top":
		reverse = True
 
	# handle if we are sorting against the y-coordinate rather than
	# the x-coordinate of the bounding box
	if method == "top-to-bottom" or method == "bottom-to-top":
		i = 1
 
	# construct the list of bounding boxes and sort them from top to
	# bottom
	boundingBoxes = [cv2.boundingRect(c) for c in cnts]
	(cnts, boundingBoxes) = zip(*sorted(zip(cnts, boundingBoxes),
		key=lambda b:b[1][i], reverse=reverse))
 
	# return the list of sorted contours and bounding boxes
	return (cnts, boundingBoxes)


In [113]:
# method help finding the contours from binary images with retriveal method = RETR_TREE and finding the coordinates with CHAIN_APPROX_SIMPLE
contours, hierarchy = cv2.findContours(img_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours, boundingBoxes = sort_contours(contours, method="top-to-bottom")

In [114]:
# extracting the contours in form of rectangles
idx = 0
for c in contours:
    # Returns the location and width,height for every contour
    x, y, w, h = cv2.boundingRect(c)
    if (w > 80 and h > 20) and w > 3*h:
        idx += 1
        new_img = img[y:y+h, x:x+w]
        cv2.imwrite(str(idx) + '.jpg', new_img)


# Once all rectangles are extracted, now we try to extract all the letters(square) inside the rectangles segmented

In [129]:
# reading a desired image
image = cv2.imread('3.jpg',0)

thresh, image_bin = cv2.threshold(image,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
image_bin = 255 - image_bin
cv2.imwrite("manish.jpg",image_bin)

True

In [130]:
# Performing Morphological Operations with kernels

kernel_length = 5
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT,(1, kernel_length))
horizontal_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_length,1))
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))


In [131]:
image_temp1 = cv2.erode(image_bin, vertical_kernel, iterations = 10)
vertical_lines_image = cv2.dilate(image_temp1, vertical_kernel, iterations = 10)
cv2.imwrite("manish_vertical_lines.jpg", vertical_lines_image)


True

In [132]:
image_temp2 = cv2.erode(image_bin, horizontal_kernel, iterations=10)
horizontal_lines_image = cv2.dilate(image_temp2, horizontal_kernel, iterations = 10)
cv2.imwrite("manish_horizontal_lines.jpg", horizontal_lines_img)

True

In [133]:
# parameters for weighted sum
alpha = 0.5
beta = 1 - alpha

# creating a weighted image from the horizontal_lines image and the vertical_lines_image

image_final_bin = cv2.addWeighted(vertical_lines_image, alpha, horizontal_lines_image, beta, 0.0)
image_final_bin = cv2.erode(~image_final_bin, kernel, iterations=3)

thresh, image_final_bin = cv2.threshold(image_final_bin, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
cv2.imwrite("manish_image_final_bin.jpg", image_final_bin)

True

In [134]:
contours2, hierarchy2 = cv2.findContours(image_final_bin, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
contours2, boundingBoxes2 = sort_contours(contours2, method = "left-to-right")

In [135]:
idx = 0
for c in contours2:
    x,y,w,h = cv2.boundingRect(c)
    if (w > 30 and h > 30):
        idx+=1
        new_image = image[y:y+h, x:x+w]
        cv2.imwrite("manish_" + str(idx)+ ".jpg", new_image)

# -----------------------------------------------------------------------------------------------------------