In [2]:
import cv2 
import numpy as np 
from matplotlib.pyplot import plot as plt
from glob import glob

In [3]:
# %load wintech_contour.py
def process_save_img(imgpath,outpath , debug =1 ):
    image = cv2.imread(imgpath,0)
    color_image = cv2.imread(imgpath)
    gray = image
    h,w = gray.shape
    #gray = gray[int(h/2)+1000:h-250 ,:w-200]
    gray = cv2.threshold(gray, 0, 255,cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    if debug == 1:
        cv2.imwrite(outpath, gray)
    return gray , color_image

def edge_kernel(size , kernel_type, value = -1 ):
    kernel = np.multiply(value , np.ones((size)))
    x = int(size[0]/2)
    y = int(size[1]/2)
    mid_value = -((size[0]*size[1])-1)*value
    if kernel_type == 'edge':
        mid_value = mid_value
    elif kernel_type == 'sharp':
        mid_value = mid_value-2
    kernel[x,y] = mid_value
    return kernel

#for 80 images of wintech ferrities the 
#parameter for 
#edge_kernel are (-3 and -2)
# cv2.adaptiveThreshold for horizontal 13,3 and verticle 19,2

def line(img , ksize , outimgpath , line_type,open_kernel ,close_kernel , debug = 0 ):
    
    #img_median = cv2.medianBlur(c_img,3)
    img_median = cv2.GaussianBlur(c_img,(5,5),0)
    if line_type == 'horizontal':
        ks = edge_kernel((ksize) , 'edge' , value = -3)
    if line_type == 'verticle':
        ks = edge_kernel((ksize) , 'edge' , value = -2)
        
    filtered = cv2.filter2D(img_median,-1,ks)
    
    img_grey = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)
    if line_type == 'horizontal':
        
        th = cv2.adaptiveThreshold(img_grey,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,13,3)
        th = cv2.medianBlur(th,3)
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, close_kernel)
        edged = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel)
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, open_kernel)
        edged = cv2.morphologyEx(edged, cv2.MORPH_OPEN, kernel)
        if debug == 1:
            print('horizontal line saved')
            cv2.imwrite(outimgpath+'horizontal.jpg' , edged)
    if line_type == 'verticle':
        
        th = cv2.adaptiveThreshold(img_grey,255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,cv2.THRESH_BINARY,19,2)
        img_median = cv2.GaussianBlur(c_img,(5,5),0)
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, close_kernel)
        edged = cv2.morphologyEx(th, cv2.MORPH_CLOSE, kernel)
        kernel = cv2.getStructuringElement(cv2.MORPH_CROSS, open_kernel)
        edged = cv2.morphologyEx(edged, cv2.MORPH_OPEN, kernel)
        if debug == 1:
            print('verticle line saved')
            cv2.imwrite(outimgpath+'verticle.jpg' , edged)    
    return edged

#parameter for 80 bills of wintech ferrites 
# for horizontal 
#close_kernel =(17,1) 
#open_kernel = (3,1)
#for verticle 
#close_kernel =(1,1) 
#open_kernel = (3,1)
def mask(img , outimgpath = None, debug = 0):
    close_kernel =(5,1) 
    open_kernel = (3,1)
    h_line = line(img , (3,1) , outimgpath ,'horizontal' , open_kernel , close_kernel, debug = debug)
    close_kernel = (1,1)
    open_kernel = (3,1)
    v_line = line(img , (1,3) , outimgpath , 'verticle' , open_kernel , close_kernel, debug = debug)
    mask = cv2.add(cv2.bitwise_not(v_line),cv2.bitwise_not(h_line))
    mask = cv2.bitwise_not(mask)
    if debug == 1:
        cv2.imwrite(outimgpath+'mask.jpg' , mask)
        print('mask saved')
    return mask

def draw_contours(mask , c_img,outimgpath , debug = 0):
    im2, contours, hierarchy = cv2.findContours(mask,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
    
    contour = []
     
    for cnt in contours:
        x,y,w,h = cv2.boundingRect(cnt)
        if h>50 and w >70:
            contour.append(cnt)
            i = cv2.rectangle(c_img,(x,y),(x+w,y+h),(0,255,0),2)
    
    if debug == 0 :
        draw =  cv2.imwrite(outimgpath , c_img)
    if debug == 1:
        draw = cv2.imwrite(outimgpath+'contours.jpg' , c_img)
        print('contours saved')
    return contour

# Processing Code - Process all image Contours

In [4]:
# %load gen_xml.py
from xml.etree.ElementTree import Element , SubElement , Comment
import xml.etree.ElementTree as et 
from xml.dom import minidom

def prettify(elem):
    """Return a pretty-printed XML string for the Element.
    """
    rough_string = et.tostring(elem, 'utf-8')
    reparsed = minidom.parseString(rough_string)
    return reparsed.toprettyxml(indent="  ")

def cmn_xml(folder_name , img_name , path, img_size):

    top = et.Element('annotation')
    comment = et.Comment('contours')
    top.append(comment)

    folder = SubElement(top, 'folder')
    folder.text = folder_name

    filename = SubElement(top, 'filename')
    filename.text = img_name
    
    path_tag = SubElement(top, 'path')
    path_tag.text = path
    
    src = SubElement(top , 'source')
    db = SubElement(src , 'database')
    db.text = 'Unknown'
    
    size = SubElement(top , 'size')
    width = SubElement(size , 'width')
    width.text = str(img_size[1])
    height = SubElement(size , 'height')
    height.text = str(img_size[0])
    depth = SubElement( size ,'depth')
    depth.text = str(img_size[2])
    return top

def xml_object(top , i , contour):
    
    obj = SubElement(top , 'object')
    name_tag = SubElement(obj , 'name')
    name_tag.text = str(i)
    pose_tag = SubElement(obj , 'pose')
    pose_tag.text = 'Unspecified'
    truncated_tag = SubElement(obj , 'truncated')
    truncated_tag.text = str(0)
    difficult_tag = SubElement(obj , 'difficult')
    difficult_tag.text = str(0)
    box = SubElement(obj , 'bndbox')
    xmin = SubElement(box , 'xmin')
    xmin.text = str(contour[0])
    ymin = SubElement(box , 'ymin')
    ymin.text = str(contour[1])
    xmax = SubElement(box , 'xmax')
    xmax.text = str(contour[2])
    ymax = SubElement(box , 'ymax')
    ymax.text = str(contour[3])
    
    return top

def gen_xml(contours ,folder, img_name , file_path , img_size):
    top = cmn_xml(folder , img_name , file_path ,img_size)
    i = 0
    for cnt in contours:
        x,y,w,h = cv2.boundingRect(cnt)
        a = (x,y,x+w,y+h)
        
        obj = xml_object(top , i , a)
        top = obj
        i = i+1
        
    return top 

In [8]:
input_dir= '../resources/Bill2/kartik/'
outdir=input_dir+'mask/'
outdir_contour = input_dir+'contours/'
outdir_xml = input_dir+'xml/'
counter=0
for imgpath in glob(input_dir+'*.jpg'):
    print(imgpath)
    imgname = imgpath.split('/')[-1]
    xml_name = imgname.split('.')[0]
    outfilepath=outdir+imgname
    img,c_img =process_save_img(imgpath,None , debug = 0)
    m = mask(img)
    cv2.imwrite(outdir+imgname , m)
    outfilepath = outdir_contour+imgname
    contours = draw_contours(m,c_img,outfilepath)
    #print(type(contours))
    x = gen_xml(contours=contours , file_path=imgpath , folder='invoices' , img_name=imgname , img_size=c_img.shape)
    tree = et.ElementTree(x)
    tree.write(outdir_xml+xml_name+'.xml')
    counter+=1

../resources/Bill2/kartik/3.jpg
../resources/Bill2/kartik/4.jpg
../resources/Bill2/kartik/2.jpg
../resources/Bill2/kartik/1.jpg


In [None]:
x = gen_xml(contours=contours , file_path=imgpath , folder='invoices' , img_name=imgname , img_size=img.shape)
tree = et.ElementTree(x)
tree.write('../resources/Bill2/xml/'+xml_name+'.xml')

imgname = imgpath.split('/')[-1]
xml_name = imgname.split('.')[0]