In [1]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage.filters import threshold_local
from ipynb.fs.full.PhotoScaner import *
import imutils

In [2]:
def binarization_inv(img):
    if(len(img.shape)>2):
        gray = cv2.cvtColor(img.copy(), cv2.COLOR_BGR2GRAY)
    else:
        gray = img
    local = threshold_local(gray, 101, offset=25)
    binary_local = gray > local
    binary_local = binary_local.astype("uint8")*255
    binary_local = cv2.bitwise_not(binary_local)
    return binary_local

In [3]:
def get_lines_for_roi(orig_im, roi):
    binary_local = binarization_inv(roi)
    (ctrs, _) = cv2.findContours(binary_local, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    return sorted_ctrs

In [4]:
def count_avg(line):
    line_y_sum = 0
    count = 0
    for word in line:
        y = word[1]
        h = word[3]
        line_y_sum+=(y+h/2)
        count+=1
    return line_y_sum/count

In [5]:
def connect_in_lines(sorted_bounds):
    lines = []
    line = []
    line_y_avg = 0
    count =0
    for bound in sorted_bounds:
        y = bound[1]
        h = bound[3]
        if(line_y_avg>=(y+h/2)-30 or line_y_avg==0):
            line.append(bound)
        else:
            line = sorted(line,key = lambda word:word[0])
            lines.append(line)
            line = []
            count+=1
            line.append(bound)
        line_y_avg = count_avg(line)
    lines.append(line)
    return lines

In [6]:
def get_avg(ctrs):
    avg_h = 0
    count = 0
    for ctr in ctrs:
        h=cv2.boundingRect(ctr)[3]
        avg_h=h
        count+=1
    if(count!=0):
        avg_h/=count
    return avg_h

In [7]:
def delete_overlaped_contours(j,contours,current_contour):
    x, y, w, h = cv2.boundingRect(current_contour)
    while(j<len(contours)):
            x1, y1, w1, h1 = cv2.boundingRect(contours[j])
            if(y<=y1+h1/2 and (y+h)>=(y1+h1/2)):
                if(x<=x1 and (x+w)>=(x1+w1)):
                    del(contours[j])
                    j=j-1
            j+=1

In [8]:
def get_internal_boundings(contours,img,i=0):
    internal_boundings = []
    while(i<len(contours)):
        x, y, w, h = cv2.boundingRect(contours[i])
        if(h<6 or w<6):
            del(contours[i])
            continue
        if(h>64):
            internal_ctrs = get_lines_for_roi(img, img[y:y+h, x:x+w])
            for ctr in internal_ctrs:
                x2,y2,w2,h2 = cv2.boundingRect(ctr)
                internal_boundings += [[x2+x,y2+y,w2,h2]]
            del(contours[i])
            continue
        delete_overlaped_contours(i+1,contours,contours[i])
        i+=1
    return internal_boundings

In [9]:
def filter_contours(internal_boundings, sorted_ctrs, i=0):
    while(i<len(internal_boundings)):
        x, y, w, h = internal_boundings[i]
        j = 0
        is_deleted = False
        if(h<6 or w<6):
            del(internal_boundings[i])
            continue
        while(j<len(sorted_ctrs)):
            x1,y1,w1,h1=cv2.boundingRect(sorted_ctrs[j])
            if((y1<y+h and y1+h1>=y+h) or (y1<y+h/1.75 and y1+h1>=y+h/1.75)):
                if((x1<=x and (x1+w1)>=(x+w)) or (x1<=x+w/1.75 and (x1+w1)>=(x+w/1.75))):
                    del(internal_boundings[i])
                    is_deleted = True
                    break
            if((y<y1+h1 and y+h>=y1+h1) or (y<y1+h1/2 and y+h>=y1+h1/2)):
                if((x<=x1 and (x+w)>=(x1+w1)) or (x<=x1+w1/1.75 and (x+w)>=(x1+w1/1.75))):
                    del(sorted_ctrs[j])
                    continue
            j+=1
        if(is_deleted):
            continue
        q = i+1
        while(q<len(internal_boundings)):
            x1, y1, w1, h1 = internal_boundings[q]
            if((y<=y1+h1 and (y+h)>=(y1+h1)) or (y<=y1+h1/1.75 and (y+h)>=(y1+h1/1.75))):
                if((x<=x1 and (x+w)>=(x1+w1)) or (x<=x1+w1/1.75 and (x+w)>=(x1+w1/1.75))):
                    del(internal_boundings[q])
                    q=q-1
            q+=1
        i+=1

In [10]:
def merge_boundings(contours,internal_boundings,i=0):
    while(i<len(contours)):
        x, y, w, h = cv2.boundingRect(contours[i])
        internal_boundings+=[[x,y,w,h]]
        i+=1

In [11]:
def preprocess(IMG_PATH):
    result = []
    transformed = final_transform(IMG_PATH)
    edged = edge_detection(transformed.copy())
    binary = cv2.adaptiveThreshold(edged, 255,cv2.ADAPTIVE_THRESH_GAUSSIAN_C,\
                cv2.THRESH_BINARY_INV,11,2)
    ctrs, hier = cv2.findContours(binary, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[1])
    internal_boundings = get_internal_boundings(sorted_ctrs, transformed)
    internal_boundings = sorted(internal_boundings, key=lambda bound: bound[1])
    filter_contours(internal_boundings, sorted_ctrs)
    merge_boundings(sorted_ctrs, internal_boundings)
    internal_boundings = sorted(internal_boundings, key=lambda bound: bound[1])      
    return (connect_in_lines(internal_boundings),transformed)