In [8]:
import numpy as np
import cv2
import os

In [9]:
# Features defined as global variables
BASELINE_ANGLE = 0.0
TOP_MARGIN = 0.0
LETTER_SIZE = 0.0
LINE_SPACING = 0.0
WORD_SPACING = 0.0
PEN_PRESSURE = 0.0
SLANT_ANGLE = 0.0

# Function for bilateral filtering to smoothen the edges which removes noise
# d is the diameter of neighborhood pixel 

def bilateralFilter(image,d):
    image = cv2.bilateralFilter(image,d, 50,50)
    return image


In [10]:
# Function for median filtering for noise remmoval(salt and pepper noise)

def MedianFilter(image, d):
    image = cv2.medianBlur(image,d)
    return image

In [11]:
# Function for binary threshold, inverted (if the pixel value is smaller than given threshold value, value is set to 0
# else set to Maxvalue)

def threshold(image, t):
    image = cv2.cvtColor(image, cv2.BGR2GRAY)
    ret, image = cv2.threshold(image, t, 255, cv2.THRESH_BINARY_INV)
    return image

In [12]:
# Function to dilate the images (increases the image size and accentuates the features of image)
# Function to erode the images (diminishes the features of the image and erodes the boundaries)

def dilate(image, kernelSize):
    kernel = np.ones(kernelSize, uint8)
    image  = cv2.dilate(image, kernel, iterations = 1)
    return image

def erode(image, kernelSize):
    kernel = np.ones(kernelSize, uint8)
    image  = cv2.dilate(image, kernel, iterations = 1)
    return image

In [1]:
# Finding contours and straightening them horizontally

def straighten(image):
    global BASELINE_ANGLE
    
    angle = 0.0
    angle_sum = 0.0
    contour_count = 0.0
    
    # applying bilateral filter
    filtered = bilateralFilter(image,3)
    #cv2.imshow('filtered',filtered)
    
    # convert to grayscale and convert to binary image using inverted binary
    thresh = threshold(filtered, 120)
    #cv2.imshow('thresh', thresh)
    
    # dilate the handwritten lines in image with a suitable kernel for contour operation
    dilated = dilate(thresh, (5,100))
    #cv2.imshow('dilated',dilated)
    
    im2,ctrs,hier = cv2.findContours(dilated.copy(), cv2.RETR_EXTERNAL, CV2.CHAIN_APPROX_SIMPLE)
    
    for i,ctr in enumerate(ctrs):
        x,y,w,h = cv2.boundingRect(ctr)
        
        # We can be sure the contour is not a line if height > width or height is < 20 pixels. Here 20 is arbitrary.
        if h>w or h<MIN_HANDWRITING_HEIGHT_PIXEL:
            continue
            
        # We extract the region of interest/contour to be straightened.
        roi = image[y:y+h, x:x+w]
        # rows, cols = ctr.shape[:2]
        
        # If the length of the line is less than one third of the document width, especially for the last line,
        # ignore because it may yield inaccurate baseline angle which subsequently affects proceeding features.
        
        if w< image.shape[1]/2:
            roi = 255
            image[y:y+h, x:x+w] = roi
            continue
            
        # minAreaRect is necessary for straightening
        rect = cv2.minAreaRect(ctr)
        center = rect[0]
        angle = rect[2]
        if angle < -45.0:
            angle += 90.0
            
        rot = cv2.getRotationMatrix2D(((x+w)/2,(y+h)/2), angle, 1)
        extract = cv2.wrapAffine(roi, rot, (w,h), borderMode = cv2.BORDER_CONSTANT, borderValue = (255,255,255))
        
        # image is overwritten with the straightened contour
        image[y:y+h, x:x+w] = extract
        
        # print angle
        angle_sum += angle
        contour_count += 1
        
        '''
        # sum of all the angles of downward baseline
        if(angle>0.0):
            positive_angle_sum += angle
            positive_count += 1
        # sum of all the angles of upward baseline
        else:
            negative_angle_sum += angle
            negative_count += 1
            
    if(positive_count == 0): positive_count = 1
    if(negative_count == 0): negative_count = 1
    average_positive_angle = positive_angle_sum / positive_count
    average_negative_angle = negative_angle_sum / negative_count
    print "average_positive_angle: "+str(average_positive_angle)
    print "average_negative_angle: "+str(average_negative_angle)
    
    if(abs(average_positive_angle) > abs(average_negative_angle)):
        average_angle = average_positive_angle
    else:
        average_angle = average_negative_angle

    print "average_angle: "+str(average_angle)
    '''
    # mean angle of the contours (not lines) is found
    mean_angle = angle_sum/contour_count
    BASELINE_ANGLE = contour_count
    return image

# function to calculate horizontal projection of the image pixel rows and return it
def horizontalProjection(img):
    # Return a list containing the sum of the pixels in each row
    (h,w) = image.shape[:2]
    sumRows = []
    for j in range(h):
        row = img[j:j+1, 0:w]
        sumRows.append(np.sum(row))
    return sumRows

# function to calculate vertical projection of the image pixel columns and return it
def verticalProjection(img):
    # Return a list containing the sum of the pixels in each column
    (h,w) = image.shape[:2]
    sumCols = []
    for j in range(w):
        col = img[0:h, j:j+1]
        sumCols.append(np.sum(col))
    return sumCols

# function to extract lines of handwritten text from the image using horizontal projection
def extractLines(img):
    
    global LETTER_SIZE
    global LINE_SPACING
    global TOP_MARGIN 
    
    # apply bilateral filter
    filtered = bilateralFilter(img,5)
    
    # convert to grayscale and binarize the image by INVERTED binary thresholding
    thresh = threshold(filtered, 160)
    #cv2.imshow('thresh', lthresh)
    
    # extract a python list containing values of the horizontal projection of the image into 'hp'
    hpList = horizontalProjection(thresh)
    
    # Extracting 'Top Margin' feature.
    topMarginCount = 0
    for sum in hpList:
        # sum can be strictly 0 as well. We take 0 and 255.
        if sum <= 255:
            topMarginCount += 1
        else:
            break
            
    # FIRST we extract the straightened contours from the image by looking at occurance of 0's in the horizontal projection.
    lineTop = 0
    lineBottom = 0
    spaceTop = 0
    spaceBottom = 0
    indexCount = 0
    setLineTop = True
    setSpaceTop = True
    includeNextSpace = True
    space_zero = []          # stores the amount of space between lines
    lines = []               # a 2D list storing the vertical start index and end index of each contour
    
    
    # Scanning the whole horizontal projection now
    for i, sum in enumerate(hpList):
        # if sum = 0 means blank
        if(sum==0):
            if(setSpaceTop):
                spaceTop = indexCount
                setSpaceTop = False      # SpaceTop will be set once for each start of a space between lines
            indexCount += 1
            spaceBottom = indexCount
            if(i<len(hpList)-1):
                if(hpList[i+1]==0):      # if the next horizontal projection is 0, keep on counting, it's still in blank space
                    continue
            # we are using this condition if the previous contour is very thin and possibly not a line
            if(includeNextSpace):
                space_zero.append(spaceBottom - spaceTop)
            else:
                if(len(space_zero)==0):
                    previous = 0
                else:
                    previous = space_zero.pop()
                space_zero.append(previous + spaceBottom - lineTop)
                
            setSpaceTop = True         # next time we encounter 0, it's begining of another space so we set new spaceTop
        
        # sum greater than 0 means contour
        if(sum>0):
            if(setLineTop):
                lineTop = indexCount
                setLineTop = False     #  lineTop will be set once for each start of a new line/contour
            indexCount += 1
            lineBottom = indexCount
            if(i<len(hpList)-1):
                if(hpList[i+1]>0):     # if the next horizontal projection is > 0, keep on counting, it's still in contour
                    continue
                
                # if the line/contour is too thin <10 pixels (arbitrary) in height, we ignore it.
                '''Also, we add the space following this and this contour itself to the previous space to 
                form a bigger space: spaceBottom-lineTop.'''
                if(lineBottom - lineTop <20):
                    includeNextSpace = False
                    setLineTop = True   # next time we encounter value>0,it's begining of another line/contour so we set new lineTop
                    continue
                    
            includeNextSpace = True    # the line/contour is accepted, new space following it will be accepted
            
            # append the top and bottom horizontal indices of the line/contour in 'lines'
            lines.append([lineBottom, lineTop])
            setLineTop = True        # next time we encounter value>0,it's begining of another line/contour so we set new lineTop
            
            
    # SECOND we extract the very individual lines from the lines/contours we extracted above.
    
    fineLines = []          # a 2D list storing the horizontal start index and end index of each individual line
    for i, line in enumerate(lines):
        anchor = line[0]   
''' 'anchor' will locate the horizontal indices where horizontal projection is > ANCHOR_POINT for uphill or < ANCHOR_POINT
for downhill(ANCHOR_POINT is arbitrary yet suitable!) '''
    
        anchorPoints = []  # list where the indices obtained by 'anchor' will be stored
        upHill = True      # it implies that we expect to find the start of an individual line (vertically), climbing up the histogram
        downHill = False    # it implies that we expect to find the end of an individual line (vertically), climbing down the histogram
        segment = hpList[line[0]:line[1]]  # # we put the region of interest of the horizontal projection of each contour here
        
        for j, sum in enumerate(segment):
            if(upHill):
                if(sum<ANCHOR_POINT):
                    anchor += 1
                    continue
                anchorPoints.append(anchor)
                upHill = False
                DownHill = True
                
            if(downHill):
                if(sum>ANCHOR_POINT):
                    anchor += 1
                    continue
                downHill = False
                upHill = True
                
        # we can ignore the contour here
        if(len(anchorPoints<2)):
            continue
            
        # len(anchorPoints) > 3 meaning contour composed of multiple lines
        lineTop = line[0]
        for x in range(1, len(anchorPoints)-1 2):
            # 'lineMid' is the horizontal index where the segmentation will be done
            lineMid = (anchorPoints[x] + anchorPoints[x+1])/2
            lineBottom = lineMid
            
            
        if(line[1] - lineTop <20):
            continue
        fineLines.append([lineTop, lineBottom])
        
    # LINE SPACING and LETTER SIZE will be extracted here
    # We will count the total number of pixel rows containing upper and lower zones of the lines and add the space_zero/runs of 0's(excluding first and last of the list ) to it.
    # We will count the total number of pixel rows containing midzones of the lines for letter size.
    # For this, we set an arbitrary (yet suitable!) threshold MIDZONE_THRESHOLD = 15000 in horizontal projection to identify the midzone containing rows.
    # These two total numbers will be divided by number of lines (having at least one row>MIDZONE_THRESHOLD) to find average line spacing and average letter size.
        
    space_nonzero_row_count = 0
    midzone_row_count = 0
    lines_having_midzone_count = 0
    flag = False
    
    for i, line in enumerate(fineLines):
        segment = hpList(line[0]:line[1])
        for j, sum in enumerate(segment):
            if(sum<MIDZONE_THRESHOLD):
                space_nonzero_row_count +=1
            else:
                midzone_row_count += 1
                flag = True
                
                
        # This line has contributed at least one count of pixel row of midzone
        if(flag):
            lines_having_midzone_count += 1
            flag = False
            
    # error prevention
    if(lines_having_midzone_count == 0):
        lines_having_midzone_count = 1 
        
    total_space_row_count = space_nonzero_row_count + np.sum(space_zero[1:-1]) #excluding first and last entries: Top and Bottom margins
    
    # the number of spaces is 1 less than number of lines but total_space_row_count contains the top and bottom spaces of the line
    average_line_spacing = float(total_space_row_count) / lines_having_midzone_count 
    average_letter_size = float(midzone_row_count) / lines_having_midzone_count
    
    # letter size is actually height of the letter and we are not considering width
    LETTER_SIZE = average_letter_size
    
    # error prevention ^-^
    if(average_letter_size == 0): average_letter_size = 1
    
    # We can't just take the average_line_spacing as a feature directly. We must take the average_line_spacing relative to average_letter_size.
    # Let's take the ratio of average_line_spacing to average_letter_size as the LINE SPACING, which is perspective to average_letter_size.
    relative_line_spacing = average_line_spacing / average_letter_size
    LINE_SPACING = relative_line_spacing
    
    #Top marging is also taken relative to average letter size of the handwritting
    relative_top_margin = float(topMarginCount) / average_letter_size
    TOP_MARGIN = relative_top_margin
                
    

    
    

