In [2]:
import os as os
import numpy as np
import cv2 as cv2
import math
from matplotlib import pyplot as plt

In [3]:
os.chdir("images")
files = [f for f in os.listdir('.') if os.path.isfile(f)]
os.chdir("..")


In [4]:
ANCHOR_POINT = 6000
MIDZONE_THRESHOLD = 15000
MIN_HANDWRITING_HEIGHT_PIXEL = 20

BASELINE_ANGLE = 0.0
TOP_MARGIN = 0.0
LETTER_SIZE = 0.0
LINE_SPACING = 0.0
WORD_SPACING = 0.0
PEN_PRESSURE = 0.0
SLANT_ANGLE = 0.0

In [5]:
'''image = cv2.imread('images/010-0.png', cv2.IMREAD_GRAYSCALE)

# Calculate the histogram
histogram = cv2.calcHist([image], [0], None, [256], [0, 256])

# Plot the histogram
plt.plot(histogram)
plt.title('Histogram')
plt.xlabel('Pixel Intensity')
plt.ylabel('Frequency')
plt.show()'''

"image = cv2.imread('images/010-0.png', cv2.IMREAD_GRAYSCALE)\n\n# Calculate the histogram\nhistogram = cv2.calcHist([image], [0], None, [256], [0, 256])\n\n# Plot the histogram\nplt.plot(histogram)\nplt.title('Histogram')\nplt.xlabel('Pixel Intensity')\nplt.ylabel('Frequency')\nplt.show()"

In [6]:
def dilate(image, kernalSize):
    #  dilation of objects in the image
    kernel = np.ones(kernalSize, np.uint8)
    image = cv2.dilate(image, kernel, iterations=1)
    return image

In [7]:
def barometer(image):
    # extract average pen pressure of the handwriting
    global PEN_PRESSURE
    #convert to grayscale
    gray_image= cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # invert image
    inverted = cv2.bitwise_not(gray_image)

    # bilateral filtering
    filtered = cv2.bilateralFilter(inverted, 3, 50, 50)

    # binary thresholding
    ret, thresh = cv2.threshold(filtered, 100, 255, cv2.THRESH_TOZERO)

    # add up all the non-zero pixel values in the image and divide by the number of them to find the average pixel value in the whole image
    non_zero_pixels = thresh[thresh > 0]
    if len(non_zero_pixels) > 0:
        average_intensity = np.sum(non_zero_pixels) / len(non_zero_pixels)
    else:
        average_intensity = 0  # Default to 0 if no non-zero pixels are found

    PEN_PRESSURE = average_intensity
    # print ("PEN_PRESSURE: "+str(average_intensity))
    return

#barometer(cv2.imread('images/'+"000-0.png"))


In [8]:
def straighten(image):
    global BASELINE_ANGLE
    
    angle = 0.0
    angle_sum = 0.0
    contour_count = 0

    # Apply bilateral filter
    filtered = cv2.bilateralFilter(image, d=3, sigmaColor=50, sigmaSpace=50)

    # Convert to grayscale and binarize with inverted thresholding
    gray_image = cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(gray_image, 120, 255, cv2.THRESH_BINARY_INV)

    # Dilate the lines in the image with a suitable kernel
    dilated = dilate(thresh, (5, 100))

    # Find contours
    ctrs, _ = cv2.findContours(dilated.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    

    for i, ctr in enumerate(ctrs):
        x, y, w, h = cv2.boundingRect(ctr)

        # Skip contours that do not resemble lines
        if h > w or h < MIN_HANDWRITING_HEIGHT_PIXEL:
            continue

        # Extract the region of interest
        roi = image[y:y+h, x:x+w]

        if w < image.shape[1] / 2:
            # If the contour is too narrow, treat it as blank
            image[y:y+h, x:x+w] = 255
            continue

        # Get angle from minAreaRect to straighten the contour
        rect = cv2.minAreaRect(ctr)
        center = rect[0]
        angle = rect[2]

        # Adjust angle if it's less than -45 degrees
        if angle < -45.0:
            angle += 90.0
        elif angle > 45:
            angle -= 90
        

        # Rotate the region of interest based on the calculated angle
        rot = cv2.getRotationMatrix2D(((x + w) / 2, (y + h) / 2), angle, 1)
        straightened = cv2.warpAffine(roi, rot, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=(255, 255, 255))

        

        # Place the straightened contour back into the original image
        image[y:y+h, x:x+w] = straightened

        # Accumulate angles to compute average later
        angle_sum += angle
        contour_count += 1

        
    
    # if image is not None:  
    #         cv2.imshow('image', image)
    #         cv2.waitKey(0) 
    #         cv2.destroyAllWindows()
    
    # print("Angle Sum: ",angle_sum, " Contour Count: ",contour_count)
    # Calculate the mean angle of all contours, ensuring no division by zero
    mean_angle = angle_sum / contour_count if contour_count > 0 else angle_sum
    BASELINE_ANGLE = mean_angle
    # print("BASELINE_ANGLE:", mean_angle)

    return image

#straighten(cv2.imread('images/'+"000-10.png"))


In [24]:
def extractLines(img):
    # extract lines of handwritten text from the image using horizontal projection

    global LETTER_SIZE
    global LINE_SPACING
    global TOP_MARGIN

    # apply bilateral filter
    filtered = cv2.bilateralFilter(img, 5, 50, 50)

    #convert to grayscale
    gray_image= cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)

    # convert to grayscale and binarize the image by INVERTED binary thresholding
    ret, thresh = cv2.threshold(gray_image, 160, 255, cv2.THRESH_BINARY_INV)
    
    # if thresh is not None:  
    #         cv2.imshow('image', thresh)
    #         cv2.waitKey(0) 
    #         cv2.destroyAllWindows()
    # extract a python list containing values of the horizontal projection of the image into 'hp'
    #hpList = np.sum(thresh, axis=1).tolist()
    hpList = np.sum(thresh , axis=1).tolist()
    # plt.plot(hpList)
    # plt.xlabel('Row Index')
    # plt.ylabel('Pixel Intensity Sum')
    # plt.title('Horizontal Projection')
    # plt.show()
    
    # Extracting 'Top Margin' feature.
    topMarginCount = 0
    for row_sum in hpList:
        # If row_sum is 0, we are still in the top margin
        if row_sum < 100:
            topMarginCount += 1
        else:
            break
    #print(f"Top Margin: {topMarginCount} pixels")

    # print "(Top margin row count: "+str(topMarginCount)+")"

    # extract the straightened contours from the image by looking at occurance of 0's in the horizontal projection.
    lineTop = 0
    lineBottom = 0
    spaceTop = 0
    spaceBottom = 0
    indexCount = 0
    setLineTop = True
    setSpaceTop = True
    includeNextSpace = True
    space_zero = []  # stores the amount of space between lines
    lines = []  # a 2D list storing the vertical start index and end index of each contour

    # scanning the whole horizontal projection now
    for i, row_sum in enumerate(hpList):
        # row_sum being 0 means blank space
        if (row_sum == 0):
            if (setSpaceTop):
                spaceTop = indexCount
                setSpaceTop = False  # spaceTop will be set once for each start of a space between lines
            indexCount += 1
            spaceBottom = indexCount
            
            if (i < len(hpList) - 1):  # avoid array index out of bounds error
                # If the next horizontal projection is 0, keep counting; it's still blank space
                if (hpList[i + 1] == 0):
                    continue
            
            # Use this condition if the previous contour is very thin and possibly not a line
            if (includeNextSpace):
                space_zero.append(spaceBottom - spaceTop)
            else:
                previous = space_zero.pop() if space_zero else 0
                space_zero.append(previous + spaceBottom - lineTop)
            
            # Next time we encounter 0, it's the beginning of another space, set new spaceTop
            setSpaceTop = True

        # row_sum greater than 0 means contour
        if (row_sum > 0):
            if (setLineTop):
                lineTop = indexCount
                setLineTop = False  # lineTop will be set once for each start of a new line/contour
            indexCount += 1
            lineBottom = indexCount

            if (i < len(hpList) - 1):  # necessary to avoid array index out of bounds error
                # If the next horizontal projection is > 0, keep counting; it's still in contour
                if (hpList[i + 1] > 0):
                    continue

                # If the line/contour is too thin (< 20 pixels in height), we ignore it.
                if (lineBottom - lineTop < 20):  # Thin contour check
                    includeNextSpace = False
                    setLineTop = True  # Prepare for next line detection
                    continue

            # The line/contour is accepted; new space following it will be accepted
            includeNextSpace = True

            # Append the top and bottom horizontal indices of the line/contour in 'lines'
            lines.append([lineTop, lineBottom])
            setLineTop = True  # Prepare for the next line detection


    # extract the very individual lines from the lines/contours we extracted above.
    fineLines = []  # a 2D list storing the horizontal start index and end index of each individual line
    for i, line in enumerate(lines):

        # anchor will locate the horizontal indices where horizontal projection is > ANCHOR_POINT for uphill or < ANCHOR_POINT for downhill
        anchor = line[0]
        anchorPoints = []  # python list where the indices obtained by 'anchor' will be stored
        # it implies that we expect to find the start of an individual line (vertically), climbing up the histogram
        upHill = True
        # it implies that we expect to find the end of an individual line (vertically), climbing down the histogram
        downHill = False
        # we put the region of interest of the horizontal projection of each contour here
        segment = hpList[line[0]:line[1]]

        for j, sum in enumerate(segment):
            if (upHill):
                if (sum < ANCHOR_POINT):
                    anchor += 1
                    continue
                anchorPoints.append(anchor)
                upHill = False
                downHill = True
            if (downHill):
                if (sum > ANCHOR_POINT):
                    anchor += 1
                    continue
                anchorPoints.append(anchor)
                downHill = False
                upHill = True


        # we can ignore the contour here
        if (len(anchorPoints) < 2):
            continue

        # len(anchorPoints) > 3 meaning contour composed of multiple lines
        lineTop = line[0]
        for x in range(1, len(anchorPoints)-1, 2):
            # 'lineMid' is the horizontal index where the segmentation will be done
            lineMid = (anchorPoints[x]+anchorPoints[x+1])/2
            lineBottom = lineMid
            # line having height of pixels <20 is considered defects, so we just ignore it
            # this is a weakness of the algorithm to extract lines (anchor value is ANCHOR_POINT, see for different values!)
            if (lineBottom-lineTop < 20):
                continue
            fineLines.append([lineTop, lineBottom])
            lineTop = lineBottom
        if (line[1]-lineTop < 20):
            continue
        fineLines.append([lineTop, line[1]])

    # LINE SPACING and LETTER SIZE will be extracted here
    # We will count the total number of pixel rows containing upper and lower zones of the lines and add the space_zero/runs of 0's(excluding first and last of the list ) to it.
    # We will count the total number of pixel rows containing midzones of the lines for letter size.
    # For this, we set an arbitrary (yet suitable!) threshold MIDZONE_THRESHOLD = 15000 in horizontal projection to identify the midzone containing rows.
    # These two total numbers will be divided by number of lines (having at least one row>MIDZONE_THRESHOLD) to find average line spacing and average letter size.

    space_nonzero_row_count = 0
    midzone_row_count = 0
    lines_having_midzone_count = 0
    flag = False

    for i, line in enumerate(fineLines):
        # Convert indices to integers if they are not already
        line_start = int(line[0])  # Convert start index to integer
        line_end = int(line[1])    # Convert end index to integer

        # Check if indices are within bounds
        if line_start < 0 or line_end >= len(hpList):
            print(f"Warning: Line indices {line_start}:{line_end} are out of bounds for hpList.")
            continue  # Skip this iteration if indices are invalid

        segment = hpList[line_start:line_end]  # Extract the relevant segment from hpList
        for j, row_sum in enumerate(segment):
            if (row_sum < MIDZONE_THRESHOLD):
                space_nonzero_row_count += 1
            else:
                midzone_row_count += 1
                flag = True

        # This line has contributed at least one count of pixel row of midzone
        if flag:
            lines_having_midzone_count += 1
            flag = False  # Reset flag for the next line

    # Error prevention
    if (lines_having_midzone_count == 0):
        lines_having_midzone_count = 1  # Ensure at least 1 if none found



    # excluding first and last entries: Top and Bottom margins
    total_space_row_count = space_nonzero_row_count + np.sum(space_zero[1:-1])
    # the number of spaces is 1 less than number of lines but total_space_row_count contains the top and bottom spaces of the line
    average_line_spacing = float(
        total_space_row_count) / lines_having_midzone_count
    average_letter_size = float(midzone_row_count) / lines_having_midzone_count
    # letter size is actually height of the letter and we are not considering width
    LETTER_SIZE = average_letter_size
    # error prevention ^-^
    if (average_letter_size == 0):
        average_letter_size = 1
    # We can't just take the average_line_spacing as a feature directly. We must take the average_line_spacing relative to average_letter_size.
    # Let's take the ratio of average_line_spacing to average_letter_size as the LINE SPACING, which is perspective to average_letter_size.
    relative_line_spacing = average_line_spacing / average_letter_size
    LINE_SPACING = relative_line_spacing

    # Top marging is also taken relative to average letter size of the handwritting
    relative_top_margin = float(topMarginCount) / average_letter_size
    TOP_MARGIN = relative_top_margin

    print("LETTER_SIZE: ",LETTER_SIZE)
    print("LINE_SPACING: ",LINE_SPACING)
    print("TOP_MARGIN: ",TOP_MARGIN)

    
    return fineLines

extractLines(straighten(cv2.imread('images/'+"000-0.png")))

LETTER_SIZE:  12.142857142857142
LINE_SPACING:  4.435294117647059
TOP_MARGIN:  2.635294117647059


[[70, 102],
 [154, 182],
 [221, 246.5],
 [298, 329],
 [371, 401],
 [440, 467],
 [515, 538]]

In [49]:
def extractWords(image, lines):

    global LETTER_SIZE
    global WORD_SPACING

    # apply bilateral filter
    filtered = cv2.bilateralFilter(image, 5, 50, 50)

    #convert to grayscale
    gray_image= cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)

    # convert to grayscale and binarize the image by INVERTED binary thresholding
    ret, thresh = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY_INV)

    # Width of the whole document is found once.
    width = thresh.shape[1]
    space_zero = []  # stores the amount of space between words
    words = []  # a 2D list storing the coordinates of each word: y1, y2, x1, x2

    # Isolated words or components will be extacted from each line by looking at occurance of 0's in its vertical projection.
    for i, line in enumerate(lines):
        # Ensure line values and width are integers
        y1, y2 = int(line[0]), int(line[1])  # Use int() instead of float()
        extract = thresh[y1:y2, 0:int(width)]  # y1:y2, x1:x2

        # Compute vertical projection on the extracted segment
        vp = np.sum(extract, axis=0).tolist()
        

        wordStart = 0
        wordEnd = 0
        spaceStart = 0
        spaceEnd = 0
        indexCount = 0
        setWordStart = True
        setSpaceStart = True
        includeNextSpace = True
        spaces = []

        # we are scanning the vertical projection
        for j, sum in enumerate(vp):
            # sum being 0 means blank space
            if (sum == 0):
                if (setSpaceStart):
                    spaceStart = indexCount
                    # spaceStart will be set once for each start of a space between lines
                    setSpaceStart = False
                indexCount += 1
                spaceEnd = indexCount
                if (j < len(vp)-1):  # this condition is necessary to avoid array index out of bound error
                    # if the next vertical projectin is 0, keep on counting, it's still in blank space
                    if (vp[j+1] == 0):
                        continue

                # we ignore spaces which is smaller than half the average letter size
                if ((spaceEnd-spaceStart) > int(LETTER_SIZE/2)):
                    spaces.append(spaceEnd-spaceStart)

                # next time we encounter 0, it's begining of another space so we set new spaceStart
                setSpaceStart = True

            # sum greater than 0 means word/component
            if (sum > 0):
                if (setWordStart):
                    wordStart = indexCount
                    setWordStart = False  # wordStart will be set once for each start of a new word/component
                indexCount += 1
                wordEnd = indexCount
                if (j < len(vp)-1):  # this condition is necessary to avoid array index out of bound error
                    # if the next horizontal projectin is > 0, keep on counting, it's still in non-space zone
                    if (vp[j+1] > 0):
                        continue

                # append the coordinates of each word/component: y1, y2, x1, x2 in 'words'
                # we ignore the ones which has height smaller than half the average letter size
                # this will remove full stops and commas as an individual component
                count = 0
                line_start = int(line[0])
                line_end = int(line[1])

                for k in range(line_end - line_start):
                    row = thresh[line_start + k:line_start + k + 1, wordStart:wordEnd]  # y1:y2, x1:x2
                    if np.sum(row):
                        count += 1

                if count > int(LETTER_SIZE / 2):
                    words.append([line_start, line_end, wordStart, wordEnd])

                # Next time we encounter value > 0, it's the beginning of another word/component, so we set a new wordStart
                setWordStart = True


        space_zero.extend(spaces[1:-1])

    # print space_zero
    space_columns = np.sum(space_zero)
    space_count = len(space_zero)
    if (space_count == 0):
        space_count = 1
    average_word_spacing = float(space_columns) / space_count
    if LETTER_SIZE == 0.0:
        relative_word_spacing = average_word_spacing
    else:
        relative_word_spacing = average_word_spacing / LETTER_SIZE
    # used to be divideed but LETTER_SIZE
    WORD_SPACING = relative_word_spacing
    # print ("Average word spacing: "+str(average_word_spacing))
    # print ("Average word spacing relative to average letter size: "+str(relative_word_spacing))

    return words

In [54]:
def extractSlant(img, words):

    global SLANT_ANGLE

    if not words:
        return 0  # or an appropriate error value

    # 9 different values of angle
    
    theta = [-0.785398, -0.523599, -0.261799, -0.0872665,
             0.01, 0.0872665, 0.261799, 0.523599, 0.785398]

    # Corresponding index of the biggest value in s_function will be the index of the most likely angle in 'theta'
    s_function = [0.0] * 9
    count_ = [0]*9

    # apply bilateral filter
    filtered = cv2.GaussianBlur(img, (3, 3), 0)

    #convert to grayscale
    gray_image= cv2.cvtColor(filtered, cv2.COLOR_BGR2GRAY)

    # convert to grayscale and binarize the image by INVERTED binary thresholding
    ret, thresh = cv2.threshold(gray_image, 100, 255, cv2.THRESH_BINARY_INV)

    # loop for each value of angle in theta
    for i, angle in enumerate(theta):
        s_temp = 0.0  # overall sum of the functions of all the columns of all the words!
        count = 0  # just counting the number of columns considered to contain a vertical stroke and thus contributing to s_temp

        # loop for each word
        for j, word in enumerate(words):
            original = thresh[word[0]:word[1], word[2]:word[3]]  # y1:y2, x1:x2
            
            height, width = original.shape
            # height = word[1]-word[0]
            # width = word[3]-word[2]

            # the distance in pixel we will shift for affine transformation
            # it's divided by 2 because the uppermost point and the lowermost points are being equally shifted in opposite directions
            shift = (math.tan(angle) * height) / 2

            # the amount of extra space we need to add to the original image to preserve information
            # yes, this is adding more number of columns but the effect of this will be negligible
            pad_length = abs(int(shift))

            # create a new image that can perfectly hold the transformed and thus widened image
            # blank_image = np.zeros((height, width+pad_length*2, 3), np.uint8)
            # new_image = cv2.cvtColor(blank_image, cv2.COLOR_BGR2GRAY)
            # new_image[:, pad_length:width+pad_length] = original

            new_image = np.pad(original, ((0, 0), (pad_length, pad_length)), mode='constant', constant_values=0)

            # points to consider for affine transformation
            (height, width) = new_image.shape[:2]
            x1 = width/2
            y1 = 0
            x2 = width/4
            y2 = height
            x3 = 3*width/4
            y3 = height

            pts1 = np.float32([[x1, y1], [x2, y2], [x3, y3]])
            pts2 = np.float32([[x1+shift, y1], [x2-shift, y2], [x3-shift, y3]])

            
            M = cv2.getAffineTransform(pts1, pts2)
            #deslanted = cv2.warpAffine(new_image, M, (width, height))
            deslanted = cv2.warpAffine(new_image, M, (new_image.shape[1], height), borderMode=cv2.BORDER_CONSTANT, borderValue=0)

            # find the vertical projection on the transformed image
            vp = np.sum(deslanted, axis=0)
            

            # loop for each value of vertical projection, which is for each column in the word image
            for k, col_value in enumerate(vp):
                # the columns is empty
                if (col_value == 0):
                    continue

                # this is the number of foreground pixels in the column being considered
                num_fgpixel = col_value / 255

                # if number of foreground pixels is less than onethird of total pixels, it is not a vertical stroke so we can ignore
                if (num_fgpixel < height/3):
                    continue

                # the column itself is extracted, and flattened for easy operation
                # column = deslanted[0:height, k:k+1]
                # column = column.flatten()

                # # now we are going to find the distance between topmost pixel and bottom-most pixel
                # # l counts the number of empty pixels from top until and upto a foreground pixel is discovered
                # for l, pixel in enumerate(column):
                #     if (pixel == 0):
                #         continue
                #     break
                # # m counts the number of empty pixels from bottom until and upto a foreground pixel is discovered
                # for m, pixel in enumerate(column[::-1]):
                #     if (pixel == 0):
                #         continue
                #     break

                # # the distance is found as delta_y, I just followed the naming convention in the research paper I followed
                # delta_y = height - (l+m)

                top_pixel = next(i for i, px in enumerate(deslanted[:, k]) if px != 0)
                bottom_pixel = next(i for i, px in enumerate(deslanted[:, k][::-1]) if px != 0)
                
                delta_y = height - (top_pixel + bottom_pixel)

                # h_sq = (float(num_fgpixel)/delta_y)**2

                # # multiplying by a factor of num_fgpixel/height to the above function to yeild better result
                # # this will also somewhat negate the effect of adding more columns and different column counts in the transformed image of the same word
                # h_wted = (h_sq * num_fgpixel) / height


                # # add up the values from all the loops of ALL the columns of ALL the words in the image
                # s_temp += h_wted

                h_sq = (num_fgpixel / delta_y) ** 2
                s_temp += h_sq * num_fgpixel / height
                count += 1

                count += 1

        s_function[i] = s_temp / count if count > 0 else 0
        count_[i] = count

    # finding the largest value and corresponding index
    max_value = 0.0
    max_index = 4
    # for index, value in enumerate(s_function):
    #     # print str(index)+" "+str(value)+" "+str(count_[index])
    #     if (value > max_value):
    #         max_value = value
    #         max_index = index
    
    for index, value in enumerate(s_function):
        if value > max_value:
            max_value = value
            max_index = index  # Capture the index of the new max value

    if max_index != -1:
        angle = math.degrees(theta[max_index])

    print("Max Index: ",max_index, "Angle: ",angle)

    # add another value 9 manually to indicate irregular slant behaviour.
    # This will be seen as value 4 (no slant) but 2 corresponding angles of opposite sign will have very close values.
    if (max_index == 0):
        angle = 45
        result = " : Extremely right slanted"
    elif (max_index == 1):
        angle = 30
        result = " : Above average right slanted"
    elif (max_index == 2):
        angle = 15
        result = " : Average right slanted"
    elif (max_index == 3):
        angle = 5
        result = " : A little right slanted"
    elif (max_index == 5):
        angle = -5
        result = " : A little left slanted"
    elif (max_index == 6):
        angle = -15
        result = " : Average left slanted"
    elif (max_index == 7):
        angle = -30
        result = " : Above average left slanted"
    elif (max_index == 8):
        angle = -45
        result = " : Extremely left slanted"
    elif (max_index == 4):
        if s_function[3] == 0.0:
            p = s_function[4]  # / s_function[3]
            q = s_function[4]  # / s_function[5]
        else:
            p = s_function[4] / s_function[3]
            q = s_function[4] / s_function[5]
        # print 'p='+str(p)+' q='+str(q)
        if ((p <= 1.2 and q <= 1.2) or (p > 1.4 and q > 1.4)):
            angle = 0
            result = " : No slant"
        elif ((p <= 1.2 and q-p > 0.4) or (q <= 1.2 and p-q > 0.4)):
            angle = 0
            result = " : No slant"
        else:
            max_index = 9
            angle = 180
            result = " : Irregular slant behaviour"

        if angle == 0:
            print("\n************************************************")
            print("Slant determined to be straight.")
        else:
            print("\n************************************************")
            print("Slant determined to be irregular.")
        # type = input("Press enter if okay, else enter c to change: ")
        # if type == 'c':
        #     if angle == 0:
        #         angle = 180
        #         result = " : Irregular Slant"
        #         print("Set as"+result)
        #         print("************************************************\n")
        #     else:
        #         angle = 0
        #         result = " : Straight/No Slant"
        #         print("Set as"+result)
        #         print("************************************************\n")
        # else:
        #     print("No Change!")
        #     print("************************************************\n")

    SLANT_ANGLE = angle
    return

In [58]:
def start(file_name):
    global BASELINE_ANGLE
    global TOP_MARGIN
    global LETTER_SIZE
    global LINE_SPACING
    global WORD_SPACING
    global PEN_PRESSURE
    global SLANT_ANGLE
    
    image = cv2.imread('images/'+file_name) 

    #Pen Pressure
    barometer(image)

    # straightened image without filtration and binarization
    straightened = straighten(image)

    # extract lines of handwritten text from the image using the horizontal projection
    lineIndices = extractLines(straightened)

    # extract words from each line using vertical projection
    wordCoordinates = extractWords(straightened, lineIndices)

    # extract average slant angle of all the words containing a long vertical stroke
    extractSlant(straightened, wordCoordinates)

    BASELINE_ANGLE = round(BASELINE_ANGLE, 2)
    TOP_MARGIN = round(TOP_MARGIN, 2)
    LETTER_SIZE = round(LETTER_SIZE, 2)
    LINE_SPACING = round(LINE_SPACING, 2)
    WORD_SPACING = round(WORD_SPACING, 2)
    PEN_PRESSURE = round(PEN_PRESSURE, 2)
    SLANT_ANGLE = round(SLANT_ANGLE, 2)

    return [BASELINE_ANGLE, TOP_MARGIN, LETTER_SIZE, LINE_SPACING, WORD_SPACING, PEN_PRESSURE, SLANT_ANGLE]

start("000-34.png")


LETTER_SIZE:  13.666666666666666
LINE_SPACING:  3.048780487804878
TOP_MARGIN:  3.073170731707317
Max Index:  6 Angle:  14.99997778074544


[0.19, 3.07, 13.67, 3.05, 1.98, 164.55, -15]

In [13]:
page_ids = []
if os.path.isfile("raw_feature_list"):
    print("Info: raw_feature_list already exists.")
    with open("raw_feature_list", "r") as label:
        for line in label:
            content = line.split()
            page_id = content[-1]
            page_ids.append(page_id)

with open("raw_feature_list", "a") as label:
    count = len(page_ids)
    for file_name in files:
        if (file_name in page_ids):
            continue
        features = start(file_name)
        features.append(file_name)
        for i in features:
            label.write(f"{i}\t")
        label.write("\n")
        count += 1
        progress = (count*100)/len(files)
        print (str(count)+' '+file_name+' '+str(progress)+'%')
    print("Done!")

Info: raw_feature_list already exists.
Done!
