# Initialize

In [None]:
#imports
import cv2
from matplotlib import pyplot as plt

import os

# hyperparameters
# output and inputs paths
input_path = "../image-data/"
output_path = "../HWR_results/"
# rectangle dimensions for pre-processing dilation
Dilate_pre = (1, 1)
# rectangle dimensions for line-connecting dilation
Dilate_line = (250, 1)
# blur kernel dimensions for line-connection
Line_blur = (301, 51)
# vertical padding for line boxes
Line_pad = 25
# lower bound for thresholding
Thresh_lb = 200
# smallest area for segmented lines, anything smaller is filtered
min_line_area = 50000
# smallest area for segmented chars, anything smaller is filtered
min_char_area = 500

# idk how this works but it's some parameters for resizing plt images in a notebook
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.dpi'] = 100

# utility function to extract a segment of an image
def extract(img, x, y, w, h):
    return img[y:y+h, x:x+w]

# Images

In [None]:
# list of all images
imgs = [
    "P21-Fg006-R-C01-R01-binarized.jpg",
    "P22-Fg008-R-C01-R01-binarized.jpg",
    "P106-Fg002-R-C01-R01-binarized.jpg",
    "P123-Fg001-R-C01-R01-binarized.jpg",
    "P123-Fg002-R-C01-R01-binarized.jpg",
    "P166-Fg002-R-C01-R01-binarized.jpg",
    "P166-Fg007-R-C01-R01-binarized.jpg",
    "P168-Fg016-R-C01-R01-binarized.jpg",
    "P172-Fg001-R-C01-R01-binarized.jpg",
    "P342-Fg001-R-C01-R01-binarized.jpg",
    "P344-Fg001-R-C01-R01-binarized.jpg",
    "P423-1-Fg002-R-C01-R01-binarized.jpg",
    "P423-1-Fg002-R-C02-R01-binarized.jpg",
    "P513-Fg001-R-C01-R01-binarized.jpg",
    "P564-Fg003-R-C01-R01-binarized.jpg",
    "P583-Fg002-R-C01-R01-binarized.jpg",
    "P583-Fg006-R-C01-R01-binarized.jpg",
    "P632-Fg001-R-C01-R01-binarized.jpg",
    "P632-Fg002-R-C01-R01-binarized.jpg",
    "P846-Fg001-R-C01-R01-binarized.jpg",
]

# reads an image and does very initial pre-processing
def read_and_process(path):
    img = cv2.imread(path)
    img = cv2.bitwise_not(img)
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, Dilate_pre)
    img = cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel)
    img = cv2.bitwise_not(img)
    img = cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel)
    return img

# Draws bounding boxes on an image

In [None]:
def draw_boxes(img, bboxes):
    bboxes_img = img.copy()
    for x,y,w,h in bboxes:
        cv2.rectangle(bboxes_img, (x, y), (x+w, y+h), (0,0,255), 2)
    return bboxes_img

# Line segmentation

In [None]:
# full line segmentation function
def segment_lines(img):
    # step 1: blur, more horizontally than vertically
    blur = cv2.GaussianBlur(img, Line_blur, 0)
    # step 2: threshold
    img_gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
    ret, thresh2 = cv2.threshold(img_gray, Thresh_lb, 255, cv2.THRESH_BINARY_INV)
    plt.imshow(thresh2)
    # step 3: dilate
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, Dilate_line)
    mask = cv2.morphologyEx(thresh2, cv2.MORPH_DILATE, kernel)
    plt.imshow(mask)
    # step 4: identify contours
    bboxes = []
    contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    # step 5: draw bounding boxes
    for cntr in contours:
        x,y,w,h = cv2.boundingRect(cntr)
        y -= Line_pad
        h += Line_pad * 2
        if w * h < min_line_area: continue
        bboxes.append((x,y,w,h))
    return bboxes

# Character segmentation

In [None]:
# simple approach for now, warrants more research
def segment_chars(img):
    ret, img_thresh = cv2.threshold(img, Thresh_lb, 255, cv2.THRESH_BINARY_INV)
    bboxes_chars = []
    img_gray = cv2.cvtColor(img_thresh, cv2.COLOR_BGR2GRAY)
    contours = cv2.findContours(img_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    for cntr in contours:
        x,y,w,h = cv2.boundingRect(cntr)
        if w * h < min_char_area: continue
        bboxes_chars.append((x,y,w,h))
    return bboxes_chars

# Gather results

In [None]:
# creates folder ../HWR_results and writes a LOT of images
if not os.path.exists(output_path):
    os.mkdir(output_path)
if not os.path.exists(output_path + "line_boxes/"):
    os.mkdir(output_path + "line_boxes/")
if not os.path.exists(output_path + "lines/"):
    os.mkdir(output_path + "lines/")
if not os.path.exists(output_path + "char_boxes/"):
    os.mkdir(output_path + "char_boxes/")
if not os.path.exists(output_path + "chars/"):
    os.mkdir(output_path + "chars/")
for path in imgs:

    img = read_and_process(input_path + path)
    bboxes = segment_lines(img)
    linebox_img = draw_boxes(img, bboxes)
    cv2.imwrite(output_path + "line_boxes/" + path[:-4] + ".bmp", linebox_img)

    line_n = 0
    for x,y,w,h in bboxes:
        line_img = extract(img, x, y, w, h)
        cv2.imwrite(output_path + "lines/" + path[:-4] + "-line" + str(line_n) + ".bmp", line_img)

        bboxes_chars = segment_chars(line_img)
        charbox_img = draw_boxes(line_img, bboxes_chars)
        cv2.imwrite(output_path + "char_boxes/" + path[:-4] + "-line" + str(line_n) + ".bmp", charbox_img)

        char_n = 0
        for x2, y2, w2, h2 in bboxes_chars:
            char_img = extract(line_img, x2, y2, w2, h2)
            cv2.imwrite(output_path + "chars/" + path[:-4] + "-line" + str(line_n) + "-char" + str(char_n) + ".bmp", char_img)
            char_n += 1
        line_n += 1