# Initialize

In [None]:
#imports
import cv2
from matplotlib import pyplot as plt

# hyperparameters
# rectangle dimensions for pre-processing dilation
Dilate_pre = (1, 1)
# rectangle dimensions for line-connecting dilation
Dilate_line = (250, 1)
# blur kernel dimensions for line-connection
Line_blur = (301, 51)
# vertical padding for line boxes
Line_pad = 25
# lower bound for thresholding
Thresh_lb = 200
# smallest area for segmented lines, anything smaller is filtered
min_line_area = 50000
# smallest area for segmented chars, anything smaller is filtered
min_char_area = 500

# idk how this works but it's some parameters for resizing plt images in a notebook
plt.rcParams['figure.dpi'] = 150
plt.rcParams['savefig.dpi'] = 150

# utility function to extract a segment of an image
def extract(img, x, y, w, h):
    return img[y:y+h, x:x+w]

In [None]:
# read the image and very initial pre-processing
img = cv2.imread("../image-data/P123-Fg002-R-C01-R01-binarized.jpg")
img = cv2.bitwise_not(img)
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, Dilate_pre)
img = cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel)
img = cv2.bitwise_not(img)
img = cv2.morphologyEx(img, cv2.MORPH_DILATE, kernel)
plt.imshow(img)

# line segmentation

In [None]:
# full line segmentation function
def segment_lines(img):
    # step 1: blur, more horizontally than vertically
    blur = cv2.GaussianBlur(img, Line_blur, 0)
    # step 2: threshold
    img_gray = cv2.cvtColor(blur, cv2.COLOR_BGR2GRAY)
    ret, thresh2 = cv2.threshold(img_gray, Thresh_lb, 255, cv2.THRESH_BINARY_INV)
    plt.imshow(thresh2)
    # step 3: dilate
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, Dilate_line)
    mask = cv2.morphologyEx(thresh2, cv2.MORPH_DILATE, kernel)
    plt.imshow(mask)
    # step 4: identify contours
    bboxes = []
    bboxes_img = img.copy()
    contours = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    # step 5: draw bounding boxes
    for cntr in contours:
        x,y,w,h = cv2.boundingRect(cntr)
        y -= Line_pad
        h += Line_pad * 2
        if w * h < min_line_area: continue
        cv2.rectangle(bboxes_img, (x, y), (x+w, y+h), (0,0,255), 2)
        bboxes.append((x,y,w,h))
    plt.imshow(bboxes_img)
    plt.show()
    return bboxes
bboxes = segment_lines(img)

In [None]:
# draw all lines in img
for x,y,w,h in bboxes:
    line_img = extract(img, x, y, w, h)
    plt.imshow(line_img)
    plt.show()

# character segmentation

In [None]:
# simple approach for now, warrants more research
def segment_chars(img):
    ret, img_thresh = cv2.threshold(img, Thresh_lb, 255, cv2.THRESH_BINARY_INV)
    bboxes_chars = []
    bboxes_img = cv2.bitwise_not(img_thresh.copy())
    img_gray = cv2.cvtColor(img_thresh, cv2.COLOR_BGR2GRAY)
    contours = cv2.findContours(img_gray, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    for cntr in contours:
        x,y,w,h = cv2.boundingRect(cntr)
        if w * h < min_char_area: continue
        cv2.rectangle(bboxes_img, (x, y), (x+w, y+h), (0,0,255), 2)
        bboxes_chars.append((x,y,w,h))
    plt.imshow(bboxes_img)
    plt.show()
    return bboxes_chars

In [None]:
# show all segmented characters in all lines
char_imgs = []
for x,y,w,h in bboxes:
    line_img = extract(img, x, y, w, h)
    bboxes_chars = segment_chars(line_img)
    for x2,y2,w2,h2 in bboxes_chars:
        char_imgs.append(extract(line_img, x2, y2, w2, h2))