# Lab 6: Сегментация текста 

### Phrase: еңбек етсең ерінбей, тояды қарның тіленбей
[ if you work hard and won't be lazy, you won't need to eat snow ]
[ Если будешь усердно работать, то не нужно будет есть снег ]

In [29]:
import os
import numpy as np
from PIL import Image, ImageOps, ImageDraw, ImageFont
import matplotlib.pyplot as plt

In [30]:
def generate_phrase_profile(img: np.array):
    img_b = img != 255

    horizontal_profile = np.sum(img_b, axis=0)
    plt.bar(
        x=np.arange(start=1, stop=img_b.shape[1] + 1).astype(int),
        height=horizontal_profile,
        width=0.9
    )
    plt.savefig("output/" + "profile/" + "horizontal_profile.png")
    plt.clf()

    vertical_profile = np.sum(img_b, axis=1)
    plt.barh(
        y=np.arange(start=1, stop=img_b.shape[0] + 1).astype(int),
        width=vertical_profile,
        height=0.9
    )
    plt.savefig("output/" + "profile/" + "vertical_profile.png")
    plt.clf()

In [31]:
def simple_binarization(img, threshold):
    binarized_img = np.zeros_like(img)
    binarized_img[img > threshold] = 255
    
    return binarized_img.astype(np.uint8)

In [32]:
def generate_phrase_image(input_phrase, phrase_font, threshold):
    space_len = 5
    phrase_width = 0
    max_height = 0

    for char in input_phrase:
        mask = phrase_font.getmask(char)
        if mask:
            width = mask.getbbox()[2]
            height = mask.getbbox()[3]
            phrase_width += width
            if height > max_height:
                max_height = height

    phrase_width += space_len * (len(input_phrase) - 1)

    img = Image.new("L", (phrase_width, max_height + 40), color="white")
    draw = ImageDraw.Draw(img)

    current_x = 0
    baseline = 0
    for char in input_phrase:
        mask = phrase_font.getmask(char)
        if mask:
            width = mask.getbbox()[2]
            height = mask.getbbox()[3]
            draw.text((current_x, baseline - height + 30), char, "black", font=phrase_font)
            current_x += width + space_len

    img = Image.fromarray(simple_binarization(np.array(img), threshold))
    img.save("output/original_phrase.bmp")

    np_img = np.array(img)
    generate_phrase_profile(np_img)
    ImageOps.invert(img).save("output/inverted_phrase.bmp")
    return np_img

In [33]:
def segment_phrase(input_image):
    profile = np.sum(input_image == 0, axis=0)
    boundries = []

    in_letter = False
    for i, count in enumerate(profile):
        if count > 0:
            if not in_letter:
                start = i
                in_letter = True
        else:
            if in_letter:
                end = i
                in_letter = False
                boundries.append((start - 1, end))

    if in_letter:
        boundries.append((start, len(profile)))

    return boundries

In [34]:
def create_boxes(img, bounds):
    image = Image.fromarray(img)
    draw = ImageDraw.Draw(image)

    for start, end in bounds:
        left, right = start, end
        top, bottom = 0, img.shape[0]
        draw.rectangle([left, top, right, bottom], outline="red", width=2)

    image.save("output/" + "segmented_phrase.bmp")

In [35]:
def main():
    original_phrase = "еңбек етсең ерінбей, тояды қарның тіленбей"
    font_size = 52

    font = ImageFont.truetype("Times New Roman.ttf", font_size)

    if not os.path.exists("output"):
        os.makedirs("output")

    generated_img = generate_phrase_image(original_phrase, font, 75)
    letter_bounds = segment_phrase(generated_img)
    create_boxes(generated_img, letter_bounds)

if __name__ == "__main__":
    main()

<Figure size 640x480 with 0 Axes>