In [54]:
def pad_with(vector, pad_width, iaxis, kwargs):
    pad_value = kwargs.get('padder', 0)
    vector[:pad_width[0]] = pad_value
    vector[-pad_width[1]:] = pad_value

In [58]:
def split_in_lines(gray_img, bin_img, min_line_height = 10):
    '''
    function that split an image into lines
    '''

    # count
    ones = np.sum(bin_img,1)

    # histogram
    mean = np.mean(ones * 1.0) / 5
    histo = (ones > mean) * 1

    # get rising and falling edges from histogram
    shifted = np.roll(histo, -1, 0)
    shifted[-1] = histo[-1]
    edges = histo - shifted
    print(type((edges == -1).nonzero()))
    rising_indices = np.array((edges == -1).nonzero()).flatten()
    falling_indices = np.array((edges == 1).nonzero()).flatten()
    if len(falling_indices) < 2 or len(rising_indices) < 2:
        return np.array([gray_img]), np.array([bin_img])
    # make starting with rising not falling 
    if falling_indices[0] < rising_indices[0]:
        falling_indices = falling_indices[1:]
    # make ending with falling not rising 
    if rising_indices[-1] > falling_indices[-1]:
        rising_indices = rising_indices[:-1]

    # cut image on histo
    gray_lines = []
    bin_lines = []
    line_count = min(rising_indices.shape[0], falling_indices.shape[0])
    for i in range(line_count):
        line_height = falling_indices[i] - rising_indices[i]
        # split gray with padding
        start_split = max(rising_indices[i] - line_height//3, 0)
        end_split = min(falling_indices[i] + line_height//3, gray_img.shape[0])
        gray_line = gray_img[start_split:end_split]
        # split binary
        bin_line = bin_img[rising_indices[i]:falling_indices[i]]
        # pad binary
        bin_line = np.pad(bin_line, line_height//3, pad_with)
        # filter if less than 10 pixels
        if line_height > min_line_height:
            gray_lines.append(gray_line)
            bin_lines.append(1-bin_line)
    gray_lines = np.array(gray_lines)
    bin_lines = np.array(bin_lines)
    return gray_lines, bin_lines
    

In [59]:
from skimage import io
import os
import torch
import numpy as np
from skimage.color import rgb2gray
import time

img =  rgb2gray(io.imread('./1.png'))
# clip
gray_img = img[int(0.25*img.shape[0]):int(0.7*img.shape[0]), int(0.1*img.shape[1]):int(0.9*img.shape[1])]

# binary
bin_img = 1 - ((gray_img > 128) * 1)

start_time = time.time()
gray_lines, bin_lines = split_in_lines_2(gray_img,bin_img)
print("--- %s seconds taken ---" % (time.time() - start_time))

for i in range(gray_lines.shape[0]):
    io.imsave("lines/gray_line" + str(i) + ".png", gray_lines[i])
    io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])


  img =  rgb2gray(io.imread('./1.png'))
  gray_lines = np.array(gray_lines)
  bin_lines = np.array(bin_lines)
  io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])


<class 'tuple'>
--- 0.038521528244018555 seconds taken ---


  io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])
  io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])
  io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])
  io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])
  io.imsave("lines/bin_line" + str(i) + ".png", bin_lines[i])


In [None]:
# torch
def split_in_lines(img, min_line_height = 10):
    '''
    function that split an image into lines
    '''
    # clip
    img = img[int(0.25*img.shape[0]):int(0.7*img.shape[0]), int(0.1*img.shape[1]):int(0.9*img.shape[1])]

    # binary
    img = 1 - ((img > 128) * 1)

    # torch tensor
    img_tensor = torch.tensor(img)

    # count
    ones = torch.sum(img_tensor,1)

    # histogram
    mean = torch.mean(ones * 1.0) / 5
    histo = (ones > mean) * 1

    # get rising and falling edges from histogram
    shifted = torch.roll(histo, -1, 0)
    shifted[-1] = histo[-1]
    edges = histo - shifted
    rising_indices = torch.flatten((edges == -1).nonzero())
    falling_indices = torch.flatten((edges == 1).nonzero())
    if len(falling_indices) < 2 or len(rising_indices) < 2:
        return img
    # make starting with rising not falling 
    if falling_indices[0] < rising_indices[0]:
        falling_indices = falling_indices[1:]
    # make ending with falling not rising 
    if rising_indices[-1] > falling_indices[-1]:
        rising_indices = rising_indices[:-1]

    # cut image on histo
    lines = []
    line_count = min(rising_indices.size()[0], falling_indices.size()[0])
    for i in range(line_count):
        line = img[rising_indices[i]:falling_indices[i]]
        line_height = line.shape[0]
        line = np.pad(line, line.shape[0]//3, pad_with)
        # filter if less than 10 pixels
        if line_height > min_line_height:
            lines.append(1-line)
    return lines
    

In [None]:
from skimage import io
import os
import torch
import numpy as np
paths = os.listdir('../data/formsA-D')

for p in paths:
    img = io.imread('../data/formsA-D/' + p)
    lines = split_in_lines(img)

    for i in range(len(lines)):
        io.imsave("lines/line" + str(i) + '_' + p + ".png", lines[i])