In [3]:
import os
import sys
import glob
import cv2
import numpy as np
from matplotlib import pyplot as plt

In [5]:
def show_rescaled(img, lab = 'this is a test', myscale = 0.7, cmap = 'gray'):
    """
    show an image, rescaled so that it fits the screen
    pressing a key should destroy the window
    """
    imS = cv2.resize(img, (int(img.shape[0] * myscale), int(img.shape[1] * myscale)))   
    cv2.imshow('zzz', imS)

In [None]:
def extract_pages(filename):
    # extract relevant info from filename
    pdfname = os.path.basename(filename)
    dirname = os.path.dirname(filename)
    musicname = pdfname.split(".pdf")[0]
    dirpages = dirname + "/"+ musicname + "_pages/"
    dirdebug = dirname + "/"+ musicname + "_debug/" # to store files for debug
    # create directory
    tmp = os.system("mkdir " + dirpages)
    # burst pdf
    tmp = os.system("pdftk " + filename + " burst")
    # move pages to rigth location
    tmp = os.system("mv pg_* " + dirpages)
    return (musicname, dirpages, dirdebug)

In [None]:
def rotate_page(patharg):
    image = cv2.imread(patharg)
    # convert the image to grayscale and flip the foreground
    # and background to ensure foreground is now "white" and
    # the background is "black"
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.bitwise_not(gray) 
    # threshold the image, setting all foreground pixels to
    # 255 and all background pixels to 0
    thresh = cv2.threshold(gray, 0, 255,
        cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    # grab the (x, y) coordinates of all pixel values that
    # are greater than zero, then use these coordinates to
    # compute a rotated bounding box that contains all
    # coordinates
    coords = np.column_stack(np.where(thresh > 0))
    angle = cv2.minAreaRect(coords)[-1]
     # the `cv2.minAreaRect` function returns values in the
    # range [-90, 0); as the rectangle rotates clockwise the
    # returned angle trends to 0 -- in this special case we
    # need to add 90 degrees to the angle
    if angle < -45:
        angle = -(90 + angle)
    # otherwise, just take the inverse of the angle to make
    # it positive
    else:
        angle = -angle
    # rotate the image to deskew it
    (h, w) = image.shape[:2]
    center = (w // 2, h // 2)
    M = cv2.getRotationMatrix2D(center, angle, 1.0)
    rotated = cv2.warpAffine(image, M, (w, h),
        flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_REPLICATE)
    # show the output image
    print("rotating" + patharg + " [INFO] angle: {:.3f}".format(angle), end = "\r")
    # overwrite image
    cv2.imwrite(patharg, rotated)
    return 0

In [None]:
def convert_page(ff):
    print("converting to jpg file " + ff + "...", end = "\r")
    os.system("convert -density 300 " + ff + " " + ff[:-4] + ".jpg")
    # remove the pdf as we don't need it anymore
    os.system("rm " + ff)
    return 0

In [None]:
def clean_page(page):
    img = cv2.imread(page,0)
    ret,thresh1 = cv2.threshold(img,100,255,cv2.THRESH_BINARY) # Note: 100 as threshold is quite generous
    # clean image by erosion + dilation
    # this is supposed to be a very gentle cleaning --- maybe too gentle?
    kernel1 = np.ones((3,3), np.uint8)
    kernel2 = np.ones((2,2), np.uint8)
    img_erosion = cv2.erode(thresh1, kernel1, iterations=1)
    img_dilation = cv2.dilate(img_erosion, kernel2, iterations=1)
    # overwrite previous image
    cv2.imwrite(page, img_dilation)
    return 0

In [None]:
def find_lines(patharg, 
               consider_row_black = 0.4, 
               minlineheight = 15, 
               maxlineheight = 100, 
               debug = False, 
               saveplot = True):
    ##### PARAMS
    #debug = False
    #consider_row_black = 0.70 # if xx% of more of pixels in a row are black, consider black
    #minlineheight = 15 # these values are used to filter bad lines (too narrow, too large)
    #maxlineheight = 100 
    original = cv2.imread(patharg)
    # 1) extract horizontal lines
    horizontal = original.copy()
    # convert to grayscale
    horizontal = cv2.cvtColor(horizontal, cv2.COLOR_BGR2GRAY)
    (thresh, horizontal) = cv2.threshold(horizontal, 128, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
    horizontal = cv2.adaptiveThreshold(cv2.bitwise_not(horizontal), 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, -2)
    # prepare structure
    horizontalsize = int(horizontal.shape[1] / 20) 
    # note: if I use a value > 30, I capture also some notes; 
    # if much lower, I lose some lines
    horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, 
                               (horizontalsize,1))
    # Apply morphology operations
    horizontal = cv2.erode(horizontal, horizontalStructure, (-1, 1))
    horizontal = cv2.dilate(horizontal, horizontalStructure, (-1, 1))
    kernel = np.ones((6,6), np.uint8)
    horizontal = cv2.dilate(horizontal, kernel, iterations = 1)
    horizontal = cv2.bitwise_not(horizontal)
    # find beginning and end of all lines
    nrows, ncols = horizontal.shape
    # sum the number of black pixels in each row
    black_pixels = np.sum(horizontal == 0, 1)
    black_rows = (black_pixels > consider_row_black * ncols)
    begin_end = []

    openline = 0
    closeline = nrows
    isopen = False
    for i in range(nrows):
    if black_rows[i]:
        if isopen == False:
            # open a new line
            isopen = True
            openline = i
    else:
        if isopen == True:
            # close a line
            isopen = False
            closeline = i
        if ((closeline - openline) > minlineheight) and ((closeline - openline) < maxlineheight):
           begin_end.append([openline, closeline])
    # now refine beginning and end
    begin_end_refined = begin_end.copy()
    tmp = cv2.cvtColor(original, cv2.COLOR_BGR2GRAY)
    rowsums = np.sum(tmp,1)
    for i in range(len(begin_end) + 1):
      # white space above
      if i == 0:
         mystart = 0
      else:
         mystart = begin_end[i - 1][1]
      if i == (len(begin_end)):
         myend = nrows
      else:
         myend = begin_end[i][0]
      myrs = rowsums[mystart:myend] / (255 * original.shape[1])
      success = False
      cutoff = 0.995
      while not success:
         totemptylines = np.sum((myrs > cutoff) == True)
         if totemptylines > 0:
            above = np.min(np.where((myrs > cutoff) == True))
            below = np.max(np.where((myrs > cutoff) == True))
            success = True
            if i < len(begin_end):
               begin_end_refined[i][0] = begin_end[i][0] - (myrs.shape[0] - below)
            if i > 0:
               begin_end_refined[i-1][1] = begin_end[i-1][1] + above
            if debug:
               z = tmp[mystart:myend,]
               cv2.line(z, (0, above), (ncols, above), (0,0,255), 2)
               cv2.line(z, (0, below), (ncols, below), (0,0,255), 2)
               show_small_matplot(z, cmap = "Spectral")
         else:
            cutoff = cutoff - 0.005
    if saveplot:
      # draw the lines on the original
      withlines = original.copy()

      for z in begin_end_refined:
         cv2.line(withlines, (0, z[0]), (ncols, z[0]), (255,0,0), 2)
         cv2.line(withlines, (0, z[1]), (ncols, z[1]), (0,0,255), 2)

      #show_small_matplot(withlines, cmap = "Spectral")
      cv2.imwrite(os.path.split(patharg)[1], withlines)
    return begin_end_refined


In [None]:
# Note: this needs to be chosen by user via GUI
filename = "../test/Beethoven5.pdf"
# Step 1: burst pdf 
musicname, dirpages, dirdebug = extract_pages(filename)
# Step 2: for each page, convert to jpg, do some cleaning, rotate, and attempt finding lines
pages = glob.glob(dirpages + "/*.pdf")
for page in pages:
    convert_page(page)
    page_jpg = page[:-4] + ".jpg"
    clean_page(page_jpg)
    rotate_page(page_jpg)
    #find_boxes(page, debug = True)


In [6]:
page = "../test/Beethoven5_pages/pg_0006.jpg"
img = cv2.imread(page,0)
show_rescaled(img)

In [None]:
# 3) Horizontal lines
horizontal = img.copy()
# prepare structure
horizontalsize = int(horizontal.shape[1] / 20) 
# note: if I use a value > 30, I capture also some notes; 
# if much lower, I lose some lines
horizontalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, 
                                                (horizontalsize,1))
## Apply morphology operations
horizontal = cv2.erode(horizontal, horizontalStructure, (-1, 1))
horizontal = cv2.dilate(horizontal, horizontalStructure, (-1, 1))

#show_small(horizontal)
kernel = np.ones((5,5), np.uint8)
horizontal = cv2.dilate(horizontal, kernel, iterations = 1)
horizontal = cv2.bitwise_not(horizontal)
#show_small(horizontal)



# In[ ]:

# 4) Vertical stuff
vertical = img.copy()
# prepare structure
verticalsize = int(vertical.shape[0] / 650) 
verticalStructure = cv2.getStructuringElement(cv2.MORPH_RECT, 
                                                (1, verticalsize))
## Apply morphology operations
vertical = cv2.erode(vertical, verticalStructure, (-1, -1))
vertical = cv2.dilate(vertical, verticalStructure, (-1, -1))
#show_small(vertical)


#x and y axis
rs = np.sum(1- horizontal, axis =1)
cl = list(range(horizontal.shape[0]))


import matplotlib.pyplot as plt
from matplotlib import interactive
interactive(True)
plt.axhline(y = 6)
import scipy.interpolate
f = scipy.interpolate.interp1d(cl, rs, kind = 'cubic')
plt1 = plt.plot(cl, rs, 'o', cl, f(cl), '--')
