In [122]:
import matplotlib.pyplot as plt  
# iscrtavanje slika i plotova unutar samog browsera
%matplotlib inline 

import matplotlib.pylab as pylab
# prikaz vecih slika 
pylab.rcParams['figure.figsize'] = 21,15

import numpy as np
import cv2 # OpenCV biblioteka

def show_in_window_and_below(img, below=True):
    if (below):
        plt.imshow(img, 'gray')
    cv2.imshow('image', img)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

# current image extension
ext = '.JPG'

BLACK = 0 # black pixel's color
WHITE = 255 # white pixel's color

In [123]:
# runs calculations
def calculate_runs(img):
    runs = [[1] for x in xrange(img.shape[1])] # each column starts with 1 black pixel
    black_runs_flat, white_runs_flat = [], []
    for col in xrange(img.shape[1]): # iterate through all columns
        img[0,col] = 0 # PAINT THE FIRST PIXEL IN PREDEFINED COLOR, to make all columns start the same
        run_index = 0 # start the run
        for row in xrange(1, img.shape[0]): # for each pixel/row in current column
            if (img[row-1 ,col] != img[row, col]):  # if they are not the same,
                # memorize the old run in corresponding array
                if (run_index % 2 == 0): # black run
                    black_runs_flat.append(runs[col][run_index])
                else:
                    white_runs_flat.append(runs[col][run_index])
                # start a new run
                run_index += 1
                runs[col].append(0)
            runs[col][run_index] += 1     # add a pixel to the current run
        # the column ended ==> save the last run for the ended column
        if (run_index % 2 == 0): # black run
            black_runs_flat.append(runs[col][run_index])
        else:
            white_runs_flat.append(runs[col][run_index])
    return runs, black_runs_flat, white_runs_flat

In [124]:
# line thickness and spacings (black and white runs analysis)
from collections import Counter

def calculate_line_thickness(black_flat):
    num_top = 4 # number of top/most common runs
    black_count = Counter(black_flat) # Counter({1: 3, 8: 1, 3: 1, ...})
    m_c_black = black_count.most_common(num_top)
    m_c_black1, m_c_black2 = m_c_black[0][0], m_c_black[1][0]
    print 'Top', num_top, 'most common black runs:', m_c_black
    if (m_c_black1*3 < m_c_black2): # kind of a sanity check
        line_thickness = m_c_black1
    else:
        line_thickness = (m_c_black1 + m_c_black2) / 2.
    print '>>> line thickness:  ', line_thickness
    return line_thickness

def calculate_line_spacing(white_flat, image_height):
    num_top = 4 # number of top/most common runs
    white_count = Counter(white_flat) # print white_count.most_common(50)
    m_c_white = white_count.most_common(num_top)
    m_c_white1, m_c_white2 = m_c_white[0][0], m_c_white[1][0]
    print 'Top', num_top, 'most common white runs', m_c_white
    
    if (m_c_white1 > image_height*0.5): # sanity check
        line_spacing = m_c_white2
    else:
        if (m_c_white2 > image_height*0.5):
            line_spacing = m_c_white1
        else:
            line_spacing = (m_c_white1 + m_c_white2) / 2.
    print 'line spacing: ', line_spacing
    return line_spacing

In [125]:
def remove_staff_lines(img, runs, line_height, staff_thickness_multiplier):
    # copy the image.. python is pass-by-object-reference so it is necessary!
    p = img.copy() # pass-by-object-reference: https://stackoverflow.com/a/33066581/2101117
    # NOTE: copying is NOT NECESSARY if we won't use the passed `img` after this function returns
    #edit the image
    for c in xrange(len(runs)):        # for every column
        cumulative = 0 # initialize the number of passed pixels
        for r in xrange(len(runs[c])): # for every run
            run_length = runs[c][r]
            if (r % 2 == 0): # black runs # every black run longer than 2 * line_height is deleted/whitened
                if (run_length < line_height * staff_thickness_multiplier):
                    # ++ AKO JE SLJEDEĆI/PRETHODNI %% BIJELI %% RUN = VISINA PRAZNINE +-1
                    # ++ AKO JE SLJEDEĆI/PRETHODNI %%  CRNI  %% RUN = VISINA LINIJE +-1
                    p[cumulative:cumulative + run_length, c] = [255]*(run_length)
            #else: # white runs
            #    do something maybe ?
            cumulative += run_length
    return p

In [126]:
def remove_staff_lines_with_lines_only(img, runs, line_height, staff_thickness_multiplier, lines_only_img):
    # copy the image.. python is pass-by-object-reference so it is necessary!
    p = img.copy() # pass-by-object-reference: https://stackoverflow.com/a/33066581/2101117
    # NOTE: copying is NOT NECESSARY if we won't use the passed `img` after this function returns
    #edit the image
    for c in xrange(len(runs)):        # for every column
        cumulative = 0 # initialize the number of passed pixels
        for r in xrange(len(runs[c])): # for every run
            run_length = runs[c][r]
            if (r % 2 == 0): # black runs # every black run longer than 2 * line_height is erased
                if (run_length < line_height * staff_thickness_multiplier):
                    for compare_px in xrange(cumulative, cumulative + run_length):
                        if (lines_only_img[compare_px, c] == BLACK):
                            p[compare_px, c] = 255
                        # p[cumulative:cumulative + run_length, c] = [255]*(run_length)
            cumulative += run_length
    return p

In [127]:
#########
# USE:
#    = in lines-only image, to LOCATE the lines AND/OR check if there is a line on the current location/run
#        - easier to find lines, since there are no other elements
#        - takes more time, since we need to generate the lines-only image,
#           ,BUT THAT IS NOT A PROBLEM SINCE WE WILL NEED IT TO LOCATE THE LINES
#
#    = in binary image, to check if there is a line on the current location/run
#
# da se utvrde linije treba samo odrediti visinu, a ne 
# TIP: mozda za svaki linijski sistem (ili cak liniju) cuvati vise x, koordinata,
#   recimo na svaku petinu sirine slike provjeravati lokacije
#   linijskih sistema (ili pojedinacnih linija) na vise mijesta u slici:
#       |       |        |      |
#       V       V        V      V
# -----..____..--------------------  <== curved line, others are ok
# --------------------------------- 
# ---------------------------------
# ---------------------------------
# ---------------------------------
######
# x je niz od onoliko crnih piksela koliko je prosjecna debljina linije (+-1 ili 2)
# x = [0] * (int(thickness)-1)
# y je niz od onoliko crnih piksela koliko je prosjecna debljina linije + 2 ili 3 ||| INT!
#   ~ pikseli iz y niza pocinju od posljednje tacke posmatrane linije (tj.kandidata za liniju)
# y = current_pos + int(spacing * 0.9 ili 0.8)
# print x_in_y(x, y)
###
def x_in_y(x, y):
#     print 'checking', x, 'in', y
    try:
        x_len = len(x)
    except TypeError:
        x_len = 1
        x = type(y)((x,))

    for i in xrange(len(y)):
        if (y[i : i+x_len] == x):
            return True
    return False

# a = [0,0,1,1,1,0,0,0,0]
# b = [1,1,1]
# b = 0 # works also
# print x_in_y(b, a)

In [128]:
# # # # # # # # # # # # # # # # # # # # # # # # # # #
# ALG 2 - look at white runs around the line candidate
# 2*d . . . 1*d . . . X . . . 1*d . . . 2*d
#  0         1        -        2         3
# In order for X to be whitened (erased):
#

def spacing_is_ok(run_length, line_spacing):
#     print '>>> spacing_is_ok <<< run_length: {}, line_spacing: {}'.format(run_length, line_spacing)
    return (run_length < (line_spacing * 1.15)) and (run_length > (line_spacing * 0.85))

def find_lines_up_or_down(column, row_start, black, line_thickness, line_spacing):
#     print 'column', column
#     print 'row_start', row_start, 'black', black, 'line_thickness', line_thickness, 'line_spacing', line_spacing
    scaled_spacing = int(line_spacing*0.9) + 1 # <-- might be the cause of all evil :)
    search_position = row_start + black + scaled_spacing
    # determine the search thickness we are looking for
    search_for_thickness = 1 if (line_thickness <= 3) else 2  # CAUTION <-- might be the cause of all evil :)
    search_for_array = [0] * search_for_thickness # [0]*5 --> [0, 0, 0, 0, 0]
    # determine the search space (exact pixels through which we will search for a line)
    search_height = int(line_thickness)*2 + 1 # thick*2 if (thick <= 3) else thick*1.5 ILI 2.5 ???
    lowest_pixel = search_position + search_height
#     print 'lowest pixel to look at: {} | number of rows: {}'.format(lowest_pixel, len(column))
    if (len(column) > lowest_pixel): # if we have 10 elements, maximum valid index is 9 !
        search_space = column[search_position : lowest_pixel]
        line_below = x_in_y(search_for_array, search_space)
        if (line_below):
            text = 'FOUND a line BELOW\n\tposition: {}|search_thickness: {}|search_height: {}'
#             print text.format(search_position, search_for_thickness, search_height)
            return True
    
    search_position = row_start - scaled_spacing - search_height
#     print 'search position for lines above:', search_position
    lowest_pixel = row_start - scaled_spacing
    if (search_position >= 0):
        search_space = column[search_position : lowest_pixel]
        line_above = x_in_y(search_for_array, search_space)
        if line_above:
            text = 'Found a line ABOVE\n\tposition: {}|search_thickness: {}|search_height: {}'
#             print text.format(search_position, search_for_thickness, search_height)
            return True
    
    return False
# end of find_lines_up_or_down

def run_has_lines_up_or_down(runs_in_column, current_index, line_thickness, line_spacing):
    '''check if there is a line on `line_spacing` above or below the current index '''
    lines = [255,255,255,255]
    return 1

def rm_staff_lines_up_down_neighbours(img, runs, line_thickness, line_spacing, staff_thickness_multiplier):
    '''Removes staff lines by looking at every black run's neighbours, above and below.'''
    # Original image is being changed + Python passes by obj-ref, so it's necessary
    p = img.copy() # pass-by-object-reference: https://stackoverflow.com/a/33066581/2101117
    # NOTE: copying is NOT NECESSARY if we won't use the passed `img` after this function returns
    for c in xrange(len(runs)):        # for every column
        cumulative = 0 # initialize the number of passed pixels
        for r in xrange(len(runs[c])): # for every run
            run_length = runs[c][r]
            deleted = False # did we delete the run
            if (r % 2 == 0): # check every black run, shorter than `thickness * multiplier`
                if (run_length < line_thickness * staff_thickness_multiplier):
                    # this is not the last run === check the run AFTER this one
                    if (r + 1 < (len(runs[c]))):
                        if spacing_is_ok(runs[c][r+1], line_spacing): # we found a line - delete it
                            p[cumulative:cumulative + run_length, c] = [255]*(run_length)
                            deleted = True
                        # else: # more conditions to add.. like..
                                # check if there is a black run above or below the current one,
                                # on distance that is equal to line_spacing +-1,
                                # whose lenght is = line_thickness +-1
                            
                    # this is not the first run === check the run BEFORE this one
                    if (not deleted and (r - 1 >= 0)):
                        if spacing_is_ok(runs[c][r-1], line_spacing): # we found a line - delete it
                            p[cumulative:cumulative + run_length, c] = [255]*(run_length)
                            deleted = True
                    # run_length > 1  ILI_DA_BUDE cumulative > 0  ? ? ? ?? ? ? ? ?? ?
                    if (not deleted): # and run_length > 1)
                        column = img[0:img.shape[0], c].tolist() # convert numpy array to python list
                        if find_lines_up_or_down(column, cumulative, run_length, line_thickness, line_spacing):
                            p[cumulative:cumulative + run_length, c] = [255]*(run_length)
                            deleted = True
#                     if run_has_lines_up_or_down(runs[c], r, line_thickness, line_spacing)):
#                 else: # run is thicker than the average line
#                     if (run_length < line_thickness * 2.2): # ASSUMPTION --- for empty note-heads
                        
            cumulative += run_length # !!! # we advanced down for `run_length` pixels !
    return p

In [129]:
# # # # # # # # # # # # # # # # # # # # # # # # # # #
# ALG 3 - compare neighbour points on distance `d`,
# to the left and right, from the observed point X:
# 2*d . . . 1*d . . . X . . . 1*d . . . 2*d
#  0         1        -        2         3
# In order for X to be whitened (erased):
#   - points[1] and points[2] should be black, or
#   - points[0] and points[1] should be black, or
#   - points[2] and points[3] should be black.

def initialize_points(c, distance, img, compare_point, runs):
    '''Gets the neighbours located '''
    points = [255,255,255,255] # initially, pixels are white (maybe -1 if not accessible)
    # 2*left , 1*left , 1*right, 2*right
    if (c - distance >= 0): # we can get the 1*left pixel
        points[1] = img[compare_point, c - distance]
        if (c - 2*distance >= 0): # we can get the 2*left pixel
            points[0] = img[compare_point, c - 2*distance]
    if (c + distance < len(runs)): # we can get the right pixel
        points[2] = img[compare_point, c + distance]
        if (c + 2*distance < len(runs)):
            points[3] = img[compare_point, c + 2*distance]
    return points

def rm_staff_lines_side_neighbours(img, runs, thickness, spacing, thickness_mul, distance, lines_only_img = []):
    if distance > len(runs)/2:
        print 'WHOA! Distance is: {} and there are only {} columns'.format(distance, len(runs))
        return img
    int_thickness = int(thickness)+1
    p = img.copy() # we will erase someimg lines, so copy the image
    # NOTE: copying is NOT NECESSARY if we won't use the passed `img` after this function returns
    if (lines_only_img != []): # check neighbours on the lines_only image, if we passed it
        img = lines_only_img
    for c in xrange(len(runs)): # for every column
        cumulative = 0 # number of passed pixels
        for r in xrange(len(runs[c])):
            run_length = runs[c][r]
            if (r % 2 == 0):# for every black run
#                 if (run_length < thickness * thickness_mul): # ADD THIS BACK ! ! ! !! !
#                     p[cumulative:cumulative + run_length, c] = [255]*(run_length)
#                     cumulative += run_length # !!!
#                     continue
#                 else:
                pixels_to_remove = int_thickness # <=== this goes to ELSE
                # we need to check if we passed the image height!
                compare_point = min(img.shape[0]-1, cumulative + pixels_to_remove/2) # + run_length/2 # maybe later
                # 4 values of neighbour pixels, some of them must be black,
                #   for deletion of the observed pixel to happen.
                points = initialize_points(c, distance, img, compare_point, runs)
                if (points[1] == 0):
                    if (points[2] == 0):
                        p[cumulative: cumulative + pixels_to_remove, c] = [255]*pixels_to_remove
                    else: # out of bounds or white # !! !!  ASSUMPTION !!  !!!
                    # if (points[2] == -1): # more robust? Needs points=[-1,-1,-1,-1]
                        # two to the left is black?
                        if (points[0] == 0):
                            p[cumulative: cumulative + pixels_to_remove, c] = [255]*pixels_to_remove
                        # else: NIJE LINIJA :D
                # no black point on the left
                else:
                    if (points[2] == 0 and points[3] == 0):
                        p[cumulative: cumulative + pixels_to_remove, c] = [255]*pixels_to_remove
            cumulative += run_length # !!!
    # return the new, processed image
    return p

In [79]:
# # # # # # # #
# SKIP THIS !!!  This was waaay back then... it's old testing
# # # # #
def parse_image(img, threshold_type, block_size, c_value, staff_thickness_multiplier, params=[]):
    t_t, b_s, s_t_m = threshold_type, block_size, staff_thickness_multiplier
    print('=========\nthreshold_type: {}, block_size: {}, c_value: {}, staff_thickness_multiplier: {}'.format(t_t, b_s, c_value, s_t_m))
    img_ada = cv2.adaptiveThreshold(img, 255, threshold_type, cv2.THRESH_BINARY, block_size, c_value)
    
    dilate_kernel = np.ones((1,30), dtype=np.int) # np.ones((kernel_w, kernel_h), dtype=np.int);
    # staff lines LOCATIONS, along with lines-only image
    lines_only_img, locations = cv2.dilate(img_ada, kernel, iterations=1)
    
    # runs calculation
    runs, black_runs_flat, white_runs_flat = calculate_runs(img_ada)
    line_thickness = calculate_line_thickness(black_runs_flat)
    line_spacing = calculate_line_spacing(white_runs_flat, img_ada.shape[0]) # needs image height
    result = rm_staff_lines_up_down_neighbours(img_ada, runs, line_thickness, line_spacing, staff_thickness_multiplier)
#     distance = int(line_spacing * 0.5)
#     result = rm_staff_lines_side_neighbours(img_ada, runs, line_thickness, line_spacing, staff_thickness_multiplier, distance)
#     result = rm_s(img_ada, runs, line_thickness, line_spacing, staff_thickness_multiplier, distance)
    
    cv2.imwrite('./images/dataset/run_X/params_'+str(t_t)+'_'+str(b_s)+'_'+str(c_value)+'_'+str(s_t_m)+ext, result)
    #cv2.imwrite('./images/dataset/run_Y/params_ORIGINAL.jpg', img_ada)

img = cv2.imread('images/dataset/muzikanti'+ext, 0) #  0 -->  read as grayscale
# parse_image(img, cv2.ADAPTIVE_THRESH_MEAN_C, 33, 35, 1.5)

In [249]:
# param values
threshold_types = [cv2.ADAPTIVE_THRESH_MEAN_C]# , cv2.ADAPTIVE_THRESH_GAUSSIAN_C # const values are 0 and 1
block_sizes = [33,35,37,39,43]# [11, 19, 27, 35, 43] # [11,15,19,23,27,31,35, 39, 43]
c_values = [29,33,35,37] # [17,19,21] # [11, 19, 27, 35, 43] # [11,15,19,23,27,31,35, 39, 43]
staff_thickness_multipliers = [1.3, 1.5] # [1., 1.5, 2., 2.5]

In [68]:
for thresh_type in threshold_types:
    for block_size in block_sizes:
        for c_val in c_values:
            for s_t_mul in staff_thickness_multipliers:
                parse_image(img, thresh_type, block_size, c_val, s_t_mul)

threshold_type: 0, block_size: 33, c_value: 29, staff_thickness_multiplier: 1.3
Top two most common black runs: [(4, 36510), (3, 33167)]
>>> line height:   3.5
Top two most common white runs [(21, 24794), (20, 15403)]
space height:  20.5
threshold_type: 0, block_size: 33, c_value: 29, staff_thickness_multiplier: 1.5
Top two most common black runs: [(4, 36510), (3, 33167)]
>>> line height:   3.5
Top two most common white runs [(21, 24794), (20, 15403)]
space height:  20.5
threshold_type: 0, block_size: 33, c_value: 33, staff_thickness_multiplier: 1.3
Top two most common black runs: [(3, 38242), (4, 31349)]
>>> line height:   3.5
Top two most common white runs [(21, 26952), (20, 12360)]
space height:  20.5
threshold_type: 0, block_size: 33, c_value: 33, staff_thickness_multiplier: 1.5
Top two most common black runs: [(3, 38242), (4, 31349)]
>>> line height:   3.5
Top two most common white runs [(21, 26952), (20, 12360)]
space height:  20.5
threshold_type: 0, block_size: 33, c_value: 35, 

Top two most common black runs: [(4, 36534), (3, 32881)]
>>> line height:   3.5
Top two most common white runs [(21, 25559), (20, 15173)]
space height:  20.5
threshold_type: 0, block_size: 43, c_value: 33, staff_thickness_multiplier: 1.3
Top two most common black runs: [(3, 37651), (4, 31786)]
>>> line height:   3.5
Top two most common white runs [(21, 27641), (20, 12117)]
space height:  20.5
threshold_type: 0, block_size: 43, c_value: 33, staff_thickness_multiplier: 1.5
Top two most common black runs: [(3, 37651), (4, 31786)]
>>> line height:   3.5
Top two most common white runs [(21, 27641), (20, 12117)]
space height:  20.5
threshold_type: 0, block_size: 43, c_value: 35, staff_thickness_multiplier: 1.3
Top two most common black runs: [(3, 39893), (4, 29339)]
>>> line height:   3.5
Top two most common white runs [(21, 28482), (20, 10654)]
space height:  20.5
threshold_type: 0, block_size: 43, c_value: 35, staff_thickness_multiplier: 1.5
Top two most common black runs: [(3, 39893), (4,

In [185]:
# Test various kernel dimensions on  binarized images with
# morph. operations: erosion, dilation, opening  and  closing
kernel_widths = [1, 2]
kernel_heights = [7, 11, 15]

block_sizes = [51, 51]
c_values = [35, 45]
ada_method = cv2.ADAPTIVE_THRESH_MEAN_C #  GAUS GAUS GAUS GAUS GAUS GAUS !!!
thresh = cv2.THRESH_BINARY
for i in range(1):
    img_path = './images/dataset/viva{}_up.jpg'.format(i+1)
    img = cv2.imread(img_path, 0) # grayscale
    img = cv2.adaptiveThreshold(img, 255, ada_method, thresh, block_sizes[i], c_values[i])
    for kernel_width in kernel_widths:
        for kernel_height in kernel_heights:
            #kernel_width = kernel_height # ONLY FOR TESTING!!!
            kernel = np.ones((kernel_width, kernel_height), dtype=np.int)
            eroded = cv2.erode(img, kernel, iterations=1)
            dilated = cv2.dilate(img, kernel, iterations=1)
            er_b4_dil = cv2.dilate(eroded, kernel, iterations=1)
#             dil_b4_er = cv2.erode(dilated, kernel, iterations=1) # useles...
            
            cv2.imwrite('./images/kernel_2/ER_b_{}_c_{}_kw_{}_kh_{}.jpg'\
                        .format(block_sizes[i], c_values[i], kernel_width, kernel_height),  eroded)
#             cv2.imwrite('./images/kernel_2/DIL_b_{}_c_{}_kw_{}_kh_{}.jpg'\
#                         .format(block_sizes[i], c_values[i], kernel_width, kernel_height),  dilated) # no need to save dil.
            cv2.imwrite('./images/kernel_2/ER_B4_DIL_b_{}_c_{}_kw_{}_kh_{}.jpg'\
                        .format(block_sizes[i], c_values[i], kernel_width, kernel_height),  er_b4_dil)
#             cv2.imwrite('./images/kernel_2/DIL_B4_ER_b_{}_c_{}_kw_{}_kh_{}.jpg'\
#                         .format(block_sizes[i], c_values[i], kernel_width, kernel_height),  dil_b4_er)
# end

In [None]:
# Use all three algorithms on the input image
#   1 - simple:  with no parameters (with optional erosion before the alg.)
#   2 - up-down: basic call, + 3 calls with dilation (kernel_1 1 and 2 iterations, kernel_2)
#   3 - side:    with two distances: line_spacing*A and line_spacing*B. A and B are from [0.1, 2.0]
block_sizes = [51]#, 51] #  ]#
c_values = [45]#, 45]    #  ]#
thresh = cv2.ADAPTIVE_THRESH_MEAN_C
method = cv2.THRESH_BINARY
# img = cv2.imread('images/dataset/muzikanti'+ext, 0) #  0 => read as grayscale
img = cv2.imread('./images/dataset/viva1_up.jpg', 0)

for i in range(len(block_sizes)):
    block = block_sizes[i]
    c = c_values[i]
    img = cv2.adaptiveThreshold(img, 255, thresh, method, block, c)
    path_regular = './images/e_viva/b_{}_c_{}_LINES.jpg'
    cv2.imwrite(path_regular.format(block, c), img)
    # PREPARE image for lines detection  --  ER_B4_DIL_b_51_c_35_kw_2_kh_7
    kernel = np.ones((2, 7), dtype=np.int)
    eroded = cv2.erode(img, kernel)
    er_pa_dil = cv2.dilate(eroded, kernel)
    path_er_pa_dil = './images/e_viva/b_{}_c_{}_ER_B4_DIL_k_2x7.jpg'
    cv2.imwrite(path_er_pa_dil.format(block, c), er_pa_dil)
    img = er_pa_dil
    
    runs, black_runs_flat, white_runs_flat = calculate_runs(img) # runs calculation, for thickness and spacing
    line_thickness = calculate_line_thickness(black_runs_flat)
    line_spacing = calculate_line_spacing(white_runs_flat, img.shape[0]) # needs image height
    
    thickness_mul = 1.5 # staff_thickness_multiplier
    
    # Remove staff lines - RUNS algorithm ~~Simplest~~ (Aleksandar's)
    rm_s_l = remove_staff_lines(img, runs, line_thickness, thickness_mul)
#     path_regular = './images/e_viva/ER_{}_{}_b_{}_c_{}_RM_S_L.jpg'
#     cv2.imwrite(path_regular.format(k_w, k_h, block, c), rm_s_l)
    path_regular = './images/e_viva/b_{}_c_{}_RM_S_L.jpg'
    cv2.imwrite(path_regular.format(block, c), rm_s_l)
    
    # Remove staff lines - UP-DOWN algorithm (Aleksandar's)
    rm_s_l_up_down = rm_staff_lines_up_down_neighbours(img, runs, line_thickness, line_spacing, thickness_mul)
    path_up_down = './images/e_viva/b_{}_c_{}_RM_S_L_UP_DOWN.jpg'
    cv2.imwrite(path_up_down.format(block, c), rm_s_l_up_down)
    
    up_down_dilated = cv2.dilate(rm_s_l_up_down, kernel)
    path_up_down_dil = './images/e_viva/DIL_{}_{}_b_{}_c_{}_RM_S_L_UP_DOWN.jpg'
    cv2.imwrite(path_up_down_dil.format(k_w, k_h, block, c), up_down_dilated)
    
    up_down_dilated_x2 = cv2.dilate(rm_s_l_up_down, np.ones((3, 1), dtype=np.int), iterations=2)
    path_up_down_dil_x2 = './images/e_viva/DIL_3_1_TWO_iters_b_{}_c_{}_RM_S_L_UP_DOWN.jpg'
    cv2.imwrite(path_up_down_dil_x2.format(block, c), up_down_dilated_x2)
    
    k_w, k_h = 5, 2 # above kernel is 5 1. This is 5 2
    kernel = np.ones((k_w, k_h), dtype=np.int)
    up_down_dilated = cv2.dilate(rm_s_l_up_down, kernel)
    path_up_down_dil = './images/e_viva/DIL_{}_{}_b_{}_c_{}_RM_S_L_UP_DOWN.jpg'
    cv2.imwrite(path_up_down_dil.format(k_w, k_h, block, c), up_down_dilated)
    
    # Remove staff lines - SIDE algorithm (Filip's)
    distance = int(line_spacing * 0.8) ### check for: * 1, * 1.3, * 1.5
    rm_s_l_side = rm_staff_lines_side_neighbours(img, runs, line_thickness, line_spacing, thickness_mul, distance)
    path_side = './images/e_viva/b_{}_c_{}_RM_S_L_SIDE_dist_{}.jpg'
    cv2.imwrite(path_side.format(block, c, distance), rm_s_l_side)
    
    distance = int(line_spacing * 1) ### check for: * 1, * 1.3, * 1.5
    rm_s_l_side = rm_staff_lines_side_neighbours(img, runs, line_thickness, line_spacing, thickness_mul, distance)
    path_side = './images/e_viva/b_{}_c_{}_RM_S_L_SIDE_dist_{}.jpg'
    cv2.imwrite(path_side.format(block, c, distance), rm_s_l_side)
# end


< # # # #  # # # #  # # # #  # # # #  # # # #  # # # >
### (RLE) run-based approach to  LOCATING  STAVES
< # # # #  # # # #  # # # #  # # # #  # # # #  # # # >

In [211]:
# Adds "help" lines and spaces (eng. "ledger lines/spaces") to `locaions` dictionary
def add_helper_spaces_and_lines(locations, line_thickness, line_spacing, cumulative):
    line_thickness = int(round(line_thickness)) # round(2.4) = 2 ||| round(2.6) = 3  :)
    line_spacing = int(round(line_spacing))
    
    help_space_3_up_y = cumulative - (2 * line_thickness) - (3 * line_spacing) 
    locations[help_space_3_up_y] = (0, line_spacing)
    help_line_2_up_y = cumulative - (2 * line_thickness) - (2 * line_spacing)
    locations[help_line_2_up_y] = (1, line_thickness)
    help_space_2_up_y = cumulative - (1 * line_thickness) - (2 * line_spacing)
    locations[help_space_2_up_y] = (2, line_spacing)
    help_line_1_up_y = cumulative - (1 * line_thickness) - (1 * line_spacing)
    locations[help_line_1_up_y] = (3, line_thickness)
    help_space_1_up_y = cumulative - (0 * line_thickness) - (1 * line_spacing)
    locations[help_space_1_up_y] = (4, line_spacing)
    
    lowest_line = max(locations.keys()) # MAX because the lower the LINE, the bigger the Y-coordinate
    lowest_line_end = lowest_line + locations[lowest_line][1]
    
    help_space_1_down_y = lowest_line_end + (0 * line_thickness) + (0 * line_spacing)
    locations[help_space_1_down_y] = (14, line_spacing)
    help_line_1_down_y = lowest_line_end + (0 * line_thickness) + (1 * line_spacing)
    locations[help_line_1_down_y] = (15, line_thickness)
    help_space_2_down_y = lowest_line_end + (1 * line_thickness) + (1 * line_spacing)
    locations[help_space_2_down_y] = (16, line_spacing)
    help_line_2_down_y = lowest_line_end + (1 * line_thickness) + (2 * line_spacing)
    locations[help_line_2_down_y] = (17, line_thickness)
    help_space_3_down_y = lowest_line_end + (2 * line_thickness) + (2 * line_spacing) 
    locations[help_space_3_down_y] = (18, line_spacing)
    return 0 # everything is OK

def run_is_spacing_candidate(run_length, line_spacing):
    return (line_spacing * 0.85 <= run_length <= line_spacing * 1.15)

def run_is_line_candidate(run_length, line_thickness):
#     lower_bound = line_thickness * 0.4 if line_thickness > 3.6 else 1 # Helps with thin average lines
    return (line_thickness * 0.4 <= run_length <= line_thickness * 1.6)
    
# Checks if there is a staff and returns a tuple (found, start_pixel, locations, end_pixel)
#   1.1) True,  or  False
#   1.2) The y-coordinate of the stave's START (start_pixel),  or  -1
#   1.3) Dictionary of pairs: (line_or_space_start_y, (line_space_CODE, line_thickness))
#   1.4) The y-coordinate of the stave's END (end_pixel),  or  -1
def get_staff_with_spaces(run_index, runs, image_column, cumulative, line_thickness, line_spacing):
#     print ' ~~ get_staff_with_spaces ~~\n  checking', image_column[cumulative:cumulative+150]
    print ' ~~ get_staff_with_spaces ~~\n  checking:\n' #, image_column#[:cumulative], image_column[cumulative:]
    print '\t run_index,', run_index, '/', len(runs), '\t runs:', runs
    # initialize the return dictionary with a line number 1, which has the CODE=5. Why 5? See the next line
    # >> help_space3_up (CODE=0), help_line2_up, help_space2_up, help_line1_up, help_space1_up, FIRST_LINE
    locations = {cumulative: (5, runs[run_index])} # first line starts at the current index
    line_space_counter = 6 # first line is saved, now go for other SPACES AND LINES
    # cumulative -> how low (Y) are we in this image column - Make_first_row_black_in_runs_calc idea will be discarded!
    end_pixel = cumulative + runs[run_index] # determines the y-coordinate where this the last-found element end
    stave_found = True
    
    for i in range(1, 9):
        run_length = runs[run_index + i]
        if i%2 == 1: # i is odd for line_spacings, since for i=0 we get first LINE's index (run_index)
            if not run_is_spacing_candidate(run_length, line_spacing):
                stave_found = False
                break
        else: # i%2 == 0
            if not run_is_line_candidate(run_length, line_thickness):
                stave_found = False
                break
        # we came to execute this line, so this run (space/line) is ok
        locations[end_pixel] = (line_space_counter, run_length) # add the SPACE/LINE
        line_space_counter += 1 # look for the next SPACES AND LINES
        end_pixel += run_length # advance down the column
    
    if not stave_found:
        return False, -1, {}, -1 # not_found, no_start, no_locations, no_end
    
    # prepare the return values
    add_helper_spaces_and_lines(locations, line_thickness, line_spacing, cumulative)
    start_pixel = min(locations.keys())
    # get the last line's y_coordinate and add her height to it -- that's where this stave ends
    lowest_space = max(locations.keys()) # MAX because the lower the SPACE, the bigger the Y-coordinate
    end_pixel = lowest_space + locations[lowest_space][1] # lowest SPACE at `lowest_space`, height: locations[lowest_space][1]
    return True, start_pixel, locations, end_pixel

# IS beginning of the first stave close to the beginning of the second?
def staves_are_close(stave1, stave2, line_thickness):
    return abs(stave1[1][0] - stave2[1][0]) < line_thickness

def find_staves_in_runs(runs, image_column, line_thickness, line_spacing):
    run_index = 0 # current run index
    cumulative = 0 # how many pixels did we pass - for getting run's color# The paint_the_first_row_black idea will be discarded!!!
    
    staff_counter = 0
    staves = {}
    while (run_index < len(runs)):# WHILE, because we need more control over the index vlue
        run_length = runs[run_index]
#         print 'cumulative is now:',cumulative,'| runs[',run_index,'] =',run_length,'| len(runs):',len(runs)
        if (image_column[cumulative] == BLACK and run_is_line_candidate(run_length, line_thickness)):
#             print 'checking a BLACK run of size', run_length
            staff_is_found, start_pixel, staff, end_pixel = get_staff_with_spaces(run_index, runs, image_column, cumulative, line_thickness, line_spacing)
            if (staff_is_found): # YEA!
                print 'we got them linez! [8)'
                staff_counter += 1
                staves[start_pixel] = (staff_counter, staff, end_pixel)
                # A staff was found, so we need to go down 8 runs until the last line of this staff
                #   0 ,    1   ,   2 ,    3   ,   4 ,    5   ,   6 ,    7   ,  8
                # line, spacing, line, spacing, line, spacing, line, spacing, line
                for r_i in range(run_index, run_index+9): # OR 8??
                    cumulative += runs[r_i] # print 'adding', runs[r_i], 'to cumulative'
                run_index += 8
            else: # staff was NOT found
                cumulative += run_length
        else:
            cumulative += run_length
        run_index += 1
    
    return len(staves), staves # return the size and the staves dictionary

# check that NO staves from the `staves` dictionary are close to this `stave`
def no_staves_are_close(staves, stave, staff_start):
#     close_range = int(line_thickness * 1.5 + 1) / 2
    print 'close_range = (', stave[2], '-', staff_start,') / 2'
    close_range = (stave[2] - staff_start)/2
    starts_list = range(staff_start - close_range, staff_start + close_range + 1) # + 1 because range() doesn't include the last element
    starts_list.remove(staff_start)
#     starts_list = [staff_start - 2, staff_start - 1, staff_start + 1, staff_start + 2] #%% S I M P L E S T  :D  %%%%
    intersection = [start for start in starts_list if start in staves]
    return intersection == [] # Return value can "easily" be changed to list of matches
    # EXAMPLE: return [-3, 1, 5] if there are staves beginning 3px ABOVE, 1px BELOW, or 5px BELOW this staff
    # OR: return a list of staves that are close: [staves[start] for start in starts_list if start in staves]

def locate_lines_with_runs(lines_only_img):
    print 'lines_only_img.shape', lines_only_img.shape
    img_height, img_width = lines_only_img.shape # unpack values
    
    # runs calculation, for thickness and spacing
    runs, black_runs_flat, white_runs_flat = calculate_runs(lines_only_img)
    line_thickness = calculate_line_thickness(black_runs_flat)
    line_spacing = calculate_line_spacing(white_runs_flat, img_height)
    
    # get the columns that we will check for lines
    # columns = [img_width/5, img_width/5+1, img_width/5+2, img_width/3, img_width/3+1, img_width/3+2]
    columns_to_check = [img_width/5, img_width/4, img_width/3, img_width/2, img_width/3*2, img_width/4*3]
    # w/5, w/4, w/3, w/2, w/3*2, w/4*3 # for a 2400px wide image, values are: 480 600 800 1200 1600 1800
    print 'columns_to_check', columns_to_check
    
    search_width = 3 # how wide will we look arround each pixel - to BOTH sides!
    resulting_staves = {} # DICTIONARY!
    
    for col_to_check in columns_to_check:
        print '\n -- -- -- ~~ -- -- ~~ -- ~~ -- ~~\nchecking around column', col_to_check
        start_column = col_to_check - search_width
        end_column = col_to_check + search_width
        column_range = range(start_column, end_column + 1) # + 1 because range() doesn't include the last element
        print 'range(start_column, end_column)', column_range
        for col in column_range:
            print '\n ~~~~~~~~\n  checking column >>>', col, '<<<'
            image_column = lines_only_img[0:img_height, col].tolist() # convert numpy array to python list
            num_of_staves, staves = find_staves_in_runs(runs[col], image_column, line_thickness, line_spacing)
            if (num_of_staves > 0):
                for staff_start in staves: # iterate through keys # or something like that
                    # IF there is NO staffs that are close to this one (on y-axis)
                    if (no_staves_are_close(resulting_staves, staves[staff_start], staff_start)):
                        resulting_staves[staff_start] = staves[staff_start]
                break # !!! no need to check arround the `col_to_check` any more - go to next part of the image
    return resulting_staves # , stave_possibilities


In [212]:
def dilate_and_save(img, kernel_w, kernel_h):
    '''Dilates a binary image with kernel of specified dimensions.
    saves the image to hard drive and returns the saved image'''
    kernel = np.ones((kernel_w, kernel_h), dtype=np.int)
    lines_only_img = cv2.dilate(img, kernel, iterations=1)
    lines_only_img_path = './images/locate_lines/dil_{}_{}.jpg'.format(kernel_w, kernel_h)
    cv2.imwrite(lines_only_img_path, lines_only_img)
    return lines_only_img

def erode_and_save(img, kernel_w_e, kernel_h_e, kernel_w_d=0, kernel_h_d=0):
    '''Erodes a binary image with kernel of specified dimensions.
    saves the image to hard drive and returns the saved image'''
    if (kernel_w_d == 0):
        dil_str = ''
    else:
        dil_str = '_dil_{}_{}'.format(kernel_w_d, kernel_h_d)
    kernel = np.ones((kernel_w_e, kernel_h_e), dtype=np.int)
    lines_only_img = cv2.erode(img, kernel, iterations=1)
    lines_only_img_path = './images/locate_lines/er_{}_{}{}.jpg'.format(kernel_w_e, kernel_h_e, dil_str)
    cv2.imwrite(lines_only_img_path, lines_only_img)
    return lines_only_img

### Two approaches on getting the lines-only image:
####  |__ 1: dilate and then erode with a very wide kernel (written right below this heading)
####  |__ 2: use the previous approach's image + remove all non-staff-line symbols (implementation started at the end of this file)

In [213]:
def get_lines_only_img(img):
    threshold = cv2.ADAPTIVE_THRESH_MEAN_C
    # # # # # # # # # # # # # # # # # # # # #
    #  ...................................  # ##  TRY WITH BLOCK_C VALUES: ##
    # /|| TRY WITH  35_13  45_11  55_9  ||\ # ##     35_13 45_11 55_9      ##
    # ''''''''''''''''''''''''''''''''''''' #
    method, block, c = cv2.THRESH_BINARY, 55, 9 # 35, 13  || 45, 11
    img_ada = cv2.adaptiveThreshold(img, 255, threshold, method, block, c)

    # dilate with 1x50 --> erode with 2x50  | this has the best results
    kernel_w, kernel_h = 1, 50
    dilated = dilate_and_save(img_ada, kernel_w, kernel_h)
    kernel_w_e, kernel_h_e = 2, 50
#     lines_only_img = erode_and_save(dilated, kernel_w, kernel_h) # same kernel as dilation
    return erode_and_save(dilated, kernel_w_e, kernel_h_e, kernel_w, kernel_h)

def print_staff_locations(staff_locations):
    staves_keylist = staff_locations.keys()
    staves_keylist.sort()
    print '\nSORT %d staves and their lines by y-coordinates' % (len(staff_locations))
    # print staff_locations
    for staves_key in staves_keylist:
        print "STAVE AT Y = %s" % (staves_key)
        staff = staff_locations[staves_key][1]
        staff_keylist = staff.keys()
        staff_keylist.sort()
        for staff_key in staff_keylist:
            print '\tstaff_line %s: %s' % (staff_key, staff[staff_key])
        print ''
    return 0
# # #%>> SAME CODE BELOW <<%# ## ## almost the same??
#     print staff_locations
#     staves_keylist = staff_locations.keys()
#     staves_keylist.sort()
#     print '\nSORT BY y-coordinates'
#     for staves_key in staves_keylist:
#         print "STAVE AT Y = %s" % (staves_key)
#         staff = staff_locations[staves_key][1]
#         staff_keylist = staff.keys()
#         staff_keylist.sort()
#         for staff_key in staff_keylist:
#             print '\tstaff_line %s: %s' % (staff_key, staff[staff_key])
#         print ''

def get_line_locations(img, print_result=True):
    '''Locate lines.
    Returns: staff lines LOCATIONS and the lines-only image'''
    lines_only_img = get_lines_only_img(img)
    # lines_only_img = get_lines_only_img_CLEAR(img) # approach 2 - maybe in the future
    
    # use the lines-only image to locate the staves
    staff_locations = locate_lines_with_runs(lines_only_img)
    
    if (print_result):
        print_staff_locations(staff_locations)
    return staff_locations
    
img = cv2.imread('./images/dataset/chic/chicago_white_up.jpg', 0) # 0 --> read as grayscale
staf_locs = get_line_locations(img)

lines_only_img.shape (3264L, 2448L)
Top 4 most common black runs: [(4, 26066), (3, 5436), (5, 3897), (1, 1156)]
>>> line thickness:   3.5
Top 4 most common white runs [(16, 18485), (15, 8382), (17, 1890), (1910, 786)]
line spacing:  15.5
columns_to_check [489L, 612L, 816L, 1224L, 1632L, 1836L]

 -- -- -- ~~ -- -- ~~ -- ~~ -- ~~
checking around column 489
range(start_column, end_column) [486, 487, 488, 489, 490, 491, 492]

 ~~~~~~~~
  checking column >>> 486 <<<
 ~~ get_staff_with_spaces ~~
  checking:

	 run_index, 0 / 42 	 runs: [5, 517, 5, 15, 4, 16, 3, 17, 4, 16, 4, 294, 4, 15, 4, 16, 4, 16, 4, 15, 4, 289, 4, 16, 4, 15, 4, 16, 3, 16, 4, 286, 5, 15, 4, 15, 4, 16, 4, 15, 4, 1543]
 ~~ get_staff_with_spaces ~~
  checking:

	 run_index, 2 / 42 	 runs: [5, 517, 5, 15, 4, 16, 3, 17, 4, 16, 4, 294, 4, 15, 4, 16, 4, 16, 4, 15, 4, 289, 4, 16, 4, 15, 4, 16, 3, 16, 4, 286, 5, 15, 4, 15, 4, 16, 4, 15, 4, 1543]
we got them linez! [8)
 ~~ get_staff_with_spaces ~~
  checking:

	 run_index, 12 / 42 

In [None]:
# # # # # # # ########
# testing for methods
# find_four_lines_below(...) and find_lines_up_or_down(...)

# column = [0]*4 + [255]*20 + [0]*5 + [255]*21 + [0]*4 + [255]*20 # [0,0,0,0,255,19_more_255, 0,0...]
# row_start, black, line_thickness, line_spacing = 23, 5, 4, 20
# row_start, black, line_thickness, line_spacing = 49, 4, 4, 20
# find_lines_up_or_down(column, row_start, black, line_thickness, line_spacing)
# row_start, black, line_thickness, line_spacing = 0, 4, 4, 20
# find_four_lines_below(column, row_start, black, line_thickness, line_spacing)

In [None]:
runs = [[1,2,3], [4,5,6], [7,8,9], [10,11,12], [13,14,15]]
iiii = 2
print runs[iiii-1 : iiii+2] # gets the runs on the left and right
xxx = np.ones((5,5),dtype=np.int)
xxx[0:5, 2].tolist()

In [27]:
def last_test():
#     img = cv2.imread('./images/dataset/sviraj_up.jpg', 0) # read the image as GRAYcscale
    img_name = 'composicion'
    root_path = './images/dataset/' + img_name + '/'
    img = cv2.imread(root_path + img_name + '_up.jpg', 0) # read the image as GRAYcscale
    
    adaptiveMethod, thresholdType, blockSize, C = cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 35
    img_ada = cv2.adaptiveThreshold(img, 255, adaptiveMethod, thresholdType, blockSize, C)
#     img_ada = cv2.erode(img_ada, np.ones((1,7), dtype=np.int)) # thickens the lines
    
    runs, black_runs_flat, white_runs_flat = calculate_runs(img_ada) # runs calculation, for thickness and spacing
    line_thickness = calculate_line_thickness(black_runs_flat)
    line_spacing = calculate_line_spacing(white_runs_flat, img_ada.shape[0]) # calculation needs image height
    
    lines_only_img = get_lines_only_img(img) # ...
    # line_thickness = 3 # CHEAT LINE
    erode_width = int(line_thickness+1) # set to be 4 allways?
    lines_only_img = cv2.erode(lines_only_img, np.ones((erode_width, 1), dtype=np.int)) # thickens the lines
    
    thickness_mul = 1.5 # staff_thickness_multiplier
#     thickness_mul = 2 # staff_thickness_multiplier
    
#     we can send the grayscale image, also :D  Just send `img` instead of `img_ada`
    result = remove_staff_lines_with_lines_only(img_ada, runs, line_thickness, thickness_mul, lines_only_img)
    result_path = root_path + 'RM_S_L_LINES_ONLY_b{}_c{}_ada{}_er_kw{}.jpg'.format(blockSize, C, adaptiveMethod, erode_width)
    cv2.imwrite(result_path, result)
    
    runs, black_runs_flat, white_runs_flat = calculate_runs(img_ada) # runs calculation, for thickness and spacing
    line_thickness = calculate_line_thickness(black_runs_flat)
    line_spacing = calculate_line_spacing(white_runs_flat, img_ada.shape[0]) # needs image height
    
    thickness_mul = 1.5 # staff_thickness_multiplier
    
#     # Remove staff lines - RUNS algorithm ~~Simplest~~ (Aleksandar's)
#     rm_s_l = remove_staff_lines(img_ada, runs, line_thickness, thickness_mul)
#     path_regular = root_path + 'b_{}_c_{}_RM_S_L.jpg'
#     cv2.imwrite(path_regular.format(blockSize, C), rm_s_l)
    
#     # Remove staff lines - UP-DOWN algorithm (Aleksandar's)
#     rm_s_l_up_down = rm_staff_lines_up_down_neighbours(img_ada, runs, line_thickness, line_spacing, thickness_mul)
#     path_up_down = root_path + 'b_{}_c_{}_RM_S_L_UP_DOWN.jpg'
#     cv2.imwrite(path_up_down.format(blockSize, C), rm_s_l_up_down)
    
#     # Remove staff lines - SIDE algorithm (Filip's) # -----%%%%%%====%%%%  ADDED  LINES-ONLY-IMAGE %%%%%%%%==%%%==%%-
#     distance = int(line_spacing * 2.5)+1 ### check for: * 1, * 1.3, * 1.5
#     rm_s_l_side = rm_staff_lines_side_neighbours(img_ada, runs, line_thickness, line_spacing, thickness_mul, distance, lines_only_img)
#     path_side = root_path + 'ADA_b_{}_c_{}_RM_S_L_SIDE_dist_{}.jpg'
#     cv2.imwrite(path_side.format(blockSize, C, distance), rm_s_l_side)
    
#     distance = int(line_spacing * 1.7)+1 ### check for: * 1, * 1.3, * 1.5
#     rm_s_l_side = rm_staff_lines_side_neighbours(img_ada, runs, line_thickness, line_spacing, thickness_mul, distance, lines_only_img)
#     path_side = root_path + 'ADA_b_{}_c_{}_RM_S_L_SIDE_dist_{}.jpg'
#     cv2.imwrite(path_side.format(blockSize, C, distance), rm_s_l_side)

last_test()

Top 4 most common black runs: [(3, 11529), (4, 5652), (1, 3340), (2, 1486)]
>>> line thickness:   3.5
Top 4 most common white runs [(17, 8749), (16, 3693), (18, 1154), (297, 961)]
line spacing:  16.5
Top 4 most common black runs: [(3, 11529), (4, 5652), (1, 3340), (2, 1486)]
>>> line thickness:   3.5
Top 4 most common white runs [(17, 8749), (16, 3693), (18, 1154), (297, 961)]
line spacing:  16.5




In [85]:
qqq = {5:3} # 0 1 1 2 3
test_list = [6,7,8,9]
intersection = [i for i in test_list if i in qqq]
print(intersection)

[]
True
1.4 5.6


In [118]:
import os

def read_images(folder_path):
    if not (folder_path.endswith('/')):
        folder_path += '/'
    print os.listdir(folder_path)
    images = []
    image_names = os.listdir(folder_path)
    for image_name in image_names:
        images.append(cv2.imread(folder_path + image_name, 0)) # `0` --> read images as grayscale
    return image_names, images

# print len(read_images('./images/dataset/newest'))

In [None]:
def process_images_in_folder(folder_path):
    image_names, images = read_images(folder_path)
    for i in range(len(images)):
        img = images[i]
        img_name = image_names[i]
        
        adaptiveMethod, thresholdType, blockSize, C = cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 55, 35
        
        img_ada = cv2.adaptiveThreshold(img, 255, adaptiveMethod, thresholdType, blockSize, C)
    #     img_ada = cv2.erode(img_ada, np.ones((1,7), dtype=np.int)) # thickens the lines

        runs, black_runs_flat, white_runs_flat = calculate_runs(img_ada) # runs calculation, for thickness and spacing
        line_thickness = calculate_line_thickness(black_runs_flat)
        line_spacing = calculate_line_spacing(white_runs_flat, img_ada.shape[0]) # calculation needs image height

        lines_only_img = get_lines_only_img(img) # ...
        # line_thickness = 3 # CHEAT LINE
        erode_width = int(line_thickness+1) # set to be 4 allways?
        lines_only_img = cv2.erode(lines_only_img, np.ones((erode_width, 1), dtype=np.int)) # thickens the lines

        thickness_mul = 1.5 # staff_thickness_multiplier
    #     thickness_mul = 2 # staff_thickness_multiplier

    #     we can send the grayscale image, also :D  Just send `img` instead of `img_ada`
        result = remove_staff_lines_with_lines_only(img_ada, runs, line_thickness, thickness_mul, lines_only_img)
        result_path = folder_path + img_name + 'RM_S_L_LINES_ONLY_b{}_c{}_ada{}_er_kw{}.jpg'
        cv2.imwrite(result_path.format(blockSize, C, adaptiveMethod, erode_width), result)
    return 0

process_images_in_folder('./images/dataset/newest/')

## Second approach in generating the lines-only image  --==> (*more robust*) 

In [None]:
# Removes any black runs from the input image that are bigger
# than the average line_thickness * multiplier (currently = 1.7)
def rm_everything_but_lines(lines_only_img):
    img_height, img_width = lines_only_img.shape # unpack values
    
    # runs calculation, for thickness and spacing
    runs, black_runs_flat, white_runs_flat = calculate_runs(lines_only_img)
    line_thickness = calculate_line_thickness(black_runs_flat)
    line_spacing = calculate_line_spacing(white_runs_flat, img_height)
    
    staff_thickness_multiplier = 1.7 # 1.6, 1.8 ??
    
    clear_img = lines_only_img.copy()
    #edit the image
    for c in xrange(len(runs)):        # for every column
        cumulative = 0 # initialize the number of passed pixels
        for r in xrange(len(runs[c])): # for every run
            run_length = runs[c][r]
            if (r % 2 == 0): # black runs # every black run longer than 2 * line_height is deleted/whitened
                if (run_length > line_thickness * staff_thickness_multiplier):
                    clear_img[cumulative:cumulative + run_length, c] = [255]*(run_length)
            cumulative += run_length
    
    #%% % % % % % % % % % % %%
    #  R E C O N S T R U C T   T H E   I M A G E  with  MY_KERNEL !!!! !!   <<< GREAT idea! >>>
    #%% % % % % % % % % % % %%
    
    # dilate with 1x KW --> erode with 2x KW   ||||||  ==~~--- BAD IDEA CURRENTLY ~~--=== |||||
    kernel_w, kernel_h = 1, 71
    dilated = dilate_and_save(clear_img, kernel_w, kernel_h)
    kernel_w_e, kernel_h_e = 2, 71
    clear_lines_only_img = erode_and_save(dilated, kernel_w, kernel_h) # same kernel as dilation
    
    return clear_lines_only_img

def get_lines_only_img_CLEAR(img):
    threshold = cv2.ADAPTIVE_THRESH_MEAN_C
    method, block, c = cv2.THRESH_BINARY, 55, 9
    img_ada = cv2.adaptiveThreshold(img, 255, threshold, method, block, c)

    # dilate with 1x50 --> erode with 2x50  | this has the best results
    kernel_w, kernel_h = 1, 50
    dilated = dilate_and_save(img_ada, kernel_w, kernel_h)
    kernel_w_e, kernel_h_e = 2, 50
    lines_only_img = erode_and_save(dilated, kernel_w, kernel_h) # same kernel as dilation
    
    clear_lines_only_img = rm_everything_but_lines(lines_only_img)
    
    return clear_lines_only_img

In [185]:
nnn = range(-3, 3)
nnn.remove(0) ## returns None, but `nnn` is -3,-2,-1,1,2,3
mmm = {2:4, 3:6} # 2 in mmm True, 4 in mmm False
for x in mmm:
    print x

2
3
