In [3]:
%matplotlib inline
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Image loading, binarization, inversion and display
def load_image(path):
    return cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2RGB)
def image_gray(image):
    return cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
def image_bin_otsu(image_gs):
    ret,image_bin = cv2.threshold(image_gs, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)
    return image_bin
def invert(image):
    return 255-image
def display_image(image, color= False):
    plt.figure()
    if color:
        plt.imshow(image)
    else:
        plt.imshow(image, 'gray')
    plt.show()
    
# Image morphological operations
def dilate(image, kernel, iterations = 1):
    return cv2.dilate(image, kernel, iterations)
def erode(image, kernel, iterations = 1):
    return cv2.erode(image, kernel, iterations)
def open_image(image, kernel = None):
    if kernel is None:
        kernel = np.ones((1, 100))
    return dilate(erode(image, kernel), kernel)

# Horizontal projection
def horizontal_projection(image):
    hor_proj = []
    for i in range(len(image)):
        row_sum = 0
        for j in range(len(image[i])):
            row_sum += image[i][j] == 255
        hor_proj.append([255] * row_sum + [0] * (len(image[0]) - row_sum))

    return hor_proj

# Image crop
def crop_image(image, crop_start = None, crop_width = None):
    if crop_width is None:
        crop_width = len(image[0]) // 10
        
    if crop_start is None:
        end = 0
        for row in image:
            s = sum(row) / 255
            if s > end:
                end = s

        crop_start = end - crop_width
        
    cutoff = image[:]
    
    for i in range(len(cutoff)):
        cutoff[i] = cutoff[i][crop_start : crop_start + crop_width] 

    cutoff = np.array(cutoff, dtype = np.uint8)
    return cutoff

# Find Y coordinates of white pixels
def find_y(image):
    y = []
    for i in range(len(image)):
        for j in range(len(image[i])):
            if (image[i][j] == 255) and (i not in y):
                y.append(i)
    return sorted(y)

# Intersect two lists
def intersect_lists(first, second):
    ret_val = []
    for val in first:
        if val in second:
            ret_val += [val]
    return ret_val

# Group points and get distances
def label_y(y_list):
    labels = [[]]
    line_distances = []
    prev_y = None
    for y in y_list:
        if prev_y is not None:
            if y - prev_y > 1:
                labels.append([])
                line_distances += [y - prev_y]
        labels[-1] += [y]
        prev_y = y
    return labels, line_distances

# Find lines
def find_lines(image):
    first = find_y(crop_image(horizontal_projection(image)))
    second = find_y(open_image(image))
    
    
    lines, distances = label_y(intersect_lists(first, second))
    staff_spacings = [distances[i] for i in range(len(distances)) if (i+1) % 5 != 0 ]
    staff_spacing = sum(staff_spacings) * 1./len(staff_spacings)
    return lines, distances, staff_spacing

# Remove lines
def remove_lines(org_image, tolerance = 0, lines = None, topBotPixelRemoval = True, widthBasedRemoval = True):
    image = org_image.copy()
    
    if lines == None:
        lines, distances, staff_spacing = find_lines(org_image)
    
    if topBotPixelRemoval:
        for line in lines:
            top = line[0]
            bot = line[-1]
            for j in range(len(image[top])):
                remove = True
                is_line = False
                for row in image[top:bot+1]:
                    if row[j] == 255:
                        is_line = True
                        break
                if not is_line:
                    continue
                # check 2 pixels above and below
                diff = 2
                for row in image[top - diff : top]:
                    if row[j] == 255:
                        remove = False
                        break
                if remove:
                    for row in image[bot + 1: bot + diff + 1]:
                        if row[j] == 255:
                            remove = False
                            break
                if remove:
                    for row in image[top:bot+1]:
                        row[j] = 0
    
    if widthBasedRemoval:
        avg_thickness = lines[:]
        for i, line in enumerate(avg_thickness):
            avg_thickness[i] = len(line)
        avg_thickness = sum(avg_thickness) * 1./len(avg_thickness)

        for j in range(len(image[0])):
            white = False
            for i in range(len(image)):
                if image[i][j] == 255:
                    if not white:
                        start = i
                    white = True
                else:
                    if white:
                        thickness = i - start
                        if thickness <= (avg_thickness + tolerance):
                            for row in image[start : i]:
                                row[j] = 0
                    white = False
    return image

# Vertical lines recognition
def add_region(image, row, col, regions):
    coords = [(row, col)]
    idx = 0
    while (idx < len(coords)):
        row, col = coords[idx]
        for dr in range(-1,2):
            for dc in range(-1,2):
                r = row + dr
                c = col + dc
                if r >= 0 and c >= 0 and r < len(image) and c < len(image[r]):
                    if image[r][c] == 255 and ((r,c) not in coords):
                        for region in regions:
                            if (r,c) in region:
                                for coord in coords:
                                    region.append((r,c))
                                    return
                        coords += [(r,c)]
        idx += 1
    regions.append(coords)

def find_vertical_lines(image):
    # Find lines, distances
    lines, distances, staff_spacing = find_lines(image)

    # Find vertical objects
    img_open = open_image(remove_lines(image), np.ones((1.5 * staff_spacing, 1)))
    return img_open

def find_vertical_objects(image, image_vert_lines):
    # Label regions of interest
    regions = []
    for row in range(len(image_vert_lines)):
        for col in range(len(image_vert_lines[row])):
            if image_vert_lines[row][col] == 0:
                continue
            isFound = False
            for region in regions:
                if (row,col) in region:
                    isFound = True
                    break
            if isFound:
                continue
            add_region(image, row, col, regions)
    
    img_regions = image.copy()
    for row in range(len(img_regions)):
        for col in range(len(img_regions[row])):
            img_regions[row][col] = 0

    for region in regions:
        for row, col in region:
            img_regions[row, col] = 255
            
    return img_regions, regions

def split_image(image, regions):
    split_images = []
    for region in regions:
        minr = min([r for r,c in region])
        maxr = max([r for r,c in region])
        minc = min([c for r,c in region])
        maxc = max([c for r,c in region])
        sub_image = []
        for row in range(minr,maxr+1):
            sub_image.append([])
            for col in range(minc,maxc+1):
                sub_image[-1] += [image[row][col]]
        sub_image = np.array(sub_image)
        sub_image = np.uint8(sub_image)
        split_images.append(sub_image)
    return split_images

In [34]:
def create_ann(output_dim):
    
    ann = Sequential()
    # Postavljanje slojeva neurona mreže 'ann'
    ann.add(Dense(input_dim=5000, output_dim=output_dim,init="glorot_uniform"))
    ann.add(Activation("sigmoid"))
    return ann
    
def train_ann(ann, X_train, y_train):
    X_train = np.array(X_train, np.float32)
    y_train = np.array(y_train, np.float32)
   
    # definisanje parametra algoritma za obucavanje
    sgd = SGD(lr=0.01, momentum=0.9)
    ann.compile(loss='mean_squared_error', optimizer=sgd)

    # obucavanje neuronske mreze
    ann.fit(X_train, y_train, nb_epoch=500, batch_size=1, verbose = 0, shuffle=False, show_accuracy = False) 
      
    return ann


In [5]:
def resize_image(tmp_img, new_width, new_height):
    height, width = tmp_img.shape[:2]
    return cv2.resize(tmp_img, (new_width, new_height), interpolation = cv2.INTER_CUBIC)

In [6]:
from os import listdir

alphabet = []
templateNames = []
templates = []
for listedFile in listdir("templates"):
    for innerFile in listdir("templates/%s" % listedFile):
        templateNames += ["templates/%s/%s" % (listedFile, innerFile)]
        template = load_image("templates/%s/%s" % (listedFile, innerFile))
        template = resize_image(template,50,100)
        template = image_gray(template)
        template = image_bin_otsu(template)
        template = invert(template)
        templates += [template]
        alphabet += [innerFile[:innerFile.rfind('_')]]

In [14]:
def scale_to_range(image):
    return image / 255
def matrix_to_vector(image):
    return image.flatten()
def prepare_for_ann(regions):
    ready_for_ann = []
    for region in regions:
        ready_for_ann.append(matrix_to_vector(scale_to_range(region)))
    return ready_for_ann

def convert_output(outputs):
    return np.eye(len(outputs))

In [2]:
from keras.models import Sequential
from keras.layers.core import Dense,Activation
from keras.optimizers import SGD

In [15]:
training_input = prepare_for_ann(templates)

In [35]:
training_input = prepare_for_ann(templates)
training_output = convert_output(alphabet)
ann = create_ann(len(training_output))
ann = train_ann(ann, training_input, training_output)

In [36]:
org_image = load_image("test_images/staff-with-notes.jpg")
img_gray = image_gray(org_image)
img_otsu = image_bin_otsu(img_gray)
inv_img = invert(img_otsu)
img_wo_lines = remove_lines(inv_img)
img_vert_lines = find_vertical_lines(inv_img)
img_vert_objects, regions = find_vertical_objects(img_wo_lines, img_vert_lines)
objects = split_image(img_vert_objects, regions)

In [37]:
inputs_test = prepare_for_ann([resize_image(objects[0],50,100)])
results_test = ann.predict(np.array(inputs_test, np.float32))
print(results_test)
ind = np.argmax(results_test[0])
print(sum(results_test[0]))
print(ind)
print(results_test[0][ind])
print(alphabet[ind])

[[ 0.01638367  0.02309984  0.01775421  0.02247672  0.01516505  0.03768541
   0.01607686  0.01781115  0.01288471  0.01631068  0.03652634  0.62073183
   0.17389762  0.04419015  0.0061994   0.03286391  0.01645125  0.02150239
   0.01138061  0.00826756  0.0075313   0.05898105  0.00613209  0.01568887
   0.00678507  0.02647189  0.03325041  0.01608105  0.01936741  0.04791012
   0.0233353   0.01036937  0.01023917  0.02485711  0.0086211   0.01073089
   0.01256814  0.01564521  0.03845161  0.00175654  0.00728172  0.00653694
   0.02514989  0.00724032  0.04337647  0.01170169  0.01254442  0.00992273
   0.01999038  0.01003421  0.02223123  0.0195278   0.01309082  0.03089092
   0.01465366  0.01219434  0.00938048  0.01327282  0.03678793  0.02809142
   0.04320168  0.02612955  0.01930018  0.01546378  0.02236164  0.01366942
   0.00962469  0.01760738]]
2.08369159955
11
0.620731830597
g_clef
