# Optical Music Recognition
**By: Sarah Alabdulwahab & Asma Althakafi**
> Optical Music Recognition (OMR) is software that essentially teaches the machine to read sheet music. The aim of this project is to develop a sheet music reader which will classify each music note then produce an audio file of the music it read.

In [1]:
#supress warnings
import warnings
warnings.filterwarnings('ignore')

from tqdm import tqdm
import numpy as np
from pre_processing import *
from scipy.ndimage import binary_fill_holes
from skimage.morphology import thin

#Predefined models from github.com/aashrafh
from commonfunctions import *
from staff import coordinator
from segmenter import Segmenter
from connected_componentes import *
from fit import predict
from box import Box

In [2]:
label_map = {0:{0: 'N0'}, 1:{0:'b2',1:'a2'}, 2:{0:'g2',1:'f2'}, 3:{0:'e2',1:'d2'}, 4:{0:'c2',1:'b1'}, 5:{0:'a1',1:'g1'}
             , 6:{0:'f1',1:'e1'}, 7:{0:'d1',1:'c1'}}

In [3]:
def read_all_images(num_of_images):
    all_images_list = []
    for i in range(num_of_images):
        path = f'Notes/{i+1}.png'
        img = gray_img(io.imread(path))
        all_images_list.append(get_thresholded(img, threshold_otsu(img)))
    return all_images_list

In [4]:
def segmenting(img):
    segmenter = Segmenter(get_thresholded(img, threshold_otsu(img))) #binary image
    imgs_with_staff = segmenter.regions_with_staff
    imgs_spacing, imgs_rows, coord_imgs  = [], [], []
    for i, img in enumerate(imgs_with_staff):
        spacing, rows, no_staff_img = coordinator(img,IsHorizontal(img))
        imgs_rows.append(rows)
        imgs_spacing.append(spacing)
        coord_imgs.append(no_staff_img)
    return segmenter, imgs_spacing, imgs_rows, coord_imgs, imgs_with_staff

In [5]:
def estim(c, idx, imgs_spacing, imgs_rows):
    spacing = imgs_spacing[idx]
    rows = imgs_rows[idx]
    margin = 1+(spacing/4)
    for index,line in enumerate (rows):
        if c >= line - margin and c <= line + margin:
            return index+1, 0
        elif c >= line + margin and c <= line + 3*margin:
            return index+1, 1
    return 0, 0 

In [6]:
def filter_beams(prims, prim_with_staff, bounds):
    n_bounds = []
    n_prims = []
    n_prim_with_staff = []
    for i, prim in enumerate(prims):
        if prim.shape[1] >= 2*prim.shape[0]:
            continue
        else:
            n_bounds.append(bounds[i])
            n_prims.append(prims[i])
            n_prim_with_staff.append(prim_with_staff[i])
    return n_prims, n_prim_with_staff, n_bounds

In [7]:
def get_labeled_data(img):
    labels_list, images_list = [], []
    segmenter, imgs_spacing, imgs_rows, coord_imgs, imgs_with_staff = segmenting(img)
    black_names = ['4', '8', '8_b_n', '8_b_r', '16', '16_b_n', '16_b_r', '32', '32_b_n', '32_b_r', 'a_4', 
                   'a_8', 'a_16', 'a_32', 'chord']
    disk_size = segmenter.most_common / 4
    for i, img in enumerate(coord_imgs):
        primitives, prim_with_staff, boundary = get_connected_components(img, imgs_with_staff[i])
        for j, prim in enumerate(primitives):
            prim = binary_opening(prim, square(segmenter.most_common-imgs_spacing[i]))
            label = predict((255*(1 - prim)).astype(np.uint8))[0]
            if label in black_names:
                test_img = binary_dilation(np.copy(prim_with_staff[j]), disk(disk_size))
                comps, comp_w_staff, bounds = get_connected_components(test_img, prim_with_staff[j])
                comps, comp_w_staff, bounds = filter_beams(comps, comp_w_staff, bounds)
                bounds = [np.array(bound)+disk_size-2 for bound in bounds]
                if len(bounds) <= 1 or label in ['8_b_n', '8_b_r', '16_b_n', '16_b_r', '32_b_n', '32_b_r']:
                    for bbox in bounds:
                        line_idx, p = estim(int(bbox[2]+boundary[j][0]), i, imgs_spacing, imgs_rows)
                        labels_list.append(label_map[line_idx][p])
                        images_list.append(prim_with_staff[j])
            elif label in ['2', 'a_2']:
                head_img = binary_closing(1-binary_fill_holes(1-prim), disk(disk_size))
                comps, comp_w_staff, bounds = get_connected_components(head_img, prim_with_staff[j])
                for bbox in bounds:
                    line_idx, p = estim(int(bbox[2]+boundary[j][0]), i, imgs_spacing, imgs_rows)
                    labels_list.append(label_map[line_idx][p])
                    images_list.append(prim_with_staff[j])
            elif label in ['1', 'a_1']:
                line_idx, p = estim(int(boundary[j][2]), i, imgs_spacing, imgs_rows)
                labels_list.append(label_map[line_idx][p])
                images_list.append(prim_with_staff[j])
    return labels_list, images_list

## Data Collection

In [8]:
#testing on small data
all_images = read_all_images(50)
len(all_images)

50

In [9]:
all_images_labels, all_images_arr, all_images_ids =[], [], []
for i, image in tqdm(enumerate(all_images)):
    labels_list, images_list = get_labeled_data(image)
    all_images_labels += labels_list
    all_images_arr += images_list
    all_images_ids += [i] * len(labels_list)

50it [05:35,  6.72s/it]


In [10]:
#view some labels
all_images_labels[:10]

['b2', 'e2', 'b2', 'N0', 'b2', 'a2', 'a2', 'a2', 'g2', 'a2']

In [11]:
#check that all lengths match
len(all_images_labels) == len(all_images_arr) == len(all_images_ids)

True

## Store The Data

In [12]:
import pandas as pd

In [13]:
df = pd.DataFrame(data= {'ID':all_images_ids, 'img': all_images_arr, 'label': all_images_labels})
df.head()

Unnamed: 0,ID,img,label
0,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2
1,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",e2
2,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2
3,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",N0
4,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2


In [14]:
df.shape

(419, 3)

In [15]:
unique_labels = df.label.unique()
for label in unique_labels:
    df[label] = 0
df.head()

Unnamed: 0,ID,img,label,b2,e2,N0,a2,g2,d2,c2,b1,e1,g1,f2,f1,a1,d1,c1
0,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",e2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",N0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [16]:
for i, label in enumerate(df.label):
    df[label][i] = 1
df.head()

Unnamed: 0,ID,img,label,b2,e2,N0,a2,g2,d2,c2,b1,e1,g1,f2,f1,a1,d1,c1
0,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",e2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",N0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0
4,0,"[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...",b2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0


## Unify the shape

In [17]:
def unify_shape(df):
    rows, cols = [], []
    for image in df.img:
        rows.append(image.shape[0])
        cols.append(image.shape[1])
    
    rows = max(rows)
    cols = max(cols)
    for i in range(len(df)):
        add_rows = np.ones((rows - df['img'][i].shape[0], df['img'][i].shape[1]), dtype = int)
        df['img'][i] = np.vstack((df['img'][i], add_rows))

        add_cols = np.ones((rows, cols - df['img'][i].shape[1]), dtype = int)
        df['img'][i] = np.hstack((df['img'][i], add_cols))
    return df, rows, cols

In [18]:
df, rows, cols = unify_shape(df)

In [19]:
#checking shapes
for x in df.img[:5]:
    print(x.shape)

(139, 171)
(139, 171)
(139, 171)
(139, 171)
(139, 171)


## Training

In [20]:
from sklearn import preprocessing
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, GlobalAveragePooling2D, InputLayer

In [21]:
X, y = np.asarray(df['img']), np.asarray(df.drop(columns=['ID','img','label']))

In [22]:
X.shape

(419,)

In [23]:
#reshaping
def reshape_x(X):
    return np.asarray([x.reshape((rows, cols, 1)) for x in X])

In [24]:
X = reshape_x(X)
X.shape

(419, 139, 171, 1)

In [25]:
y.shape

(419, 15)

In [26]:
X_train, X_test, y_train, y_test = (train_test_split(X, y, test_size = .2, random_state = 42))

In [27]:
NN = Sequential()
NN.add(InputLayer(input_shape=X.shape[1:]))
NN.add(Conv2D(filters=10, kernel_size=3, activation='relu', padding='same'))
NN.add(MaxPooling2D())
NN.add(Conv2D(filters=20, kernel_size=3, activation='relu', padding='same'))
NN.add(Conv2D(filters=16, kernel_size=3, activation='relu', padding='same'))
NN.add(MaxPooling2D())
NN.add(Conv2D(filters=30, kernel_size=3, activation='relu', padding='same'))
NN.add(GlobalAveragePooling2D())
NN.add(Dense(df.label.nunique() , activation='softmax'))

#compile is back propagation to edit the weight
NN.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
NN.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 139, 171, 10)      100       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 69, 85, 10)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 69, 85, 20)        1820      
                                                                 
 conv2d_2 (Conv2D)           (None, 69, 85, 16)        2896      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 34, 42, 16)       0         
 2D)                                                             
                                                                 
 conv2d_3 (Conv2D)           (None, 34, 42, 30)        4

In [28]:
NN.fit(X_train, y_train, epochs=150, verbose=1, validation_split=0.25);

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150


Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150
Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150


Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150
Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150
