In [None]:
from omrdatasettools.Downloader import Downloader
from omrdatasettools.OmrDataset import OmrDataset
from omrdatasettools.MuscimaPlusPlusSymbolImageGenerator import MuscimaPlusPlusSymbolImageGenerator

The downloader module is used to extract the raw information about the datasets into the ./data folder

In [None]:
downloader = Downloader()

In [None]:
downloader.download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_V2, "data/muscima_pp_v2")

The generated raw files are then used to draw the images of the dataset on canvases and store them in different folders.

In [None]:
muscima_generator = MuscimaPlusPlusSymbolImageGenerator()
muscima_generator.extract_and_render_all_symbol_masks("./data/muscima_pp_v2/", "./data/muscima_pp_v2_symbols/")


Before training the models, the images first have to be resized to be a standard size.

First, let's find out what the max height and width is, and use those values to resize all the images

In [15]:
import os
from PIL import Image

# Specify the path to the directory containing the images
directory = '.\\data\\muscima_pp_v2_symbols'

# Get the list of files and directories in the directory
files_and_dirs = [os.path.join(directory, file) for file in os.listdir(directory)]
print(files_and_dirs)
max_width, max_height = 0, 0
for file in files_and_dirs:
    for inner_file in os.listdir(file):
        inner_file_path = os.path.join(file, inner_file)
        if os.path.isfile(inner_file_path):
            img = Image.open(inner_file_path)
            if (max_height  == 0) or (max_height == 0):
                max_width, max_height = img.size
            # Determine the maximum width and height of the image
            if img.size[0] > max_width:
                max_width = img.size[0]
            if img.size[1] > max_height:
                max_height = img.size[1]
            # Close the image without saving any changes
            img.close()

print("Maximum width:", max_width, "Maximum height:", max_height)

['.\\data\\muscima_pp_v2_symbols\\arpeggio', '.\\data\\muscima_pp_v2_symbols\\articulationAccent', '.\\data\\muscima_pp_v2_symbols\\articulationStaccato', '.\\data\\muscima_pp_v2_symbols\\articulationTenuto', '.\\data\\muscima_pp_v2_symbols\\augmentationDot', '.\\data\\muscima_pp_v2_symbols\\breathMark', '.\\data\\muscima_pp_v2_symbols\\characterCapitalA', '.\\data\\muscima_pp_v2_symbols\\characterCapitalC', '.\\data\\muscima_pp_v2_symbols\\characterCapitalE', '.\\data\\muscima_pp_v2_symbols\\characterCapitalF', '.\\data\\muscima_pp_v2_symbols\\characterCapitalL', '.\\data\\muscima_pp_v2_symbols\\characterCapitalM', '.\\data\\muscima_pp_v2_symbols\\characterCapitalP', '.\\data\\muscima_pp_v2_symbols\\characterCapitalR', '.\\data\\muscima_pp_v2_symbols\\characterCapitalS', '.\\data\\muscima_pp_v2_symbols\\characterCapitalT', '.\\data\\muscima_pp_v2_symbols\\characterCapitalV', '.\\data\\muscima_pp_v2_symbols\\characterDot', '.\\data\\muscima_pp_v2_symbols\\characterOther', '.\\data\\mus

We see that the maximum width is 1153, and the maximum height is 861. Now we have to resize all images in the dataset to this size to normalize the dataset for use in a Convolutional Neural Network.

In [17]:
import cv2
import os
import numpy as np

# Each image will be resized to the maximum width and height
# first, create a new image of the desired size and color (white) for padding
blank_image = np.full((max_height, max_width, 3), (255, 255, 255), dtype=np.uint8)

# Get the list of files and directories in the directory
files_and_dirs = [os.path.join(".\\data", "muscima_pp_v2_symbols", file) for file in os.listdir(directory)]

for file in files_and_dirs:
       for inner_file in os.listdir(file):
              inner_file_path = os.path.join(file, inner_file)
              if os.path.isfile(inner_file_path):
                     img = cv2.imread(inner_file_path)
                     old_image_height, old_image_width, channels = img.shape
                     temp = blank_image.copy()
                     # compute center offset
                     x_center = (max_width - old_image_width) // 2
                     y_center = (max_height - old_image_height) // 2
                     # copy img image into center of result image
                     temp[y_center:y_center+old_image_height, x_center:x_center+old_image_width] = img
                     # save result
                     cv2.imwrite(inner_file_path, temp)

Now the data is normalised, it is time to split the dataset and train the Convolutional Neural Network (CNN) and the Support Vector Machine (SVM).

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

First we need to split the data into training, test and validation datasets.

In [None]:
# Define the path to your dataset directory
data_dir = '.\\data\\muscima_pp_v2_symbols'

# Define the size of your input images
input_size = (1153, 861)

# Define the batch size
batch_size = 32

# Define the number of classes in your dataset
num_classes = len(os.listdir(data_dir))

# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 20% of the data will be used for validation
)

# Load and split the data into train, validation, and test sets
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=input_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Specify training subset
)

validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=input_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Specify validation subset
)

# Split the validation set further into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    validation_generator.samples,
    validation_generator.labels,
    test_size=0.5,  # 50% of validation data becomes the test set
    random_state=42
)

# Create separate generators for validation and test sets
validation_generator = datagen.flow(
    x_val,
    y_val,
    batch_size=batch_size,
    shuffle=False
)

test_generator = datagen.flow(
    x_test,
    y_test,
    batch_size=batch_size,
    shuffle=False
)

First, let's build and compile the CNN model.

In [None]:
cnn = Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(1153, 861, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax') 
])

cnn.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


After compilation, we can train the model using the test and validation sets. These results are then stored in cnn_history. After this, the model is evaluated using the test set.

In [None]:

# Train the model
cnn_history = cnn.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator
)

#Evaluate the model using the test set
test_results = cnn.evaluate(test_generator)
print("Test loss:", test_results[0])
print("Test accuracy:", test_results[1])

After this, the model can be saved for later reference.

In [None]:
cnn.save('cnn_test1.h5')

We can also use matplotlib to visualise the training progress by using the "cnn_history" variable to plot the training and validation accuracy and loss over time.

In [None]:
# Plot training & validation accuracy values
plt.plot(cnn_history.history['accuracy'])
plt.plot(cnn_history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(cnn_history.history['loss'])
plt.plot(cnn_history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()