In [None]:
from omrdatasettools.Downloader import Downloader
from omrdatasettools.OmrDataset import OmrDataset
from omrdatasettools.MuscimaPlusPlusSymbolImageGenerator import MuscimaPlusPlusSymbolImageGenerator

The downloader module is used to extract the raw information about the datasets into the ./data folder

In [None]:
downloader = Downloader()

In [None]:
downloader.download_and_extract_dataset(OmrDataset.MuscimaPlusPlus_V2, "data/muscima_pp_v2")

The generated raw files are then used to draw the images of the dataset on canvases and store them in different folders.

In [None]:
muscima_generator = MuscimaPlusPlusSymbolImageGenerator()
muscima_generator.extract_and_render_all_symbol_masks("./data/muscima_pp_v2/", "./data/muscima_pp_v2_symbols/")


Before training the models, the images first have to be resized to be a standard size.

First, let's find out what the max height and width is, and use those values to resize all the images

In [8]:
from PIL import Image
import os

# Specify the root folder containing your images
root_folder = ".\\data\\muscima_pp_v2_symbols"

# Desired width to search for
desired_width = 861

# Initialize a list to store image file paths that match the desired width
matching_images = []

# Iterate through all subfolders and their images
for foldername, subfolders, filenames in os.walk(root_folder):
    for filename in filenames:
        if filename.endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
            image_path = os.path.join(foldername, filename)
            image = Image.open(image_path)
            _, height = image.size
            if height == desired_width:
                matching_images.append(image_path)

# Print the paths of images with the desired width
if matching_images:
    print("Images with a width of 1153 pixels:")
    for image_path in matching_images:
        print(image_path)
else:
    print("No images with a width of 1153 pixels found.")

Images with a width of 1153 pixels:
.\data\muscima_pp_v2_symbols\repeat\MUSCIMA-pp_2.0___CVC-MUSCIMA_W-32_N-12_D-ideal___580.png


In [9]:
from PIL import Image
import os

# Specify the root folder containing subfolders with images
root_folder = ".\\data\\muscima_pp_v2_symbols"

# Initialize variables to store maximum width and height
max_width = 0
max_height = 0

# Iterate through all subfolders and their images to find maximum width and height
for foldername, subfolders, filenames in os.walk(root_folder):
    for filename in filenames:
        if filename.endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
            image_path = os.path.join(foldername, filename)
            image = Image.open(image_path)
            width, height = image.size
            max_width = max(max_width, width)
            max_height = max(max_height, height)

# Iterate through all subfolders and their images again and pad them
for foldername, subfolders, filenames in os.walk(root_folder):
    for filename in filenames:
        if filename.endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
            image_path = os.path.join(foldername, filename)
            image = Image.open(image_path)
            
            # Calculate padding dimensions
            padding_width = max_width - image.width
            padding_height = max_height - image.height
            
            # Create a new image with white padding
            padded_image = Image.new("RGB", (max_width, max_height), (255, 255, 255))
            
            # Calculate the position to paste the original image (centered)
            paste_x = padding_width // 2
            paste_y = padding_height // 2
            
            # Paste the original image onto the padded image
            padded_image.paste(image, (paste_x, paste_y))
            
            # Save the padded image, overwriting the original
            padded_image.save(image_path)

print("All images in subfolders have been resized to the maximum width and height.")

All images in subfolders have been resized to the maximum width and height.


At this stage, the images are quite large. To aid in training, we can reduce the size by 50%.

In [27]:
from PIL import Image
import os

# Specify the root folder containing the padded images
root_folder = ".\\data\\muscima_pp_v2_symbols"

# Initialize the output folder
output_root_folder = ".\\data\\muscima_pp_v2_symbols_resized"

# Create the output root folder if it doesn't exist
if not os.path.exists(output_root_folder):
    os.makedirs(output_root_folder)

# Specify the scale factor for resizing (50% in this case)
scale_factor = 0.5

# Iterate through all subfolders and their images
for foldername, subfolders, filenames in os.walk(root_folder):
    # Create the corresponding subfolder structure in the output directory
    relative_folder = os.path.relpath(foldername, root_folder)
    output_folder = os.path.join(output_root_folder, relative_folder)
    os.makedirs(output_folder, exist_ok=True)

    for filename in filenames:
        if filename.endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
            image_path = os.path.join(foldername, filename)
            image = Image.open(image_path)

            # Calculate the new dimensions while preserving aspect ratio
            width, height = image.size
            new_width = int(width * scale_factor)
            new_height = int(height * scale_factor)

            # Resize the image
            resized_image = image.resize((new_width, new_height))

            # Save the resized image to the corresponding output subfolder
            output_path = os.path.join(output_folder, filename)
            resized_image.save(output_path)

print("All images in subfolders have been resized to 50% of their size while preserving aspect ratio and stored in corresponding output subfolders.")

All images in subfolders have been resized to 50% of their size while preserving aspect ratio and stored in corresponding output subfolders.


We can also try to supplement the dataset by rotating the images 10 degrees and -10 degrees.

In [28]:
import os

root_folder = ".\\data\\muscima_pp_v2_symbols_resized"
rotate_degrees = [10, -10]

for foldername, subfolders, filenames in os.walk(root_folder):
    for filename in filenames:
        if filename.endswith((".jpg", ".jpeg", ".png", ".gif", ".bmp")):
            # Get the full image path
            image_path = os.path.join(foldername, filename)
            image = Image.open(image_path)

            # Get image dimensions
            height, width = image.size

            for degree in rotate_degrees:
                new_image = image.rotate(degree, fillcolor=(255, 255, 255))

                # Get the original file extension
                file_extension = os.path.splitext(filename)[-1]

                # Save the rotated image with a new filename in the output folder
                new_filename = filename.replace(file_extension, f"_rotated{degree}{file_extension}")
                new_image_path = os.path.join(foldername, new_filename)
                new_image.save(new_image_path)

Now the data is normalised, it is time to split the dataset and train the Convolutional Neural Network (CNN) and the Support Vector Machine (SVM).

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
import numpy as np

First we need to split the data into training, test and validation datasets.

In [29]:
from PIL import Image
import numpy as np

# Define the path to your dataset directory
data_dir = '.\\data\\muscima_pp_v2_symbols_resized'

# Define the size of your input images
input_size = (344, 126)

# Define the batch size
batch_size = 32

# Define the number of classes in your dataset
num_classes = len(os.listdir(data_dir))
print(f"Number of classes: {num_classes}")

# Create an ImageDataGenerator for data augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    validation_split=0.2  # 20% of the data will be used for validation
)

# Load and split the data into train, validation, and test sets
train_generator = datagen.flow_from_directory(
    data_dir,
    target_size=input_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'  # Specify training subset
)

validation_generator = datagen.flow_from_directory(
    data_dir,
    target_size=input_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'  # Specify validation subset
)

validation_data = []
validation_labels = []

# Collect batches in smaller groups and resize images
for i in range(len(validation_generator)):
    batch_data, batch_labels = validation_generator[i]
    
    # Ensure the data type and range are appropriate
    batch_data = (batch_data * 255).astype(np.uint8)  # Normalize to [0, 255] and convert to uint8
        
    validation_data.extend(batch_data)
    validation_labels.extend(batch_labels)

# Convert the lists to arrays
x_val = np.array(validation_data)
y_val = np.array(validation_labels)

# Now, you can split x_val and y_val into validation and test sets
x_val, x_test, y_val, y_test = train_test_split(
    x_val,
    y_val,
    test_size=0.5,  # 50% of validation data becomes the test set
    random_state=42
)

# Create separate generators for validation and test sets
validation_generator = datagen.flow(
    x_val,
    y_val,
    batch_size=batch_size,
    shuffle=False
)

test_generator = datagen.flow(
    x_test,
    y_test,
    batch_size=batch_size,
    shuffle=False
)


Number of classes: 75
Found 31242 images belonging to 75 classes.
Found 7770 images belonging to 75 classes.


First, let's build and compile the CNN model.

In [30]:
cnn = Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(344, 126, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(75, activation='softmax') 
])

cnn.compile(optimizer='adam',
            loss='categorical_crossentropy',
            metrics=['accuracy'])


After compilation, we can train the model using the test and validation sets. These results are then stored in cnn_history. After this, the model is evaluated using the test set.

In [31]:

# Train the model
cnn_history = cnn.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator
)

#Evaluate the model using the test set
test_results = cnn.evaluate(test_generator)
print("Test loss:", test_results[0])
print("Test accuracy:", test_results[1])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10

After this, the model can be saved for later reference.

In [None]:
cnn.save('cnn_test1.h5')

We can also use matplotlib to visualise the training progress by using the "cnn_history" variable to plot the training and validation accuracy and loss over time.

In [None]:
# Plot training & validation accuracy values
plt.plot(cnn_history.history['accuracy'])
plt.plot(cnn_history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(cnn_history.history['loss'])
plt.plot(cnn_history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()