In [2]:
pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.16.2-cp312-cp312-macosx_10_15_x86_64.whl.metadata (4.1 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Using cached absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Using cached astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=23.5.26 (from tensorflow)
  Using cached flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Using cached gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Using cached google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting h5py>=3.10.0 (from tensorflow)
  Downloading h5py-3.12.1-cp312-cp312-macosx_10_13_x86_64.whl.metadata (2.5 kB)
Collecting libclang>=13.0.0 (from tensorflow)
  Using cached libclang-18.1.1-py2.py3-none-macosx_10_9_x86_64.whl.metadata (5.2 kB)
Collecting ml-dtypes~=0.3.1 (from tenso

In [10]:
print(os.getcwd())

/Users/kimiyashahamat/Desktop/DL_CV/Final Project/Sheet-Music-Parser


In [11]:
from tensorflow.keras.layers import Activation, BatchNormalization, Convolution2D, MaxPooling2D, \
    GlobalAveragePooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import plot_model

from models.TrainingConfiguration import TrainingConfiguration


class Vgg4ConvOnlyConfiguration(TrainingConfiguration):
    """ A simplified VGG network that uses no fully-connected layer, but instead a Convolutional Layer + Global Average Pooling """

    def __init__(self, optimizer, width, height, training_minibatch_size, number_of_classes):
        super().__init__(optimizer=optimizer, data_shape=(height, width, 3),
                         training_minibatch_size=training_minibatch_size, number_of_classes=number_of_classes)

    def classifier(self) -> Sequential:
        """ Returns the model of this configuration """
        model = Sequential()

        self.add_convolution(model, 32, 3, self.weight_decay, input_shape=self.data_shape)
        self.add_convolution(model, 32, 3, self.weight_decay)
        model.add(MaxPooling2D())

        self.add_convolution(model, 64, 3, self.weight_decay)
        self.add_convolution(model, 64, 3, self.weight_decay)
        model.add(MaxPooling2D())

        self.add_convolution(model, 128, 3, self.weight_decay)
        self.add_convolution(model, 128, 3, self.weight_decay)
        self.add_convolution(model, 128, 3, self.weight_decay)
        model.add(MaxPooling2D())

        self.add_convolution(model, 256, 3, self.weight_decay)
        self.add_convolution(model, 256, 3, self.weight_decay)
        self.add_convolution(model, 256, 3, self.weight_decay)
        model.add(MaxPooling2D())

        self.add_convolution(model, 512, 3, self.weight_decay)
        self.add_convolution(model, 512, 3, self.weight_decay)
        self.add_convolution(model, 512, 3, self.weight_decay)

        model.add(Convolution2D(self.number_of_classes, kernel_size=(1, 1), padding='same'))
        model.add(GlobalAveragePooling2D())
        model.add(Activation('softmax', name='output_class'))

        model.compile(self.get_optimizer(), loss="categorical_crossentropy", metrics=["accuracy"])
        return model

    def add_convolution(self, model, filters, kernel_size, weight_decay, strides=(1, 1), input_shape=None):
        if input_shape is None:
            model.add(Convolution2D(filters, kernel_size, strides=strides, padding='same',
                                    kernel_regularizer=l2(weight_decay)))
        else:
            model.add(
                Convolution2D(filters, kernel_size, padding='same', kernel_regularizer=l2(weight_decay),
                              input_shape=input_shape))
        model.add(BatchNormalization())
        model.add(Activation('relu'))

    def name(self) -> str:
        """ Returns the name of this configuration """
        return "vgg4_conv_only"

    def performs_localization(self) -> bool:
        return False


if __name__ == "__main__":
    configuration = Vgg4ConvOnlyConfiguration("Adadelta", 96, 96, 16, 32)
    configuration.classifier().summary()
    plot_model(configuration.classifier(), to_file="vgg4_conv_only.png")
    print(configuration.summary())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


You must install pydot (`pip install pydot`) for `plot_model` to work.
Training for 200 epochs ...
Additional parameters: Initialization: glorot_uniform, Weight-decay of 0.0001, Minibatch-size: 16, Early stopping after 20 epochs without improvement
Data-Shape: (96, 96, 3), Reducing learning rate by factor to 0.5 respectively if not improved validation accuracy after 8 epochs
Data-augmentation: Zooming 20.0% randomly, rotating 10° randomly
Optimizer: Adadelta, with parameters {'name': 'adadelta', 'learning_rate': 0.0010000000474974513, 'weight_decay': None, 'clipnorm': None, 'global_clipnorm': None, 'clipvalue': None, 'use_ema': False, 'ema_momentum': 0.99, 'ema_overwrite_frequency': None, 'loss_scale_factor': None, 'gradient_accumulation_steps': None, 'rho': 0.95, 'epsilon': 1e-07}
Performing object localization: False


In [12]:
import argparse
import datetime
import fnmatch
import os
import pickle
import re
from time import time
from typing import List

import tensorflow.keras
import numpy
import numpy as np
from keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from ClassWeightCalculator import ClassWeightCalculator
from reporting import TelegramNotifier, GoogleSpreadsheetReporter, sklearn_reporting
from reporting.TrainingHistoryPlotter import TrainingHistoryPlotter
from datasets.TrainingDatasetProvider import TrainingDatasetProvider
from datasets.DirectoryIteratorWithBoundingBoxes import DirectoryIteratorWithBoundingBoxes
from models.ConfigurationFactory import ConfigurationFactory


def train_model(dataset_directory: str, model_name: str, stroke_thicknesses: List[int],
                width: int, height: int,
                staff_line_vertical_offsets: List[int], training_minibatch_size: int,
                optimizer: str, dynamic_learning_rate_reduction: bool, use_fixed_canvas: bool, datasets: List[str],
                class_weights_balancing_method: str,
                save_after_every_epoch: bool,
                resume_from_checkpoint: str,
                send_telegram_messages: bool):
    image_dataset_directory = os.path.join(dataset_directory, "images")

    bounding_boxes = None
    bounding_boxes_cache = os.path.join(dataset_directory, "bounding_boxes.txt")

    print("Loading configuration and data-readers...")
    start_time = time()

    number_of_classes = len(os.listdir(os.path.join(image_dataset_directory, "training")))
    training_configuration = ConfigurationFactory.get_configuration_by_name(model_name, optimizer, width, height,
                                                                            training_minibatch_size, number_of_classes)
    if training_configuration.performs_localization() and bounding_boxes is None:
        # Try to unpickle
        with open(bounding_boxes_cache, "rb") as cache:
            bounding_boxes = pickle.load(cache)

    if not training_configuration.performs_localization():
        bounding_boxes = None

    train_generator = ImageDataGenerator(rotation_range=training_configuration.rotation_range,
                                         zoom_range=training_configuration.zoom_range
                                         )
    training_data_generator = DirectoryIteratorWithBoundingBoxes(
        directory=os.path.join(image_dataset_directory, "training"),
        image_data_generator=train_generator,
        target_size=(training_configuration.input_image_rows,
                     training_configuration.input_image_columns),
        batch_size=training_configuration.training_minibatch_size,
        bounding_boxes=bounding_boxes,
    )
    training_steps_per_epoch = np.math.ceil(training_data_generator.samples / training_data_generator.batch_size)

    validation_generator = ImageDataGenerator()
    validation_data_generator = DirectoryIteratorWithBoundingBoxes(
        directory=os.path.join(image_dataset_directory, "validation"),
        image_data_generator=validation_generator,
        target_size=(
            training_configuration.input_image_rows,
            training_configuration.input_image_columns),
        batch_size=training_configuration.training_minibatch_size,
        bounding_boxes=bounding_boxes)
    validation_steps_per_epoch = np.math.ceil(validation_data_generator.samples / validation_data_generator.batch_size)

    test_generator = ImageDataGenerator()
    test_data_generator = DirectoryIteratorWithBoundingBoxes(
        directory=os.path.join(image_dataset_directory, "test"),
        image_data_generator=test_generator,
        target_size=(training_configuration.input_image_rows,
                     training_configuration.input_image_columns),
        batch_size=training_configuration.training_minibatch_size,
        shuffle=False,
        bounding_boxes=bounding_boxes)
    test_steps_per_epoch = np.math.ceil(test_data_generator.samples / test_data_generator.batch_size)

    model:Model = training_configuration.classifier()
    model.summary()

    print("Model {0} created.".format(training_configuration.name()))

    initial_epoch = 0
    if resume_from_checkpoint:
        # Try to parse epoch from checkpoint filename. Checkpoint files written by this program
        # are of the form <start>_<configname>-<epoch>.h5.
        # The regular expression assumes there are no dashes in configname, otherwise
        # the initial epoch will just be 0. That is harmless unless you have parameters that
        # depend on the epoch.
        m = re.match('[\d-]+_[^-]+-(\d+).h5', resume_from_checkpoint)
        if m and m.groups() and len(m.groups() == 1):
            initial_epoch = int(m.groups()[0]) + 1
        # This will not restore parameters that are adapted dynamically during training since
        # afaik not all of them get saved to the model checkpoint.
        model.load_weights(resume_from_checkpoint)
        print("Model {0} weights loaded from checkpoint {1}. Training will resume from epoch {2}".format(
            training_configuration.name(),
            resume_from_checkpoint,
            initial_epoch))

    print(training_configuration.summary())

    start_of_training = datetime.date.today()

    monitor_variable = 'val_accuracy'
    if training_configuration.performs_localization():
        monitor_variable = 'val_output_class_accuracy'

    best_model_path = "{0}_{1}".format(start_of_training, training_configuration.name())
    if save_after_every_epoch:
        model_checkpoint = ModelCheckpoint(best_model_path + "-{epoch:02d}.h5", monitor=monitor_variable,
                save_best_only=True, verbose=1, save_freq='epoch')
    else:
        model_checkpoint = ModelCheckpoint(best_model_path+".h5", monitor=monitor_variable,
                save_best_only=True, verbose=1)
    early_stop = EarlyStopping(monitor=monitor_variable,
                               patience=training_configuration.number_of_epochs_before_early_stopping,
                               verbose=1)
    learning_rate_reduction = ReduceLROnPlateau(monitor=monitor_variable,
                                                patience=training_configuration.number_of_epochs_before_reducing_learning_rate,
                                                verbose=1,
                                                factor=training_configuration.learning_rate_reduction_factor,
                                                min_lr=training_configuration.minimum_learning_rate)
    tensorboard_callback = TensorBoard(
            log_dir="./logs/{0}_{1}/".format(start_of_training, training_configuration.name()))
    if dynamic_learning_rate_reduction:
        callbacks = [model_checkpoint, early_stop, tensorboard_callback, learning_rate_reduction]
    else:
        print("Learning-rate reduction on Plateau disabled")
        callbacks = [model_checkpoint, early_stop, tensorboard_callback]

    class_weight_calculator = ClassWeightCalculator()
    class_weights = class_weight_calculator.calculate_class_weights(image_dataset_directory,
                                                                    method=class_weights_balancing_method,
                                                                    class_indices=training_data_generator.class_indices)
    if class_weights_balancing_method is not None:
        print("Using {0} method for obtaining class weights to compensate for an unbalanced dataset.".format(
            class_weights_balancing_method))

    print("Training on dataset...")
    history = model.fit(
        x=training_data_generator,
        steps_per_epoch=training_steps_per_epoch,
        epochs=training_configuration.number_of_epochs,
        callbacks=callbacks,
        validation_data=validation_data_generator,
        validation_steps=validation_steps_per_epoch,
        class_weight=class_weights,
        initial_epoch=initial_epoch
    )

    best_model = None
    if not save_after_every_epoch:
        print("Loading best model from check-point and testing...")
        best_model = tensorflow.keras.models.load_model(best_model_path + '.h5')
    else:
        print("Loading latest model from check-point and testing...")
        latest_mtime = 0
        latest_file = None
        pattern = best_model_path + '*.h5'
        for f in os.listdir('.'):
            if fnmatch.fnmatch(f, pattern):
                fd = os.open(f, os.O_RDONLY)
                st = os.fstat(fd)
                mt = st.st_mtime
                if not latest_file or mt > latest_mtime:
                    latest_mtime = mt
                    latest_file = f

        print('latest model file is {0}'.format(latest_file))
        best_model = tensorflow.keras.models.load_model(latest_file)

    test_data_generator.reset()
    file_names = test_data_generator.filenames
    class_labels = os.listdir(os.path.join(image_dataset_directory, "test"))
    # Notice that some classes have so few elements, that they are not present in the test-set and do not
    # appear in the final report. To obtain the correct classes, we have to enumerate all non-empty class
    # directories inside the test-folder and use them as labels
    names_of_classes_with_test_data = [
        class_name for class_name in class_labels
        if os.listdir(os.path.join(image_dataset_directory, "test", class_name))]
    true_classes = test_data_generator.classes
    predictions = best_model.predict_generator(test_data_generator, steps=test_steps_per_epoch)
    if training_configuration.performs_localization():
        predicted_classes = numpy.argmax(predictions[0], axis=1)
    else:
        predicted_classes = numpy.argmax(predictions, axis=1)

    test_data_generator.reset()
    evaluation = best_model.evaluate_generator(test_data_generator, steps=test_steps_per_epoch)
    classification_accuracy = 0

    print("Reporting classification statistics with micro average")
    report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3,
                                                     target_names=names_of_classes_with_test_data, average='micro')
    print(report)

    print("Reporting classification statistics with macro average")
    report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3,
                                                     target_names=names_of_classes_with_test_data, average='macro')
    print(report)

    print("Reporting classification statistics with weighted average")
    report = sklearn_reporting.classification_report(true_classes, predicted_classes, digits=3,
                                                     target_names=names_of_classes_with_test_data, average='weighted'
                                                     )
    print(report)

    indices_of_misclassified_files = [i for i, e in enumerate(true_classes - predicted_classes) if e != 0]
    misclassified_files = [file_names[i] for i in indices_of_misclassified_files]
    misclassified_files_actual_prediction_indices = [predicted_classes[i] for i in indices_of_misclassified_files]
    misclassified_files_actual_prediction_classes = [class_labels[i] for i in
                                                     misclassified_files_actual_prediction_indices]
    print("Misclassified files:")
    for i in range(len(misclassified_files)):
        print("\t{0} is incorrectly classified as {1}".format(misclassified_files[i],
                                                              misclassified_files_actual_prediction_classes[i]))

    for i in range(len(best_model.metrics_names)):
        current_metric = best_model.metrics_names[i]
        print("{0}: {1:.5f}".format(current_metric, evaluation[i]))
        if current_metric == 'accuracy' or current_metric == 'output_class_acc':
            classification_accuracy = evaluation[i]
    print("Total Accuracy: {0:0.5f}%".format(classification_accuracy * 100))
    print("Total Error: {0:0.5f}%".format((1 - classification_accuracy) * 100))

    end_time = time()
    execution_time_in_seconds = round(end_time - start_time)
    print("Execution time: {0:.1f}s".format(end_time - start_time))

    training_result_image = "{1}_{0}_{2:.1f}p.png".format(training_configuration.name(), start_of_training,
                                                          classification_accuracy * 100)
    TrainingHistoryPlotter.plot_history(history, training_result_image)

    datasets_string = str.join(",", datasets)
    notification_message = "Training on {0} dataset with model {1} finished. " \
                           "Accuracy: {2:0.5f}%".format(datasets_string, model_name, classification_accuracy * 100)
    if send_telegram_messages:
        TelegramNotifier.send_message_via_telegram(notification_message, training_result_image)
    else:
        print(notification_message)

    dataset_size = training_data_generator.samples + validation_data_generator.samples + test_data_generator.samples
    stroke_thicknesses_string = ",".join(map(str, stroke_thicknesses))
    staff_line_vertical_offsets_string = ",".join(map(str, staff_line_vertical_offsets))
    image_sizes = "{0}x{1}px".format(training_configuration.input_image_rows,
                                     training_configuration.input_image_columns)
    data_augmentation = "{0}% zoom, {1}° rotation".format(int(training_configuration.zoom_range * 100),
                                                          training_configuration.rotation_range)
    today = "{0:02d}.{1:02d}.{2}".format(start_of_training.day, start_of_training.month, start_of_training.year)
    balancing_method = "None" if class_weights_balancing_method is None else class_weights_balancing_method

    GoogleSpreadsheetReporter.append_result_to_spreadsheet(dataset_size=dataset_size, image_sizes=image_sizes,
                                                           stroke_thicknesses=stroke_thicknesses_string,
                                                           staff_lines=staff_line_vertical_offsets_string,
                                                           model_name=model_name, data_augmentation=data_augmentation,
                                                           optimizer=optimizer,
                                                           early_stopping=training_configuration.number_of_epochs_before_early_stopping,
                                                           reduction_patience=training_configuration.number_of_epochs_before_reducing_learning_rate,
                                                           learning_rate_reduction_factor=training_configuration.learning_rate_reduction_factor,
                                                           minibatch_size=training_minibatch_size,
                                                           initialization=training_configuration.initialization,
                                                           initial_learning_rate=training_configuration.get_initial_learning_rate(),
                                                           accuracy=classification_accuracy,
                                                           date=today,
                                                           use_fixed_canvas=use_fixed_canvas,
                                                           datasets=datasets_string,
                                                           execution_time_in_seconds=execution_time_in_seconds,
                                                           balancing_method=balancing_method)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.register("type", "bool", lambda v: v.lower() == "true")
    parser.add_argument("--dataset_directory", type=str, default="data",
                        help="The directory, that is used for storing the images during training")
    parser.add_argument("--model_name", type=str, default="res_net_4",
                        help="The model used for training the network. Run ListAvailableConfigurations.ps1 or "
                             "models/ConfigurationFactory.py to get a list of all available configurations")

    parser.add_argument("--use_existing_dataset_directory", dest="delete_and_recreate_dataset_directory",
                        action='store_false',
                        help="Whether to delete and recreate the dataset-directory (by downloading the appropriate "
                             "files from the internet, extracting and generating images) or simply use whatever data "
                             "currently is inside of that directory.")
    parser.set_defaults(delete_and_recreate_dataset_directory=True)

    parser.add_argument("--minibatch_size", default=16, type=int,
                        help="Size of the minibatches for training, typically one of 8, 16, 32, 64 or 128")
    parser.add_argument("--optimizer", default="Adadelta", type=str,
                        help="The optimizer used for the training, can be SGD, Adam or Adadelta")

    parser.add_argument("--no_dynamic_learning_rate_reduction", dest="dynamic_learning_rate_reduction",
                        action="store_false",
                        help="True, if the learning rate should not be scheduled to be reduced on a plateau.")
    parser.set_defaults(dynamic_learning_rate_reduction=True)
    parser.add_argument("--class_weights_balancing_method", default=None, type=str,
                        help="The optional weight balancing method for handling unbalanced datasets. If provided,"
                             "valid choices are simple or skBalance. 'simple' uses 1/sqrt(#samples_per_class) as "
                             "weights for samples from each class to compensate for classes that are underrepresented."
                             "'skBalance' uses the Python SkLearn module to calculate more sophisticated weights.")
    parser.add_argument("--telegram_messages", dest="send_telegram_messages", action="store_true",
                        help="Send messages via telegram")
    parser.set_defaults(send_telegram_messages=False)
    parser.add_argument("--save_after_every_epoch", dest="save_after_every_epoch", action="store_true",
                        help="Write a checkpoint after every epoch")
    parser.set_defaults(save_after_every_epoch=False)
    parser.add_argument("--resume_from_checkpoint", dest="resume_from_checkpoint", default=None, type=str,
                        help="Load checkpoint from file specified.")

    TrainingDatasetProvider.add_arguments_for_training_dataset_provider(parser)

    flags, unparsed = parser.parse_known_args()

    offsets = []
    if flags.offsets != "":
        offsets = [int(o) for o in flags.offsets.split(',')]
    stroke_thicknesses_for_generated_symbols = [int(s) for s in flags.stroke_thicknesses.split(',')]

    if flags.datasets == "":
        raise Exception("No dataset selected. Specify the dataset for the training via the --dataset parameter")
    datasets = flags.datasets.split(',')

    if flags.delete_and_recreate_dataset_directory:
        training_dataset_provider = TrainingDatasetProvider(flags.dataset_directory)
        training_dataset_provider.recreate_and_prepare_datasets_for_training(
            datasets=datasets, width=flags.width,
            height=flags.height,
            use_fixed_canvas=flags.use_fixed_canvas,
            stroke_thicknesses_for_generated_symbols=stroke_thicknesses_for_generated_symbols,
            staff_line_spacing=flags.staff_line_spacing,
            staff_line_vertical_offsets=offsets,
            random_position_on_canvas=flags.random_position_on_canvas)
        training_dataset_provider.resize_all_images_to_fixed_size(flags.width, flags.height)
        training_dataset_provider.split_dataset_into_training_validation_and_test_set()

    train_model(dataset_directory=flags.dataset_directory,
                model_name=flags.model_name,
                stroke_thicknesses=stroke_thicknesses_for_generated_symbols,
                width=flags.width,
                height=flags.height,
                staff_line_vertical_offsets=offsets,
                training_minibatch_size=flags.minibatch_size,
                optimizer=flags.optimizer,
                dynamic_learning_rate_reduction=flags.dynamic_learning_rate_reduction,
                use_fixed_canvas=flags.use_fixed_canvas,
                datasets=datasets,
                class_weights_balancing_method=flags.class_weights_balancing_method,
                save_after_every_epoch=flags.save_after_every_epoch,
                resume_from_checkpoint=flags.resume_from_checkpoint,
                send_telegram_messages=flags.send_telegram_messages)

    # To run in in python console
    # dataset_directory = 'data'
    # model_name = 'res_net_3_small'
    # delete_and_recreate_dataset_directory = True
    # stroke_thicknesses = [3]
    # width = 96
    # height = 192
    # staff_line_vertical_offsets = None
    # staff_line_spacing = 14
    # training_minibatch_size = 32
    # optimizer = 'Adadelta'
    # dynamic_learning_rate_reduction = True

  m = re.match('[\d-]+_[^-]+-(\d+).h5', resume_from_checkpoint)
  m = re.match('[\d-]+_[^-]+-(\d+).h5', resume_from_checkpoint)


ModuleNotFoundError: No module named 'ClassWeightCalculator'