Pre-Processing midi data using piano roll time series format.

X axis represents time in steps of 0.25 beats.
Y axis represents the midi value of the note.

In [2]:
import music21 as m21
import os
import json
from torch.utils.data import Dataset, DataLoader
import numpy as np
import torch

import torch.backends.cudnn as cudnn
torch.cuda.empty_cache()
cudnn.benchmark = True  # Optimise for hardware

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(device)
"""
Preprocessing file used to transform piano midi files into one hot encoded tensors. Uses music21 library to translate the 
midi files into music21 objects where the note pitches and durations can be extracted. 
PyTorch is used to one hot encode and transform into output tensors. 
"""
# Constants for datapaths of each file and other key values.
PIANO_DATAPATH = "Dataset"
TIME_STEP = 0.25
SEQUENCE_LENGTH = 256
LABEL_MAPPING_PATH = "Lable_Mapping.json"
OUTPUT_FILES = "Output_files"
SIZE = 256

acceptable_durations = [
    0.25, 
    0.5,
    0.75,
    1, 
    1.25,
    1.5, 
    1.75, 
    2, 
    2.25,
    2.5,
    2.75,
    3,
    3.25,
    3.5, 
    3.75,
    4
]

cuda:0


In [4]:
def load_data(data_path):
    """
    This function takes in a datapath and uses the os library to iterate through every file in the datapath directories
    and transforms them into music21 objects.

    :params data path containing the directory path to a folder of midi files
    :returns a list of music21 objects 
    """
    # Generate the labels for each composer.
    songs = []
    labels = []
    no_songs_in_dir = []
    
    # Iterates through each file in every directory from the input file path
    for path, subdir, filenames in os.walk(data_path):
        base = os.path.basename(path)
        labels.append(base)
        count = 0
        for file in filenames:
            
            # For every midi file, convert to a music21 object
            if file.endswith("mid"):
                song = m21.converter.parse(os.path.join(path, file))
                # Take just the melody of the song
                song = song.getElementsByClass(m21.stream.Part)[0]
                songs.append(song)
                count += 1
        no_songs_in_dir.append(count)

        # Get rid of the 'Dataset' label as this doesn't represent anything meaningful
    if labels[0] == 'Dataset':
        labels = labels[1:]
        no_songs_in_dir = no_songs_in_dir[1:]

    return songs, labels, no_songs_in_dir

songs, labels, no_songs_in_dir = load_data(PIANO_DATAPATH)

for i in range(len(no_songs_in_dir)):
    print(f"Number of songs for {labels[i]}: {no_songs_in_dir[i]}")




Number of songs for albeniz: 14
Number of songs for bach: 3
Number of songs for balakir: 1
Number of songs for beeth: 29
Number of songs for borodin: 7
Number of songs for brahms: 9
Number of songs for burgm: 9
Number of songs for chopin: 48
Number of songs for debussy: 7
Number of songs for granados: 3
Number of songs for grieg: 16
Number of songs for haydn: 21
Number of songs for liszt: 16
Number of songs for mendelssohn: 15
Number of songs for mozart: 21
Number of songs for muss: 8
Number of songs for schumann: 24
Number of songs for tschai: 12


In [5]:
def transpose(song):
    """Transposes song to C maj/A min

    :param piece (m21 stream): Piece to transpose
    :return transposed_song (m21 stream):
    """

    # get key from the song
    
    measures_part0 = song.getElementsByClass(m21.stream.Measure)
    key = measures_part0[0][4]
    
    # estimate key using music21
    if not isinstance(key, m21.key.Key):
        key = song.analyze("key")

    # get interval for transposition. E.g., Bmaj -> Cmaj
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))

    # transpose song by calculated interval
    tranposed_song = song.transpose(interval)
    return tranposed_song



In [20]:
def piano_roll(cols, song):
    """ 
    This function takes in a single song and column dimension and outputs the pianoroll 2D matrix that represents the midi
    notes at every time step.
    :params cols, song: column size of the matrix and the input song to convert
    :returns a 2D pianoroll matrix.
    """
    array = np.zeros((128, cols))
    index = 0
    array_list = []
    
    flat_song = song.flatten().notesAndRests
    flat_idx = 0
    count = 0
    num_sym = len(flat_song)
    
    bool = False
    # Create piano rolls for segments at 10 note objects apart 
    while flat_idx < num_sym - 1:
        array = np.zeros((128, cols))
        index = 0
        flat_idx = count

        while index < cols:
            # Increment the index of the flattened list.
            if flat_idx > num_sym - 1:
                bool = True
                break
            symbol = flat_song[flat_idx]

            if isinstance(symbol, m21.note.Note):
                # Finds the midi symbol of the note
                note = int(symbol.pitch.midi)
                
                # Finds the duration of the note
                duration = symbol.duration.quarterLengthNoTuplets

                # Converts this to the step size. If duration = 0.5 this equates to two time steps. 
                steps = int(duration // TIME_STEP)
                
                # Set the value of this midi note to 1 for the amount of time steps.
                array[note, index: index + steps + 1] = 1
                
                # Resets the index to the next time position.
                index += steps 

            elif isinstance(symbol, m21.note.Rest):
                steps = int(symbol.quarterLength // TIME_STEP)
                index += steps

            else:
                # Iterate through the whole chord and add every note to the same time step
                duration = int(symbol[0].duration.quarterLengthNoTuplets)
                steps = int(duration // TIME_STEP)
                for i in range(len(symbol)):
                    # Calulate the duration of the note
                    note = int(symbol[i].pitch.midi)
                    array[note, index: index + steps + 1] = 1

                # Assume the chord plays the same for every note in it.
                index += steps

            # Increment the flattened notes counter
            flat_idx += 1
       
        
        # DOuble check to make sure the matrix is the right dimension.
        if array.shape[1] != cols:
            break
        elif bool:
            break

        array_list.append(array)

        count += 50
        

    return array_list



In [21]:
def acceptable_note_durations(song):
    """ 
    Removes the notes that have unaccetpable durations. 
    :params song: the song to remove the ntoes from
    :returns the same song object with the ntoes removed.
    """
       
    lst = []

    # Iterates through every symbol in the song
    for symbol in song.flatten().notesAndRests:
        
        # If the symbol has a duration that isn't acceptable, remove it from the song.
        if symbol.duration.quarterLength not in acceptable_durations:
            lst.append(symbol.duration.quarterLength)
            song.remove(symbol)
            continue
    
    return song



In [None]:
def acceptable_note_durations(song):
    """ 
    Removes the notes that have unaccetpable durations. 
    :params song: the song to remove the ntoes from
    :returns the same song object with the ntoes removed.
    """
       
    lst = []

    # Iterates through every symbol in the song
    for symbol in song.flatten().notesAndRests:
        
        # If the symbol has a duration that isn't acceptable, remove it from the song.
        if symbol.duration.quarterLength not in acceptable_durations:
            lst.append(symbol.duration.quarterLength)
            song.remove(symbol)
            continue
    
    return song



In [6]:
def labels_mapping(labels, path):
    """
    Generate a mapping of the labels
    """
    mappings = {}

    # Finds the unique elements in the list
    unique = list(set(labels))
    
    # Sets an integer value for every label
    for i, symbol in enumerate(unique):
        mappings[symbol] = i
    # Opens the dicitonary in a new .json file
    with open(path, "w") as fp:
        json.dump(mappings, fp, indent=4)

In [7]:
def convert_labels_to_int(labels):
    """ 
    Converts the labels to an integer list
    :returns label_ints: list of integers representing every label
    """
    # Open the json file containing the labels represented in a dictionary
    with open(LABEL_MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)

    label_ints = []
    # Add the label values into the list and return
    for i, symbol in enumerate(mappings):
        label_ints.append(mappings[symbol])

    return label_ints



In [8]:
def preprocess(data_path, songs, labels, no_songs_in_dir, size=SIZE):
    """ 
    This function loads, encodes and saves the midi songs as the encoded version of the midi file.
    :params data path containing the input midi files
    :returns an output directory containing the encoded files.
    """
    # Load the songs, labels and the counter
    #songs, labels, no_songs_in_dir = load_data(data_path)
    index = 0
    count = no_songs_in_dir[0]

    # Initialize the label mapping dictionaries
    labels_mapping(labels, LABEL_MAPPING_PATH)
    labels_as_ints = convert_labels_to_int(labels)
    label = labels_as_ints[index]
    
    # Create empty label/matrix lists
    label_list = []
    matrix_list = []

    # Iterate through every song , applying the transpose, acceptable duration and piano roll functions to each song.
    for i, song in enumerate(songs):

        # Filter the songs by transposing to C maj/A minor and ensuring only quarter note duration notes are in the song
        song = acceptable_note_durations(song)
        transposed_song = transpose(song)
       
        prolls = piano_roll(size, transposed_song)

        # Add each matrix into the training sequenes list
        for iter in range(len(prolls)):
            # Append to the lists   
            label_list.append(label)
            matrix_list.append(prolls[iter])

    # Change the label if appropriate
        if i == count:
            if label != labels_as_ints[-1]:
                index += 1
                count = sum(no_songs_in_dir[:index + 1])
                label = labels_as_ints[index]

        print(f"Song {i} completed")      
    # Convert to numpy arrays before creating tensors
    input_array = np.array(matrix_list)
    label_array = np.array(label_list)

    # Create tensors of the training sequences and the labels
    inputs_out = torch.from_numpy(input_array)
    labels_out = torch.from_numpy(label_array)
    
    # Save the tensors as outputs
    torch.save(inputs_out, "Input_tensors.pt")
    torch.save(labels_out, "Labels_tensors.pt")

    print("Input tensor dimensions: ", inputs_out.shape)
    print("Label tensor dimensions: ", labels_out.shape)
    return inputs_out, labels_out


inputs_out, labels_out = preprocess(PIANO_DATAPATH, songs, labels, no_songs_in_dir)

Song 0 completed
Song 1 completed
Song 2 completed
Song 3 completed
Song 4 completed
Song 5 completed
Song 6 completed
Song 7 completed
Song 8 completed
Song 9 completed
Song 10 completed
Song 11 completed
Song 12 completed
Song 13 completed
Song 14 completed
Song 15 completed
Song 16 completed
Song 17 completed
Song 18 completed
Song 19 completed
Song 20 completed
Song 21 completed
Song 22 completed
Song 23 completed
Song 24 completed
Song 25 completed
Song 26 completed
Song 27 completed
Song 28 completed
Song 29 completed
Song 30 completed
Song 31 completed
Song 32 completed
Song 33 completed
Song 34 completed
Song 35 completed
Song 36 completed
Song 37 completed
Song 38 completed
Song 39 completed
Song 40 completed
Song 41 completed
Song 42 completed
Song 43 completed
Song 44 completed
Song 45 completed
Song 46 completed
Song 47 completed
Song 48 completed
Song 49 completed
Song 50 completed
Song 51 completed
Song 52 completed
Song 53 completed
Song 54 completed
Song 55 completed
So