In [43]:
import music21 as m21
import os
import json
from tensorflow import keras
import numpy as np
PIANO_DATAPATH = "Dataset"
MUSIC_DURATION = 0.25
SAVE_MUSIC_PATH = "Processed_data"
SEQUENCE_LENGTH = 128
SINGLE_FILE_PATH = "single_file_dataset.txt"
MAPPING_PATH = "Mapping.json"



In [44]:
def load_data(data_path):
    songs = []

    for path, _, filenames in os.walk(data_path):

        for file in filenames:
            if file.endswith("mid"):
                song = m21.converter.parse(os.path.join(path, file))
                # Take just the melody of the song
                song = song.getElementsByClass(m21.stream.Part)
                songs.append(song[0])

    return songs

songs = load_data(PIANO_DATAPATH)



In [46]:
def encode_songs(song, time_step=0.25):
    # For each song, need to grab the notes and rest, flatten the list and song object 
    encoded_melody = []
    encoded_chords = []
    
    for element in song.flat.notesAndRests:
        
         # handle notes
        if isinstance(element, m21.note.Note):
            symbol = element.pitch.midi # 60
        # handle rests
        elif isinstance(element, m21.note.Rest):
            symbol = "r"
        elif isinstance(element, m21.chord.Chord):
            continue

        # convert the note/rest into time series notation
        steps = int(element.duration.quarterLength / time_step)
        for step in range(steps):
            
            # if it's the first time we see a note/rest, let's encode it. Otherwise, it means we're carrying the same
            # symbol in a new time step
            if step == 0:
                encoded_melody.append(symbol)
            else:
                encoded_melody.append("_")
                encoded_chords.append("_")

    # cast encoded song to str
    encoded_melody = " ".join(map(str, encoded_melody))
    

    return encoded_melody



In [47]:
def preprocess(data_path):
    
    # Load the songs:
    songs = load_data(data_path)

    for i, song in enumerate(songs):

        # Encode the song
        encoded_mel = encode_songs(song)

        # Create the filename from the directory and name for each song
        save_dir_mel = os.path.join(SAVE_MUSIC_PATH, str(i) + ".txt")
        
        # Write to the new file
        with open(save_dir_mel, "w") as fp:
            fp.write(encoded_mel)

    fp.close()


preprocess(PIANO_DATAPATH)

  return self.iter().getElementsByClass(classFilterList)


In [48]:
def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()
    return song

In [52]:
def create_single_file(dataset_path, single_file_path, sequence_length):
    """ 
    Generates mappings for each individual element in the song
    """

    new_song_delimiter = "/ " * sequence_length
    songs = ""
    
    # Load the data into a single string with the delimiter
    for path, subdir, files in os.walk(dataset_path):

        for file in files:
            
            file_path = os.path.join(path, file)

            song = load(file_path)
            
            songs = songs + "".join(song) + " " + new_song_delimiter

    # remove empty space from last character of string
    songs = songs[:-1]
    
    # save string that contains all the dataset
    with open(single_file_path, "w") as fp:
        fp.write(songs)

    return songs# remove empty space from last character of string

songs = create_single_file(SAVE_MUSIC_PATH, SINGLE_FILE_PATH, 128)

263


In [53]:

def create_mapping(songs, mapping_path):
    """Creates a json file that maps the symbols in the song dataset onto integers

    :param songs (str): String with all songs
    :param mapping_path (str): Path where to save mapping
    :return:
    """
    mappings = {}

    # identify the vocabulary
    songs = songs.split()
    vocabulary = list(set(songs))

    # create mappings
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # save voabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)

create_mapping(songs, MAPPING_PATH)

def convert_songs_to_int(songs):
    int_songs = []

    # load mappings
    with open(MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)

    # transform songs string to list
    songs = songs.split()

    # map songs to int
    for symbol in songs:
        int_songs.append(mappings[symbol])

    return int_songs

ints = convert_songs_to_int(songs)


def generate_training_sequences(sequence_length):
    """Create input and output data samples for training. Each sample is a sequence.

    :param sequence_length (int): Length of each sequence. With a quantisation at 16th notes, 64 notes equates to 4 bars

    :return inputs (ndarray): Training inputs
    :return targets (ndarray): Training targets
    """

    # load songs and map them to int
    songs = load(SINGLE_FILE_PATH)
    int_songs = convert_songs_to_int(songs)

    inputs = []
    targets = []

    # generate the training sequences
    num_sequences = len(int_songs) - sequence_length
    for i in range(num_sequences):
        inputs.append(int_songs[i:i+sequence_length])
        targets.append(int_songs[i+sequence_length])

    # one-hot encode the sequences
    vocabulary_size = len(set(int_songs))
    # inputs size: (# of sequences, sequence length, vocabulary size)
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)
    targets = np.array(targets)

    print(f"There are {len(inputs)} sequences.")

    return inputs, targets

inputs, targets = generate_training_sequences(128)

