The melody dataset we will use is the KERN Dataset with link https://kern.humdrum.org/cgi-bin/browse?l=essen%2Feuropa%2Fdeutschl

It also has folksongs for other countries

ESAC is the original site: http://www.esac-data.org/

You will notice that the songs are saved in  the **KERN** format, a popular music encoding method used by the Humdrum software package. More details at https://www.humdrum.org/

In [1]:
"""
Music21 is a package that allows you to manipulate symbolic music data. Also allows you to load files in krn format
"""
import os
import json
import music21 as m21

In [2]:
# Test folder only has few files in it
KERN_DATASET_PATH = "deutschl/erk/"
SAVE_DIR = "dataset"
SINGLE_FILE_DATASET = "file_dataset"
MAPPING_PATH = "mapping.json"

# When training our model, we need to pass sequences that are fixed in length
SEQUENCE_LENGTH = 64

ACCEPTABLE_DURATIONS = [
    0.25,
    0.5,
    0.75, # dotted
    1, # quarter
    1.5,
    2,
    3,
    4 # whole note
]

In [3]:
def load_songs_in_kern(dataset_path):
    
    songs = []
    
    # go through all the files in dataset and load them with music21
    for path, subdir, files in os.walk(dataset_path):
        for file in files:
            if file[-3:] == "krn":
                song = m21.converter.parse( os.path.join(path, file) )
                songs.append(song)
                
    return songs

In [4]:
def has_acceptable_durations(song, acceptable_durations):
    # Depends on whether all the notes and crests are acceptable
    
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_durations:
            return False
    return True

In [5]:
def transpose(song):
    
    # Get key from the song (usually stored in the first measure of the first part of the score)
    parts = song.getElementsByClass(m21.stream.Part)
    measures_part0 = parts[0].getElementsByClass(m21.stream.Measure)
    key = measures_part0[0][4]
    
    # In some songs, key is not added, so estimate key using music21
    if not isinstance(key, m21.key.Key):
        key = song.analyze("key")
    
    #print(key)
    # Get interval for transposition. E.g. Bmaj -> Cmaj
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))
    
    # Transpose song by calculated interval
    transposed_song = song.transpose(interval)
    
    return transposed_song

In [6]:
def encode_song(song, time_step=0.25):
    # Each item in the list corresponds to 1/16th (0.25 from acceptable durations)
    # pitch = 60, duration = 1.0 -> [60, "_", "_", "_"]
    
    encoded_song = []
    
    for event in song.flat.notesAndRests:
        
        # Handle notes
        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi # 60
        # Handle rests
        elif isinstance(event, m21.note.Rest):
            symbol = "r"
        
        # Convert the note/rest into time series notation
        steps = int(event.duration.quarterLength / time_step)
        for step in range(steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")
    
    # Cast encoded song to a str
    encoded_song = " ".join( map(str, encoded_song) )
    
    return encoded_song

In [7]:
def preprocess(dataset_path):
    
    # Load fold songs
    print("Loading songs...")
    songs = load_songs_in_kern(dataset_path)
    print(f"Loaded {len(songs)} songs.")
    
    for i, song in enumerate(songs):
        
        # Filter out songs that have non-acceptable durations (to make it easier for the AI to pick up the different parts)
        if not has_acceptable_durations(song, ACCEPTABLE_DURATIONS):
            continue
        
        # Transpose songs to Cmaj/Amin
        song = transpose(song)

        # Encode songs with music time series representation
        encoded_song = encode_song(song)

        # Save songs to text file
        save_path = os.path.join(SAVE_DIR, str(i))
        with open(save_path, "w") as fp:
            fp.write(encoded_song)
            

In [8]:
def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()
    
    return song

# It will be easier to manipulate that file and create sequences for training the model
def create_single_file_dataset(dataset_path, file_dataset_path, sequence_length):
    new_song_delimiter = "/ " * sequence_length
    songs = ""
    
    # load encoded songs and add delimiters
    for path, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(path, file)
            song = load(file_path)
            songs = songs + song + " " + new_song_delimiter
            
    songs = songs[:-1] # Just to remove space at end of string
            
    # save string that contains all dataset
    with open(file_dataset_path, "w") as fp:
        fp.write(songs)
        
    return songs

In [9]:
# This will be for the translation later. We know that our model will only take in integers
def create_mapping(songs, mapping_path):
    mappings = {}
    
    # Identify the vocabulary
    songs = songs.split()
    vocabulary = list(set(songs))
    
    # Create mappings
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i
    
    # Save vocabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4) # indent will show them on one line each

In [16]:
preprocess(KERN_DATASET_PATH)
songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_DATASET, SEQUENCE_LENGTH)
create_mapping(songs, MAPPING_PATH)

In [19]:
#songs

In [12]:
def convert_song_to_int(songs):
    int_songs = []
    
    # Load mappings
    with open(MAPPING_PATH, "r") as fp:
        mappings = json.load(fp)
    
    # Cast songs string to a list
    songs = songs.split()
    
    # Map songs to int
    for symbol in songs:
        int_songs.append(mappings[symbol])
        
    return int_songs

In [13]:
import tensorflow.keras as keras
import numpy as np

In [14]:
def generate_training_sequences(sequence_length):
    """
    In order to train our network, we will create sequences that are subsets on our series and each sequence will have 
    a target which will be the value that comes after the said sequence
    [11, 12, 13, 14, ...] -> i: [11, 12], t: 13; i: [12, 13], t: 14; etc
    
    We will give our LSTM 4 bars of 4/4 ?
    """
    
    # Load songs and map them to int
    songs = load(SINGLE_FILE_DATASET)
    int_songs = convert_song_to_int(songs)
    
    # Generate the training sequences
    # E.g. 100 symbols, 64 sl, 100 - 64 = 36 sequences
    inputs = []
    targets = []
    
    num_sequences= len(int_songs) - sequence_length
    for i in range(num_sequences):
        inputs.append(int_songs[ i : i + sequence_length ])
        targets.append(int_songs[ i + sequence_length ])
    
    # One-hot encoding
    # inputs: (# of sequences, sequence length) -> (# of sequences, sequence length, vocabulary size)
    # [ [0, 1, 2], [1, 1, 2] ] => [ [ [1, 0, 0], [0, 1, 0], [0, 0, 1] ], [...] ]
    vocabulary_size = len(set(int_songs))
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)
    targets = np.array(targets)
    
    return inputs, targets

In [15]:
inputs, targets = generate_training_sequences(SEQUENCE_LENGTH)