In [1]:
import os
import json
import numpy as np
import music21 as m21
import tensorflow.keras as keras

KERN_DATASET_PATH = '/home/pain/proj/music-generation/deutschl/essen/europa/deutschl/erk'
SAVE_DIR = '/home/pain/proj/music-generation/dataset'
SINGLE_FILE_DATASET = "/home/pain/proj/music-generation/file_dataset"
MAPPING_FILE = '/home/pain/proj/music-generation/mapping.json'
ACCEPTABLE_DURATIONS = [
    0.25,
    0.5,
    0.75,
    1.0,
    1.5,
    2,
    3,
    4
]

2022-01-12 08:08:41.815380: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-01-12 08:08:41.816067: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [5]:
def load_songs(dataset_path):
    songs = []

    # go through all the files in dataset and load them with m21
    for path, _, files in os.walk(dataset_path):
        for file in files:
            if file[-3:] == "krn":
                song = m21.converter.parse(os.path.join(path, file))
                songs.append(song)

    return songs

In [6]:
# to train model only acceptable duration song
def has_acceptable_durations(song, acceptable_durations):
    for note in song.flat.notesAndRests:
        if note.duration.quarterLength not in acceptable_durations:
            return False
    return True

In [7]:
# Transposes song to C maj/A min key because there are 24 keys
def transpose(song):
    # get key from the song
    parts = song.getElementsByClass(m21.stream.Part)
    measure_part0 = parts[0].getElementsByClass(m21.stream.Measure)
    key = measure_part0[0][4]

    # not having the key so estimate it using music21
    if not isinstance(key, m21.key.Key):
        key = song.analyze("key")

    # get interval for transposition eg Bmaj -> Cmaj
    if key.mode == "major":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("C"))
    elif key.mode == "minor":
        interval = m21.interval.Interval(key.tonic, m21.pitch.Pitch("A"))

    transposed_song = song.transpose(interval)

    return transposed_song

In [8]:
def encode_song(song, time_step=0.25):
    # p = 60, d = 1.0/0.25 = 4 -> [60, "_", "_", "_"] 

    encoded_song = []
    for event in song.flat.notesAndRests:
        #handle notes

        if isinstance(event, m21.note.Note):
            symbol = event.pitch.midi # 60

        # handle rests
        elif isinstance(event, m21.note.Rest):
            symbol = "r"

        # convert the note/rest into time series notation
        steps = int(event.duration.quarterLength / time_step)

        for step in range(steps):
            if step == 0:
                encoded_song.append(symbol)
            else:
                encoded_song.append("_")
    
    # typecast encoded song to a str
    encode_song_string = " ".join(map(str, encoded_song))

    return encode_song_string


In [9]:
def preprocess(dataset_path):
    print("Loading songs...")
    songs = load_songs(dataset_path)
    print(f"Loaded {len(songs)} songs.")

    for i, song in enumerate(songs):
        # filter out songs that have non-acceptable duration
        if not has_acceptable_durations(song, ACCEPTABLE_DURATIONS):
            continue

        song = transpose(song)
        encoded_song = encode_song(song)

        save_path = os.path.join(SAVE_DIR, str(i))
        with open(save_path, "w") as fp:
            fp.write(encoded_song) 

In [10]:
def load(file_path):
    with open(file_path, "r") as fp:
        song = fp.read()

    return song

def create_single_file_dataset(dataset_path, file_dataset_path, sequence_length=64):
    # load encoded songs and add delimiters
    new_song_delimiter = "/ " *  sequence_length
    songs = ""

    for path, _, files in os.walk(dataset_path):
        for file in files:
            file_path = os.path.join(path, file)
            song = load(file_path)
            songs = songs + song + " " + new_song_delimiter
    
    songs = songs[:-1]    

    # save string that contains all dataset
    with open(file_dataset_path, "w") as fp:
        fp.write(songs)

    return songs


In [11]:
def create_mapping(songs, mapping_path):
    mappings = {}

    # identify the vocabulary
    songs = songs.split(" ")
    vocabulary = list(set(songs))

    # create mappings
    for i, symbol in enumerate(vocabulary):
        mappings[symbol] = i

    # save vocabulary to a json file
    with open(mapping_path, "w") as fp:
        json.dump(mappings, fp, indent=4)


In [12]:
def convert_songs_to_int(songs):
    int_songs = []

    #load mappings
    with open(MAPPING_FILE, "r") as fp:
        mappinps = json.load(fp)

    # cast song string to a list
    songs = songs.split()

    # map songs to int
    for symbol in songs:
        int_songs.append(mappinps[symbol])

    return int_songs

In [14]:
def generating_training_sequences(sequence_length=64):
    # [11,12,13,14, ...] -> input: [11,12], target: [13]
    
    # load songs and map them to int
    songs = load(SINGLE_FILE_DATASET)
    int_songs = convert_songs_to_int(songs)

    # generate the training sequences
    inputs = []
    targets = []
    
    num_sequences = len(int_songs) - sequence_length
    # sliding window
    for i in range(num_sequences):
        inputs.append(int_songs[i:i+sequence_length])
        targets.append(int_songs[i+sequence_length])

    # one-hot encode the sequences, help in categorical data
    # inputs: (# of sequences, sequence length)
    # [[0,1,2], [1,1,2]] -> [[[1,0,0], [0,1,0], [0,0,1]], [[0,1,0], [0,1,0], [0,0,1]]
    vocabulary_size = len(set(int_songs))
    inputs = keras.utils.to_categorical(inputs, num_classes=vocabulary_size)
    targets = np.array(targets)

    return inputs, targets


In [15]:
OUTPUT_UNITS = 38
LOSS = "sparse_categorical_crossentrophy"
LEARNING_RATE = 0.0001
NUM_UNITS = [256] # number of layer in neural network
EPOCHS = 50
BATCH_SIZE = 64
SAVE_MODEL_PATH = "model.h5" # keras use h5 for saving file

def build_model(output_units, num_units, loss, learning_rate):
    # create the model architecture
    input = keras.layers.Input(shape=(None, output_units))
    x = keras.layers.LSTM(num_units[0])(input)
    # avoid overfitting
    x = keras.layers.Dropout(0.2)(x) # syntex => inputing x -> dropout

    output = keras.layers.Dense(output_units, activation="softmax")(x)
    model = keras.Model(input, output)

    # compile model
    model.compile(loss=loss,
                optimizer=keras.optimizers.Adam(lr=learning_rate),
                metrics=["accuracy"])

    model.summary()
    return model    

def train(output_units=OUTPUT_UNITS, num_units=NUM_UNITS, loss=LOSS, learning_rate=LEARNING_RATE):
    # generate the training sequence
    inputs, targets = generating_training_sequences()

    # build the network
    model = build_model(output_units, num_units, loss, learning_rate)

    # train the model
    model.fit(inputs, targets, epochs=EPOCHS, batch_size=BATCH_SIZE)

    # save the model
    model.save(SAVE_MODEL_PATH)

train()


In [7]:
import json

class MelodyGenerator:
    """A class that wraps the LSTM model and offers utilities to generate melodies."""

    def __init__(self, model_path="model.h5"):
        """Constructor that initialises TensorFlow model"""

        self.model_path = model_path
        self.model = keras.models.load_model(model_path)

        with open(MAPPING_FILE, "r") as fp:
            self._mappings = json.load(fp)

        self._start_symbols = ["/"] * 64


    def generate_melody(self, seed, num_steps, max_sequence_length, temperature):
        """Generates a melody using the DL model and returns a midi file.
        :param seed (str): Melody seed with the notation used to encode the dataset
        :param num_steps (int): Number of steps to be generated
        :param max_sequence_len (int): Max number of steps in seed to be considered for generation
        :param temperature (float): Float in interval [0, 1]. Numbers closer to 0 make the model more deterministic.
            A number closer to 1 makes the generation more unpredictable.
        :return melody (list of str): List with symbols representing a melody
        """

        # create seed with start symbols
        seed = seed.split()
        melody = seed
        seed = self._start_symbols + seed

        # map seed to int
        seed = [self._mappings[symbol] for symbol in seed]

        for _ in range(num_steps):

            # limit the seed to max_sequence_length
            seed = seed[-max_sequence_length:]

            # one-hot encode the seed
            onehot_seed = keras.utils.to_categorical(seed, num_classes=len(self._mappings))
            # (1, max_sequence_length, num of symbols in the vocabulary)
            onehot_seed = onehot_seed[np.newaxis, ...]

            # make a prediction
            probabilities = self.model.predict(onehot_seed)[0]
            # [0.1, 0.2, 0.1, 0.6] -> 1
            output_int = self._sample_with_temperature(probabilities, temperature)

            # update seed
            seed.append(output_int)

            # map int to our encoding
            output_symbol = [k for k, v in self._mappings.items() if v == output_int][0]

            # check whether we're at the end of a melody
            if output_symbol == "/":
                break

            # update melody
            melody.append(output_symbol)

        return melody


    def _sample_with_temperature(self, probabilites, temperature):
        """Samples an index from a probability array reapplying softmax using temperature
        :param predictions (nd.array): Array containing probabilities for each of the possible outputs.
        :param temperature (float): Float in interval [0, 1]. Numbers closer to 0 make the model more deterministic.
            A number closer to 1 makes the generation more unpredictable.
        :return index (int): Selected output symbol
        """
        predictions = np.log(probabilites) / temperature
        probabilites = np.exp(predictions) / np.sum(np.exp(predictions))

        choices = range(len(probabilites)) # [0, 1, 2, 3]
        index = np.random.choice(choices, p=probabilites)

        return index


    def save_melody(self, melody, step_duration=0.25, format="midi", file_name="mel.mid"):
        """Converts a melody into a MIDI file
        :param melody (list of str):
        :param min_duration (float): Duration of each time step in quarter length
        :param file_name (str): Name of midi file
        :return:
        """

        # create a music21 stream
        stream = m21.stream.Stream()

        start_symbol = None
        step_counter = 1

        # parse all the symbols in the melody and create note/rest objects
        for i, symbol in enumerate(melody):

            # handle case in which we have a note/rest
            if symbol != "_" or i + 1 == len(melody):

                # ensure we're dealing with note/rest beyond the first one
                if start_symbol is not None:

                    quarter_length_duration = step_duration * step_counter # 0.25 * 4 = 1

                    # handle rest
                    if start_symbol == "r":
                        m21_event = m21.note.Rest(quarterLength=quarter_length_duration)

                    # handle note
                    else:
                        m21_event = m21.note.Note(int(start_symbol), quarterLength=quarter_length_duration)

                    stream.append(m21_event)

                    # reset the step counter
                    step_counter = 1

                start_symbol = symbol

            # handle case in which we have a prolongation sign "_"
            else:
                step_counter += 1

        # write the m21 stream to a midi file
        stream.write(format, file_name)

# so we are using seed one to generate melody, 
## building....
# Done. output is in string format. compatible for preprocessing and training model.
# Let see it on Musescore.
# so, it was able to generate pretty good melody given a very short melody.
# That's all. Thank you
if __name__ == "__main__":
    mg = MelodyGenerator()
    seed = "67 _ 67 _ 67 _ _ 65 64 _ 64 _ 64 _ _"
    seed2 = "67 _ _ _ _ _ 65 _ 64 _ 62 _ 60 _ _ _"
    melody = mg.generate_melody(seed, 500, 64, 0.3)
    print(melody)
    mg.save_melody(melody)

['67', '_', '67', '_', '67', '_', '_', '65', '64', '_', '64', '_', '64', '_', '_', '62', '60', '_', '60', '_', '64', '_', '_', '62', '60', '_', '_', '_', 'r', '_', '60', '_', '64', '_', '67', '_', '67', '_', '_', '_', '67', '_', '65', '_', '65', '_', '_', '_', '64', '_', '_', '_', '60', '_', '_', '_', '64', '_', '_', '_', '67', '_', '_', '_', '67', '_', '65', '_', '64', '_', '_', '_', '65', '_', '_', '_', '64', '_', '_', '62', '62', '_', '_', '_', '60', '_', '_', '_']


In [12]:
if __name__ == "__main__":
    m21.environment.set("musescoreDirectPNGPath", "/usr/bin/mscore")
    preprocess(KERN_DATASET_PATH)
    songs = create_single_file_dataset(SAVE_DIR, SINGLE_FILE_DATASET)
    create_mapping(songs, MAPPING_FILE)
    # inputs, targets = generating_training_sequences()

    # songs = load_songs(KERN_DATASET_PATH)
    # # print(len(songs))
    # song = songs[0]
    # song.show()
    # transposed_song = transpose(song)
    # transposed_song.show()

Loading songs...
Loaded 1700 songs.
