# Irish Folk Jig Generation using an LSTM Reccurent Neural Network

1. Importing Libraries

In [None]:
# Regular Imports
from music21 import converter, instrument, note, chord, stream, midi #Music21 Libary for Music Proccesing
import glob
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Tensorflow Imports
import tensorflow.keras as keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras.layers import LSTM, Dropout
from tensorflow.keras.layers import Embedding
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.models import load_model

2. Required Functions for Creating the Note-Sequence Database

In [None]:
MELODY_NOTE_OFF = 128 
MELODY_NO_EVENT = 129

# Converting a Music21 music sequence object to a Numpy array of integers.
# Numbers 0 - 127 represent notes at a specified pitch
# 128 Represents "Note Off and 129 represents "No Event"

def streamToNoteArray(stream):
    # Part one, extract from stream
    total_length = np.int(np.round(stream.flat.highestTime / 0.25)) # in 1/16th of a whole note
    stream_list = []
    for element in stream.flat:
        if isinstance(element, note.Note):
            stream_list.append([np.round(element.offset / 0.25), np.round(element.quarterLength / 0.25), element.pitch.midi])
        elif isinstance(element, chord.Chord):
            stream_list.append([np.round(element.offset / 0.25), np.round(element.quarterLength / 0.25), element.sortAscending().pitches[-1].midi])
    np_stream_list = np.array(stream_list, dtype=np.int)
    df = pd.DataFrame({'pos': np_stream_list.T[0], 'dur': np_stream_list.T[1], 'pitch': np_stream_list.T[2]})
    df = df.sort_values(['pos','pitch'], ascending=[True, False]) # sort the dataframe properly
    df = df.drop_duplicates(subset=['pos']) # drop duplicate values
    # part 2, convert into a sequence of note events
    output = np.zeros(total_length+1, dtype=np.int16) + np.int16(MELODY_NO_EVENT)  # set array full of no events by default.
    # Fill in the output list
    for i in range(total_length):
        if not df[df.pos==i].empty:
            n = df[df.pos==i].iloc[0] # pick the highest pitch at each semiquaver
            output[i] = n.pitch # set note on
            output[i+n.dur] = MELODY_NOTE_OFF
    return output


# Convert the Numpy array containing note sequences into a Pandas Dataframe
def noteArrayToDataFrame(note_array):

    df = pd.DataFrame({"code": note_array})
    df['offset'] = df.index
    df['duration'] = df.index
    df = df[df.code != MELODY_NO_EVENT]
    df.duration = df.duration.diff(-1) * -1 * 0.25  # calculate durations and change to quarter note fractions
    df = df.fillna(0.25)
    return df[['code','duration']]


#Convert a numpy array containing a Melody-RNN sequence back into a music21 stream.
def noteArrayToStream(note_array):

    df = noteArrayToDataFrame(note_array)
    melody_stream = stream.Stream()
    for index, row in df.iterrows():
        if row.code == MELODY_NO_EVENT:
            new_note = note.Rest() # To not produce long notes
        elif row.code == MELODY_NOTE_OFF:
            new_note = note.Rest()
        else:
            new_note = note.Note(row.code)
        new_note.quarterLength = row.duration
        melody_stream.append(new_note)
    return melody_stream



3. Define training Variables for Model

In [None]:
VOCABULARY_SIZE = 130 # 0-127 notes | 128 note_off | 129 no_event
SEQ_LEN = 128 # Lenght of each note sequence in database
BATCH_SIZE = 256 # Batch Size of Model
EPOCHS = 30 # Epochs to train for

4. Loading Music Database from NPZ format + Preparing Dataset for training

In [None]:

with np.load('melody_training_dataset.npz', allow_pickle=True) as data:
    train_set = data['train']

print("Training melodies:", len(train_set))


#Slice a sequence into sequences of specified lenghts
def slice_sequence_examples(sequence, num_steps):
    xs = []
    for i in range(len(sequence) - num_steps - 1):
        example = sequence[i: i + num_steps]
        xs.append(example)
    return xs


# Return examles in sequence to singleton format
def seq_to_singleton_format(examples):
    xs = []
    ys = []
    for ex in examples:
        xs.append(ex[:-1])
        ys.append(ex[-1])
    return (xs,ys)

slices = []
for seq in train_set:
    slices +=  slice_sequence_examples(seq, SEQ_LEN+1)

# Split the sequences into Xs and ys:
X, y = seq_to_singleton_format(slices)
# Convert into numpy arrays.
X = np.array(X)
y = np.array(y)

print("Total Training Corpus:")
print("X:", X.shape)
print("y:", y.shape)
print()

5. Building LSTM Model

In [None]:
model = Sequential()
model.add(Embedding(VOCABULARY_SIZE, 256, input_length=SEQ_LEN))

# LSTM part
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(312, return_sequences=True))
model.add(LSTM(412, return_sequences=True))
model.add(LSTM(512))
# Project back to vocabulary
model.add(Dense(VOCABULARY_SIZE, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model.summary()

losses = []


# Custom Callback function for saving loss over epochs, and saving copy of model every 10 epochs
class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, loss, logs={}):
        if epoch % 10 ==0:  # or save after some epoch, each k-th epoch etc.
            self.model.save(r'./v5-ckpt/model_{}.h5'.format(epoch))
        losses.append(loss)

6 . Training

In [None]:
losses = []

class CustomSaver(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, loss, logs={}):
        if epoch % 10 ==0:  # or save after some epoch, each k-th epoch etc.
            self.model.save(r'./v6-ckpt/model_{}.h5'.format(epoch))
        losses.append(loss)
        
hist1 = model.fit(X, y, batch_size=BATCH_SIZE, epochs=200, callbacks = [CustomSaver()])

7. Generating Tunes

In [None]:
model_dec = Sequential()
model_dec.add(Embedding(VOCABULARY_SIZE, 256, input_length=1, batch_input_shape=(1,1)))
# LSTM part
model.add(LSTM(256, return_sequences=True))
model.add(LSTM(312, return_sequences=True))
model.add(LSTM(412, return_sequences=True))
model.add(LSTM(512))

# project back to vocabulary
model_dec.add(Dense(VOCABULARY_SIZE, activation='softmax'))
model_dec.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
model_dec.summary()
# set weights from training model
#model_dec.set_weights(model_train.get_weights())
model_dec.load_weights(r"C:\Users\Vikas\OneDrive\Artash Python\A.I\Special Projects\Irish Music\v6-ckpt\model_180.h5")


for i in tqdm(range(10000)):
    model_dec.reset_states() # Start with LSTM state blank
    o = sample_model(60, model_dec, length=200, temperature=random.uniform(1.8,2.5))
    melody_stream = noteArrayToStream(o)
    mf = midi.translate.streamToMidiFile(melody_stream)
    mf.open('./gen2/'+str(i)+'.mid', 'wb')
    mf.write()
    mf.close()