In [1]:
################## Imports ##################
from music21 import converter, instrument, note, chord, stream, tempo
from tensorflow.python.keras.utils import to_categorical
from tensorflow.python.keras.callbacks import ModelCheckpoint
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Dropout, CuDNNLSTM, Reshape
from tqdm import tnrange
import numpy as np
import os
import glob

In [2]:
################## Pre Processing ##################

######### PARAMETERS #########
# preprocessing
music_directories = ('../../chopin', )
sequence_length = 50
# was trained with 100

# training
training_epochs = 300
training_batch_size = 1024

# inference
generated_notes_length = 300
file_name = 'test_gen_chopin.mid'

In [3]:
################## Pre Processing ##################
######### FUNCTIONS #########

# Load music file in given directories. Merge multiple hands and return music dictionary. This dictionary contains all notes for both hands at each global timestep
def load_music(data_dirs):
    music = {}
    total_offset = 0
    # load all files in all dirs
    for path in data_dirs:
        for file in glob.glob(path + '/*.mid'):
            print('parsing file ' + file)
            midi = converter.parse(file)
            notes_to_parse = midi.recurse()
            # falls mindestens 1 element in music, get totaloffset as biggest element of music
            if len(music.keys()) > 0:
                total_offset= sorted(music.keys())[-1]
            #go through all nodes
            for el in notes_to_parse:
                #check if offset is defined as float already 
                offset = float(el.offset)
                offset += total_offset
                #check if offset already occured before, no merge both the part of left and right hand
                if offset not in music:
                    music[offset] = []
                if isinstance(el, note.Note):
                    music[offset].append(el)
                elif isinstance(el, chord.Chord):
                    for element in el.notes:
                        music[offset].append(element)
                else:
                    if offset in music: del music[offset]
    return music

# If a given note (pitch) is flat, transform into sharp (requires flat_to_sharp mapping)
def to_sharp(pitch):
    # Mapping from flat to sharp notes in order to decrease dimensionality
    flat_to_sharp={
        'D-':'C#',
        'E-':'D#',
        'G-':'F#',
        'A-':'G#',
        'B-':'A#'
    }
    #pitch=e.g. 'E-4'
    if pitch not in ['-1']:
        if '-' in pitch:
            #flat!
            #get octave:
            octave = pitch[-1]
            return flat_to_sharp[pitch[:2]] + octave#0 to 1
        else:
            return pitch
    else:
        return pitch
    
#sort notes: 1. octave 2. pitch =>actual hight, not character
def sort_notes(unique_note_freqs):
#define all possible pitches
    possible_pitches = ['C','C#','D','D#','E','F','F#','G', 'G#','A','A#','B']
    all_pitches=[]
    for i in range(0,8):#1 to 7
        for pitch in possible_pitches:
            all_pitches.append(pitch+str(i))
    valid_pitches=[]
    for all_pitch in all_pitches:
        if all_pitch in unique_note_freqs:
            valid_pitches.append(all_pitch)
    return valid_pitches

# Convert music dictionary to music list
def music_dict_to_list(music_dict):
    music_list=[]
    for key in sorted(music_dict):
        music_list.append([key, music_dict[key]])
    return music_list

# get unqiue list of notes from given music list
def get_unique_notes(music_list):
    unique_notes = []
    for _, notes in music_list:
        #for each note      
        for _, current_note in enumerate(notes):
            current_pitch = to_sharp(str(current_note.pitch))
            #get all unique notes:
            if current_pitch not in unique_notes:
                unique_notes.append(current_pitch)
    return unique_notes

# get unique list of durations form given music list
def get_unique_durations(music_list):
    unique_durations = []
    for _, notes in music_list:
        #for each note      
        for _, current_note in enumerate(notes):
            current_duration = float(current_note.quarterLength)
            #get all unique durations:
            if current_duration not in unique_durations:
                unique_durations.append(current_duration)
    return unique_durations

# get unique list of durations form given music list
def get_unique_temporal_differences(music_list):
    unique_temporal_differences = [0.0]
    i = 0
    for _, notes in music_list:
        #define temporal difference
        #temporal difference: time until next note in list
        #e.g. if it's 0 it play simultaneously with next note
        #check if next note at i+1 exists to calculate time until next note
        if len(music_list) > i+1:
            #temporal differnce = duration from current timestamp to next timestamp
            temporal_difference = music_list[i+1][0] - music_list[i][0]
        else:
            #for last note
            #temporal differnce = duration of note
            temporal_difference = notes[0].quarterLength
        if temporal_difference not in unique_temporal_differences:
            unique_temporal_differences.append(temporal_difference)
        i+=1
    return unique_temporal_differences

# transform a music list into training data. We select 3 features for each note.
def create_training_data(music_list):
    training_data = []
    i=0
    for timestamp, notes in music_list:
        if len(music_list) > i+1:
            #temporal differnce = duration from current timestamp to next timestamp
            temporal_difference = music_list[i+1][0] - music_list[i][0]
        else:
            #for last note
            #temporal differnce = duration of note
            temporal_difference = notes[0].quarterLength
        #for each note      
        for idx, current_note in enumerate(notes):
            current_pitch = to_sharp(str(current_note.pitch))#to_sharp translates notes with 2 names => reduce input space
            current_duration = float(current_note.quarterLength)
            #append to training_data 
            if (idx+1) == len(notes):
                #last note in timestamp
                training_data.append([temporal_difference, current_pitch, current_duration])
            else:
                #temporal difference of 0 for simultaneous notes
                training_data.append([0, current_pitch, current_duration])
        i += 1
    return training_data

# create training sequences and the correpsonding outputs for given training data
def create_sequences(training_data, sequence_length):
    X = []
    y = []
    for i in range(0, len(training_data) - sequence_length):
        sequence_in = training_data[i:i + sequence_length]
        sequence_out = training_data[i + sequence_length]
        X.append(sequence_in)
        y.append(sequence_out)
    return X, y

# normalize input sequences
def normalize_input(X, unique_note_to_int, unique_duration_to_int, unique_temporal_difference_to_int):
    normalized_X=[]
    for sequence in X:
        normalized_sequence_X = []
        for data in sequence:
            normalized_single_X=[]
            # normalize temporal_diff:
            normalized_single_X.append(unique_temporal_difference_to_int[data[0]]/len(unique_temporal_difference_to_int.keys()))
            # normalize pitch
            normalized_single_X.append(unique_note_to_int[data[1]]/len(unique_note_to_int.keys()))
            # normalize duration
            normalized_single_X.append(unique_duration_to_int[data[2]]/len(unique_duration_to_int.keys()))
            # append normalized feature set to sequence
            normalized_sequence_X.append(normalized_single_X)
        normalized_X.append(normalized_sequence_X)
    return normalized_X

# normalize output feature set
def normalize_output(y, unique_note_to_int, unique_duration_to_int, unique_temporal_difference_to_int):
    normalized_y = []
    for data in y:
        normalized_single_y=[]
        # normalize temporal_diff:
        normalized_single_y.append(unique_temporal_difference_to_int[data[0]])
        # normalize pitch
        normalized_single_y.append(unique_note_to_int[data[1]])
        # normalize duration
        normalized_single_y.append(unique_duration_to_int[data[2]])
        # append normalized feature set to y
        normalized_y.append(normalized_single_y)
    normalized_y = to_categorical(normalized_y)
    return normalized_y

# predict output feature sets with trained model
def predict_notes(model, pattern, generated_notes_length, unique_temporal_differences_length, unique_notes_length, unique_durations_length):
    prediction_output = []
    for note_index in tnrange(generated_notes_length, desc='generating notes'):
        # predict next feature set with given pattern
        prediction_input = np.reshape(pattern, (1, pattern.shape[0], pattern.shape[1]))
        
        #print("prediction input:\n",prediction_input)
        
        prediction = model.predict(prediction_input, verbose=0)
        
        # get max predictions
        index_temporal_difference = np.argmax(prediction[0][0])
        index_note = np.argmax(prediction[0][1])
        index_duration = np.argmax(prediction[0][2])
        
        #print("prediction 0", prediction[0][0])
        #print("prediction 1", prediction[0][1])
        #print("prediction 2", prediction[0][2])
        
        #print("prediction index 0", index_temporal_difference)
        #print("prediction index 1", index_note)
        #print("prediction index 2", index_duration)

        # transform prediction at max index
        result_temporal_difference = int_to_unique_temporal_difference[index_temporal_difference]
        result_note = int_to_unique_note[index_note]
        result_duration = int_to_unique_duration[index_duration]
        
        # Normalize predictions to append it to the sequence
        result_temporal_difference_normalized = index_temporal_difference/unique_temporal_differences_length
        result_note_normalized = index_note/unique_notes_length
        result_duration_normalized = index_duration/unique_durations_length
        
        #print("result 0", result_temporal_difference)
        #print("result 1", result_note)
        #print("result 2", result_duration)

        # append result to output
        prediction_output.append([result_temporal_difference, result_note, result_duration])
        
        # add new pattern in order to generate next feature set
        #pattern = np.vstack((pattern, (result_temporal_difference,result_note_normalized,result_duration)))
        pattern = np.vstack((pattern, (result_temporal_difference_normalized,result_note_normalized,result_duration_normalized)))
        pattern = np.delete(pattern, 0, 0)
    return prediction_output

# convert 
def convert_musiclist_to_music(prediction_output):
    offset = 0
    output_notes = []
    # create note and chord objects based on the values generated by the model
    for feature_set in prediction_output:  
        # get single values from generated feature set
        gen_offset = feature_set[0]
        gen_note = feature_set[1]
        gen_duration = feature_set[2]

        # create note
        new_note = note.Note(gen_note, quarterLength=gen_duration)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)

        # increase offset each iteration
        offset += gen_offset
    return output_notes

# play generated music
def play_music(output_notes, file_name):
    midi_stream = stream.Stream(output_notes) #output_notes
    #mm1 = tempo.MetronomeMark('slow')
    #midi_stream.append(mm1)
    #midi_stream.append(output_notes)
    midi_stream.write('midi', fp=file_name)
    midi_stream.show('midi')
    #midi_stream.show('text')

In [4]:
################## Pre Processing ##################
######### LOGIC #########

# load music files
music = load_music(data_dirs=music_directories)
print("# Parsed music length:",len(music))

# convert music dictionary to list
music_list = music_dict_to_list(music)

# get unique notes from music list
unique_notes = get_unique_notes(music_list)
#sort unique notes by actual pitch
unique_notes = sort_notes(unique_notes)
print("# Unique notes:\n", unique_notes)

# get unique durations from music list 
unique_durations = get_unique_durations(music_list)
print("# Unique durations:", unique_durations)
# get max duration from music list
max_duration = max(unique_durations)
print("# Max duration:", max_duration)

# get unqiue temporal difference
unique_temporal_differences = get_unique_temporal_differences(music_list)
print("# Unique temporal differences", unique_temporal_differences)
# get max temporal difference
max_temporal_difference = max(unique_temporal_differences)
print("# Max temporal difference:", max_temporal_difference)

# create training data by extracting 3 required features for each note
training_data = create_training_data(music_list)
print("# First element of training data (temporal_difference, note, duration):\n",training_data[0])

## create Mappings for all features
# create a dictionary to map notes to integers
unique_note_to_int = {note: number for number, note in enumerate(unique_notes)}
print("# Unique note to int:\n", unique_note_to_int)
int_to_unique_note = {number: note for number, note in enumerate(unique_notes)}
print("# Int to unique note:\n", int_to_unique_note)

# create a dictionary to map durations to integers
unique_duration_to_int = {duration: number for number, duration in enumerate(unique_durations)}
print("# Unique duration to int:\n", unique_duration_to_int)
int_to_unique_duration = {number: duration for number, duration in enumerate(unique_durations)}
print("# Int to unique duration:\n", int_to_unique_duration)

# create a dictionary to map temporal differences to integers
unique_temporal_difference_to_int = {temporal_difference: number for number, temporal_difference in enumerate(unique_temporal_differences)}
print("# Unique temporal difference to int:\n", unique_temporal_difference_to_int)
int_to_unique_temporal_difference = {number: temporal_difference for number, temporal_difference in enumerate(unique_temporal_differences)}
print("# Int to unique temporal difference:\n", int_to_unique_temporal_difference)

# create input sequences and the corresponding outputs
X, y = create_sequences(training_data, sequence_length)
print("# First X\n", X[0])
print("# First y\n:", y[0])

# normalize input and output
X = normalize_input(X, unique_note_to_int, unique_duration_to_int, unique_temporal_difference_to_int)
y = normalize_output(y, unique_note_to_int, unique_duration_to_int, unique_temporal_difference_to_int)
print("# First normalized X\n", X[0])
print("# First normalized y\n:", y[0])

# reshape the input into a format compatible with LSTM layers
X = np.reshape(X, (-1, sequence_length, 3))
print("# Shape of X (after reshape):", X.shape)
print("# Shape of y (after reshape):", y.shape)

parsing file ../../chopin/chpn_op10_e01.mid
parsing file ../../chopin/chpn-p6.mid
parsing file ../../chopin/chpn_op35_2.mid
parsing file ../../chopin/chpn-p23.mid
parsing file ../../chopin/chpn-p4.mid
parsing file ../../chopin/chpn_op25_e4.mid
parsing file ../../chopin/chpn-p14.mid
parsing file ../../chopin/chpn_op7_2.mid
parsing file ../../chopin/chpn-p2.mid
parsing file ../../chopin/chpn-p17.mid
parsing file ../../chopin/chpn_op53.mid
parsing file ../../chopin/chpn_op27_1.mid
parsing file ../../chopin/chpn-p12.mid
parsing file ../../chopin/chpn-p21.mid
parsing file ../../chopin/chpn_op35_1.mid
parsing file ../../chopin/chpn-p10.mid
parsing file ../../chopin/chpn_op7_1.mid
parsing file ../../chopin/chpn_op27_2.mid
parsing file ../../chopin/chpn-p13.mid
parsing file ../../chopin/chpn_op33_4.mid
parsing file ../../chopin/chpn-p16.mid
parsing file ../../chopin/chpn_op25_e11.mid
parsing file ../../chopin/chpn-p1.mid
parsing file ../../chopin/chpn_op25_e1.mid
parsing file ../../chopin/chpn

# First X
 [[0.25, 'G3', 0.25], [0.25, 'C4', 0.25], [0.25, 'E4', 0.25], [0.25, 'C4', 0.25], [0.25, 'G4', 0.25], [0.25, 'C5', 0.25], [0.25, 'E5', 0.25], [0.25, 'C5', 0.25], [0.25, 'G5', 0.25], [0.25, 'C6', 0.25], [0.25, 'E6', 0.25], [0.25, 'C6', 0.25], [0.25, 'G6', 0.25], [0.25, 'C7', 0.25], [0.25, 'E7', 0.25], [0.25, 'C7', 0.25], [0.25, 'G6', 0.25], [0.25, 'C6', 0.25], [0.25, 'E6', 0.25], [0.25, 'C6', 0.25], [0.25, 'G5', 0.25], [0.25, 'C5', 0.25], [0.25, 'E5', 0.25], [0.25, 'C5', 0.25], [0.25, 'G4', 0.25], [0.25, 'C4', 0.25], [0.25, 'E4', 0.25], [0.25, 'C4', 0.25], [0.25, 'G3', 0.25], [0.5, 'C3', 0.25], [0.25, 'C3', 0.25], [0.25, 'A3', 0.25], [0.25, 'C4', 0.25], [0.25, 'F4', 0.25], [0.25, 'C4', 0.25], [0.25, 'A4', 0.25], [0.25, 'C5', 0.25], [0.25, 'F5', 0.25], [0.25, 'C5', 0.25], [0.25, 'A5', 0.25], [0.25, 'C6', 0.25], [0.25, 'F6', 0.25], [0.25, 'C6', 0.25], [0.25, 'A6', 0.25], [0.25, 'C7', 0.25], [0, 'E7', 0.25], [0, 'F#1', 1.0], [0.25, 'F#2', 1.0], [0.25, 'C7', 0.25], [0.25, 'A6', 0.

In [5]:
################## Model ##################
model = Sequential()
model.add(CuDNNLSTM(
    1024,
    return_sequences=True, 
    input_shape=(X.shape[1], X.shape[2])))
model.add(Dropout(rate=0.3))
model.add(CuDNNLSTM(512, return_sequences=True))
model.add(Dropout(rate=0.3))
model.add(CuDNNLSTM(512))
model.add(Dropout(rate=0.3))
model.add(Dense(units=y.shape[1] * y.shape[2], activation='softmax'))
model.add(Reshape((y.shape[1] ,y.shape[2])))
model.load_weights('checkpoint_overnight_training')
model.compile(loss='categorical_crossentropy', optimizer='adam')

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm (CuDNNLSTM)       (None, 50, 1024)          4214784   
_________________________________________________________________
dropout (Dropout)            (None, 50, 1024)          0         
_________________________________________________________________
cu_dnnlstm_1 (CuDNNLSTM)     (None, 50, 512)           3149824   
_________________________________________________________________
dropout_1 (Dropout)          (None, 50, 512)           0         
_________________________________________________________________
cu_dnnlstm_2 (CuDNNLSTM)     (None, 512)               2101248   
_________________________________________________________________
dropout_2 (Dropout)  

In [6]:
################## Training ##################
"""
checkpoint = ModelCheckpoint(
    'newchpts',
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

callbacks_list = [checkpoint]
model.fit(X, y, epochs=training_epochs, batch_size=training_batch_size, callbacks=callbacks_list)
"""

"\ncheckpoint = ModelCheckpoint(\n    'newchpts',\n    monitor='loss',\n    verbose=0,\n    save_best_only=True,\n    mode='min'\n)\n\ncallbacks_list = [checkpoint]\nmodel.fit(X, y, epochs=training_epochs, batch_size=training_batch_size, callbacks=callbacks_list)\n"

In [8]:
################## Inference ##################

# get random notes from training data
start = np.random.randint(0, len(X)-1)
seed = X[start]
# predict model output of specified length, starting with the previously created seed
prediction_output = predict_notes(model, seed, generated_notes_length, len(unique_temporal_differences), len(unique_notes), len(unique_durations)) 

# Convert feature set back to music
output_notes = convert_musiclist_to_music(prediction_output)

# Finally: Play music!
play_music(output_notes, file_name)

HBox(children=(IntProgress(value=0, description='generating notes', max=300, style=ProgressStyle(description_w…


