### Set Up

#### Standard library imports

In [1]:
import copy
import datetime
import os
import pdb
import random
import time
from tempfile import TemporaryFile

#### Third party imports 

In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.contrib.rnn import BasicLSTMCell, LSTMStateTuple

#### Local imports

In [3]:
import modules.nikhil.midi_related as midi
import modules.nikhil.batch as batch

from modules.nikhil.MyFunctions import (
    alignXy,
    Conditional_Probability_Layer,
    Input_Kernel, 
    getNumberOfBatches,
    Loss_Function_1,
    Loss_Function_2,
    LSTM_Cell,
    LSTM_Layer
)

#### Extensions and autoreload

In [4]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

#### Setting relative directories

In [6]:
Working_Directory = os.getcwd()
Project_Directory = os.path.abspath(os.path.join(Working_Directory,'..'))
Music_In_Directory = Project_Directory + "/data/" 
Output_Directory = Project_Directory + "/outputs/"
Model_Directory = Output_Directory + "models/"
Music_Out_Directory = Output_Directory + "midi/"
Music_Out_Train_Directory = Music_Out_Directory + "train/"
Checkpoint_Directory = Model_Directory + "ckpt/"
Numpy_Directory = Model_Directory + "np/"

Midi_Directories = [
    "albeniz", 
    "beeth",
    "borodin",
    "brahms",
    "burgm",
    "chopin", 
    "chopin_midi",
    "debussy", 
    "granados", 
    "grieg", 
    "haydn", 
    "liszt", 
    "mendelssohn", 
    "mozart", 
    "muss", 
    "schubert", 
    "schumann", 
    "tschai"
] 

### Data preperation

#### Load pieces (i.e. import midi files)

In [7]:
# First checkt that importing single midi (i.e. Beethoven's Fuer Elise) works
elise = midi.midiToNoteStateMatrix(Music_In_Directory + "beeth/elise.mid")

In [8]:
# Import all Midi data
min_time_steps = 128 # only files with at least this many 48th note steps are saved
lowerBound = 21
upperBound = 109

all_pieces = {}
chopin_only_pieces = {}
piano_midi_only_pieces = {}

start_time_loading = time.time()
time_loading_old = start_time_loading

# Gather the pieces from the specified directory
for f in range(len(Midi_Directories)):
    Training_Midi_Folder = Music_In_Directory + Midi_Directories[f]
    if Midi_Directories[f] == 'chopin_midi':
        chopin_only_pieces = {**chopin_only_pieces, **midi.loadPieces(Training_Midi_Folder,
                                                                      min_time_steps,
                                                                      lowerBound, 
                                                                      upperBound,
                                                                      verbose=False,
                                                                      verbose_name=True)}
    else: 
        piano_midi_only_pieces = {**piano_midi_only_pieces, **midi.loadPieces(Training_Midi_Folder,
                                                                              min_time_steps,
                                                                              lowerBound, 
                                                                              upperBound,
                                                                              verbose=False,
                                                                              verbose_name=True)}
    time_loading_new = time.time()
    duration = time_loading_new - time_loading_old
    time_loading_old = time_loading_new
    print('Loading directory ' + Midi_Directories[f] + ' took ' + str(round(duration, 3)) + 's' )

all_pieces = {**chopin_only_pieces, **piano_midi_only_pieces}
end_time_loading = time.time()

print('Number of total pieces = ', len(all_pieces))    
print('Loading all pieces took ' +  str(round(end_time_loading - start_time_loading, 3)) + 's')

Loaded alb_esp1
Loaded alb_esp2
Loaded alb_esp3
Loaded alb_esp4
Loaded alb_esp5
Loaded alb_se1
Loaded alb_se2
Loaded alb_se3
Loaded alb_se4
Loaded alb_se5
Loaded alb_se6
Loaded alb_se7
Loaded alb_se8
Loading directory albeniz took 11.888s
Loaded appass_1
Loaded appass_2
Loaded appass_3
Loaded beethoven_hammerklavier_1
Loaded beethoven_hammerklavier_2
Loaded beethoven_hammerklavier_3
Loaded beethoven_hammerklavier_4
Loaded beethoven_les_adieux_1
Loaded beethoven_les_adieux_2
Loaded beethoven_les_adieux_3
Loaded beethoven_opus10_1
Loaded beethoven_opus10_2
Loaded beethoven_opus10_3
Loaded beethoven_opus22_1
Loaded beethoven_opus22_2
Loaded beethoven_opus22_3
Loaded beethoven_opus22_4
Loaded beethoven_opus90_1
Loaded beethoven_opus90_2
Loaded elise
Loaded mond_1
Loaded mond_2
Loaded mond_3
Loaded pathetique_1
Loaded pathetique_2
Loaded pathetique_3
Loaded waldstein_1
Loaded waldstein_2
Loaded waldstein_3
Loading directory beeth took 73.984s
Loaded bor_ps1
Loaded bor_ps2
Loaded bor_ps3
Loa

#### Filtering

In [15]:
even_time_signatures = [16]
odd_time_signatures = [12]
sel_time_signautres = even_time_signatures
#sel_time_signautres = odd_time_signatures

#####  'incorrect' Chopin pieces 

In [16]:
print('Number of Chopin pieces loaded = ', len(chopin_only_pieces))

Number of Chopin pieces loaded =  95


In [17]:
#Check time signature occurences
time_signatures = []
verbose = False #True
keys = list(chopin_only_pieces.keys())
for k in keys:
    piece = chopin_only_pieces[str(k)]
    time_signature = max([b[0][3] for b in  piece])
    if str(k) in midi.EXACT_FILES:
        time_signatures.append(time_signature)
    if verbose:
        print("Piece: {}".format(k) + "  Time signature: {}".format(time_signature))
time_signatures = np.array(time_signatures)
unique, counts = np.unique(time_signatures, return_counts=True)
dict(zip(unique, counts))

{4: 1, 6: 3, 8: 2, 12: 37, 16: 31, 17: 1, 19: 1, 24: 6, 28: 1, 36: 1, 40: 1}

In [18]:
#Only include pieces which were not recoreded (i.e. which MIDI files are exact) and are in (2/4 and 4/4) or in (3/4)
time_signatures = []
verbose = False #True
chopin_pieces_filtered = chopin_only_pieces.copy()
keys = list(chopin_pieces_filtered.keys())

for k in keys:
        piece = chopin_pieces_filtered[str(k)]
        time_signature = max([b[0][3] for b in  piece])
        if not ((time_signature in sel_time_signautres) and str(k) in midi.EXACT_FILES) :
            chopin_pieces_filtered.pop(k)
        else:
            time_signatures.append(time_signature)
        if verbose:
            print("Piece: {}".format(k) + "  Time signature: {}".format(time_signature))
time_signatures = np.array(time_signatures)
unique, counts = np.unique(time_signatures, return_counts=True)
dict(zip(unique, counts))

{16: 31}

In [19]:
print('Number of pieces by Chopin left after filtering = ', len(chopin_pieces_filtered))

Number of pieces by Chopin left after filtering =  31


##### Piano Midi pieces by 3/4 or 4/4 measure

In [20]:
print('Number of Piano Midi pieces loaded = ', len(piano_midi_only_pieces))

Number of Piano Midi pieces loaded =  279


In [21]:
#Check time signature occurences
time_signatures = []
verbose = False #True
keys = list(piano_midi_only_pieces.keys())
for k in keys:
    piece = piano_midi_only_pieces[str(k)]
    time_signature = max([b[0][3] for b in  piece])
    time_signatures.append(time_signature)
    if verbose:
        print("Piece: {}".format(k) + "  Time signature: {}".format(time_signature))
time_signatures = np.array(time_signatures)
unique, counts = np.unique(time_signatures, return_counts=True)
dict(zip(unique, counts))

{4: 2,
 6: 16,
 8: 47,
 9: 1,
 12: 92,
 16: 89,
 18: 6,
 24: 10,
 28: 1,
 32: 6,
 36: 3,
 40: 4,
 48: 1,
 56: 1}

In [22]:
#filter training pieces by selected time signature
piano_midi_pieces_filtered = piano_midi_only_pieces.copy()
piano_midi_pieces_remaining = piano_midi_only_pieces.copy()
keys = list(piano_midi_only_pieces.keys())
verbose = False

for k in keys:
    piece = piano_midi_only_pieces[str(k)]
    time_signature = int(max([b[0][3] for b in  piece]))
    if not (time_signature in sel_time_signautres):
        piano_midi_pieces_filtered.pop(k)
    else:
        piano_midi_pieces_remaining.pop(k)
    if verbose:
        print("Piece: {}".format(k) + "  Time signature: {}".format(time_signature))

In [23]:
print('Number of Piano Midi pieces left after filtering = ', len(piano_midi_pieces_filtered))

Number of Piano Midi pieces left after filtering =  89


#### Train/Validation pieces split

In [24]:
# Define pieces relevant for training and validation
pieces_tmp = piano_midi_pieces_filtered.copy()
#pieces_tmp = chopin_pieces_filtered.copy()

In [25]:
# Free up memory
def free_memory():
    all_pieces.clear()
    chopin_only_pieces.clear()
    piano_midi_only_pieces.clear()
    piano_midi_pieces_filtered.clear()
    piano_midi_pieces_remaining.clear()
    pieces_tmp.clear()

In [26]:
# Either select one validation and one training piece or 
# set aside a random set of pieces for validation purposes
n_subset = 30
#selection = 'single_piece' 
#selection = 'subset'
selection = 'all'

random.seed(1337)
if selection == 'single_piece':
    validation_pieces = copy.deepcopy({'chop2803' : all_pieces['chop2803']})
    training_pieces   = copy.deepcopy({'chop2804' : all_pieces['chop2804']})
    free_memory()
elif selection == 'subset' or selection == 'all':
    if selection == 'subset':
        training_pieces = copy.deepcopy({k: pieces_tmp[k] for k in random.sample(list(pieces_tmp.keys()), n_subset)})
        free_memory()
    elif selection == 'all':
        training_pieces = pieces_tmp.copy()
    num_validation_pieces = len(training_pieces) // 10

    validation_pieces={}
    for v in range(num_validation_pieces):
        index = random.choice(list(training_pieces.keys()))
        validation_pieces[index] = training_pieces.pop(index)

In [27]:
print('Number of training   pieces = ', len(training_pieces))    
print('Number of validation pieces = ', len(validation_pieces))     

Number of training   pieces =  81
Number of validation pieces =  8


#### Check that features (X) and lables (y) generation work

In [28]:
# Generate sample Note State Matrix for dimension measurement and numerical checking purposes

y = tf.convert_to_tensor(batch.getPieceBatch(training_pieces, # 16
                                             batch_size = 16,
                                             num_time_steps = 16*3*2), 
                         dtype=tf.float32) 
X = Input_Kernel(y, Midi_low = lowerBound, Midi_high = upperBound - 1)
X, y = alignXy(X,y)

print('Dimensions y: (sample_size, num_notes, num_timesteps, play_articulate_velocity) = ', y.shape)
print('Dimensions X: (sample_size, num_notes, num_timesteps, feature_dim             ) = ', X.shape)

Dimensions y: (sample_size, num_notes, num_timesteps, play_articulate_velocity) =  (16, 88, 96, 4)
Dimensions X: (sample_size, num_notes, num_timesteps, feature_dim             ) =  (16, 88, 96, 108)


### Model architecture

#### Parameters

In [29]:
Midi_low = lowerBound
Midi_high = upperBound - 1
num_notes = Midi_high + 1 - Midi_low # X.shape[1] = Midi_high + 1 - Midi_low 
num_timesteps = 16*3*2 
input_size = 4
keep_prob = 0.5

num_t_units = [128, 128] # [200, 200]
num_n_units = [64, 64] # [100, 100]
dense_units = 3

#### Start building of model graph 

In [30]:
# Build the Model Graph:
tf.reset_default_graph()
print('Building Graph...')

Building Graph...


#### Inputs

In [31]:
# Graph Input Placeholders
Note_State_Batch = tf.placeholder(dtype=tf.float32, shape=[None, num_notes, None, input_size], name= "Note_State_Batch")
output_keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name= "output_keep_prob")

#Generate expanded tensor from batch of note state matrices
Note_State_Expand = Input_Kernel(Note_State_Batch, 
                                 Midi_low=Midi_low, 
                                 Midi_high=Midi_high #,
                                 #time_init=time_init
                                )
Note_State_Expand_aligned, Note_State_Batch_aligned = alignXy(Note_State_Expand, Note_State_Batch)

print('Note_State_Expand shape = ', Note_State_Expand.get_shape())
print('Note_State_Batch shape = ',  Note_State_Batch.get_shape())

Note_State_Expand shape =  (?, 88, ?, 108)
Note_State_Batch shape =  (?, 88, ?, 4)


#### Timewise LSTM

In [32]:
# Generate initial state (at t=0) placeholder
timewise_state=[]
for i in range(len(num_t_units)):
    timewise_c=tf.placeholder(dtype=tf.float32, shape=[None, num_t_units[i]]) #None = batch_size * num_notes
    timewise_h=tf.placeholder(dtype=tf.float32, shape=[None, num_t_units[i]])
    timewise_state.append(LSTMStateTuple(timewise_h, timewise_c))

timewise_state=tuple(timewise_state)

timewise_cell = LSTM_Cell(num_t_units, output_keep_prob)

timewise_out, timewise_state_out = LSTM_Layer(input_data=Note_State_Expand_aligned,
                                              state_init=timewise_state,
                                              cell = timewise_cell,
                                              time_or_note="time")

print('Time-wise output shape = ', timewise_out.get_shape())
# print('Time-wise state shape = ', timewise_state_out)

Time-wise output shape =  (?, 88, ?, 128)


#### Notewise LSTM

In [33]:
#LSTM Note Wise Graph

# Generate initial state (at n=0) placeholder
notewise_state=[]
for i in range(len(num_n_units)):
    notewise_c=tf.placeholder(dtype=tf.float32, shape=[None, num_n_units[i]]) #None = batch_size * num_timesteps
    notewise_h=tf.placeholder(dtype=tf.float32, shape=[None, num_n_units[i]])
    notewise_state.append(LSTMStateTuple(notewise_h, notewise_c))

notewise_state=tuple(notewise_state)

notewise_cell = LSTM_Cell(num_n_units, output_keep_prob)

notewise_out, notewise_state_out =  LSTM_Layer(input_data=timewise_out,
                                               state_init=notewise_state,
                                               cell=notewise_cell,
                                               time_or_note="note")

print('Note-wise output shape = ', notewise_out.get_shape())
# print('Note-wise state shape = ', notewise_state_out)

Note-wise output shape =  (?, 88, ?, 64)


#### Simulate conditional probabilty using dense layers to generate Outputs

In [34]:
output_1, output_2, output_3 = Conditional_Probability_Layer(notewise_out, dense_units=dense_units)

print('play_articulate_logit output shape = ', output_1.get_shape())
print('velocity output shape = ', output_2.get_shape()) 
print('play_articulate_sampled output shape = ', output_3.get_shape())

play_articulate_logit output shape =  (?, 88, ?, 2)
velocity output shape =  (?, 88, ?, 1)
play_articulate_sampled output shape =  (?, 88, ?, 2)


#### Finished building of model graph 

In [35]:
print('Graph Building Complete')

Graph Building Complete


### Model training

In [36]:
# Loss Function and Optimizer
alpha = 0.01 
loss_p_a, log_likelihood = Loss_Function_1(Note_State_Batch_aligned, output_1)
loss_velocity = Loss_Function_2(Note_State_Batch_aligned, output_2)
loss = loss_p_a + alpha * 1 / 127 * tf.sqrt(loss_velocity)
optimizer = tf.train.AdadeltaOptimizer(learning_rate = 1, epsilon=1e-04).minimize(loss)

In [37]:
# TF1 specific parameters 
restore_model_name = None
save_model_name = 'Long_Train_256'

In [40]:
current_time_str = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

# Training parameters

epochs = 16
batch_size = 2
epoch_save_list = [1, 2, 4, 8, 16]



n_train_batches = getNumberOfBatches(training_pieces, batch_size, num_timesteps)
n_val_batches = getNumberOfBatches(validation_pieces, batch_size, num_timesteps)

# Values for loss, metric and confusion matrix
train_loss_p_a_array   = np.full((epochs, n_train_batches), 10.0)
train_loss_vel_array   = np.full((epochs, n_train_batches), 10.0)
train_metric_p_a_array = np.full((epochs, n_train_batches), 10.0)
train_metric_vel_array = np.full((epochs, n_train_batches), 10.0)
val_loss_p_a_array     = np.full((epochs, n_val_batches), 10.0)
val_loss_vel_array     = np.full((epochs, n_val_batches), 10.0)
val_metric_p_a_array   = np.full((epochs, n_val_batches), 10.0)
val_metric_vel_array   = np.full((epochs, n_val_batches), 10.0)

In [41]:
(n_train_batches, n_val_batches)

(4333, 605)

In [42]:
start_time = time.time()
time_old = start_time

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    
    # try to restore the pre_trained
    if restore_model_name is not None:
        Load_Directory = Checkpoint_Directory + restore_model_name
               
        print("Load the model from: {}".format(restore_model_name))
        saver.restore(sess, Load_Directory + '/{}'.format(restore_model_name))
        
    
    # Initial States
    timewise_state_val = []
    for i in range(len(num_t_units)):
        c_t = np.zeros((batch_size * num_notes, num_t_units[i])) 
        h_t = np.zeros((batch_size * num_notes, num_t_units[i]))
        timewise_state_val.append(LSTMStateTuple(h_t, c_t))
        
    notewise_state_val = []
    for i in range(len(num_n_units)):
        c_n = np.zeros((batch_size * num_timesteps, num_n_units[i])) 
        h_n = np.zeros((batch_size * num_timesteps, num_n_units[i]))
        notewise_state_val.append(LSTMStateTuple(h_n, c_n))
        

    # Training Loop
    for epoch in range(epochs):
        print('\rStart of Epoch [%d/%d]'% (epoch + 1, epochs))
        print('\n')

        
        # Generate batch of training data   
        n = 0
        for k in training_pieces.keys():
            start_old = 0
            piece = training_pieces[str(k)]
            while start_old < (len(piece) - num_timesteps):
                print('\rTraining batch: %d/%d' % (n + 1, n_train_batches), end='\r')
                batch_input_state_train, start_old = batch.getPieceBatch2(piece, 
                                                                    num_time_steps = num_timesteps, 
                                                                    batch_size = batch_size,
                                                                    start_old = start_old)    

            
                # Run Session
                feed_dict = {Note_State_Batch: batch_input_state_train, 
                             output_keep_prob: keep_prob, 
                             timewise_state: timewise_state_val, 
                             notewise_state: notewise_state_val}


                l_1, l_2, log_likelihood_run, _, velocity_gen_out_run ,note_gen_out_run, Note_State_Batch_result = sess.run(
                    [loss_p_a, loss_velocity, log_likelihood, optimizer, output_2, output_3, Note_State_Batch_aligned], 
                    feed_dict = feed_dict)
                
                train_loss_p_a_array[epoch, n] = l_1
                train_loss_vel_array[epoch, n] = l_2
                
                n += 1
        print('')
        
        # Generate batch of validation data   
        n = 0
        for k in validation_pieces.keys():
            start_old = 0
            piece = validation_pieces[str(k)]
            while start_old < (len(piece)- num_timesteps):
                print('Validation batch: %d/%d' % (n + 1, n_val_batches), end='\r')
                batch_input_state_val, start_old = batch.getPieceBatch2(piece, 
                                                                    num_time_steps = num_timesteps, 
                                                                    batch_size = batch_size, 
                                                                    start_old = start_old)    

                # Run Session
                feed_dict = {Note_State_Batch: batch_input_state_val, 
                             output_keep_prob: keep_prob, 
                             timewise_state: timewise_state_val, 
                             notewise_state: notewise_state_val}


                l_1, l_2, log_likelihood_run, = sess.run(
                    [loss_p_a, loss_velocity, log_likelihood], 
                    feed_dict = feed_dict)

                val_loss_p_a_array[epoch, n] = l_1
                val_loss_vel_array[epoch, n] = l_2
                n += 1
        print('')


        time_new = time.time()
        duration = time_new - time_old
        time_old = time_new
        print('Training   Loss p_a: '     + str(np.mean(train_loss_p_a_array[epoch,:])))
        print('Validation Loss p_a: '     + str(np.mean(val_loss_p_a_array[epoch,:])))
        print('Training   Loss vel: '     + str(np.mean(train_loss_vel_array[epoch,:])))
        print('Validation Loss vel: '     + str(np.mean(val_loss_vel_array[epoch,:])))
        print('')
        print('Time: ' +  str(round(duration, 3)) + 's')

        # Periodically save model and loss histories
        if (epoch + 1) in epoch_save_list:

            model_save_path = Checkpoint_Directory + current_time_str[:-7] + '/{}'.format(save_model_name)
            np_save_path = Numpy_Directory + current_time_str[:-7] + '/' 
            save_path = saver.save(sess, model_save_path)            
            try:
                os.mkdir(np_save_path) 
            except:
                pass
            np.savez(np_save_path + save_model_name + '_array', 
                     train_loss_p_a_array, 
                     train_loss_vel_array,
                     val_loss_p_a_array, 
                     val_loss_vel_array,
                    ) 
            for i in range(batch_size):
                midi.generate_audio(batch_input_state_train[i:(i+1),:,:,:], 
                                    Music_Out_Train_Directory + current_time_str[:-7] + '/',
                                    'train' + '_epoch_' + str(epoch + 1) + '_batch_' + str(i) + '_true', 
                                    verbose = False)
            prediction = np.concatenate([note_gen_out_run, velocity_gen_out_run, Note_State_Batch_result[:,:,:,3:4]], axis=-1)
            for i in range(batch_size):
                midi.generate_audio(prediction[i:(i+1),:,:,:], 
                                    Music_Out_Train_Directory + current_time_str[:-7] + '/',
                                    'train' + '_epoch_' + str(epoch + 1) + '_batch_' + str(i) + '_predict', 
                                    verbose = False)

end_time = time.time()

print('Training time = ', end_time - start_time, ' seconds')

Start of Epoch [1/16]


Training batch: 4333/4333
Validation batch: 605/605
Training   Loss p_a: 0.06812044371336659
Validation Loss p_a: 0.045973980568410934
Training   Loss vel: 741.1412369472009
Validation Loss vel: 629.5551560583193

Time: 4312.516s
Start of Epoch [2/16]


Training batch: 1423/4333

KeyboardInterrupt: 