# ECBM 4040 Fall 2020 FINAL PROJECT 

### Author:

Wenjun Yang (wy2347)   

Qihang Yang (qy2231)

In [20]:
# Autoreload
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# # You do not need to run this
# import sys
# sys.path.append("/home/ecbm4040/anaconda3/envs/envTF22/lib/python3.7/site-packages")
# sys.path

In [21]:
import numpy as np
import tensorflow as tf

In [22]:
from prep import *
from model import *

### Step 1: Extract the midi data from zip file.

* the data we use in this project comes from [Classical Piano Midi Page](http://www.piano-midi.de/) 
* all of them are midi file containing two tracks of piano for left hand and right hand


In [None]:
## you don't have to run this block

# import zipfile
# with zipfile.ZipFile('data/midifile.zip', 'r') as zip_ref:
#     zip_ref.extractall('data/')

### Step 2: Prepare your data for training

#### A little insight about the input data structure
The input data proposed by the original paper is a little bit complex and requires some knowledge about music. \
For more information on background knowledge of music, you can refer to the following two links:
* [MIDI_events](https://www.mixagesoftware.com/en/midikit/help/HTML/midi_events.html#:~:text=The%20Note%20Off%20Event%20is,hard%20the%20key%20was%20released.) 
* [MIDI Turtorial](http://www.music-software-development.com/midi-tutorial.html) 

We spend quite a lot time trying to figure out the whole logic behind this and provide the following glossary table for your information.\
And for consistency, we will stick to the name convention stated in the original paper.

* stateMatrix: matrix of state, for state definition see below
* note: 0-77 lower_bound=24; upper_bound=102 
* part_position(1) = note
* pitchclass = 1 of 12 half steps CDEFGAB b#
* part_pitchclass(12): one-hot pitchclass 
* state: (1,0) (1,1) (0,0) -> denoting holding or repeating a note
* context: the count of each pitchclass played in last timestep 
* part_context(12): rearranged context
* part_prev_vicinity(50):

#### Note:
* input for model: part_position + part_pitchclass + part_prev_vicinity + part_context + beat + [0] 
* total number of arguments: 1 + 12 + 50 + 12 + 4 + 1 = 80
* for each of the 78 note you have 80 arguments in above structure
* and we only use sequences of 128 timesteps for training
* so the input data form will be 128 X 78 X 80

Please refer to prep.py for the details of implementation of data preprocessing part.

The general data preparation process is as the following flowchart:
<img src="image/Data Prep.png" />


In [6]:
training_data = load_data('data/music')

load liz_et2
load scn16_3
load haydn_43_1
load mendel_op30_3
load beethoven_les_adieux_1
load br_rhap
load grieg_zwerge
load haydn_7_1
load muss_8
load schu_143_2
load muss_4
load scn68_12
load clementi_opus36_1_1
load waldstein_3
load liz_et_trans5
load ty_maerz
load ty_juli
load rac_op3_2
load chpn-p3
load gra_esp_4
load scn15_5
load beethoven_opus10_2
load grieg_wedding
load mz_330_1
load haydn_35_2
load chpn_op27_1
load schubert_D935_3
load mendel_op19_4
load schub_d960_1
load haydn_9_1
load mz_545_1
load rac_op33_6
load liz_rhap15
load alb_esp2
load grieg_berceuse
load rac_op23_2
load bach_850
load beethoven_opus22_1
load burg_quelle
load debussy_cc_4
load schubert_D850_4
load chpn-p4
load mendel_op30_1
load pathetique_3
load clementi_opus36_6_1
load mendel_op62_3
load schu_143_1
load chpn_op25_e11
load scn15_4
load schub_d960_4
load mz_570_3
load clementi_opus36_5_1
load scn15_13
load grieg_waechter
load chpn-p8
load brahms_opus1_2
load rac_op32_1
load liz_et_trans4
load haydn_8_

### (Optional) Step 3: Train the theano-based model

In [159]:
from Original.ori_prep import *
import Original.ori_multi_training
import Original.ori_model

In [None]:
pcs = multi_training.loadPieces("music")

m = model.Model([300,300],[100,50], dropout=0.5)

multi_training.trainPiece(m, pcs, 10000)

pickle.dump( m.learned_config, open( "output/final_learned_config.p", "wb" ))

### Step 4: Train our model

description

* flowchart

Try #2

In [41]:
model = music_gen()

In [42]:
model.compile(optimizer=tf.keras.optimizers.Adam(5e-5),loss= my_loss)

In [44]:
model.build(input_shape = (None, 127, 78, 82)) # Training shape
# model.summary()
filepath="model/model-epoch-{epoch:02d}.h5"
save_callback=tf.keras.callbacks.ModelCheckpoint(filepath,  
                                              verbose=0, save_best_only=False, 
                                              save_weights_only=True, period=50)



In [45]:
data_gen=update_input_batch_generator(training_data)
model.fit_generator(data_gen,epochs=600,steps_per_epoch=20,callbacks=[save_callback])

Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Epoch 70/600
Epoch 71/600
Epoch 72/600
Epoch 73/600
Epoch 74/600
Epoch 75/600
Epoch 76/600
Epoch 77/600
Epoch 78

Epoch 101/600
Epoch 102/600
Epoch 103/600
Epoch 104/600
Epoch 105/600
Epoch 106/600
Epoch 107/600
Epoch 108/600
Epoch 109/600
Epoch 110/600
Epoch 111/600
Epoch 112/600
Epoch 113/600
Epoch 114/600
Epoch 115/600
Epoch 116/600
Epoch 117/600
Epoch 118/600
Epoch 119/600
Epoch 120/600
Epoch 121/600
Epoch 122/600
Epoch 123/600
Epoch 124/600
Epoch 125/600
Epoch 126/600
Epoch 127/600
Epoch 128/600
Epoch 129/600
Epoch 130/600
Epoch 131/600
Epoch 132/600
Epoch 133/600
Epoch 134/600
Epoch 135/600
Epoch 136/600
Epoch 137/600
Epoch 138/600
Epoch 139/600
Epoch 140/600
Epoch 141/600
Epoch 142/600
Epoch 143/600
Epoch 144/600
Epoch 145/600
Epoch 146/600
Epoch 147/600
Epoch 148/600
Epoch 149/600
Epoch 150/600
Epoch 151/600
Epoch 152/600
Epoch 153/600
Epoch 154/600
Epoch 155/600
Epoch 156/600
Epoch 157/600
Epoch 158/600
Epoch 159/600
Epoch 160/600
Epoch 161/600
Epoch 162/600
Epoch 163/600
Epoch 164/600
Epoch 165/600
Epoch 166/600
Epoch 167/600
Epoch 168/600
Epoch 169/600
Epoch 170/600
Epoch 171/600
Epoch 

Epoch 297/600
Epoch 298/600
Epoch 299/600
Epoch 300/600
Epoch 301/600
Epoch 302/600
Epoch 303/600
Epoch 304/600
Epoch 305/600
Epoch 306/600
Epoch 307/600
Epoch 308/600
Epoch 309/600
Epoch 310/600
Epoch 311/600
Epoch 312/600
Epoch 313/600
Epoch 314/600
Epoch 315/600
Epoch 316/600
Epoch 317/600
Epoch 318/600
Epoch 319/600
Epoch 320/600
Epoch 321/600
Epoch 322/600
Epoch 323/600
Epoch 324/600
Epoch 325/600
Epoch 326/600
Epoch 327/600
Epoch 328/600
Epoch 329/600
Epoch 330/600
Epoch 331/600
Epoch 332/600
Epoch 333/600
Epoch 334/600
Epoch 335/600
Epoch 336/600
Epoch 337/600
Epoch 338/600
Epoch 339/600
Epoch 340/600
Epoch 341/600
Epoch 342/600
Epoch 343/600
Epoch 344/600
Epoch 345/600
Epoch 346/600
Epoch 347/600
Epoch 348/600
Epoch 349/600
Epoch 350/600
Epoch 351/600
Epoch 352/600
Epoch 353/600
Epoch 354/600
Epoch 355/600
Epoch 356/600
Epoch 357/600
Epoch 358/600
Epoch 359/600
Epoch 360/600
Epoch 361/600
Epoch 362/600
Epoch 363/600
Epoch 364/600
Epoch 365/600
Epoch 366/600
Epoch 367/600
Epoch 

Epoch 493/600
Epoch 494/600
Epoch 495/600
Epoch 496/600
Epoch 497/600
Epoch 498/600
Epoch 499/600
Epoch 500/600
Epoch 501/600
Epoch 502/600
Epoch 503/600
Epoch 504/600
Epoch 505/600
Epoch 506/600
Epoch 507/600
Epoch 508/600
Epoch 509/600
Epoch 510/600
Epoch 511/600
Epoch 512/600
Epoch 513/600
Epoch 514/600
Epoch 515/600
Epoch 516/600
Epoch 517/600
Epoch 518/600
Epoch 519/600
Epoch 520/600
Epoch 521/600
Epoch 522/600
Epoch 523/600
Epoch 524/600
Epoch 525/600
Epoch 526/600
Epoch 527/600
Epoch 528/600
Epoch 529/600
Epoch 530/600
Epoch 531/600
Epoch 532/600
Epoch 533/600
Epoch 534/600
Epoch 535/600
Epoch 536/600
Epoch 537/600
Epoch 538/600
Epoch 539/600
Epoch 540/600
Epoch 541/600
Epoch 542/600
Epoch 543/600
Epoch 544/600
Epoch 545/600
Epoch 546/600
Epoch 547/600
Epoch 548/600
Epoch 549/600
Epoch 550/600
Epoch 551/600
Epoch 552/600
Epoch 553/600
Epoch 554/600
Epoch 555/600
Epoch 556/600
Epoch 557/600
Epoch 558/600
Epoch 559/600
Epoch 560/600
Epoch 561/600
Epoch 562/600
Epoch 563/600
Epoch 

<tensorflow.python.keras.callbacks.History at 0x7f3cbe99a750>

In [28]:
# Parameters:
# # training_data: training data
# # length: the length of music to be composed,
# # prob: assign 1 to statematrix when predicted probability > prob,
# # name: file name

model.compose(training_data, length = 100, prob = 0.1, name = "sample")

sample.mid saved


midi.Pattern(format=1, resolution=220, tracks=\
[midi.Track(\
  [midi.NoteOnEvent(tick=0, channel=0, data=[43, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[50, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[54, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[57, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[62, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[66, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[69, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[79, 60]),
   midi.NoteOnEvent(tick=165, channel=0, data=[60, 60]),
   midi.NoteOffEvent(tick=55, channel=0, data=[60, 0]),
   midi.NoteOnEvent(tick=55, channel=0, data=[60, 60]),
   midi.NoteOnEvent(tick=165, channel=0, data=[67, 60]),
   midi.NoteOnEvent(tick=440, channel=0, data=[55, 60]),
   midi.NoteOnEvent(tick=275, channel=0, data=[74, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[71, 60]),
   midi.NoteOffEvent(tick=55, channel=0, data=[71, 0]),
   midi.NoteOnEvent(tick=55, channel=0, data=[

In [37]:
def generate_music(model_epoch_num=600,prob=0.5,length=100):
    model_epoch=music_gen()
    model_epoch.build(input_shape = (None, 127, 78, 82)) 
    model_epoch.load_weights("model/model-epoch-"+ str(model_epoch_num) +".h5")
    model_epoch.compose(training_data, length = 100, prob = prob, name = "sample_"+str(model_epoch_num)+"_"+str(prob).replace('.',''))

In [38]:
generate_music(600,prob=0.4)



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

sample_600_04.mid saved


In [35]:
# 600 epoch model
model_epoch_600=music_gen()
model_epoch_600.compile(optimizer=tf.keras.optimizers.Adam(5e-5),loss= my_loss)
model_epoch_600.build(input_shape = (None, 127, 78, 82)) 
model_epoch_600.load_weights("model/model-epoch-600.h5")

In [36]:
model_epoch_600.compose(training_data, length = 100, prob = 0.5, name = "sample_600_050")



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

sample_600_050.mid saved


midi.Pattern(format=1, resolution=220, tracks=\
[midi.Track(\
  [midi.NoteOnEvent(tick=0, channel=0, data=[67, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[69, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[73, 40]),
   midi.NoteOffEvent(tick=55, channel=0, data=[67, 0]),
   midi.NoteOffEvent(tick=0, channel=0, data=[73, 0]),
   midi.NoteOnEvent(tick=55, channel=0, data=[61, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[67, 40]),
   midi.NoteOnEvent(tick=55, channel=0, data=[57, 40]),
   midi.NoteOffEvent(tick=0, channel=0, data=[61, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[64, 40]),
   midi.NoteOffEvent(tick=0, channel=0, data=[67, 0]),
   midi.NoteOffEvent(tick=0, channel=0, data=[69, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[72, 40]),
   midi.NoteOffEvent(tick=55, channel=0, data=[57, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[61, 40]),
   midi.NoteOffEvent(tick=0, channel=0, data=[64, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[64, 40]),

In [19]:
# 500 epoch model
model_epoch_500=music_gen()
model_epoch_500.compile(optimizer=tf.keras.optimizers.Adam(5e-5),loss= my_loss)
model_epoch_500.build(input_shape = (None, 127, 78, 82)) 
model_epoch_500.load_weights("model/model-epoch-500.h5")

model_epoch_500.compose(training_data, length = 100, prob = 0.3, name = "sample_500_030")



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

sample_500_030.mid saved


midi.Pattern(format=1, resolution=220, tracks=\
[midi.Track(\
  [midi.NoteOnEvent(tick=0, channel=0, data=[60, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[67, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[79, 60]),
   midi.NoteOffEvent(tick=55, channel=0, data=[79, 0]),
   midi.NoteOnEvent(tick=55, channel=0, data=[79, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[55, 60]),
   midi.NoteOffEvent(tick=0, channel=0, data=[60, 0]),
   midi.NoteOffEvent(tick=0, channel=0, data=[79, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[79, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[59, 60]),
   midi.NoteOffEvent(tick=0, channel=0, data=[79, 0]),
   midi.NoteOffEvent(tick=55, channel=0, data=[59, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[65, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[72, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[70, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[53, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[60, 60

In [14]:
# 400 epoch model
model_epoch_400=music_gen()
# model_epoch_400.compile(optimizer=tf.keras.optimizers.Adam(5e-5),loss= my_loss)
model_epoch_400.build(input_shape = (None, 127, 78, 82)) 
model_epoch_400.load_weights("model/model-epoch-400.h5")

model_epoch_400.compose(training_data, length = 100, prob = 0.5, name = "sample_400_050")



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



midi.Pattern(format=1, resolution=220, tracks=\
[midi.Track(\
  [midi.NoteOnEvent(tick=0, channel=0, data=[50, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[58, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[59, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[62, 40]),
   midi.NoteOffEvent(tick=55, channel=0, data=[58, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[58, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[61, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[70, 40]),
   midi.NoteOffEvent(tick=55, channel=0, data=[61, 0]),
   midi.NoteOffEvent(tick=0, channel=0, data=[70, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[60, 40]),
   midi.NoteOnEvent(tick=55, channel=0, data=[70, 40]),
   midi.NoteOnEvent(tick=55, channel=0, data=[46, 40]),
   midi.NoteOffEvent(tick=55, channel=0, data=[58, 0]),
   midi.NoteOffEvent(tick=0, channel=0, data=[70, 0]),
   midi.NoteOnEvent(tick=0, channel=0, data=[58, 40]),
   midi.NoteOnEvent(tick=0, channel=0, data=[70, 40])

# -------------------------------
# Former works

In [6]:
# Unsolved Problem
############################################################
# 1. Is this generally the right architecture
# 2. Probably need change all the 128 into None? But this raises error
# 3. 'mask' function in original code, does Keras take care of that for us?
# 4. What is the target to validate model? The whole state sequence [batch,128,78,2] 
#                                          or the state of only the next timestep
# 5. The function to predict next note to play?
# 6. Way too many parameters in the last dense layer! Not quite sure it is what it should be

inputs = tf.keras.Input(shape=(127,78,80))

# For why use permute dimensions and use time distributed layers 
# please refer to https://keras.io/api/layers/recurrent_layers/time_distributed/

inputs_rotate= tf.keras.backend.permute_dimensions(inputs,(0,2,1,3)) #(batch,78,128,80)

time_lstm1 = tf.keras.layers.LSTM(300,return_sequences=True,dropout=0.5)
time_lstm2 = tf.keras.layers.LSTM(300,return_sequences=True,dropout=0.5)

inter1 = tf.keras.layers.TimeDistributed(time_lstm1)(inputs_rotate) #(batch,78,128,300)
inter2 = tf.keras.layers.TimeDistributed(time_lstm2)(inter1) #(batch,78,128,300)

note_lstm1 = tf.keras.layers.LSTM(100,return_sequences=True,dropout=0.5)
note_lstm2 = tf.keras.layers.LSTM(50,return_sequences=True,dropout=0.5)

inter2_rotate= tf.keras.backend.permute_dimensions(inter2,(0,2,1,3)) #(batch,128,78,300)

# the input of note-axis part of model will be 
# 1) the note-state vector from previous LSTM stack (batch,128,78,300)
# 2) where the previous note was chosen to be played (batch,128,78,1)
# 3) where the previous note was chosen to be articulated (batch,128,78,1)
# that's why we are using padding here and concatenate the 3 together 
# please see https://www.tensorflow.org/api_docs/python/tf/pad 
# https://www.tensorflow.org/api_docs/python/tf/concat
# for reference

state_inputs = tf.keras.Input(shape=(127,78,2))

paddings=[[0,0],[0,0],[1,0],[0,0]]

prev_note_state=tf.pad(state_inputs[:,:,:-1,:], paddings, 'CONSTANT', constant_values=0)   # (batch,128,78,2)

inter_input1=tf.concat((inter2_rotate,prev_note_state),axis=-1) # (batch,128,78,302)

inter3 = tf.keras.layers.TimeDistributed(note_lstm1)(inter_input1) #(batch,128,78,100)

inter_input2=tf.concat((inter3,prev_note_state),axis=-1) #(batch,128,78,102)

inter4 = tf.keras.layers.TimeDistributed(note_lstm2)(inter_input2) ##(batch,128,78,50)

outputs = tf.keras.layers.Flatten()(inter4)

outputs = tf.keras.layers.Dropout(.5)(outputs)

outputs = tf.keras.layers.Dense(156, activation='sigmoid')(outputs) #（batch,128,78,2）

# output the final result, i.e., probability of playing or articulating certain notes
outputs = tf.keras.layers.Reshape((78,2))(outputs) #（batch,78,2）

model=tf.keras.Model(inputs=[inputs,state_inputs],outputs=outputs)

In [7]:
# custom loss function
# the output of model is the same shape with the sample's state matrix
# that is (time,note(78),state(2))
# the 2 for each time and note denote the probability of the note being played or articulated repectively in the last step
# we use the negative log likelihood to denote the loss, the log function can avoid the numbers being too small

def my_loss(y_true, y_pred):
#     y_pred=np.asarray(y_pred)
#     y_true=np.asarray(y_true)
    loss=-tf.keras.backend.sum(tf.math.log(y_pred*y_true+(1-y_pred)*(1-y_true)+np.spacing(np.float32(1.0)))) # numeric stablity
    return loss

In [26]:
model.compile(optimizer=tf.keras.optimizers.Adam(5e-5),loss= my_loss)

In [19]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 127, 78, 80) 0                                            
__________________________________________________________________________________________________
tf_op_layer_Transpose (TensorFl [(None, 78, 127, 80) 0           input_1[0][0]                    
__________________________________________________________________________________________________
time_distributed (TimeDistribut (None, 78, 127, 300) 457200      tf_op_layer_Transpose[0][0]      
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 127, 78, 2)] 0                                            
______________________________________________________________________________________________

In [195]:
### new generator
def update_input_batch_generator(statemat_dict):
    
    # training data generator
    
    while True:
        batch=build_input_batch(statemat_dict)
        train = tf.concat([batch[0][:,:-1],batch[1][:,:-1]], axis = -1 )
        yield (train, batch[1][:,-1])

In [189]:
gen=update_input_batch_generator(training_data)
num=0
for i in gen:
    print(i[0].shape)
    print(i[1].shape)
    num=num+1
    if num>=2:
        break

(10, 127, 78, 82)
(10, 78, 2)
(10, 127, 78, 82)
(10, 78, 2)


In [27]:
data_gen=update_input_batch_generator(training_data)
model.fit_generator(data_gen,epochs=20,steps_per_epoch=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20

KeyboardInterrupt: 

# ----------------------------------------

In [78]:


def music_composition(model, starting_data, length):

    test_data = starting_data[:]
    new_input = [np.asarray(test_data[0][:-1]).reshape(1, 127, 78, 80),
                  np.asarray(test_data[1][:-1]).reshape(1, 127, 78, 2)] # [(1, 127, 78, 80), (1, 127, 78, 2)]
    
    output_state = []
          
    for _ in range(length):
         
        pred_state = model.predict(new_input) # Predict statematrix (1, 78, 2)
        
        for i in range(pred_state[0].shape[0]):
            for j in range(pred_state[0].shape[1]):
                if pred_state[0][i][j] > 0.25:
                    pred_state[0][i][j] = 1
                else:
                    pred_state[0][i][j] = 0

        
        output_state.append(pred_state[0])
        
        # Combine pred_state to test_state
        new_state = np.concatenate((np.asarray(new_input[1][0][1:]).reshape(1, 126, 78, 2), 
                                    np.asarray(pred_state).reshape(1, 1, 78, 2)), axis = 1) # (1, 127, 78, 2)

        new_data = np.asarray(build_input_data(new_state[0])).reshape(1, 127, 78, 80) # Starematrix -> Input_data (1, 127, 78, 80)
        new_input = [new_data, new_state] # [(1, 127, 78, 80), (1, 127, 78, 2)]

        
    return np.array(output_state)



In [79]:

# Select a starting data
starting_data = build_single_input(training_data) # [(1, 127, 78, 80), (1, 127, 78, 2)]

output_state = music_composition(model_20, starting_data, length = 100)

output_state.shape

(100, 78, 2)

In [80]:
output_state

array([[[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       ...,

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]],

       [[0., 0.],
        [0., 0.],
        [0., 0.],
        ...,
        [0., 0.],
        [0., 0.],
        [0., 0.]]], dtype=float32)

In [81]:
statematrix_to_midi(output_state)

midi.Pattern(format=1, resolution=220, tracks=\
[midi.Track(\
  [midi.NoteOnEvent(tick=0, channel=0, data=[42, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[49, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[52, 60]),
   midi.NoteOnEvent(tick=330, channel=0, data=[37, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[57, 60]),
   midi.NoteOnEvent(tick=55, channel=0, data=[64, 60]),
   midi.NoteOffEvent(tick=55, channel=0, data=[64, 0]),
   midi.NoteOnEvent(tick=55, channel=0, data=[64, 60]),
   midi.NoteOnEvent(tick=220, channel=0, data=[40, 60]),
   midi.NoteOnEvent(tick=0, channel=0, data=[48, 60]),
   midi.NoteOffEvent(tick=55, channel=0, data=[48, 0]),
   midi.NoteOnEvent(tick=110, channel=0, data=[69, 60]),
   midi.NoteOnEvent(tick=275, channel=0, data=[44, 60]),
   midi.NoteOnEvent(tick=110, channel=0, data=[47, 60]),
   midi.NoteOffEvent(tick=55, channel=0, data=[47, 0]),
   midi.NoteOnEvent(tick=55, channel=0, data=[47, 60]),
   midi.NoteOffEvent(tick=55, channel=0, 

In [58]:
test_dt=build_single_input(training_data)
test_dt = [np.asarray(test_dt[0][:-1]).reshape(1, 127, 78, 80), \
                  np.asarray(test_dt[1][:-1]).reshape(1, 127, 78, 2)]
test_pred = model.predict(test_dt)


In [59]:
test_pred

array([[[2.13447674e-05, 3.33141179e-06],
        [2.85910555e-05, 1.61102810e-06],
        [1.14372451e-05, 1.71526221e-06],
        [9.07939193e-06, 2.31358649e-06],
        [3.45558037e-05, 1.60782929e-06],
        [4.92890598e-04, 7.97427583e-06],
        [6.59059221e-03, 1.20087920e-04],
        [1.21153775e-03, 1.77290294e-05],
        [5.57307899e-03, 2.22813214e-05],
        [9.90594490e-05, 2.41834982e-06],
        [3.27689992e-03, 2.65911749e-05],
        [1.33357281e-02, 4.64830100e-06],
        [3.13579105e-03, 1.41584067e-04],
        [8.40216968e-03, 1.32070463e-02],
        [3.59997526e-03, 4.41489770e-04],
        [2.51671323e-03, 4.02670048e-05],
        [5.53600788e-02, 8.26758151e-06],
        [1.75525015e-03, 6.67826607e-05],
        [4.72218990e-02, 9.96029703e-04],
        [4.15141806e-02, 4.13033785e-03],
        [4.57139947e-02, 9.83341131e-04],
        [9.82572604e-03, 8.26598553e-05],
        [4.09033196e-03, 2.70972698e-04],
        [2.43507195e-02, 1.5296326

In [60]:
for i in range(test_pred[0].shape[0]):
    for j in range(test_pred[0].shape[1]):
        if test_pred[0][i][j] > 1e-1:
            test_pred[0][i][j] = 1
        else:
            test_pred[0][i][j] = 0

In [61]:
test_pred[0]

array([[0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 0.],
       [1., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [1., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.],
       [0., 0.

In [104]:
test_data = build_input_data(test_state)
np.array(test_data[-1]).shape

(78, 80)

In [49]:
np.asarray(test_dt[0][:-1]).shape, np.asarray(test_dt[1][:-1]).shape

((127, 78, 80), (127, 78, 2))

In [51]:
test_dt1 = np.asarray(test_dt[0][:-1]).reshape(1, 127, 78, 80)
test_dt2 = np.asarray(test_dt[1][:-1]).reshape(1, 127, 78, 2)
test_input = [test_dt1, test_dt2]

In [55]:
model.input_shape, model.output_shape

([(None, 127, 78, 80), (None, 127, 78, 2)], (None, 78, 2))

In [52]:
test_pred=model.predict(test_input)
test_pred

array([[[nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],
        [nan, nan],


In [21]:
np.sum(np.square(test_pred.reshape(128,78,2)-np.asarray(test_dt[1])))

418.4551392232534

In [22]:
out=statematrix_to_midi(test_pred.reshape(128,78,2))

* Tensorboard

### Step 5: Difference in outcome

### Step 6: Compare between the models (Discussion: The advantage of biaxial LSTM)

Time-axis Model

In [34]:
t_inputs = tf.keras.Input(shape=(128,78,80))

t_inputs_rotate= tf.keras.backend.permute_dimensions(t_inputs,(0,2,1,3)) #(78,128,80)

t_time_lstm1 = tf.keras.layers.LSTM(300,return_sequences=True)
t_time_lstm2 = tf.keras.layers.LSTM(300,return_sequences=True)

t_inter1 = tf.keras.layers.TimeDistributed(t_time_lstm1)(t_inputs_rotate) #(78,128,80)
t_inter2 = tf.keras.layers.TimeDistributed(t_time_lstm2)(t_inter1) #(78,128,80)

t_inter2_rotate= tf.keras.backend.permute_dimensions(t_inter2,(0,2,1,3)) #(128,78,80)
t_outputs = tf.keras.layers.Dense(2,activation='sigmoid')(t_inter2_rotate) #(128,78,2)

time_model=tf.keras.Model(inputs=t_inputs,outputs=t_outputs)

In [35]:
time_model.summary()

Model: "model_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         [(None, 128, 78, 80)]     0         
_________________________________________________________________
tf_op_layer_Transpose_5 (Ten [(None, 78, 128, 80)]     0         
_________________________________________________________________
time_distributed_9 (TimeDist (None, 78, 128, 300)      457200    
_________________________________________________________________
time_distributed_10 (TimeDis (None, 78, 128, 300)      721200    
_________________________________________________________________
tf_op_layer_Transpose_6 (Ten [(None, 128, 78, 300)]    0         
_________________________________________________________________
dense_3 (Dense)              (None, 128, 78, 2)        602       
Total params: 1,179,002
Trainable params: 1,179,002
Non-trainable params: 0
_________________________________________________

In [36]:
time_model.compile(optimizer=tf.keras.optimizers.Adam(),loss=my_loss)

In [37]:
data_gen=input_batch_generator(training_data)
time_model.fit_generator(data_gen,epochs=10,steps_per_epoch=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7ca4673850>

Note_axis Model

In [46]:
n_inputs = tf.keras.Input(shape=(128,78,80))

n_note_lstm1 = tf.keras.layers.LSTM(100,return_sequences=True)
n_note_lstm2 = tf.keras.layers.LSTM(50,return_sequences=True)

n_inter3 = tf.keras.layers.TimeDistributed(n_note_lstm1)(n_inputs)
n_inter4 = tf.keras.layers.TimeDistributed(n_note_lstm2)(n_inter3)

n_outputs = tf.keras.layers.Dense(2,activation='sigmoid')(n_inter4)

note_model=tf.keras.Model(inputs=n_inputs,outputs=n_outputs)

In [47]:
note_model.summary()

Model: "model_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         [(None, 128, 78, 80)]     0         
_________________________________________________________________
time_distributed_13 (TimeDis (None, 128, 78, 100)      72400     
_________________________________________________________________
time_distributed_14 (TimeDis (None, 128, 78, 50)       30200     
_________________________________________________________________
dense_5 (Dense)              (None, 128, 78, 2)        102       
Total params: 102,702
Trainable params: 102,702
Non-trainable params: 0
_________________________________________________________________


In [48]:
note_model.compile(optimizer=tf.keras.optimizers.Adam(),loss=my_loss)

In [49]:
data_gen=input_batch_generator(training_data)
note_model.fit_generator(data_gen,epochs=10,steps_per_epoch=20)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f7ca50d6fd0>

### Step 7: Improvement

### Step 8: Unsolved problems and Drawbacks

1. The model doesn't include factors like velocity and tempo, which makes the generated music somewhat plain and lacking in style.
2. The model requires a lot of hand-picked arguments based on empirical knowledge about music.
3. The model only deals with single instrument. If we simply run the model on different instruments and then combine each track together, this sure won't give us a good melody. How to make these intruments sound good together could be an interesting task.