# Training

In [1]:
# !pip install music21
from music21 import *
import numpy as np
import os
from collections import Counter
from fractions import Fraction

# !pip install tensorflow
from keras.layers import *
from keras.models import *
from keras.callbacks import *
import keras.backend as K
import tensorboard
import tensorflow as tf
from datetime import datetime

# !pip install sklearn
from sklearn.model_selection import train_test_split

In [None]:
%load_ext tensorboard

In [152]:
inst = 'Violin'
artist = 'Vivaldi'

$ instrument \in \{Piano, Violin, Flute, Horn, Vocal\} $ <br><br>
$ instrument = Piano \rightarrow artist \in \{Bach, Chopin, Debussy, Ellington, Corea, Anime\}$<br>
$ instrument = Violin \rightarrow artist \in \{Mozart, Paganini, Vivaldi\}$ <br>
$ instrument = Flute \rightarrow artist \in \{Brahms, Wagner\}$ <br>
$ instrument = Horn \rightarrow artist \in \{Mozart, Strauss\}$ <br>
$ instrument = Vocal \rightarrow artist \in \{DaftPunk, Jazz, Pop\}$

In [153]:
# file references
path = "train_music/" + inst + artist + "/"
model_name = inst + artist + ".keras"

unique_x_file = inst + artist + "_unique.txt"
xval_file = inst + artist + "_x.txt"

In [154]:
# hyperparameters
epochs=100 # number of times model runs through train data during training
frequentNoteMin = 10 # model only counts "frequent" notes, specify number times a note/rhythm must appear in train music to be counted
validation=False # include val loss as metric for training - only set to True if overfitting is a clear issue
no_of_timesteps = 32 # SENSITIVE - DO NOT MODIFY, HERE FOR EXPERIMENTAL REASONS, length of sequence to determine next note/rhythm in sequence

In [155]:
def read_midi(file, inst):
    
    print("Loading Music File:",file)
    
    notes=[]
    notes_to_parse = None
    
    #parsing a midi file
    midi = converter.parse(file)
    #midi.show()
  
    #grouping based on different instruments
    s2 = instrument.partitionByInstrument(midi)

    #Looping over all the instruments
    for part in s2.parts:
    
        #select elements of only the selected instrument
        if inst in str(part): 
        
            notes_to_parse = part.recurse() 
      
            #finding whether a particular element is note or a chord
            for element in notes_to_parse:
                
                #note
                if isinstance(element, note.Note):
                    notes.append((str(element.pitch), element.quarterLength))
                
                #chord
                elif isinstance(element, chord.Chord):
                    notes.append(('.'.join(str(n) for n in element.normalOrder), element.quarterLength))
                    
                #rest
                elif isinstance(element, note.Rest):
                    notes.append(('rest', element.quarterLength))

    return notes

### Extract Note Sequences

In [156]:
#read all the filenames
files=[i for i in os.listdir(path) if i.endswith(".mid")]

#reading each midi file
print("Loading notes...")
notes_array = [read_midi(path+i, inst) for i in files]

Loading notes...
Loading Music File: train_music/ViolinVivaldi/autumn_no3_allegro_gp.mid
Loading Music File: train_music/ViolinVivaldi/gp_v04.mid
Loading Music File: train_music/ViolinVivaldi/gp_v10.mid
Loading Music File: train_music/ViolinVivaldi/391violcon5.mid
Loading Music File: train_music/ViolinVivaldi/gp_v11.mid
Loading Music File: train_music/ViolinVivaldi/gp_v05.mid
Loading Music File: train_music/ViolinVivaldi/1539mando2.mid
Loading Music File: train_music/ViolinVivaldi/gp_v07.mid
Loading Music File: train_music/ViolinVivaldi/1532avautunno1.mid
Loading Music File: train_music/ViolinVivaldi/3336gmrv433n3.mid




Loading Music File: train_music/ViolinVivaldi/spring_no1_allegro_gp.mid
Loading Music File: train_music/ViolinVivaldi/gp_v06.mid
Loading Music File: train_music/ViolinVivaldi/gp_v12.mid
Loading Music File: train_music/ViolinVivaldi/grosso3.mid




Loading Music File: train_music/ViolinVivaldi/390cello.mid
Loading Music File: train_music/ViolinVivaldi/3770piccolafluteconclargop79.mid
Loading Music File: train_music/ViolinVivaldi/gp_v02.mid
Loading Music File: train_music/ViolinVivaldi/gp_v03.mid
Loading Music File: train_music/ViolinVivaldi/gp_v01.mid
Loading Music File: train_music/ViolinVivaldi/355summer1.mid
Loading Music File: train_music/ViolinVivaldi/2205winter3.mid
Loading Music File: train_music/ViolinVivaldi/3340vivc2.mid
Loading Music File: train_music/ViolinVivaldi/1538mando1.mid
Loading Music File: train_music/ViolinVivaldi/1535hvivaldg.mid
Loading Music File: train_music/ViolinVivaldi/summer_no3_presto_gp.mid
Loading Music File: train_music/ViolinVivaldi/3341vivc1.mid
Loading Music File: train_music/ViolinVivaldi/3338grmv433n1.mid
Loading Music File: train_music/ViolinVivaldi/427autumn3.mid
Loading Music File: train_music/ViolinVivaldi/winter_no3_allegro_gp.mid
Loading Music File: train_music/ViolinVivaldi/354winter3



Loading Music File: train_music/ViolinVivaldi/1537magnsicutoc.mid
Loading Music File: train_music/ViolinVivaldi/autumn_no2_adagio_gp.mid
Loading Music File: train_music/ViolinVivaldi/1540mando3.mid
Loading Music File: train_music/ViolinVivaldi/viv_2t.mid
Loading Music File: train_music/ViolinVivaldi/1534hvivalda.mid
Loading Music File: train_music/ViolinVivaldi/winter_no2_largo_gp.mid
Loading Music File: train_music/ViolinVivaldi/2231con8Gminor.mid
Loading Music File: train_music/ViolinVivaldi/1530seasonwinter1.mid
Loading Music File: train_music/ViolinVivaldi/816bminor.mid
Loading Music File: train_music/ViolinVivaldi/1533avautunno2.mid
Loading Music File: train_music/ViolinVivaldi/summer_no5_adagio_gp.mid
Loading Music File: train_music/ViolinVivaldi/3337grmv433n2.mid
Loading Music File: train_music/ViolinVivaldi/gp_v08.mid
Loading Music File: train_music/ViolinVivaldi/viv_pic.mid
Loading Music File: train_music/ViolinVivaldi/3769vierjaargetijdenlentedeeln1.mid
Loading Music File: tr

#### Sample Train Music
**** you do not have to run this part for training

In [7]:
sampleIndex = 0
notes_list = []
offset = 0
for note_tuple in notes_array[sampleIndex]:
    if ('.' in note_tuple[0]) or note_tuple[0].isdigit(): #chord
        notes_in_chord = note_tuple[0].split('.')
        notes = []
        for current_note in notes_in_chord: 
            cn=int(current_note)
            new_note = note.Note(cn)
            new_note.quarterLength = note_tuple[1]
            notes.append(new_note)
            
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        notes_list.append(new_chord)
    elif 'rest' in note_tuple[0]: #rest
        new_rest = note.Rest(quarterLength=note_tuple[1])
        notes_list.append(new_rest)
    else:
        new_note = note.Note(note_tuple[0])
        new_note.quarterLength = note_tuple[1]
        notes_list.append(new_note)

s = stream.Score()
s.insert(0, metadata.Metadata())
s.metadata.title = "Sample Train Music"
s.append(notes_list)

In [9]:
#s.show()

In [10]:
s.show('midi')

### Data Preparation

In [157]:
# every note in a 1d array
notes_ = [element for note_ in notes_array for element in note_]

In [158]:
# frequency of each note
freq = dict(Counter(notes_))

In [159]:
# list of frequent notes, adjustable by count
frequent_notes = [note_ for note_, count in freq.items() if count>=frequentNoteMin]
print("# of frequent notes:", len(frequent_notes))

# of frequent notes: 299


In [160]:
# new_music = notes - nonfrequent notes
new_music=[]

for notes in notes_array:
    temp=[]
    for note_ in notes:
        if note_ in frequent_notes:
            temp.append(note_)            
    new_music.append(temp)

In [161]:
# prepare input and output sequences
x = []
y = []

for note_ in new_music:
    for i in range(0, len(note_) - no_of_timesteps, 1):
        
        #preparing input and output sequences
        input_ = note_[i:i + no_of_timesteps]
        output_ = note_[i + no_of_timesteps]
        
        x.append(input_)
        y.append(output_)
        

print("input shape:\t", np.asarray(x).shape)
print("output shape:\t", np.asarray(y).shape)

input shape:	 (30820, 32, 2)
output shape:	 (30820, 2)


In [162]:
# unique note --> integer
unique_x=[]
for i in range(len(x)):
    for j in range(len(x[i])):
        unique_x.append(x[i][j])
unique_x = list(set(unique_x))
x_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_x))

with open(unique_x_file, "w") as output:
    for element in unique_x:
        output.write(str(element)+'\n')

In [163]:
len(unique_x)

299

In [164]:
# x_seq <--> model input
x_seq=[]
for i in x:
    temp=[]
    for j in i:
        #assigning unique integer to every note
        temp.append(x_note_to_int[j])
    x_seq.append(temp)
    
x_seq = np.array(x_seq)

In [165]:
# same procedure as x_seq but simpler
unique_y = list(set(y))
y_note_to_int = dict((note_, number) for number, note_ in enumerate(unique_y)) 
y_seq = np.array([y_note_to_int[i] for i in y])

In [166]:
# train/test partitioning, test_size=0.2 --> 20% of data is test
x_tr, x_val, y_tr, y_val = train_test_split(x_seq,y_seq,test_size=0.2,random_state=0)
np.savetxt(xval_file, x_val)

In [167]:
# metadata
print("Shapes:")
print("x_tr:", x_tr.shape)
print("y_tr:", y_tr.shape)
print("x_val:", x_val.shape)
print("y_val:", y_val.shape)

Shapes:
x_tr: (24656, 32)
y_tr: (24656,)
x_val: (6164, 32)
y_val: (6164,)


### Neural Network

In [168]:
def lstm(input_shape, output_shape):
    K.clear_session()
    model = Sequential()
    
    #embedding layer
    model.add(Embedding(input_shape, 100, input_length=32,trainable=True))
    
    model.add(LSTM(128,return_sequences=True))
    model.add(LSTM(128))
    model.add(Dense(256))
    model.add(Activation('relu'))
    model.add(Dense(output_shape))
    model.add(Activation('softmax'))
    model.compile(loss='sparse_categorical_crossentropy', optimizer='adam')
    return model

model = lstm(len(unique_x), len(unique_y))
model.summary()

In [169]:
logdir="logs/fit/" + datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

In [170]:
# training
if validation:
    mc = ModelCheckpoint(model_name, mode='min', verbose=1, save_best_only=True)
    history = model.fit(np.array(x_tr),np.array(y_tr),batch_size=128,epochs=epochs, validation_data=(np.array(x_val),np.array(y_val)),verbose=1, callbacks=[mc])
else:
    mc = ModelCheckpoint(model_name, mode='min', verbose=1)
    history = model.fit(np.array(x_seq),np.array(y_seq),batch_size=128,epochs=epochs, verbose=1, callbacks=[mc, tensorboard_callback])
    
# can stop kernel if loss does not consistently decrease (converges)

Epoch 1/100
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step - loss: 4.8421
Epoch 1: saving model to ViolinVivaldi.keras
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 69ms/step - loss: 4.8407
Epoch 2/100
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 3.8019
Epoch 2: saving model to ViolinVivaldi.keras
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 71ms/step - loss: 3.8014
Epoch 3/100
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 3.3311
Epoch 3: saving model to ViolinVivaldi.keras
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 71ms/step - loss: 3.3309
Epoch 4/100
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step - loss: 3.0638
Epoch 4: saving model to ViolinVivaldi.keras
[1m241/241[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 71ms/step - loss: 3.0636
Epoch 5/100
[1m241/241[0m [32m━━━━━━━

KeyboardInterrupt: 

In [None]:
%tensorboard --logdir logs