## Import libraries

In [1]:
from music21 import *
import glob
from tqdm import tqdm
import numpy as np
import random
from tensorflow.keras.layers import LSTM, Dense, Input, Dropout
from tensorflow.keras.models import Sequential, Model, load_model
from sklearn.model_selection import train_test_split

## Reading ans Parsing the Midi File

In [2]:
def read_files(file):
    notes=[]
    notes_to_parse=None
     #parse the midi file
    midi=converter.parse(file)
     #seperate all instruments from the file
    instrmt=instrument.partitionByInstrument(midi)

    for part in instrmt.parts:
     #fetch data only of Piano instrument
         if 'Piano' in str(part):
             notes_to_parse=part.recurse()

     #iterate over all the parts of sub stream elements
     #check if element's type is Note or chord
     #if it is chord split them into notes
    for element in notes_to_parse:
        if type(element)==note.Note:
              notes.append(str(element.pitch))
        elif type(element)==chord.Chord:
              notes.append('.'.join(str(n) for n in element.normalOrder))

#return the list of notes
    return notes

#retrieve paths recursively from inside the directories/files
file_path=['schubert']
print(file_path[0]+'/*.mid')
all_files=glob.glob(file_path[0]+'/*.mid',recursive=True)


#reading each midi file
notes_array = np.array([read_files(i) for i in tqdm(all_files,position=0,leave=True)])

schubert/*.mid


100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [11:16<00:00, 23.33s/it]
  notes_array = np.array([read_files(i) for i in tqdm(all_files,position=0,leave=True)])


## Exploring the dataset

In [3]:
#unique notes
notess= sum(notes_array, [])
unique_notes= list(set(notess))
print("Unique Notes:", len(unique_notes))

#notes with their frequency
freq= dict(map(lambda x: (x, notess.count(x)), unique_notes))

#get the threshold frequency
for i in range (30, 100, 20):
    print(i,":", len(list(filter(lambda x:x[1]>=i, freq.items()))))

Unique Notes: 95
30 : 17
50 : 10
70 : 5
90 : 3


In [4]:
#filter notes greater than threshold i.e. 50
freq_notes= dict(filter(lambda x:x[1]>=50, freq.items()))

#create new notes using the frequent notes
new_notes=[[i for i in j if i in freq_notes] for j in notes_array]

In [5]:
#dictionary having key as note index ans value as note
ind2note = dict(enumerate(freq_notes))

#dictionary having key as note ans value as note index
note2ind= dict(map(reversed, ind2note.items()))

## Input and Output Sequence for model

In [6]:
#timestep
timesteps= 50

#store values of input and output
x=[]; y=[]

for i in new_notes:
    for j in range(0, len(i)-timesteps):
        #input will be the current index + timestep
        #output will be the next index after timestep
        inp= i[j:j+timesteps]; out=i[j+timesteps]
        
        #append the index value of respective notes
        x.append(list(map(lambda x:note2ind[x], inp)))
        y.append(note2ind[out])

x_new= np.array(x);
y_new= np.array(y)
    

## Training and Testing sets

In [7]:
#reshape input ans output for the model
x_new= np.reshape(x_new, (len(x_new), timesteps, 1))
y_new= np.reshape(y_new, (-1, 1))

#split the input ans value into training and testing sets
#80% for training ans 20% for testing sets

x_train,x_test,y_train,y_test= train_test_split(x_new,y_new, test_size=0.2, random_state=42)

## Building the model

In [8]:
#create the model
model= Sequential()

#create two stacked LSTM layer with the latent dimension of 256
model.add(LSTM(256, return_sequences=True, input_shape=(x_new.shape[1], x_new.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))

#fully connected layer for the output with softmax activation
model.add(Dense(len(note2ind), activation='softmax'))
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 50, 256)           264192    
                                                                 
 dropout (Dropout)           (None, 50, 256)           0         
                                                                 
 lstm_1 (LSTM)               (None, 256)               525312    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense (Dense)               (None, 256)               65792     
                                                                 
 dense_1 (Dense)             (None, 10)                2570      
                                                                 
Total params: 857,866
Trainable params: 857,866
Non-trai

## Train the model

In [9]:
#compile the model using Adam optimizer
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

#train the model on training sets ans validate on testing sets
model.fit(x_train, y_train, batch_size=128, epochs=80, validation_data=(x_test, y_test))

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80


Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80


<keras.callbacks.History at 0x1e02a868190>

## Save model

In [10]:
import pickle

In [11]:
pickle.dump(model, open('model.pkl', 'wb'))



INFO:tensorflow:Assets written to: ram://eefe88cc-ed9a-4d1a-9243-7160e10ce1d1/assets


INFO:tensorflow:Assets written to: ram://eefe88cc-ed9a-4d1a-9243-7160e10ce1d1/assets
