# Colab Block
I usually run my notebooks in Colabratory, which has a free 12GB GPU to connect to and use. Files are most easily accessed via a Google Drive, but that needs to be connected to and authenticated twice via this block. Also, some packages need to be installed again every runtime. So that is what this first block does, remove if not using Colab's notebook.

In [None]:
from google.colab import auth
auth.authenticate_user()
!apt-get install -y -qq software-properties-common python-software-properties module-init-tools
!add-apt-repository -y ppa:alessandro-strada/ppa 2>&1 > /dev/null
!apt-get update -qq 2>&1 > /dev/null
!apt-get -y install -qq google-drive-ocamlfuse fuse
from oauth2client.client import GoogleCredentials
creds = GoogleCredentials.get_application_default()
import getpass
!google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret} < /dev/null 2>&1 | grep URL
vcode = getpass.getpass()
!echo {vcode} | google-drive-ocamlfuse -headless -id={creds.client_id} -secret={creds.client_secret}
!mkdir -p gdrive
!google-drive-ocamlfuse gdrive
!pip install -q keras
!pip install numba
!pip install tqdm
!pip install opencv-python
!apt update && apt install -y libsm6 libxext6
!pip install music21

# Imports

In [3]:
import glob
import pickle
import numpy as np
import keras as K

from music21 import converter, instrument, note, chord, stream
from keras.models import Sequential
from keras.layers import Dense, MaxPooling1D, Conv1D
from keras.layers import Dropout, Bidirectional, SpatialDropout1D
from keras.layers import LSTM, GRU, Flatten,CuDNNLSTM, CuDNNGRU
from keras.layers import Activation
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint

import tensorflow as tf

import os
import subprocess
# import mido
# from mido import MidiFile

# mid = MidiFile('C:/Users/Valkling/Dropbox/my_personal_programs/Music_Bot/midicsv-1.1.tar/midicsv-1.1/test.mid')

# DirPath = "C:\Users\Valkling\Dropbox\my_personal_programs\Music_Bot\midicsv-1.1.tar\midicsv-1.1"

Using TensorFlow backend.


# Read in Midi Files
Place any midi files you would like to read in an attached midi_songs folder and uncomment this block. Alternatively, just load the pre-packed .npy file in the next block for a pre-read in dataset.

In [0]:
# %%time
# notes = []

# for file in glob.glob("../midi_songs/*.mid"):
#     try:
#         midi = converter.parse(file)

#         notes_to_parse = None

#         parts = instrument.partitionByInstrument(midi)
# #         print(file)
#         if parts: # file has instrument parts
#             notes_to_parse = parts.parts[0].recurse()
#         else: # file has notes in a flat structure
#             notes_to_parse = midi.flat.notes
#     #     print("here")
#         for element in notes_to_parse:
#             if isinstance(element, note.Note):
#                 notes.append(str(element.pitch))
#             elif isinstance(element, chord.Chord):
#                 notes.append('.'.join(str(n) for n in element.normalOrder))
# #         print("here")
#     except:
#         print("file failed")
# # with open('../data/notes', 'wb') as filepath:
# #     pickle.dump(notes, filepath)

CPU times: user 1e+03 µs, sys: 0 ns, total: 1e+03 µs
Wall time: 639 ms


In [0]:
notes = np.load("gdrive/Music_Bot/midiset.npy")

# Notes to sequences
cuts notes into a series of 100 note arrays with the next (as in 101st) note placed in a different array to use as the Y variable in the model. Basically, we are using the last 100 notes to predict the next note.

In [0]:
notesbackup = notes
n_vocab = len(set(notes))

sequence_length = 100

pitchnames = sorted(set(item for item in notes))

note_to_int = dict((note, number) for number, note in enumerate(pitchnames))

network_input = []
network_output = []

for i in range(0, len(notes) - sequence_length, 1):
    sequence_in = notes[i:i + sequence_length]
    sequence_out = notes[i + sequence_length]
    network_input.append([note_to_int[char] for char in sequence_in])
    network_output.append(note_to_int[sequence_out])

n_patterns = len(network_input)

network_input = np.reshape(network_input, (n_patterns, sequence_length, 1))

network_input = network_input / float(n_vocab)

network_output = np_utils.to_categorical(network_output)

In [6]:
network_input.shape

(57077, 100, 1)

# Set up Model
The model is 3 LSTMs stacked on top of each other. 3 LSTMs causes the model to hang at over 4.0 loss for the first 40 minutes or so of training time before taking off. Note that the CuDNNLSTMs are a special Nvida layer that automatically optimizes the LSTMs to work around twice as fast but needs to be used with certain GPUs. Colab's GPU is compatable and all set for it but replace with regular LSTMs if the layers won't work for you. (Still only try this code with a good GPU, this code would take to long on CPU or even an underpowerd GPU)

In [13]:
model = Sequential()

model.add(CuDNNLSTM(512, return_sequences=True, 
               input_shape=(network_input.shape[1], network_input.shape[2])))
model.add(Dropout(0.1))
model.add(CuDNNLSTM(512, return_sequences=True))
model.add(Dropout(0.1))
model.add(CuDNNLSTM(512,))
model.add(Dense(256, activation='elu'))
model.add(Dropout(0.1))
model.add(Dense(n_vocab, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["accuracy"])

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_4 (CuDNNLSTM)     (None, 100, 512)          1054720   
_________________________________________________________________
dropout_2 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
cu_dnnlstm_5 (CuDNNLSTM)     (None, 100, 512)          2101248   
_________________________________________________________________
dropout_3 (Dropout)          (None, 100, 512)          0         
_________________________________________________________________
cu_dnnlstm_6 (CuDNNLSTM)     (None, 512)               2101248   
_________________________________________________________________
dense_3 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
__________

# Checkpoint Functions

In [0]:
filepath = "../best_weights.hdf5"
checkpoint = ModelCheckpoint(
    filepath,
    monitor='loss',
    verbose=1,
    save_best_only=True,
    mode='min'
)

callbacks_list = [checkpoint]

In [0]:
# model = K.models.load_model("gdrive/Music_Bot/best_weights.hdf5")

# Train the Model
Run until you are satisfied with the loss. Under 1.0 loss at least for something knid of like music. 0.5 is pretty good, around 0.2 is about as good as it will get. Can be run longer than that but will take a long time and getting the loss too low might result in too much overfitting and then the midi files are just being repeated in the most inefficent way possible. Basically, the program is going to try to predict the music perfectly and we want it to make *just* enough errors in its prediction to create new music but not so much that it still sounds like music.

In [None]:
tf.initialize_all_variables
model.fit(network_input, 
          network_output, 
          epochs=1000, 
          batch_size=64, 
          callbacks=callbacks_list,
          verbose=2,
         )

In [0]:
# model.load_weights(filepath)
# model.save_weights("../Best_Overall_Weights.hdf5")

In [0]:
# model.save(../Best_Overall_model.hdf5")

# Generate Music
We create the music by giving the model a random sample from our data set to predict on. Then we append the newly predicted note to the end of our pattern,drop the first note, then use that pattern to predict the next and so on. set to 1000 notes right now, or around a 4 minute midi file. The first 20 seconds of an output tend to sound iffy while the computer finds it's rhythm.


In [0]:
%%time
start = np.random.randint(0, len(network_input)-1)
int_to_note = dict((number, note) for number, note in enumerate(pitchnames))
pattern = network_input[start]
prediction_output = []
for note_index in range(1000):
    prediction_input = np.reshape(pattern, (1, len(pattern), 1))
    prediction_input = prediction_input / float(n_vocab)
    prediction = model.predict(prediction_input, verbose=0)
    index = np.argmax(prediction)
    result = int_to_note[index]
    prediction_output.append(result)
    pattern = np.append(pattern,index)
    pattern = pattern[1:len(pattern)]

This block translates our notes prediction into a midi format. raising or lowering the offset += at the bottom will change the notes playback speed.

In [10]:
%%time
offset = 0
output_notes = []
for pattern in prediction_output:
    if ('.' in pattern) or pattern.isdigit():
        notes_in_chord = pattern.split('.')
        notes = []
        for current_note in notes_in_chord:
            new_note = note.Note(int(current_note))
            new_note.storedInstrument = instrument.Piano()
            notes.append(new_note)
        new_chord = chord.Chord(notes)
        new_chord.offset = offset
        output_notes.append(new_chord)
    else:
        new_note = note.Note(pattern)
        new_note.offset = offset
        new_note.storedInstrument = instrument.Piano()
        output_notes.append(new_note)
    offset += 0.6

CPU times: user 256 ms, sys: 6 ms, total: 262 ms
Wall time: 256 ms


# Output the file

In [None]:
midi_stream = stream.Stream(output_notes)
midi_stream.write('midi', fp='../test_output.mid')