# Music Generation using RNN

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
tf.device(device_name)

Found GPU at: /device:GPU:0


<tensorflow.python.eager.context._EagerDeviceContext at 0x7fd596bd2148>

## Importing Packages

In [3]:
pip install mido

Collecting mido
[?25l  Downloading https://files.pythonhosted.org/packages/20/0a/81beb587b1ae832ea6a1901dc7c6faa380e8dd154e0a862f0a9f3d2afab9/mido-1.2.9-py2.py3-none-any.whl (52kB)
[K     |██████▎                         | 10kB 23.4MB/s eta 0:00:01[K     |████████████▌                   | 20kB 30.0MB/s eta 0:00:01[K     |██████████████████▊             | 30kB 16.4MB/s eta 0:00:01[K     |█████████████████████████       | 40kB 11.6MB/s eta 0:00:01[K     |███████████████████████████████▏| 51kB 7.9MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 3.8MB/s 
[?25hInstalling collected packages: mido
Successfully installed mido-1.2.9


In [4]:
from mido import MidiFile, MidiTrack, Message
from tensorflow.keras.layers import LSTM, Dense, Activation, Dropout, Flatten
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mido

## Loading Data
Our Sound data is in MIDI (.mid) format. 

In [5]:
mid = MidiFile('/content/drive/MyDrive/Study/DL/RNN/Music Generation/allegroconspirito.mid') # a Mozart piece

## Data Preprocessing

Here we extract the note value, velocity and the timing from the midi file and form an array of all the musical notes present in the music file.

### Extracting Data from MIDI file

In [6]:
# list of all the note values, veloctiy and timing
notes = []

# we are calculating time here coz we want to cover the time of some meta messages in the file
# while we are skipping them for note and velocity
time = 0.0  # current time
prev = 0.0  # time of previous note

for msg in mid:
	### this time is in seconds, not ticks
	time += msg.time

  # excluding meta message here
	if not msg.is_meta:

		#only interested in piano channel
		if msg.channel == 0:

			if msg.type == 'note_on':

				# note in vector form to train on
				note = msg.bytes()
     
				# note message conntains [type, note, velocity], but we are only interested in 
        # [note, velocity, time], tere for we will be extracting note and velocity and
        # appending timing of the musical note
        # observe that the 
        # timing of musical note = time till ucrrent note - time of previous note
				# therefore, timing of musical note = time - prev

				note = note[1:]
				note.append(time-prev)
				prev = time
				notes.append(note)

### Scaling data into range of 0, 1

In [7]:
t = []
for note in notes:
  # domain knowledge required for chossing the numbers 24 and 88,
  # a general method would be substracting the whole set by the minimum number
  # then dividing the whole set by new maximum fromed
	note[0] = (note[0]-24)/88

  # same as above... domain knowledge required   
	note[1] = note[1]/127  
	t.append(note[2])
 
# here we are uing the general methody
# calculating the maximum value of time and then dividing by it
# the minimum value is 0 thus no need of substraction
# scale based on the biggest time of any note
max_t = max(t)
for note in notes:
	note[2] = note[2]/max_t

### Creating Data Labels

* We will now make a an array of musical notes in X and corresponding next musical notes in Y.
* n_prev is the length of musical notes used to predict the next muscial note.
* we will also save a seed to generate the music later.

In [8]:
X = []
Y = []
n_prev = 30
# n_prev notes to predict the (n_prev+1)th note
for i in range(len(notes)-n_prev):
	x = notes[i:i+n_prev]
	y = notes[i+n_prev]
	X.append(x)
	Y.append(y)
# save a seed to do prediction later
seed = notes[0:n_prev]

## Model Building

* We are going to make a Sequential model.
* We will use two LSTM layers.
* After each LSTM we are using a dropout layer.
* Finally the output will be given by a dense layer

In [9]:
model = Sequential()
model.add(LSTM(128, input_shape=(n_prev, 3), return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(3))

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 30, 128)           67584     
_________________________________________________________________
dropout (Dropout)            (None, 30, 128)           0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                49408     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
flatten (Flatten)            (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 3)                 195       
Total params: 117,187
Trainable params: 117,187
Non-trainable params: 0
__________________________________________________

## Model Training

In [11]:
# hyper parameters
batch_size = 256
epochs = 400

In [12]:
model.compile(loss='mse', optimizer='adam')
model.fit(X, Y, batch_size=batch_size, epochs=epochs, verbose=1)

Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400
Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7fd5206a5588>

## Making Predictions

* prediction is a list that will contain our generated notes
* seed is the seed that we saved earlier to generate music
* no_of_predictions is the number of musical notes we want to generate
------
What we are going to do is make a loop, we make a prediction for the seed then update the seed by removing the note at earliest time stamp and adding the generated note to the latest time stamp. Now we will use this seed to generate new notes. This process caries on in a loop to generate a sequence of notes.

In [13]:
prediction = []
x = seed
x = np.expand_dims(x, axis=0)

no_of_predictions = 3000

for i in range(no_of_predictions):
	preds = model.predict(x)
	x = np.squeeze(x)
	x = np.concatenate((x, preds))
	x = x[1:]
	x = np.expand_dims(x, axis=0)
	preds = np.squeeze(preds)
	prediction.append(preds)

In [14]:
for pred in prediction:

  # coverting back to original range
  # we converted the data into the range of 0, 1 above
  # now we are geeting the data in the range of 0, 1 from the model
  # and have to get the original data from it
  # so we are now multiplying and adding, just oppisite of the opertaion we did above
  
	pred[0] = int(88*pred[0] + 24)
	pred[1] = int(127*pred[1])
	pred[2] *= max_t
  
	# rejecting values that will be out of range
	# domain knowledge and knowledge of MIDI files required to know the range
	if pred[0] < 24:
		pred[0] = 24
	elif pred[0] > 102:
		pred[0] = 102
	if pred[1] < 0:
		pred[1] = 0
	elif pred[1] > 127:
		pred[1] = 127
	if pred[2] < 0:
		pred[2] = 0

## Saving Generated Music

In [15]:
mid = MidiFile()
track = MidiTrack()
mid.tracks.append(track)

for note in prediction:
	# 147 means note_on
	note = np.insert(note, 0, 147)
	bytes = note.astype(int)
	msg = Message.from_bytes(bytes[0:3]) 
	time = int(note[3]/0.001025) # to rescale to midi's delta ticks. arbitrary value for now.
	msg.time = time
	track.append(msg)

mid.save('generated_music.mid')