In [1]:
!pip3 install tensorflow
!pip3 install music21
!pip3 install tqdm

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Embedding, LSTM, Dense, Layer
from tensorflow.keras import optimizers

import numpy as np
import os
import re
from music21 import converter, abcFormat, midi
import time
import functools
from IPython import display as ipythondisplay
from tqdm import tqdm

!apt-get install abcmidi timidity timidity-interfaces-extra freepats > /dev/null 2>&1

#assert len(tf.config.list_physical_devices('GPU')) > 0



In [2]:
import requests, zipfile, io
r = requests.get("http://ifdo.ca/~seymour/nottingham/nottingham_database.zip")
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("./")
!cat ./nottingham_database/*.abc > ./nottingham_database/nottingham.abc

cat: ./nottingham_database/nottingham.abc: input file is output file


In [3]:
dataset = "./nottingham_database/nottingham.abc"

In [4]:
with open(dataset) as f:
    musics_string = re.sub(r'(?m)^\%.*\n?', '', f.read())
    musics_string = re.sub(r'(?m)^F: http.*\n?', '', musics_string)
    musics = re.split('\n\n+', musics_string)
    
print(musics[5])

X: 6
T:Bonnie Kate
S:Mick Peat
M:4/4
L:1/4
K:G
f|"G"g3/2a/2 "D"gf|"G"gd2e|"C"dc "D"BA|"G"BG2A|"G"B" Em"G2A/2B/2|\
"Am"c" D"A2B/2c/2|"G"Bd "C"cB| [1"D"Ad ef:|
 [2"D"A2 A2||"G"BG2A/2B/2|"D"cA "G7"dB|"C"ec2d/2e/2|"D"fzde/2f/2|
"G"g3/2a/2 "Em"ge|"Bm"df "Em"gB|"Am"ce "D"d/2e/2d/2c/2|"G"BG G2:|


In [5]:
abcScore = converter.parse(musics[5])

In [6]:
# Play test music
#sp = midi.realtime.StreamPlayer(abcScore)
#sp.play()

In [7]:
musical_alphabet = sorted(set(musics_string))
print(len(musical_alphabet))

char_to_scal =  {u:i for i, u in enumerate(musical_alphabet)}
scal_to_char = list(musical_alphabet)

def vectorize_string(string):
  return np.array([char_to_scal[c] for c in string])

vectorized_songs = vectorize_string(musics_string)

92


In [8]:
def make_random_batch(seq_len = 5, batch_size = 1):
    inputs = [vectorized_songs[i:i + seq_len] for i in range(len(musics_string) - seq_len)]
    outputs = [vectorized_songs[i:i + seq_len] for i in range(1, len(musics_string) - seq_len + 1)]
    batch_indexes = np.random.choice(len(inputs), batch_size)
    batch_input = np.reshape([inputs[i] for i in batch_indexes], [batch_size, seq_len])
    batch_output = np.reshape([outputs[i] for i in batch_indexes], [batch_size, seq_len])
    return batch_input, batch_output

batch_input, batch_output = make_random_batch()

In [9]:
print(batch_input[:5])
print(batch_output[:5])

[[31 89  0  3 34]]
[[89  0  3 34  3]]


In [10]:
def my_LSTM(rnn_units): 
      return tf.keras.layers.LSTM(
        rnn_units, 
        return_sequences=True, 
        recurrent_initializer='glorot_uniform',
        recurrent_activation='sigmoid',
        stateful=True,
      )

In [11]:
class musicGenRNN(Model):
    def __init__(self, musical_alphabet_size, embedding_dim, rnn_units, batch_size):
        super().__init__()
        
        self.embedding = Embedding(musical_alphabet_size, embedding_dim, batch_input_shape=[batch_size, None])
        self.lstm = my_LSTM(rnn_units)
        self.classifier = Dense(musical_alphabet_size)
        
    def call(self, inputs):
        x = self.embedding(inputs)
        x = self.lstm(x)
        y = self.classifier(x)
        return y

In [12]:
seq_len=10
batch_size=32

input_shape = [len(musical_alphabet)]
music_gen_rnn = musicGenRNN(len(musical_alphabet), embedding_dim=256, rnn_units=1024, batch_size=batch_size)
music_gen_rnn.build((batch_size, *input_shape))
music_gen_rnn.summary()

Model: "music_gen_rnn"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  23552     
_________________________________________________________________
lstm (LSTM)                  multiple                  5246976   
_________________________________________________________________
dense (Dense)                multiple                  94300     
Total params: 5,364,828
Trainable params: 5,364,828
Non-trainable params: 0
_________________________________________________________________


In [13]:
x, y = make_random_batch(seq_len, batch_size)
pred = music_gen_rnn(x)

In [14]:
num_training_iterations = 2000 
batch_size = 10
seq_length = 100
learning_rate = 1e-3 

# Model parameters: 
musical_alphabet_size = len(musical_alphabet)
embedding_dim = 256 
rnn_units = 1024  # Experiment between 1 and 2048

# Checkpoint location: 
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "my_ckpt")

In [15]:
music_gen_rnn = musicGenRNN(musical_alphabet_size, embedding_dim, rnn_units, batch_size)
music_gen_rnn.build((batch_size, *input_shape))
music_gen_rnn.summary()

Model: "music_gen_rnn_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      multiple                  23552     
_________________________________________________________________
lstm_1 (LSTM)                multiple                  5246976   
_________________________________________________________________
dense_1 (Dense)              multiple                  94300     
Total params: 5,364,828
Trainable params: 5,364,828
Non-trainable params: 0
_________________________________________________________________


In [16]:
def compute_loss(labels, logits):
    loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)
    return loss

optimizer = tf.keras.optimizers.Adam(learning_rate)

In [None]:
import matplotlib.pyplot as plt

@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:

        y_pred = music_gen_rnn(x)
        loss = compute_loss(y, y_pred)
        grads = tape.gradient(loss, music_gen_rnn.trainable_variables)

        optimizer.apply_gradients(zip(grads, music_gen_rnn.trainable_variables))
    return loss

##################
# Begin training!#
##################

history = []

for iter in tqdm(range(num_training_iterations)):
    x_batch, y_batch = make_random_batch(seq_length, batch_size)
    loss = train_step(x_batch, y_batch)

    # Update the progress bar
    history.append(loss.numpy().mean())
    plt.plot(history)

    # Update the model with the changed weights!
    if iter % 100 == 0:
        music_gen_rnn.save_weights(checkpoint_prefix)
    
# Save the trained model and the weights
music_gen_rnn.save_weights(checkpoint_prefix)

  3%|▎         | 54/2000 [03:12<1:48:27,  3.34s/it]

In [None]:
music_gen_rnn = musicGenRNN(len(musical_alphabet), embedding_dim=256, rnn_units=1024, batch_size=1)
music_gen_rnn.build((1, *input_shape))
# Restore the model weights for the last checkpoint after training
music_gen_rnn.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
music_gen_rnn.build(tf.TensorShape([1, None]))

music_gen_rnn.summary()

In [None]:
def predict_new_music(model, start_string, music_length):
    vectorised_input = np.array([char_to_scal[c] for c  in start_string])
    text_generated = []
    
    model.reset_states()
    
    vectorised_input = tf.expand_dims(vectorised_input, 0)
    
    for i in range(music_length):
        y_pred = model(vectorised_input)
        y_pred = tf.squeeze(y_pred, 0)
        y_id = tf.random.categorical(y_pred, 1)[-1,0].numpy()
        vectorised_input = tf.expand_dims([y_id], 0)
        text_generated.append(scal_to_char[y_id])
    
    return (start_string + ''.join(text_generated))

In [None]:
music = predict_new_music(music_gen_rnn, 'X', 1000)


In [None]:
music

In [None]:
# Play test music
abcScore = converter.parse(music)
sp = midi.realtime.StreamPlayer(abcScore)
sp.play()