In [121]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import ModelCheckpoint
from IPython.display import clear_output
import numpy as np
import json

%run midi_utils.ipynb

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

In [2]:
# https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence
# Dynamically grabs data for the model, since the whole thing wouldn't fit into memory all at once.

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, training_data, batch_size, num_classes, shuffle=True):
        self.training_data = training_data
        self.batch_size = batch_size
        self.num_classes = num_classes
        self.shuffle = shuffle

    def __len__(self):
        # returns the number of batches
        return int(np.floor(len(training_data) / self.batch_size))

    def __getitem__(self, idx):
        X = [i.split(", ")[0].split(" ") for i in self.training_data[idx * self.batch_size:(idx + 1) * self.batch_size]]
        X = [[int(integer) for integer in integers] for integers in X]
        y = [i.split(", ")[1] for i in self.training_data[idx * self.batch_size:(idx + 1) * self.batch_size]]
        y = [int(integer) for integer in y]

        return to_categorical(X, num_classes=self.num_classes), to_categorical(y, num_classes=self.num_classes)

    def on_epoch_end(self):
        if self.shuffle == True:
            np.random.shuffle(self.training_data)

In [23]:
# Load vocabulary
vocabulary = {token: int(token_int) for token, token_int in json.load(open("./dictionary.json")).items()}

# Count the lines in training_data
with open("./training_data_preprocessed.txt") as f:
    training_data = f.read().splitlines()
    np.random.shuffle(training_data)

# FOR TESTING THE LEARNING CAPABILITY OF THE MODEL
#training_data = training_data[:100000]
    
# Instantiate generator with batch size 128, shuffling the data each epoch
training_generator = DataGenerator(training_data, 128, len(vocabulary), True)

In [37]:
# Config to save model after every epoch if it is better than all previous ones in terms of minimal loss
filepath = "../models/deep_whole_dataset_opt_fix/DeepLSTM-{epoch:02d}-{loss:.4f}.hdf5"
checkpoint = ModelCheckpoint(
    filepath, monitor='loss',
    verbose=0,
    save_weights_only=False,
    save_best_only=True,
    mode='min'
)

In [41]:
# If you want to continue training an existing model, load it here
model = tf.keras.models.load_model("../models/deep_whole_dataset_opt_fix/DeepLSTM-58-0.9498.hdf5")

In [39]:
# If you want to start training from scratch, instantiate the model here
optimizer = tf.keras.optimizers.Adam(clipvalue=5.0)

model = Sequential()
model.add(LSTM(256, input_shape=(100, len(vocabulary),), return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(256, return_sequences = True))
model.add(Dropout(0.2))
model.add(LSTM(256))
model.add(Dropout(0.2))
model.add(Dense(len(vocabulary), activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer=optimizer)

In [40]:
model.fit(training_generator,
          epochs=1000,
          #use_multiprocessing=True, can't use this in a jupyter notebook ¯\_(ツ)_/¯ 
          callbacks=[checkpoint],
          workers=6)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
 3170/15884 [====>.........................] - ETA: 17:57 - loss: 1.1217

KeyboardInterrupt: 

In [126]:
def print_loading(i, n_tokens_to_generate, stop_at_EOS):
    clear_output(wait=True)
    if not stop_at_EOS:
        print(str(i), "/", str(n_tokens_to_generate), "generated")
        return
    print(str(i), "/ ?", "generated")

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)
    
def generate_music(model, vocab_size, vocabulary, starting_input, n_tokens_to_generate, temperature = 1.0, stop_at_EOS = False):
    
    original_temp = temperature
    
    # Keeps track of the number of tokens generated so far
    i = 0
    
    # Used as input, where the first input is a bunch of random tokens from the vocabulary 
    # It's sliding because the predicted token will be constantly appended to the input
    # [0, 1, 2] -predict-> [3] 
    # [1, 2, 3] -predict-> [4]
    # [2, 3, 4] and so on
    if starting_input == None:
        sliding_window = [np.random.randint(0, vocab_size, size=99).tolist()]
        sliding_window[0].append(159)
    else:
        sliding_window = [starting_input]
    
    # Inverse of the vocabulary, because the tokens in integer form need to be converted back to tokens
    int_to_token_dict = dict(map(reversed, vocabulary.items()))
    
    # List that holds the final output. Grows by each prediction.
    prediction_output = []
    
    while True:
        # Convert to the same format as the one the model saw during training
        prediction_input = to_categorical(sliding_window, num_classes = vocab_size)

        # Predict next token depending on the current sequence 
        prediction = model(prediction_input)[0]
        i += 1
        
        # Get the integer variant of the token
        #index = np.argmax(prediction)
        index = sample(prediction, temperature)
        print(len(np.unique(sliding_window)))

        # Check if previous tokens were "varied" enough: if they had at least 30 unique tokens.
        # If not, raise the temperature in hopes of making the predictions more creative
        if (len(np.unique(sliding_window)) < 25):
            print(len(np.unique(sliding_window)))
        
        # Grab the token variant of the integer and append the resulting token to prediction output
        result = int_to_token_dict[index]
        prediction_output.append(result)
        
        # Slide the input 1 int to the right, appending the current prediction and removing one token from the start,
        # so the sequence length will stay the same
        sliding_window = np.append(sliding_window, index)
        sliding_window = [sliding_window[1:len(sliding_window)]]
        
        # A loading bar for the impatient
        print_loading(i, n_tokens_to_generate, stop_at_EOS)
        
        if (stop_at_EOS and result == "<EOS>") or (i == n_tokens_to_generate):
            break
            
    return prediction_output

In [129]:
generated_tokens = generate_music(model, len(vocabulary), vocabulary, None, 1000, 0.7)
convert_tokens_to_midi(generated_tokens).show("midi")

1000 / 1000 generated


In [122]:
validate_tokens(generated_tokens)

(True, [])

In [123]:
for i in range(0, 10):
    midi_filepath = "../generated_samples/final_deep_sample" + str(i) + ".mid"
    
    generated_tokens = generate_music(model, len(vocabulary), vocabulary, None, 3000, 0.7)
    generated_midi_stream = convert_tokens_to_midi(generated_tokens)
    generated_midi_stream.write('midi', fp=midi_filepath)

3000 / 3000 generated
