In [2]:
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from IPython.display import clear_output
import numpy as np
import json

%run midi_utils.ipynb

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)

music21: Certain music21 functions might need the optional package matplotlib;
                  if you run into errors, install it by following the instructions at
                  http://mit.edu/music21/doc/installing/installAdditional.html


In [87]:
def print_loading(i, n_tokens_to_generate, n_unique, stop_at_EOS):
    clear_output(wait=True)
    if not stop_at_EOS:
        print(str(i), "/", str(n_tokens_to_generate), "generated. Unique tokens in sliding seq:", str(n_unique))
        return
    print(str(i), "/ ?", "generated. Unique tokens in sliding seq:", str(n_unique))

def sample(preds, temperature=1.0):
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)
    
# Function to generate music.
# Temperature determines how confident the model should be in its predictions. Lower = more confident
def generate_music(model, vocab_size, vocabulary, starting_input, n_tokens_to_generate, temperature = 1.0, stop_at_EOS = False):
    
    # Keeps track of the number of tokens generated so far
    i = 0
    
    # Used as input, where the first input is a bunch of random tokens from the vocabulary 
    # It's sliding because the predicted token will be constantly appended to the input
    # [0, 1, 2] -predict-> [3] 
    # [1, 2, 3] -predict-> [4]
    # [2, 3, 4] and so on
    if starting_input == None:
        sliding_window = [np.random.randint(159, 160, size=99).tolist()]
    else:
        sliding_window = [starting_input]
    
    # Inverse of the vocabulary, because the tokens in integer form need to be converted back to tokens
    int_to_token_dict = dict(map(reversed, vocabulary.items()))
    
    # List that holds the final output. Grows by each prediction.
    prediction_output = []
    
    while True:
        # Convert to the same format as the one the model saw during training
        prediction_input = to_categorical(sliding_window, num_classes = vocab_size)

        # Predict next token depending on the current sequence 
        prediction = model(prediction_input)[0]
        i += 1
        
        # Get the integer variant of the token
        #index = np.argmax(prediction)
        index = sample(prediction, temperature)
        
        # Grab the token variant of the integer and append the resulting token to prediction output
        result = int_to_token_dict[index]
        prediction_output.append(result)
        
        # Slide the input 1 int to the right, appending the current prediction and removing one token from the start,
        # so the sequence length will stay the same
        sliding_window = np.append(sliding_window, index)
        sliding_window = [sliding_window[1:len(sliding_window)]]
        
        # A loading bar for the impatient
        print_loading(i, n_tokens_to_generate, len(np.unique(sliding_window)), stop_at_EOS)
        
        if (stop_at_EOS and result == "<EOS>") or (i == n_tokens_to_generate):
            break
            
    return prediction_output

In [9]:
vocabulary = {token: int(token_int) for token, token_int in json.load(open("./dictionary.json")).items()}
model = tf.keras.models.load_model("../best_model.hdf5")

In [95]:
generated_tokens = generate_music(model, len(vocabulary), vocabulary, None, 700, 0.7)
generated_midi_stream = convert_tokens_to_midi(generated_tokens)
generated_midi_stream.show("midi")

700 / 700 generated. Unique tokens in sliding seq: 24


In [99]:
for i in range(0, 50):
    generated_tokens = generate_music(model, len(vocabulary), vocabulary, None, 3000, 0.7)
    generated_midi_stream = convert_tokens_to_midi(generated_tokens)
    midi_filepath = "../generated_samples/LSTM_objective" + str(i) + ".mid"
    generated_midi_stream.write('midi', fp=midi_filepath)

3000 / 3000 generated. Unique tokens in sliding seq: 32


In [97]:
validate_tokens(generated_tokens)

(True, [])