In [1]:
RUNNING_IN_COLAB = False

if RUNNING_IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')

In [2]:
# tensorflow/keras imports
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Bidirectional, Lambda, Embedding
from tensorflow.keras.callbacks import TensorBoard
from keras.preprocessing.sequence import pad_sequences

# Python libraries
from copy import deepcopy
import random
import pickle
import os
import time
import itertools

# miscellaneous imports (visualization, plotting, music21, etc)
import numpy as np
import seaborn as sb  # Heatmaps
import matplotlib.pyplot as plt
import util_plotting  # for plotting loss over time
from tqdm import tqdm  # for a progress bar
from music21 import *

Using TensorFlow backend.


In [3]:
MODELS_DIRECTORY = '../models'
PICKLES_DIRECTORY = 'pickles'
COMPOSITIONS_DIRECTORY = '../outputs/compositions'
TENSORBOARD_LOGS_DIRECTORY = '../tensorboard_logs'

if RUNNING_IN_COLAB:  # access the shared drive instead
    MODELS_DIRECTORY = '/content/drive/Shared drives/melodyAI/outputs/models'
    PICKLES_DIRECTORY = '/content/drive/Shared drives/melodyAI/pickles'
    COMPOSITIONS_DIRECTORY = '/content/drive/Shared drives/melodyAI/outputs/compositions'
    TENSORBOARD_LOGS_DIRECTORY = '/content/drive/Shared drives/melodyAI/tensorboard_logs'


if not os.path.exists(MODELS_DIRECTORY):
    os.makedirs(MODELS_DIRECTORY)
if not os.path.exists(PICKLES_DIRECTORY):
    os.makedirs(PICKLES_DIRECTORY)
if not os.path.exists(COMPOSITIONS_DIRECTORY):
    os.makedirs(COMPOSITIONS_DIRECTORY)
if not os.path.exists(TENSORBOARD_LOGS_DIRECTORY):
    os.makedirs(TENSORBOARD_LOGS_DIRECTORY)

In [4]:
assert os.path.exists(MODELS_DIRECTORY)
assert os.path.exists(PICKLES_DIRECTORY)
assert os.path.exists(COMPOSITIONS_DIRECTORY)
assert os.path.exists(TENSORBOARD_LOGS_DIRECTORY)

# Retrieving data

In [5]:
# load the datasets
with open(f"{PICKLES_DIRECTORY}/short_seqs_duration.pickle", 'rb') as short_duration,\
     open(f"{PICKLES_DIRECTORY}/short_seqs_pitch.pickle", 'rb') as short_pitch:
    short_seqs_duration = pickle.load(short_duration)
    short_seqs_pitch = pickle.load(short_pitch)

with open(f"{PICKLES_DIRECTORY}/medium_seqs_duration.pickle", 'rb') as medium_duration,\
     open(f"{PICKLES_DIRECTORY}/medium_seqs_pitch.pickle", 'rb') as medium_pitch:
    medium_seqs_duration = pickle.load(medium_duration)
    medium_seqs_pitch = pickle.load(medium_pitch)
    
with open(f"{PICKLES_DIRECTORY}/long_seqs_duration.pickle", 'rb') as long_duration,\
     open(f"{PICKLES_DIRECTORY}/long_seqs_pitch.pickle", 'rb') as long_pitch:
    long_seqs_duration = pickle.load(long_duration)
    long_seqs_pitch = pickle.load(long_pitch)

In [6]:
# retrieve the pitches and durations that were used to build the data set
with open(f'{PICKLES_DIRECTORY}/duration_vocab.pickle', 'rb') as d, open(f'{PICKLES_DIRECTORY}/pitch_vocab.pickle', 'rb') as p:
    duration_vocab = sorted(pickle.load(d))
    pitch_vocab = sorted(pickle.load(p))
    
# retrieve the likelihood of starting with a given pitch/duration
# these are used in sampling to find the starting pitch/duration of each composition
with open(f'{PICKLES_DIRECTORY}/starting_pitch_likelihood.pickle', 'rb') as p, open(f'{PICKLES_DIRECTORY}/starting_duration_likelihood.pickle', 'rb') as d:
    starting_pitch_likelihood = pickle.load(p)
    starting_duration_likelihood = pickle.load(d)
    
duration_vocab_size = len(duration_vocab)
pitch_vocab_size = len(pitch_vocab)
pitch_vocab = sorted(pitch_vocab)
print(f"There are {pitch_vocab_size} unique pitches in the dataset")
duration_vocab = sorted(duration_vocab)
print(f"There are {duration_vocab_size} unique duration values in the dataset")

There are 51 unique pitches in the dataset
There are 8 unique duration values in the dataset


# Experimenting with how temperature affects a softmax distribution

In [7]:
def softmax(x):
    """
    Simply performs the softmax operation on an input vector.
    
    Parameters:
        x: a vector of logits.
        
    Returns: 
        np.array: array representing the softmax distribution of the input logits.
    """
    return np.exp(x) / np.sum(np.exp(x), axis=0)

logits = np.array([1,2,3,4])
logits_temp_p0 = logits / 0.01
logits_temp_p2 = logits / 0.2
logits_temp_p4 = logits / 0.4
logits_temp_p6 = logits / 0.6
logits_temp_p8 = logits / 0.8
logits_temp_p10 = logits / 1.0
logits_temp_p15 = logits / 1.5
logits_temp_p150 = logits / 150

print(f"RAW: {softmax(logits)}")
print(f"TEMP 0.0: {softmax(logits_temp_p0)}")
print(f"TEMP 0.2: {softmax(logits_temp_p2)}")
print(f"TEMP 0.4: {softmax(logits_temp_p4)}")
print(f"TEMP 0.6: {softmax(logits_temp_p6)}")
print(f"TEMP 0.8: {softmax(logits_temp_p8)}")
print(f"TEMP 1.0: {softmax(logits_temp_p10)}")
print(f"TEMP 1.5: {softmax(logits_temp_p15)}")
print(f"TEMP 150: {softmax(logits_temp_p150)}")

RAW: [0.0320586  0.08714432 0.23688282 0.64391426]
TEMP 0.0: [5.14820022e-131 1.38389653e-087 3.72007598e-044 1.00000000e+000]
TEMP 0.2: [3.03841168e-07 4.50940275e-05 6.69254708e-03 9.93262055e-01]
TEMP 0.4: [5.07707490e-04 6.18514343e-03 7.53504725e-02 9.17956677e-01]
TEMP 0.6: [0.00547228 0.02897292 0.15339683 0.81215798]
TEMP 0.8: [0.01689363 0.05896455 0.20580651 0.71833531]
TEMP 1.0: [0.0320586  0.08714432 0.23688282 0.64391426]
TEMP 1.5: [0.07076911 0.13783941 0.26847452 0.52291696]
TEMP 150: [0.24750558 0.24916113 0.25082776 0.25250553]


# Model

### Helper functions and data structures

In [8]:
# Map from MIDI pitch => unique index.
# E.g., index of MIDI pitch `69` can be retrieved by evaluating `midi2index[69]` 
# A rest `-1` would be evaluated by `char2index[-1]`.
midi2index = {u:i for i, u in enumerate(pitch_vocab)}

# Map from unique index => MIDI pitch; inverse mapping of midi2index
index2midi = np.array(pitch_vocab)

# Map from duration value => unique index.
# E.g., index of duration `1.5` can be retrieved by evaluating `duration2index[1.5]` 
duration2index = {u:i for i, u in enumerate(duration_vocab)}

# Map from unique index => duration value; inverse mapping of midi2index
index2duration = np.array(duration_vocab)

def timestep_to3d(x):
    """
    Takes a 1D vector (a vector representing a single timestep) and converts it to a 3D vector (which is required by an LSTM network).
    
    Parameters:
        x: a 1D vector which is the one-hot encoding of a value at a single timestep.
        
    Returns: 
        np.array: a 3D vector corresponding to a one-hot encoding of a single timestep.
    """
    return np.reshape(x, (1, 1, x.shape[0]))

def vectorize_seqs(seqs, feature):
    """
    Mass conversion of MIDI pitches or duration values to their corresponding one
    """
    vectorized_pieces = []
    for seq in seqs:
        vectorized_seqs = []
        if feature == "pitch":
            for p in seq:
                vectorized_seqs.append(midi2index[p])
        elif feature == "duration":
            for d in seq:
                vectorized_seqs.append(duration2index[d])
        else:
            raise ValueError(f"\"{feature}\" is not a valid feature name.")
        vectorized_pieces.append(vectorized_seqs)
        
    return np.array(vectorized_pieces)

### Data preprocessing

In [9]:
# vectorize all of the data sets
vec_short_seqs_duration = vectorize_seqs(short_seqs_duration, "duration")
vec_short_seqs_pitch = vectorize_seqs(short_seqs_pitch, "pitch")
vec_medium_seqs_duration = vectorize_seqs(medium_seqs_duration, "duration")
vec_medium_seqs_pitch = vectorize_seqs(medium_seqs_pitch, "pitch")
vec_long_seqs_duration = vectorize_seqs(long_seqs_duration, "duration")
vec_long_seqs_pitch = vectorize_seqs(long_seqs_pitch, "pitch")

In [10]:
# pad the sequences with trailing 0's to make them all the same length
padded_short_duration = pad_sequences(vec_short_seqs_duration, padding="post", dtype='int32')
padded_short_pitch = pad_sequences(vec_short_seqs_pitch, padding="post", dtype='int32')
padded_medium_duration = pad_sequences(vec_medium_seqs_duration, padding="post", dtype='int32')
padded_medium_pitch = pad_sequences(vec_medium_seqs_pitch, padding="post", dtype='int32')
padded_long_duration = pad_sequences(vec_long_seqs_duration, padding="post", dtype='int32')
padded_long_pitch = pad_sequences(vec_long_seqs_pitch, padding="post", dtype='int32')

In [11]:
# sanity check (pitch/duration seqs should have same # of time steps for short/medium/long seqs)
assert padded_short_duration.shape[1] == padded_short_pitch.shape[1]
assert padded_medium_duration.shape[1] == padded_medium_pitch.shape[1]
assert padded_long_duration.shape[1] == padded_long_pitch.shape[1]

In [12]:
print(f'All short sequences now encompass {padded_short_duration.shape[1]} time steps.')
print(f'All medium sequences now encompass {padded_medium_duration.shape[1]} time steps.')
print(f'All long sequences now encompass {padded_long_duration.shape[1]} time steps.')

All short sequences now encompass 49 time steps.
All medium sequences now encompass 59 time steps.
All long sequences now encompass 69 time steps.


In [13]:
def get_batch(seqs, batch_size, bidirectional=True):
    """
    Batch definition to return training examples X and targets Y
    
    Parameters:
        seqs (np.array): 2D array of shape (num_seqs, seq_length) for which to choose batch samples.
        num_sequences (int): How many sequences (pieces) we want to return for a single training batch.
        bidirectional (boolean): Whether the batches are used in a bidirectional LSTM.    
    Returns:
        x_batch (np.array): Input sequence for training.
        y_batch (np.array): Output (label) sequence for training.
    """
    
    # the length of the vectorized melodies (number of pieces to choose from) 
    n = seqs.shape[0] - 1
    # randomly choose the pieces for this training batch
    seq_choice = np.random.choice(n, batch_size)

    # x_batch, y_batch provide the true inputs and targets for network training
    if bidirectional:
        # need all time steps for x and y, since network flows in both directions
        x_batch = np.array([seqs[seq] for seq in seq_choice])
        y_batch = deepcopy(x_batch)
    else:
        # don't need the final time step for x because the final time step should be predicted
        x_batch = np.array([seqs[mel][0:-1] for seq in seq_choice])
        # don't need the first time step for y because we don't predict the starting time step
        y_batch = np.array([seqs[mel][1:] for seq in seq_choice])  

    return x_batch, y_batch

# Composing with the model

In [14]:
def sample_distribution(dist_vec, num_categories):
    """
    Sample a softmax distribution vector.
    
    Parameters:
        dist_vec (np.array): A logit vector from which to take a sample from.
        num_categories (int): Number of categories to sample from (used for reshaping).
        
    Returns:
        int: The index of the output value which was sampled.
    """
    return tf.random.categorical(dist_vec.reshape(1, num_categories), 1).numpy().flatten()[0]

def compose(pitch_model, duration_model, pitch_prompt, duration_prompt, length=50):
    '''
    Composes a piece of music (in the format of a music21.stream.Stream object).
    
    Arguments:
        pitch_model (tf.keras.Model): The trained model for pitch predictions.
        duration_model (tf.keras.Model): The trained model for duration predictions.
        pitch_prompt (int): The first pitch of the piece (index of the one-hot encoded pitch vector).
        duration_prompt (int): The first duration of the piece (index of the one-hot encoded duration vector).
        length (int): How many time steps to generate.
       
       Returns:
           music21.stream.Stream(): The composed piece.
    '''    
    
    # the lists that hold the indices of the values to index in to pitches/durations lists
    generated_pitches, generated_durations = [pitch_prompt], [duration_prompt]
    
    current_pitch, current_duration = pitch_prompt, duration_prompt
    for t in range(length):
        # model only accepts 3D inputs
        pitch_vec = timestep_to3d(vectorize_pitch(current_pitch))
        duration_vec = timestep_to3d(vectorize_duration(current_duration))
        
        # predict the output distributions
        pitch_pred = pitch_model.predict(pitch_vec)
        duration_pred = duration_model.predict(duration_vec)
        # sample the distributions (returns the index of the one-hot vectors)
        next_pitch = sample_distribution(pitch_pred, pitch_vocab_size)
        next_duration = sample_distribution(duration_pred, duration_vocab_size)
        generated_pitches.append(next_pitch)
        generated_durations.append(next_duration)
        
        # get ready for next iteration
        current_pitch, current_duration = next_pitch, next_duration
        
    
    composed_stream = stream.Stream()
    for pair in list(zip(generated_pitches, generated_durations)):
        p = pitch.Pitch(midi=pitches[pair[0]])
        d = duration.Duration(durations[pair[1]])
        n = note.Note()
        n.pitch = p
        n.duration = d
        composed_stream.append(n)
    
    return composed_stream

# Generation infrastructure

In [15]:
bidirectional_vals = [True, False]
lstm_cell_vals = [500, 1000, 2000]
temperature_vals = [0.2, 0.5, 1.0, 10.0]

def generate_param_sets(start=None, end=None):
    """
    Creates sets of possible parameter combinations that will be tested over.
    Optionally allows to pass in indices to return a sub set of param sets in case training in to be done at separate times.
    
    Parameters:
        start (integer): The starting index of the range of sets to return from all possible sets.
        end (int): The ending index (inclusive) of the renage of sets to return from all possible sets.
        *NOTE that start and end must either both be None or integers; only suppling one argument causes an exception to be raised.
    """
    sets = []
    for bidirectional in bidirectional_vals:
        for lstm_cells in lstm_cell_vals:
            for temperature in temperature_vals:
                s = {
                    "bidirectional": bidirectional,
                    "lstm_cells": lstm_cells,
                    "temperature": temperature,
                }
                sets.append(s)
                    
    if start is None:
      start = 0
    if end is None or end > len(sets):
      end = len(sets) - 1  # subtract 1 because of the +1 in the end slice

    return sets[start:end+1]  # +1 to make the index passed in inclusive

In [26]:
def add_training_durations_string(dur1, dur2):
    """
    Adds the training duration of two models and returns them in a string format.
    
    dur1/dur2 (float): floating point representation of training time in seconds
    """
    
    total_time = dur1 + dur2
    hours = int(total_time // 3600)  # 3600 seconds/hour
    minutes = int((total_time - (3600 * hours)) // 60) # subtract the hours from remaining time; 60 sec/min
    seconds = (total_time - (3600 * hours)) - (60 * minutes)
    return "{}h {}m {:.2f}s".format(hours, minutes, seconds)

class MetadataModel:
    """
    A wrapper class for LSTM models that is used to gather metadata from training.
    """
    
    def __init__(self, vocab_size, embedding_dim, batch_size, lstm_cells, bidirectional, temperature, verbose=0, id=None):
        self._vocab_size = vocab_size
        self._embedding_dim = embedding_dim
        self._batch_size = batch_size
        self._lstm_cells = lstm_cells
        self._bidirectional = bidirectional
        self._temperature = temperature
        self.verbose = verbose # no underscore because this should be mutable
        self._name = '_'.join(["bidir" if bidirectional else "unidir", str(lstm_cells), str(temperature)])
        if id is not None:
            self.name += f"_{id}"
        self._total_training_time = 0.0 # in seconds; will be updated after training, obviously
        self._model = self._get_model()
        
    
    # ---- setting properties so that these attributes are immutable ----
    @property
    def name(self):
        return self._name
    
    @property
    def model(self):
        return self._model
        
    @property
    def vocab_size(self):
        return self._vocab_size
    
    @property
    def embedding_dim(self):
        return self._embedding_dim
    
    @property
    def batch_size(self):
        return self._batch_size
    
    @property
    def lstm_cells(self):
        return self._lstm_cells
    
    @property
    def bidirectional(self):
        return self._bidirectional
    
    @property
    def temperature(self):
        return self._temperature
        
        
    @property
    def total_training_time(self):
        return self._total_training_time
    
    
    # single layer Unidirectional or Bidirectional LSTM; will easily allow us to test various configurations
    def _get_model(self, optimizer="adam"):
        """
        Creates and compiles the LSTM model.
    
        Parameters:
            embedding_dim (int): How many different dimensions to encode values over.
            lstm_cells (int): The number of LSTM cells in the model.
            bidirectional (boolean): Whether to construct a bidirectional LSTM (as opposed to unidirectional).
            temperature (float): Value by the Lambda layer to divide output logits by.
            optimizer (string | tf.keras.optimizers.Optimizer): Which optimization algorith to use.
        
        Returns:
            tf.keras.Model: A compiled (ready to be trained) LSTM model.
        """
    
        model = Sequential()
        # NOTE: MIT tutorial used 2d input shape but Keras documentation shoes using 3, so if errors occur, check that out
        model.add(Embedding(self.vocab_size, self.embedding_dim, batch_input_shape=[self.batch_size, self.vocab_size]))
        # only dif. betwn. bi. LSTM and uni. LSTM is the presence/absence of Bidirectional wrapper
        # hidden layer 1; 20  units; input (# timesteps, # features); return a sequence of each time step's outputs
        # input_shape first value None makes it variable (we don't have fixed length sequences)
        # output of LSTM cell uses tanh activation, recurrent connections use sigmoid
        if self.bidirectional:
            model.add(Bidirectional(LSTM(self.lstm_cells, input_shape=(None, self.vocab_size), return_sequences=True)))
        else:
            model.add(LSTM(self.lstm_cells, input_shape=(None, self.vocab_size), return_sequences=True))

        # so that we can divibe by temperature before feeding through softmax
        model.add(Lambda(lambda x: x / self.temperature))

        # TimeDistributed is a wrapper allowing one output per time step; 
        # ...requires hidden layer to have return_sequences == True
        # TODO: Maybe TimeDistributed isn't necessary (MIT didn't use it)
        model.add(TimeDistributed(Dense(self.vocab_size, activation='softmax')))
        model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', 'categorical_crossentropy'])
        return model
    
    
    def _train_model(self, X, bidirectional=True, epochs=5, batch_size=32, verbose=1, model_name='NULL_model_name', model_dataset='unknown_dataset', model_no='NULL_model_no'):
        """
        Trains an LSTM model.

        Parameters:
            model (tf.keras.Model): The model which is to be trained.
            X (np.array): A 3D vector (samples, timesteps, values) to train the network on.
            bidirectional (boolean): Indicates whether the model is a bidirectional.
            epochs (int): The number of epochs over which to train the model.
            batch_size (int): The number of samples tito show the odel before updating weights.
            verbose (int): The amount of information to print while training.

            Returns: None
        """
        if not os.path.exists(TENSORBOARD_LOGS_DIRECTORY+f'/model_{model_no}'):
            os.makedirs(TENSORBOARD_LOGS_DIRECTORY+f'/model_{model_no}')

        # Initialize TensorBoard object for logging\n",
        tensorboard = TensorBoard(log_dir=f'{TENSORBOARD_LOGS_DIRECTORY}/model_{model_no}/{model_name}-{model_dataset}')

        Y = deepcopy(X)
        if not bidirectional:
            X = X[0:-1] # do not input the final time step in unidirectional LSTM
            Y = Y[1:] # labels include all time steps but the first one in unidir. LSTM
        self.model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose, callbacks=[tensorboard])
    
    
    # returns a string version of training time
    @property
    def training_duration_string(self):
        total_time = self._total_training_time
        hours = int(total_time // 3600)  # 3600 seconds/hour
        minutes = int((total_time - (3600 * hours)) // 60) # subtract the hours from remaining time; 60 sec/min
        seconds = (total_time - (3600 * hours)) - (60 * minutes)
        return "{}h {}m {:.2f}s".format(hours, minutes, seconds)
            
    def train(self, X, dataset_name, model_no):
        start_time = time.time()
        self._train_model(self.model, X, self.bidirectional, self.verbose, model_name=self.name, model_dataset=dataset_name, model_no=model_no)
        end_time = time.time()
        self._total_training_time += end_time - start_time

In [27]:
def sample_start_pitch():
    """
    Sample from the dictionary containing the probabilities of starting a piece with a given pitch.
    
    Parameters:
        dictionary (dict(int: float)): The dictionary whose values are probabilities, and whose keys are the MIDI pitch.
                                       This dictionary must be sorted by key.
        
    Returns:
        int: The index of the MIDI pitch in the pitches list.
    """
    logits = np.array(list(starting_pitch_likelihood.values()))
    return np.argmax(np.random.multinomial(1, logits))
    
def sample_start_duration():
    """
    Sample from the dictionary containing the probabilities of starting a piece with a given duration.
    
    Parameters:
        dictionary (dict(float: float)): The dictionary whose values are probabilities, and whose keys are the quarter length duration.
                                         This dictionary must be sorted by key.
        
    Returns:
        np.array: The index of the sampled duration in the durations list.
    """
    logits = np.array(list(starting_duration_likelihood.values()))
    return np.argmax(np.random.multinomial(1, logits))

In [28]:
def run_tests(param_set_start=None, param_set_end=None,\
              num_compositions=5, verbose=0):
    """
    Creates, trains, and composes with networks of a multitude of parameter combinations.
    
    Parameters:
        param_set_start (int): The first of the generated param. sets to be tested on.
        param_set_end (int): The last of the generated param. sets to be tested on.
        num_compositions (int): How many pieces to compose for each model being tested.
        verbose (int): The amount of information to print throughout the test.
        
        Returns: None
    """
    
    param_sets = generate_param_sets(param_set_start, param_set_end)
    pitch_models, duration_models = [], []
    model_no = 0 # which iteration of model we are on
    if param_set_start is not None:
        model_no = param_set_start
            
    test_start_time=time.time()            
    for params in param_sets:
        bidirectional = params["bidirectional"]
        lstm_cells = params["lstm_cells"]
        temperature = params["temperature"]
        
        if verbose:
            print(f"Model {model_no}:\n\tbidirectional – {bidirectional}\n\tlstm cells – {lstm_cells}\
            \n\ttemperature – {temperature}")
        
        # TODO: determine embedding dimensions to test
        embedding_dim = 32
        # TODO: determine batch_sizes to test
        batch_size=16
        #TODO: try different parameter sets for the duration/pitch networks, since they are really learning
        #      very different things
        pitch_model = MetadataModel(pitch_vocab_size, embedding_dim, batch_size, lstm_cells, bidirectional, temperature, verbose=verbose)
        duration_model = MetadataModel(duration_vocab_size, embedding_dim, batch_size, lstm_cells, bidirectional, temperature, verbose=verbose)
        
        # train the pitch network
        if verbose:
            print('\n\tTraining pitch generation network...')
            print('\t\tOn short data set:')
        pitch_model.train(short_seqs_pitch, 'pitch_short', model_no)
        if verbose:
            print('\t\tOn medium data set:')
        pitch_model.train(medium_seqs_pitch, 'pitch_medium', model_no)
        if verbose:
            print('\t\tOn long data set:')
        pitch_model.train(long_seqs_pitch, 'pitch_long', model_no)
        pitch_model.model.save(f"{MODELS_DIRECTORY}/{model_no}_pitch_{pitch_model.name}.h5")
        if verbose:
            print(f"\t\tPitch model training complete: saved at {MODELS_DIRECTORY}/{model_no}_pitch_{pitch_model.name}.h5")
            print(f"\t\tTotal training time: {pitch_model.training_duration_string}'")
        
        # train the duration network
        if verbose:
            print('\n\tTraining rhythm (duration) generation network...')
            print('\t\tOn short data set:')
        duration_model.train(short_seqs_duration, 'duration_short', model_no)
        if verbose:
            print('\t\tOn medium data set:')
        duration_model.train(medium_seqs_duration, 'duration_medium', model_no)
        if verbose:
            print('\t\tOn long data set:')
        duration_model.train(long_seqs_duration, 'duration_long', model_no)
        duration_model.model.save(f"{MODELS_DIRECTORY}/{model_no}_duration_{duration_model.name}.h5")
        if verbose:
            print(f"\t\tDuration model training complete: saved at {MODELS_DIRECTORY}/{model_no}_duration_{duration_model.name}.h5")
            print(f"\t\tTotal training time: {duration_model.training_duration_string}")
        if verbose:
            print(f'\n\tModel {model_no} training complete')
            print(f'\tTotal time to train pitch and duration models: {add_training_durations_string(duration_model.total_training_time, pitch_model.total_training_time)}')
        
        # compose outputs and save them
        if not os.path.exists(f"{COMPOSITIONS_DIRECTORY}/model_{model_no}"):
            os.makedirs(f"{COMPOSITIONS_DIRECTORY}/model_{model_no}")
        comp_count = 1
        for c in range(num_compositions):
            pitch_prompt = sample_start_pitch()
            duration_prompt = sample_start_duration()
            composition = compose(pitch_model.model, duration_model.model, pitch_prompt, duration_prompt, length=100)
            composition.write('musicxml', f'{COMPOSITIONS_DIRECTORY}/model_{model_no}/composition_{comp_count}.mxl')
            composition.write('midi', f'{COMPOSITIONS_DIRECTORY}/model_{model_no}/composition_{comp_count}.mid')
            comp_count += 1
        print(f"\t\tCompositions successfully written to {COMPOSITIONS_DIRECTORY}/model_{model_no}")
            
        model_no = model_no + 1
        print('–' * 50 + "\n\n")
        
        
    test_end_time=time.time()
    total_time = test_end_time - test_start_time
    hours = int(total_time // 3600)  # 3600 seconds/hour
    minutes = int((total_time - (3600 * hours)) // 60) # subtract the hours from remaining time; 60 sec/min
    seconds = (total_time - (3600 * hours)) - (60 * minutes)
    print("TRAINING COMPLETE – elapsed time: {}h {}m {:.2f}s".format(hours, minutes, seconds))
    

In [29]:
run_tests(verbose=1)

Model 0:
	bidirectional – True
	lstm cells – 500            
	temperature – 0.2

	Training pitch generation network...
		On short data set:


TypeError: can't pickle _thread._local objects