# Notes and Resources
* Towards data science tutorial
    * https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5
* Encompassing tutorial
    * https://www.datacamp.com/community/tutorials/using-tensorflow-to-compose-music
* LSTM for Time Series
    * https://machinelearningmastery.com/reshape-input-data-long-short-term-memory-networks-keras/
* Model Checkpoint and Early Stopping
    * https://machinelearningmastery.com/how-to-stop-training-deep-neural-networks-at-the-right-time-using-early-stopping/
* Tips for Improving RNNs
    * https://danijar.com/tips-for-training-recurrent-neural-networks/
* Alternative form of labeling for efficiency and scalability?
    * https://datascience.stackexchange.com/questions/24729/one-hot-encoding-alternatives-for-large-categorical-values

# Framing the problem
Generation of music is a multiclass classification problem because a unique chord may be represented by a one-hot vector of chords where the length of the one-hot vector is the 'musical space' (vocab) of the particular classification problem.

In [27]:
## Imports

# MIDI processing
from music21 import *

# Tensorflow and Keras
import tensorflow.keras as keras
from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping
%load_ext tensorboard
import tensorflow as tf

# Sklearn
from sklearn.model_selection import train_test_split

# Wrangling
import numpy as np

# Visual
import matplotlib.pyplot as plt

# Misc
import pickle
import os
from datetime import datetime

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [28]:
def parse_string_to_chord(concatenated_chord):
    """Convert a concatenated chord to list of notes for music21 chord.
    
    :param concatenated_chord: <class 'str'>  A concatenated string
        representing a string.
    :return: <class 'list'> A list of strings representing a chord
        for music21
    """
    chord = []
    slice_from = 0
    for ix, char in enumerate(concatenated_chord):
        if char.isdigit():
            chord.append(concatenated_chord[slice_from:ix + 1])
            slice_from = ix + 1

    # Return list of notes (a chord)
    return chord


def generate_music21_stream_from_int_chords(chord_list, mapping, instrument_part=None):
    """Convert a list of chords to a music21 stream.
    
    :param chord_list: Array like list of integer chords
        chords to be converted to music21 stream.
    :param mapping: <class 'dict'> that maps integer chords to 
        string chords.
    :param instrument: <class 'music21.instrument.Instrument'> Defaults
        to KeyboardInstrument()
    :return: <class 'music21.stream.Part'>
    """

    # Default instrument
    if (not instrument_part):
        instrument_part = instrument.KeyboardInstrument()

    # Map to string list
    chord_list = [mapping[chord] for chord in chord_list]

    ## Make stream
    # Generate stream with piano as instrument
    generated_stream = stream.Part()
    generated_stream.append(instrument_part)

    # Append notes to stream
    for single_chord in chord_list:
        try:
            generated_stream.append(note.Note(single_chord))
        except:
            extracted_chord = parse_string_to_chord(single_chord)
            generated_stream.append(chord.Chord(extracted_chord))


    # Return the music21 stream
    return generated_stream

# Loading and preprocessing data

In [29]:
# Load feature and encoding data
path_to_pickled_data = '../../pickled_data/pickled_tentatively_transposed_feature_and_encoding_dict'
# with open(os.path.join(path_to_pickled_data, 'pickled_feature_and_encoding_dict'),'rb') as fobj:
#     data_dict = pickle.load(fobj)

with open(path_to_pickled_data, 'rb') as fobj:
    data_dict = pickle.load(fobj)

# Get Data
print(data_dict.keys())
chords_ds = data_dict['chords_ds']
durations_ds = data_dict['durations_ds']
chord_to_int = data_dict['chord_to_int']
duration_to_int = data_dict['duration_to_int']
int_to_chord = data_dict['int_to_chord']
int_to_duration = data_dict['int_to_duration']

dict_keys(['chords_ds', 'durations_ds', 'chord_to_int', 'duration_to_int', 'int_to_chord', 'int_to_duration'])


In [30]:
# Flatten list
chords_ds_flattened = [item for sublist in chords_ds for item in sublist]
print(chords_ds_flattened[:10])
print("Flattend chords_ds type is <class 'str'>?", all(isinstance(x, str) for x in chords_ds_flattened))
print('Flattend chords_ds length:', len(chords_ds_flattened))
print('Unique notes/chords', len(chord_to_int))

['C4E4A4', 'C4E4G4', 'A3C4F#4', 'E3A3', 'E3B3', 'A3C4', 'A3D4', 'A3C4E4', 'C3F3A3', 'F3A3E4']
Flattend chords_ds type is <class 'str'>? True
Flattend chords_ds length: 25140
Unique notes/chords 1759


In [31]:
%%time

# Data preprocessing

## Defining training data -- previous sequential data

# Dimensions for LSTM
n_sequence_patterns = None  # number of samples
n_vocab = len(np.unique(chords_ds_flattened))  # Unique categories for a given sample 
sequence_length = 100        # Number of time steps in a sample
num_features = 1    # Number of a features a given sample vector has (in this case only 1 feature which is an integer representing a chord/note)

# Empty lists for train data
network_input = []
network_output = []

## Sequence construction
# The chord_ds encapsulates the chords/notes associated with a particular score index
# create input sequences and the corresponding outputs
for i in range(0, len(chords_ds_flattened) - sequence_length):
    chord_sequence_input = chords_ds_flattened[i:i + sequence_length]  # i to the i + sequence length (exclusive) input
    chord_sequence_output = chords_ds_flattened[i + sequence_length]   # the i + sequence length output (next note after a sequence of notes)
    network_input.append([chord_to_int[chord] for chord in chord_sequence_input])
    network_output.append(chord_to_int[chord_sequence_output])

# Update number of sequence patterns based on network_input
n_sequence_patterns = len(network_input)

Wall time: 243 ms


In [32]:
## Shape the input for LSTM (which take (?, t_timesteps, f_features)) where ? is the number of samples (pattern sequences)
network_input = np.reshape(network_input, (n_sequence_patterns, sequence_length, 1))
network_input = network_input / float(n_vocab)  # Divide each element of integer network array by n_vocab = 202 to normalize the input

# Inspect the input
display('Input shape:', network_input.shape)
display(f'Sample input sequence (i.e. of shape (1 sample, t=5 out of {sequence_length} timesteps, 1 feature -- the chord)):', network_input[0][:5])

## One-hot label categorical data
network_output = keras.utils.to_categorical(network_output, num_classes=n_vocab)

# Inspect output
display('Output shape:', network_output.shape)
display(f'The expected note after the preceding {sequence_length} notes:', network_output[0])

'Input shape:'

(25040, 100, 1)

'Sample input sequence (i.e. of shape (1 sample, t=5 out of 100 timesteps, 1 feature -- the chord)):'

array([[0.40250142],
       [0.40534395],
       [0.14837976],
       [0.72143263],
       [0.72768619]])

'Output shape:'

(25040, 1759)

'The expected note after the preceding 100 notes:'

array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)

In [33]:
### Divide sets
## X_holdout_validation and y_holdout_validation are not used until a model is optimized
X_train_test, X_holdout_validation, y_train_test, y_holdout_validation = train_test_split(
    network_input,
    network_output, 
    random_state=0, 
    shuffle=False,
    test_size=0.2,
)


# Inspect after sklearn function
print('Train-test/Holdout validation array:')
print(X_train_test.shape)
print(y_train_test.shape)
print(X_holdout_validation.shape)
print(y_holdout_validation.shape)

## Train test split (test will be used for validation=[] for hyperparam optimization)
X_train, X_test, y_train, y_test = train_test_split(
    X_train_test,
    y_train_test, 
    random_state=0, 
    shuffle=False,
    test_size=0.2,
)

# Inspect now
print()
print('Train/Test Arrays:')
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

# Check n_vocab
print()
print('n_vocab:',n_vocab)

Train-test/Holdout validation array:
(20032, 100, 1)
(20032, 1759)
(5008, 100, 1)
(5008, 1759)

Train/Test Arrays:
(16025, 100, 1)
(16025, 1759)
(4007, 100, 1)
(4007, 1759)

n_vocab: 1759


In [34]:
## Build the model

# On LSTM crashing
# https://github.com/tensorflow/tensorflow/issues/37942

# Hyperparameters
lstm_hidden_units = 512
dense_hidden_units = 256
batch_size = 128
epochs = 256

# Save current time
now = datetime.now().strftime('%Y%m%d_%H-%M-%S')

# Instantiate the sequential model
name = f'{now}_lstm'
model = keras.models.Sequential(name=name)

# Input layer
model.add(
    keras.layers.LSTM(
        lstm_hidden_units,
        input_shape=(network_input.shape[1], network_input.shape[2]),
        recurrent_dropout=0.3,
        return_sequences=True,
        name='input_lstm'
))

# LSTM hidden 0
model.add(keras.layers.LSTM(lstm_hidden_units, return_sequences=True, recurrent_dropout=0.3))

# LSTM hidden 1
model.add(keras.layers.LSTM(lstm_hidden_units))

# Batch norm
model.add(keras.layers.BatchNormalization())

# Dropout
model.add(keras.layers.Dropout(0.3))

# ## Dense
# model.add(keras.layers.Dense(dense_hidden_units,))
# model.add(keras.layers.Activation('relu'))

# # Batch norm
# model.add(keras.layers.BatchNormalization())

# # Dropout
# model.add(keras.layers.Dropout(0.3,))

## Output
model.add(keras.layers.Dense(n_vocab, activation='softmax', name='dense_output'))

# Compile it
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

# Display it 
keras.utils.plot_model(model, to_file=f'./figures/{now}_lstm.png', expand_nested=True, show_shapes=True)
model.summary()

In [25]:
# Save the untrained model
#model.save(f'./untrained_models/{now}_lstm')

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


ValueError: Attempted to save a function b'__inference_input_lstm_layer_call_fn_13637' which references a symbolic Tensor Tensor("dropout/mul_1:0", shape=(None, 512), dtype=float32) that is not a simple constant. This is not supported.

In [26]:
%%time

### Fitting the model
## Callbacks
fit = True
if (fit):
    checkpoint = ModelCheckpoint(f'./saved_models_h5/{now}_best_model.h5', monitor='loss', mode='min', save_best_only=True, verbose=1)
    early_stopping = EarlyStopping(monitor='loss', verbose=1, patience=32)
    #log_dir = "logs\\fit\\" + datetime.now().strftime("%Y%m%d-%H%M%S")
    #tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

    ## Training
    history = model.fit(
        X_train,
        y_train,
        epochs=epochs,
        batch_size=batch_size,
        #callbacks=[checkpoint, early_stopping],
    )

Train on 16025 samples
Epoch 1/256
  128/16025 [..............................] - ETA: 11:52

InternalError:  Blas GEMM launch failed : a.shape=(128, 1), b.shape=(1, 512), m=128, n=512, k=1
	 [[{{node 20210424_10-34-07_lstm/input_lstm/while/body/_1/MatMul}}]] [Op:__inference_distributed_function_24813]

Function call stack:
distributed_function


In [None]:
# Save model history
with open(f'./history/pickled_{now}_history', 'wb') as fobj
    pickle.dump(history ,fobj)

In [10]:
# Load a trained model if possible
if (not fit):
    model = keras.models.load_model('./saved_models_h5/best_model.h5')

In [15]:
# Take random starting from test set () not holdout validation set
random_ix_of_sequence_elem_in_x_test = np.random.randint(0, X_test.shape[0])
chord_sequence = X_test[random_ix_of_sequence_elem_in_x_test]
original_sequence = chord_sequence.copy()

# Inspect chord_sequence
print(original_sequence.shape)
print((original_sequence * n_vocab).astype(int))

(100, 1)
[[ 629]
 [1164]
 [1164]
 [1164]
 [1399]
 [1164]
 [1164]
 [1164]
 [1164]
 [1164]
 [1164]
 [1399]
 [ 629]
 [ 629]
 [1059]
 [ 629]
 [1399]
 [1059]
 [1399]
 [1403]
 [1403]
 [1164]
 [1164]
 [1164]
 [1399]
 [ 629]
 [ 629]
 [ 629]
 [1164]
 [1164]
 [1164]
 [1399]
 [1164]
 [1164]
 [1164]
 [1164]
 [1164]
 [1164]
 [1399]
 [ 629]
 [ 629]
 [1059]
 [ 629]
 [1399]
 [1059]
 [1399]
 [1403]
 [1403]
 [1164]
 [1059]
 [1164]
 [1399]
 [ 629]
 [1059]
 [1399]
 [ 629]
 [1164]
 [1399]
 [ 629]
 [1059]
 [1399]
 [1399]
 [1164]
 [1059]
 [1164]
 [1399]
 [ 629]
 [1059]
 [1399]
 [ 629]
 [1164]
 [1399]
 [ 629]
 [1059]
 [1399]
 [1399]
 [1164]
 [1059]
 [1164]
 [1399]
 [ 629]
 [1059]
 [1399]
 [ 629]
 [1164]
 [1399]
 [ 629]
 [1059]
 [1399]
 [1399]
 [1399]
 [1059]
 [1059]
 [1399]
 [1059]
 [1059]
 [1399]
 [1059]
 [1059]
 [1399]]


In [16]:
%%time

# Store predictions
prediction_output = [] # generate desired number of notes/chords
for note_index in range(64):

    # Reshape the input for the network (?, sequence_length=..., 1 feature (a chord))
    prediction_input = np.reshape(chord_sequence, (1, sequence_length, 1))

    # Generate (sequence_length, 1) dimensional song 
    prediction = model.predict(prediction_input, verbose=0)

    # The index of the argmax of the prediction is the chord (feature) with
    # highest probability of being classified (making logical music) due to
    # softmax activation    
    index = np.argmax(prediction)

    # Map the result to a chord
    result = int_to_chord[index]

    # Append that chord to a list of predicted chords
    prediction_output.append(result)    
    
    # Convert the result into a normalized value and append it to the existing chord_sequence
    chord_sequence = np.append(chord_sequence, (index / float(n_vocab)))

    # After the first prediction, the chord_sequence now hold 33 notes
    # Keep only the next set of notes (i.e., notes 1-33) instead of notes (0-32)
    # Sliding window prediction...
    chord_sequence = chord_sequence[1:len(chord_sequence)]

In [19]:
# Convert the chord_sequence back to integers
chord_sequence = (chord_sequence * n_vocab).astype(int)
chord_sequence

array([115, 172, 172,  42, 172,  42, 172, 115, 172, 172,  42,  42, 172,
       115, 172, 172,  42, 172,  42, 172, 115, 115, 115, 163, 163, 163,
       163, 163, 163, 163, 163, 163, 163, 163, 163, 163, 189, 189, 189,
       189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
       189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
       189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
       189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189, 189,
       189, 189, 189, 189, 189, 189, 189, 189, 189])

In [20]:
# Save the generated song
generated_song = generate_music21_stream_from_int_chords(chord_sequence, mapping=int_to_chord)

# Save the original song

In [22]:
# Save the generated song
print(now)
if (not fit):
    now = datetime.now().strftime('%Y%m%d_%H-%S-%M')
generated_song.write('midi', f'./generated_songs/{now}_generated_song.mid')

20210422_23-18-57


'./generated_songs/20210422_23-18-57_generated_song.mid'