In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Bidirectional, Lambda
from sklearn.model_selection import train_test_split
import numpy as np
from music21 import *
from copy import deepcopy
import random
import pickle

# Retrieving data

In [None]:
# load the datasets and then split in to train/test data sets
# note that we don't need label sets, only input sets for bidirectional LSTM (labels == inputs)
# unidirectional label set is the same as the inputs but without the first time step
with open("pickles/short_sequences_duration.pickle", 'rb') as short_duration,\
     open("pickles/short_sequences_pitch.pickle", 'rb') as short_pitch:
    short_seqs_duration_train, short_seqs_duration_test = train_test_split(pickle.load(short_duration), train_size=0.95)
    short_seqs_pitch_train, short_seqs_pitch_test = train_test_split(pickle.load(short_pitch), train_size=0.95)

with open("pickles/medium_sequences_duration.pickle", 'rb') as medium_duration,\
     open("pickles/medium_sequences_pitch.pickle", 'rb') as medium_pitch:
    medium_seqs_duration_train, medium_seqs_duration_test = train_test_split(pickle.load(medium_duration), train_size=0.95)
    medium_seqs_pitch_train, medium_seqs_pitch_test = train_test_split(pickle.load(medium_pitch), train_size=0.95)
    
with open("pickles/long_sequences_duration.pickle", 'rb') as long_duration,\
     open("pickles/long_sequences_pitch.pickle", 'rb') as long_pitch:
    long_seqs_duration_train, long_seqs_duration_test = train_test_split(pickle.load(long_duration), train_size=0.95)
    long_seqs_pitch_train, long_seqs_pitch_test = train_test_split(pickle.load(long_pitch), train_size=0.95)

In [None]:
# retrieve the pitches and durations that were used to build the data set
# these will be used to convert the output one-hot vectors back to actual pitch/duration values
with open('pickles/durations.pickle', 'rb') as d, open('pickles/pitches.pickle', 'rb') as p:
    durations = pickle.load(d)
    pitches = pickle.load(p)

num_durations = len(durations)
num_pitches = len(pitches)
short_seq_len = short_seqs_duration_train.shape[1]
medium_seq_len = medium_seqs_duration_train.shape[1]
long_seq_len = long_seqs_duration_train.shape[1]

# Experimenting with how temperature affects a distribution

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

logits = np.array([1,2,3,4])
logits_temp_p0 = logits / 0.01
logits_temp_p2 = logits / 0.2
logits_temp_p4 = logits / 0.4
logits_temp_p6 = logits / 0.6
logits_temp_p8 = logits / 0.8
logits_temp_p10 = logits / 1.0
logits_temp_p15 = logits / 1.5
logits_temp_p150 = logits / 150

print(f"RAW: {softmax(logits)}")
print(f"TEMP 0.0: {softmax(logits_temp_p0)}")
print(f"TEMP 0.2: {softmax(logits_temp_p2)}")
print(f"TEMP 0.4: {softmax(logits_temp_p4)}")
print(f"TEMP 0.6: {softmax(logits_temp_p6)}")
print(f"TEMP 0.8: {softmax(logits_temp_p8)}")
print(f"TEMP 1.0: {softmax(logits_temp_p10)}")
print(f"TEMP 1.5: {softmax(logits_temp_p15)}")
print(f"TEMP 150: {softmax(logits_temp_p150)}")

# Model

### Helper functions

In [None]:
# takes a 1D vector (a single sam) and converts it to 3D for input to the network
def timestep_to3d(x):
    return np.reshape(x, (1, 1, x.shape[0]))

# create one-hot vector representation for a time step given the index position of the encoded value
def vectorize(index, vec_size):
    index = int(index)
    vec = np.zeros(vec_size, np.float32)
    vec[index] = 1.0
    return vec

# returns the index of the one-hot encoded value
def unvectorize(x):
    return np.argmax(x)

### Model building and training

In [None]:
# single layer Unidirectional or Bidirectional LSTM; will easily allow us to test various configurations
def get_model(num_features, num_layers=20, bidirectional=True, temperature=1.0, optimizer="adam"):
    model = Sequential()
    # only dif. betwn. bi. LSTM and uni. LSTM is the presence/absence of Bidirectional wrapper
    # hidden layer 1; 20  units; input (# timesteps, # features); return a sequence of each time step's outputs
    # input_shape first value None makes it variable (we don't have fixed length sequences)
    # output of LSTM cell uses tanh activation, recurrent connections use sigmoid
    if bidirectional:
        model.add(Bidirectional(LSTM(num_layers, input_shape=(None, num_features), return_sequences=True)))
    else:
        model.add(LSTM(num_layers, input_shape=(None, num_features), return_sequences=True))
        
    # so that we can divibe by temperature before feeding through softmax
    model.add(Lambda(lambda x: x / temperature))
        
    # TimeDistributed is a wrapper allowing one output per time step; 
    # ...requires hidden layer to have return_sequences == True
    model.add(TimeDistributed(Dense(num_features, activation='softmax')))
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', 'categorical_crossentropy'])
    return model

In [None]:
# train LSTM
def train_model(model, X, bidirectional=True, epochs=30, batch_size=32, verbose=1, validation_split=0.2):
    Y = deepcopy(X)
    if not bidirectional:
        X = X[0:-1] # do not input the final time step in unidirectional LSTM
        Y = Y[1:] # labels include all time steps but the first one in unidir. LSTM
    model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose, validation_split=validation_split)

# Composing with the model

In [None]:
# return a single index from a sample of a softmax distribution vector
def sample_distribution(dist_vec, num_categories):
    return tf.random.categorical(dist_vec.reshape(1, num_categories), 1).numpy().flatten()[0]

# use the trained model to compose new music by feeding in a single input and desired length
def compose(pitch_model, duration_model, pitch_prompt, duration_prompt, length=50):
    '''pitch_model: the trained model for pitch predictions
       duration_model: trained model for duration predictions
       pitch_prompt: the first pitch of the piece (index of the one-hot encoded pitch vector)
       duration_prompt: the first duration of the piece (index of the one-hot encoded duration vector)
       length: how many time steps to generate for
       
       returns a music21.stream.Stream object representing the composition
    '''    
    # the lists that hold the indices of the values to index in to pitches/durations lists
    generated_pitches, generated_durations = [pitch_prompt], [duration_prompt]
    
    current_pitch, current_duration = pitch_prompt, duration_prompt
    for t in range(length):
        # model only accepts 3D inputs
        pitch_vec = timestep_to3d(vectorize(current_pitch, num_pitches))
        duration_vec = timestep_to3d(vectorize(current_duration, num_durations))
        
        # predict the output distributions
        pitch_pred = pitch_model.predict(pitch_vec)
        duration_pred = duration_model.predict(duration_vec)
        # sample the distributions (returns the index of the one-hot vectors)
        next_pitch = sample_distribution(pitch_pred, num_pitches)
        print(f'Sampled pitch index {next_pitch}: MIDI no. {pitches[next_pitch]}')
        next_duration = sample_distribution(duration_pred, num_durations)
        print(f'Sampled duration index {next_duration}: quarter length {durations[next_duration]}')
        generated_pitches.append(next_pitch)
        generated_durations.append(next_duration)
        
        # get ready for next iteration
        current_pitch, current_duration = next_pitch, next_duration
        
    
    composed_stream = stream.Stream()
    for pair in list(zip(generated_pitches, generated_durations)):
        p = pitch.Pitch(midi=pitches[pair[0]])
        d = duration.Duration(durations[pair[1]])
        n = note.Note()
        n.pitch = p
        n.duration = d
        composed_stream.append(n)
    
    return composed_stream

# Generation infrastructure

In [None]:
def build_train_compose(pitch_train, duration_train, bidirectional=True, temperature=1.0, optimizer="adam", num_layers=20,\
                       epochs=15, batch_size=32, validation_split=0.0, verbose=1,\
                       pitch_seed=random.choice(pitches), duration_seed=random.choice(durations), composition_length=75):
    pitch_model = get_model(bidirectional=bidirectional, temperature=temperature, optimizer=optimizer, num_layers=num_layers)
    duration_model = get_model(bidirectional=bidirectional, temperature=temperature, optimizer=optimizer, num_layers=num_layers)
    train_model(pitch_train, duration_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=verbose)
    return compose(pitch_model, duration_model, pitch_seed, duration_seed, length=composition_length)

In [None]:
duration_bidirectional = get_model(num_durations, bidirectional=True)
train_model(duration_bidirectional, short_seqs_duration_train, bidirectional=True)
train_model(duration_bidirectional, medium_seqs_duration_train, bidirectional=True)
train_model(duration_bidirectional, long_seqs_duration_train, bidirectional=True)

In [None]:
pitch_bidirectional = get_model(num_pitches, bidirectional=True)
train_model(pitch_bidirectional, short_seqs_pitch_train, bidirectional=True)
train_model(pitch_bidirectional, medium_seqs_pitch_train, bidirectional=True)
train_model(pitch_bidirectional, long_seqs_pitch_train, bidirectional=True)

In [None]:
durations

In [None]:
(', ').join([str(p) for p in pitches])

In [None]:
piece1 = compose(pitch_bidirectional, duration_bidirectional, 3, 4, 100)

In [None]:
piece1.show('midi')

In [None]:
piece1.show()

In [None]:
piece2 = compose(pitch_bidirectional, duration_bidirectional, 3, 4, 100)

In [None]:
piece2.show('midi')

In [None]:
piece2.show()

# Results

In [None]:
piece1.show('midi')

In [None]:
piece1.show()

In [None]:
piece2.show('midi')

In [None]:
piece2.show()