In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, TimeDistributed, Bidirectional, Lambda
import numpy as np
from music21 import *
from copy import deepcopy
import random
import pickle
import os
import time

TRAIN_MODELS = True  # whether the network should be retrained when running
                     # (the alternative being to load in saved weights)

# Retrieving data

In [None]:
# load the datasets
with open("pickles/short_seqs_duration.pickle", 'rb') as short_duration,\
     open("pickles/short_seqs_pitch.pickle", 'rb') as short_pitch:
    short_seqs_duration = pickle.load(short_duration)
    short_seqs_pitch = pickle.load(short_pitch)

with open("pickles/medium_seqs_duration.pickle", 'rb') as medium_duration,\
     open("pickles/medium_seqs_pitch.pickle", 'rb') as medium_pitch:
    medium_seqs_duration = pickle.load(medium_duration)
    medium_seqs_pitch = pickle.load(medium_pitch)
    
with open("pickles/long_seqs_duration.pickle", 'rb') as long_duration,\
     open("pickles/long_seqs_pitch.pickle", 'rb') as long_pitch:
    long_seqs_duration = pickle.load(long_duration)
    long_seqs_pitch = pickle.load(long_pitch)

In [None]:
# retrieve the pitches and durations that were used to build the data set
with open('pickles/durations.pickle', 'rb') as d, open('pickles/pitches.pickle', 'rb') as p:
    durations = pickle.load(d)
    pitches = pickle.load(p)
    
# retrieve the mapping from pitch/duration values to one-hot vector indices
with open('pickles/duration_indices.pickle', 'rb') as d, open('pickles/pitch_indices.pickle', 'rb') as p:
    duration_indices = pickle.load(d)
    pitch_indices = pickle.load(p)
    
num_durations = len(durations)
num_pitches = len(pitches)
short_seq_len = short_seqs_duration.shape[1]
medium_seq_len = medium_seqs_duration.shape[1]
long_seq_len = long_seqs_duration.shape[1]

# Experimenting with how temperature affects a softmax distribution

In [None]:
def softmax(x):
    return np.exp(x) / np.sum(np.exp(x), axis=0)

logits = np.array([1,2,3,4])
logits_temp_p0 = logits / 0.01
logits_temp_p2 = logits / 0.2
logits_temp_p4 = logits / 0.4
logits_temp_p6 = logits / 0.6
logits_temp_p8 = logits / 0.8
logits_temp_p10 = logits / 1.0
logits_temp_p15 = logits / 1.5
logits_temp_p150 = logits / 150

print(f"RAW: {softmax(logits)}")
print(f"TEMP 0.0: {softmax(logits_temp_p0)}")
print(f"TEMP 0.2: {softmax(logits_temp_p2)}")
print(f"TEMP 0.4: {softmax(logits_temp_p4)}")
print(f"TEMP 0.6: {softmax(logits_temp_p6)}")
print(f"TEMP 0.8: {softmax(logits_temp_p8)}")
print(f"TEMP 1.0: {softmax(logits_temp_p10)}")
print(f"TEMP 1.5: {softmax(logits_temp_p15)}")
print(f"TEMP 150: {softmax(logits_temp_p150)}")

# Model

### Helper functions

In [None]:
# takes a 1D vector (a single sam) and converts it to 3D for input to the network
def timestep_to3d(x):
    return np.reshape(x, (1, 1, x.shape[0]))

# create one-hot vector representation for a time step given the index position of the encoded value
def vectorize(index, vec_size):
    index = int(index)
    vec = np.zeros(vec_size, np.float32)
    vec[index] = 1.0
    return vec

# returns the index of the one-hot encoded value
def unvectorize(x):
    return np.argmax(x)

### Model building and training

In [None]:
# single layer Unidirectional or Bidirectional LSTM; will easily allow us to test various configurations
def get_model(num_features, lstm_cells=500, bidirectional=True, temperature=1.0, optimizer="adam"):
    model = Sequential()
    # only dif. betwn. bi. LSTM and uni. LSTM is the presence/absence of Bidirectional wrapper
    # hidden layer 1; 20  units; input (# timesteps, # features); return a sequence of each time step's outputs
    # input_shape first value None makes it variable (we don't have fixed length sequences)
    # output of LSTM cell uses tanh activation, recurrent connections use sigmoid
    if bidirectional:
        model.add(Bidirectional(LSTM(lstm_cells, input_shape=(None, num_features), return_sequences=True)))
    else:
        model.add(LSTM(lstm_cells, input_shape=(None, num_features), return_sequences=True))
        
    # so that we can divibe by temperature before feeding through softmax
    model.add(Lambda(lambda x: x / temperature))
        
    # TimeDistributed is a wrapper allowing one output per time step; 
    # ...requires hidden layer to have return_sequences == True
    model.add(TimeDistributed(Dense(num_features, activation='softmax')))
    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy', 'categorical_crossentropy'])
    return model

In [None]:
# train LSTM
def train_model(model, X, bidirectional=True, epochs=15, batch_size=32, verbose=1):
    Y = deepcopy(X)
    if not bidirectional:
        X = X[0:-1] # do not input the final time step in unidirectional LSTM
        Y = Y[1:] # labels include all time steps but the first one in unidir. LSTM
    model.fit(X, Y, epochs=epochs, batch_size=batch_size, verbose=verbose)

# Composing with the model

In [None]:
# return a single index from a sample of a softmax distribution vector
def sample_distribution(dist_vec, num_categories):
    return tf.random.categorical(dist_vec.reshape(1, num_categories), 1).numpy().flatten()[0]

# use the trained model to compose new music by feeding in a single input and desired length
def compose(pitch_model, duration_model, pitch_prompt, duration_prompt, length=50):
    '''pitch_model: the trained model for pitch predictions
       duration_model: trained model for duration predictions
       pitch_prompt: the first pitch of the piece (index of the one-hot encoded pitch vector)
       duration_prompt: the first duration of the piece (index of the one-hot encoded duration vector)
       length: how many time steps to generate for
       
       returns a music21.stream.Stream object representing the composition
    '''    
    # the lists that hold the indices of the values to index in to pitches/durations lists
    generated_pitches, generated_durations = [pitch_prompt], [duration_prompt]
    
    current_pitch, current_duration = pitch_prompt, duration_prompt
    for t in range(length):
        # model only accepts 3D inputs
        pitch_vec = timestep_to3d(vectorize(current_pitch, num_pitches))
        duration_vec = timestep_to3d(vectorize(current_duration, num_durations))
        
        # predict the output distributions
        pitch_pred = pitch_model.predict(pitch_vec)
        duration_pred = duration_model.predict(duration_vec)
        # sample the distributions (returns the index of the one-hot vectors)
        next_pitch = sample_distribution(pitch_pred, num_pitches)
        print(f'Sampled pitch index {next_pitch}: MIDI no. {pitches[next_pitch]}')
        next_duration = sample_distribution(duration_pred, num_durations)
        print(f'Sampled duration index {next_duration}: quarter length {durations[next_duration]}')
        generated_pitches.append(next_pitch)
        generated_durations.append(next_duration)
        
        # get ready for next iteration
        current_pitch, current_duration = next_pitch, next_duration
        
    
    composed_stream = stream.Stream()
    for pair in list(zip(generated_pitches, generated_durations)):
        p = pitch.Pitch(midi=pitches[pair[0]])
        d = duration.Duration(durations[pair[1]])
        n = note.Note()
        n.pitch = p
        n.duration = d
        composed_stream.append(n)
    
    return composed_stream

# Generation infrastructure

In [None]:
def build_train(pitch_train, duration_train, bidirectional=True, temperature=1.0, optimizer="adam", num_layers=20,\
                       epochs=15, batch_size=32, validation_split=0.0, verbose=1):
    pitch_model = get_model(bidirectional=bidirectional, temperature=temperature, optimizer=optimizer, num_layers=num_layers)
    duration_model = get_model(bidirectional=bidirectional, temperature=temperature, optimizer=optimizer, num_layers=num_layers)
    train_model(pitch_train, duration_train, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=verbose)
    return compose(pitch_model, duration_model, pitch_seed, duration_seed, length=composition_length)

In [None]:
bidirectional_vals = [True, False]
lstm_cell_vals = [500, 1000, 2000]
temperature_vals = [0.2, 0.5, 1.0, 10.0]
epoch_batch_vals = [(1, 1), (5, 8), (10, 16), (30, 32)] # [(epochs, batch_size), ...]

# generate sets of parameters on which to build/train models for testing purposes
def generate_param_sets():
    sets = []
    for bidirectional in bidirectional_vals:
        for lstm_cells in lstm_cell_vals:
            for temperature in temperature_vals:
                for epochs, batch_size in epoch_batch_vals:
                    s = {
                        "bidirectional": bidirectional,
                        "lstm_cells": lstm_cells,
                        "temperature": temperature,
                        "epochs": epochs,
                        "batch_size": batch_size
                    }
                    sets.append(s)
                    
    return sets

In [None]:
class MetadataModel(self):
    def __init__(model, bidirectional):
        self.model = model
        self.bidirectional = bidirectional
        self.trained = False
        self._training_duration = -1
    
    @property
    def name():
        pass # TODO
    
    @property
    def training_duration():
        if not self.trained:
            raise Exception("Model has not been trained yet")
        # https://stackoverflow.com/questions/21633579/python-time-math
    
        
            
    
    def train(X, epochs, batch_size, verbose=0):
        start_time = time.time()
        train_model(self.model, X, self.bidirectional, epochs, batch_size, verbose)
        end_time = time.time()
        self.trained = True
        self._training_duration = end_time - start_time
        
        
    
        
    

In [None]:
def run():
    param_sets = generate_param_sets()
    models = [] # [(name, model), ...]
    
    for params in param_sets:
        bidirectional = params["bidirectional"]
        lstm_cells = params["lstm_cells"]
        temperature = params["temperature"]
        epochs = params["epochs"]
        batch_size = params["batch_size"]
        
        pitch_model = get_model()
        
        
    start_time = 

In [None]:
pitch_model = get_model(num_pitches, num_layers=1024)
duration_model = get_model(num_durations, num_layers=1024)

train_model(duration_model, short_seqs_duration, epochs=10)
train_model(duration_model, medium_seqs_duration, epochs=10)
train_model(duration_model, long_seqs_duration, epochs=10)

train_model(pitch_model, short_seqs_pitch, epochs=10)
train_model(pitch_model, medium_seqs_pitch, epochs=10)
train_model(pitch_model, long_seqs_pitch, epochs=10)

In [None]:
piece = compose(pitch_model, duration_model, 26, 1, length=69)

In [None]:
piece.show('midi')

In [None]:
piece.show()

In [None]:
pitch_model.save('models/pitch_test.h5')
duration_model.save('models/duration_test.h5')

In [None]:
recreated_pitch = keras.models.load_model('models/pitch_test.h5')
recreated_duration = keras.models.load_model('models/duration_test.h5')

In [None]:
piece = compose(recreated_pitch, recreated_duration, 38, 4, length=69)

In [None]:
piece.show('midi')

In [None]:
piece.show()