In [1]:
from gensim.models import Word2Vec
from tensorflow.keras.layers.experimental import preprocessing
import time
import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from  tensorflow.keras.callbacks import ModelCheckpoint
from keras import utils as np_utils

## Parses the dataset and maps notes to unique symbols

In [2]:
def parse_songs(data_dir, sample_length):
    note_sequence = []
    note_to_int = {}
    
    symbol = 0
    
    for data_file in os.listdir(data_dir):
        current_song = open(data_dir + "\\" + data_file,'r')
        
        for chord in current_song:
            note_processed = chord.replace("\n","")
            
            if not note_processed in note_to_int:
                note_to_int[note_processed] = symbol
                symbol += 1
            
            note_sequence.append(note_to_int[note_processed])
            
            if len(note_sequence) >= sample_length:
                return (note_sequence, note_to_int)
    
    return (note_sequence, note_to_int)

## Creates a model with 3 LSTM layers

In [3]:
#Taken from https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5
def build_model2(shape, n_vocab):
    model = keras.Sequential()
    model.add(layers.LSTM(
        256,
        input_shape=shape,
        return_sequences=True
    ))
    model.add(layers.Dropout(0.3))
    model.add(layers.LSTM(512, return_sequences=True))
    model.add(layers.Dropout(0.3))
    model.add(layers.LSTM(256))
    model.add(layers.Dense(256))
    model.add(layers.Dropout(0.3))
    model.add(layers.Dense(n_vocab))
    model.add(layers.Activation('softmax'))
    return model

## Generate pairs of note sequences and expected outputs

In [4]:
#Adapted from https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5
def generate_training_pairs(sequence, sequence_length, n_vocab):
    training_inputs = []
    expected_outputs = []
    
    for i in range(0, len(sequence) - sequence_length):
        training_inputs.append(sequence[i:i + sequence_length])
        expected_outputs.append(sequence[i + sequence_length])
    
    # reshape the input into a format compatible with LSTM layers
    n_patterns = len(training_inputs)
    training_inputs = np.reshape(training_inputs, (n_patterns, sequence_length, 1))
    
    # normalize input
    training_inputs = training_inputs / float(n_vocab)
    expected_outputs = tf.keras.utils.to_categorical(expected_outputs) 
    
    return (training_inputs, expected_outputs)

## Sample a probability from the list with some randomness

In [5]:
#taken from https://stackoverflow.com/questions/54030842/character-lstm-keeps-generating-same-character-sequence 
# to test if this resolves the repeating character issue
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)
    probas = np.random.multinomial(1, preds, 1)
    return np.argmax(probas)

## Reverse the keys and values of a dictionary

In [6]:
def reverse_dict(dictionary):
    output = {}
    
    for key, value in dictionary.items():
        output[value] = key
    
    return output

## Exports a list to a file

In [7]:
def export_list(sequence, file_name, dir_name):
    f = open(dir_name + "\\" + file_name, "a")
    
    for item in sequence:
        f.write(item + "\n")
    
    f.close()

## Create a checkpoint object and the corresponding directories

In [8]:
#Taken from https://towardsdatascience.com/how-to-generate-music-using-a-lstm-neural-network-in-keras-68786834d4c5
def create_checkopint(epochs, sample_length, sequence_length):
    dir_name = "epochs_" + str(epochs) +"_samp_length_" + str(sample_length) + "_seq_length_" + str(sequence_length) 
    sub_dir = "\\model_checkpoints"
    
    if not os.path.isdir(dir_name):
        os.mkdir(dir_name)
    if not os.path.isdir(dir_name+sub_dir):
        os.mkdir(dir_name + sub_dir)
    
    filepath = dir_name + sub_dir+ "\\weights-improvement-{epoch:02d}-{loss:.4f}-bigger.hdf5"
    
    # Save the model after training for the specified number of epochs
    checkpoint = ModelCheckpoint(
        filepath, monitor = 'loss', 
        verbose = 0,        
        save_best_only = False,
        save_freq='epoch',
        period=epochs,
        mode = 'min'
    )
    
    return [checkpoint]

## Use a model to generate new songs

In [9]:
def generate_song(model, sequence_length, num_notes, int_to_note):
    song = []
    input_sequence = []
    
    # Randomly generate the input sequence
    for item in range(sequence_length):
        input_sequence.append(np.random.randint(0, num_notes-1))

    for note_index in range(100):
        # Predict the next note
        prediction_input = np.reshape(input_sequence, (1, len(input_sequence), 1))
        prediction_input = prediction_input / float(num_notes)    
        prediction = model.predict(prediction_input, verbose=0) 
        
        # Convert the note from an int to a string
        index = sample(prediction[0], temperature=0.4)
        result = int_to_note[index]
        song.append(result)
        
        # Add the note to the input sequence
        input_sequence = np.append(input_sequence, index)
        input_sequence = input_sequence[1:len(input_sequence)]
    
    return song

## Train a model and generate songs

In [10]:
def generate_songs(sample_length, data_dir, sequence_length,epochs, num_songs):
    # Parse the csv files in the dataset into a sequence of integers
    (songs, note_to_int) = parse_songs(data_dir, sample_length)
    num_notes = len(note_to_int)
    
    # Reverse the map for decoding integers into notes
    int_to_note = reverse_dict(note_to_int)
    
    # Generate pairs of note sequences and the expected next note
    (training_inputs, expected_outputs) = generate_training_pairs(songs, sequence_length, num_notes)
    
    # Create and train a model
    callbacks_list = create_checkopint(epochs, sample_length, sequence_length)
    model = build_model2(training_inputs.shape[1:], num_notes)
    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
    model.fit(training_inputs, expected_outputs, epochs=epochs, batch_size=64, callbacks=callbacks_list)
    
    # Generate songs with the model
    generated_songs = []
    for i in range(num_songs):
        song = generate_song(model, sequence_length, num_notes, int_to_note)
        generated_songs.append(song)
        print(i)
    
    # Export the songs
    for song in range(len(generated_songs)):
        dir_name = "epochs_" + str(epochs) +"_samp_length_" + str(sample_length) + "_seq_length_" + str(sequence_length)
        file_name = "epochs_" + str(epochs) +"_samp_length_" + str(sample_length) + "_seq_length_" + str(sequence_length) + "_" + str(song)+".csv"
        export_list(generated_songs[song], file_name,dir_name)

## Driver

In [11]:
# Settings
data_dir = r"C:\Users\alext\Desktop\School\2021 Spring\CS 271\Final Project\Feature Extraction\Jazz\Jazz Extracted Notes (Solo Treble Only)"

num_songs = 20
epochs = 40

sample_legths = [1000, 2000, 3000]
sequence_lengths = [10, 50, 100, 200, 300]

# Generate songs
for sample_len in sample_legths:
    for seq_len in sequence_lengths:
        generate_songs(sample_len, data_dir, seq_len, epochs, num_songs)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40
0
1
2
3
