In [None]:
import os
import pickle
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from keras.layers import LSTM, Input, Dense, Activation, Embedding, Concatenate, Reshape
from keras.layers import RepeatVector, Permute
from keras.layers import Multiply, Lambda
import keras.backend as K 
from keras.models import Model
from keras.optimizers import RMSprop
from keras.utils import np_utils
from keras.callbacks import ModelCheckpoint, EarlyStopping

print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

### Load data from the Oskar Kolberg's Dataset
https://webesac.pcss.pl/

In [None]:
data = []

In [None]:
files = ["data_kolberg"]
for file in files:
    with open('../data/Oskar Kolberg\'s Dataset/{}.pkl'.format(file), 'rb') as handle:
        data.extend(pickle.load(handle))

In [None]:
number_of_pieces = len(data)
number_of_pieces

### Remove too long or too short phrases

Indicate the range of the most common lengths of phrases

In [None]:
notes_data = []
durations_data = []
phrases_data = []

for piece in data:
    notes_data.extend(piece[0])
    durations_data.extend(piece[1])
    phrases_data.extend(piece[2])

In [None]:
phrases_length = dict()
counter = 0
for i in range(len(phrases_data)):
    counter+=1
    if phrases_data[i] == 1:
        if counter in phrases_length.keys():
            phrases_length[counter] += 1
        else:
            phrases_length[counter] = 1
        counter = 0

phrases_length = sorted(phrases_length.items(), key=lambda x: x[1], reverse=True)
phrases_length = [ (length, count) for length, count in phrases_length if count > 0.05 * number_of_pieces]
phrases_length

Remove pieces with irregular phrases

In [None]:
seq_len = phrases_length[0][0]
lower_bound = min([ length for length, count in phrases_length ])
upper_bound = max([ length for length, count in phrases_length ])

In [None]:
# The most common phrases length ==> the lenght of a sequence
seq_len

In [None]:
# The shortest phrase lenght we consider
lower_bound

In [None]:
# The longest phrase lenght we consider
upper_bound

In [None]:
pieces = []

for piece in data:
    # Skip pieces with irregular phrases
    current_phrase_length = 0
    skip = False

    for i, digit in enumerate(piece[2]):
        current_phrase_length += 1
        if digit == 1:
            if current_phrase_length < lower_bound or current_phrase_length > upper_bound:
                skip = True
                break
            current_phrase_length = 0
    if skip:
        continue
    else:
        pieces.append(piece)

In [None]:
number_of_pieces = len(pieces)
number_of_pieces

### Split the data into train and test data

In [None]:
# Train test split
pieces_train, pieces_test = train_test_split(pieces, test_size=0.2, random_state=42)

In [None]:
len(pieces_train), len(pieces_test)

In [None]:
# save the train and test data to pickle files
with open('../data/Oskar Kolberg\'s Dataset/train/pieces_train.pkl', 'wb') as handle:
    pickle.dump(pieces_train, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('../data/Oskar Kolberg\'s Dataset/test/pieces_test.pkl', 'wb') as handle:
    pickle.dump(pieces_test, handle, protocol=pickle.HIGHEST_PROTOCOL)

### Extract train data

In [None]:
notes_input = []
durations_input = []
phrases_input = []

for piece in pieces_train:
    notes = piece[0]
    notes_input.append(129) # START = 129
    notes_input.extend(notes)

    durations = piece[1]
    durations_input.append(0)
    durations_input.extend(durations)

    phrases = piece[2]
    phrases_input.append(0)
    phrases_input.extend(phrases)

In [None]:
print("Length of the notes_input: {}".format(len(notes_input)))
print("Twenty first elements: {}".format(notes_input[:20]))
print("Length of the durations_input: {}".format(len(durations_input)))
print("Twenty first elements: {}".format(durations_input[:20]))
print("Length of the phrases_input: {}".format(len(phrases_input)))
print("Twenty first elements: {}".format(phrases_input[:20]))

### Prepare data for embedding

Map durations and notes to integers

In [None]:
durations_to_int = {}
int_to_duration = {}
notes_to_int = {}
int_to_notes = {}
phrases_to_int = {}
int_to_phrases = {}
for index, duration in enumerate(sorted(set(durations_data).union({0}))):
    durations_to_int[duration] = index
    int_to_duration[index] = duration

for index, note in enumerate(sorted(set(notes_data).union({129}))):
    notes_to_int[note] = index
    int_to_notes[index] = note

for index, phrase in enumerate(sorted(set(phrases_data))):
    phrases_to_int[phrase] = index
    int_to_phrases[index] = phrase

durations_network_input = [ durations_to_int[duration] for duration in durations_input ]
notes_network_input = [ notes_to_int[note] for note in notes_input ]
phrases_network_input = [ phrases_to_int[phrase] for phrase in phrases_input ]

In [None]:
# Before
print(sorted(set(durations_input)))

In [None]:
# After
print(sorted(set(durations_network_input)))

In [None]:
# Before
print(sorted(set(notes_input)))

In [None]:
# After
print(sorted(set(notes_network_input)))

In [None]:
print("Length of the notes_input: {}".format(len(notes_network_input)))
print("Twenty first elements: {}".format(notes_network_input[:20]))
print("Length of the durations_input: {}".format(len(durations_network_input)))
print("Twenty first elements: {}".format(durations_network_input[:20]))
print("Length of the phrases_input: {}".format(len(phrases_network_input)))
print("Twenty first elements: {}".format(phrases_network_input[:20]))

In [None]:
n_notes = len(notes_to_int)
n_durations = len(durations_to_int)
n_phrases = len(phrases_to_int)

In [None]:
print("Number of distinct notes: {}".format(n_notes))
print("Number of distinct durations: {}".format(n_durations))

In [None]:
with open('dictionary.pkl', 'wb') as file:
    dictionary = { "durations_to_int" : durations_to_int, "int_to_duration" : int_to_duration, "notes_to_int" : notes_to_int, "int_to_notes" : int_to_notes, "phrases_to_int" : phrases_to_int, "int_to_phrases" : int_to_phrases }
    # A new file will be created
    pickle.dump(dictionary, file)

### Prepare network input and output

In [None]:
def prepare_sequences(notes, durations, phrases, seq_len = 32):

    notes_input = []
    durations_input = []
    phrases_output = []

    for i in range(len(notes) - seq_len):
        notes_input.append(notes[i:i + seq_len])
        durations_input.append(durations[i:i + seq_len])

        phrases_output.append(phrases[i + seq_len])

    n_patterns = len(notes_input)

    notes_input = np.reshape(notes_input, (n_patterns, seq_len))
    durations_input = np.reshape(durations_input, (n_patterns, seq_len))
    network_input = [notes_input, durations_input]

    phrases_output = np_utils.to_categorical(phrases_output, num_classes=2)
    network_output = [phrases_output]

    return (network_input, network_output)

In [None]:
network_input, network_output = prepare_sequences(notes_network_input, durations_network_input, phrases_network_input, seq_len)

In [None]:
print('note input')
print(network_input[0][0])
print('duration input')
print(network_input[1][0])
print('phrases_output')
print(network_output[0][0])

### Create neural network

In [None]:
def create_network(n_notes, n_durations, n_phrases, embed_size = 100, rnn_units = 256, use_attention = False):

    notes_in = Input(shape = (None,))
    durations_in = Input(shape = (None,))

    x1 = Embedding(n_notes, embed_size)(notes_in)
    x2 = Embedding(n_durations, embed_size)(durations_in)

    x = Concatenate()([x1,x2])

    x = LSTM(rnn_units, return_sequences=True)(x)

    if use_attention:

        x = LSTM(rnn_units, return_sequences=True)(x)

        e = Dense(1, activation='tanh')(x)
        e = Reshape([-1])(e)
        alpha = Activation('softmax')(e)

        alpha_repeated = Permute([2, 1])(RepeatVector(rnn_units)(alpha))

        c = Multiply()([x, alpha_repeated])
        c = Lambda(lambda xin: K.sum(xin, axis=1), output_shape=(rnn_units,))(c)
    
    else:
        c = LSTM(rnn_units)(x)
                                    
    phrases_out = Dense(n_phrases, activation = 'softmax', name = 'phrase')(c)
   
    model = Model([notes_in, durations_in], [phrases_out])
    

    if use_attention:
        att_model = Model([notes_in, durations_in], alpha)
    else:
        att_model = None


    opti = RMSprop(lr = 0.001)
    model.compile(loss=['binary_crossentropy', 'binary_crossentropy'], optimizer=opti)

    return model, att_model

In [None]:
embed_size = 100
rnn_units = 256
use_attention = True
model, att_model = create_network(n_notes, n_durations, n_phrases, embed_size, rnn_units, use_attention)
model.summary()

### Train the model

In [None]:
weights_folder = "weights"

checkpoint1 = ModelCheckpoint(
    os.path.join(weights_folder, "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

checkpoint2 = ModelCheckpoint(
    os.path.join(weights_folder, "weights.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='loss'
    , restore_best_weights=True
    , patience = 10
)


callbacks_list = [
    checkpoint1
    , checkpoint2
    , early_stopping
 ]

model.save_weights(os.path.join(weights_folder, "weights.h5"))
model.fit(network_input, network_output
          , epochs=20, batch_size=32
          , validation_split = 0.2
          , callbacks=callbacks_list
          , shuffle=True
         )