In [175]:
import music21
import pickle
import numpy as np
from music21 import duration

In [176]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


### Load data from files

https://verovio.humdrum.org/?file=essen/europa/deutschl/altdeu2/deut4207.krn

In [177]:
data = []

In [178]:
files = ["data_germany_1","data_germany_2","data_germany_3","data_germany_4","data_germany_5","data_germany_6","data_germany_7", "data_germany_8", "data_germany_9", "data_germany_10", "data_germany_11"]
for file in files:
    with open('../data/data_2/{}.pkl'.format(file), 'rb') as handle:
        data.extend(pickle.load(handle))

### Extracting the data

In [179]:
from sklearn.model_selection import train_test_split

In [180]:
len(data)

5351

In [181]:
pieces = []
lower_bound = 4
upper_bound = 12

for piece in data:
    # Skip pieces with irregular phrases
    current_phrase_length = 0
    skip = False

    for i, digit in enumerate(piece[2]):
        current_phrase_length += 1
        if digit == 1:
            if current_phrase_length < lower_bound or current_phrase_length > upper_bound:
                skip = True
                break
            current_phrase_length = 0
    if skip:
        continue
    else:
        pieces.append(piece)

In [182]:
len(pieces)

4344

In [183]:
# Train test split
pieces_train, pieces_test = train_test_split(pieces, test_size=0.2, random_state=42)

In [212]:
len(pieces_train), len(pieces_test)

(3475, 869)

In [184]:
# save the train and test data to pickle files
with open('../data/set_1/train/pieces_train.pkl', 'wb') as handle:
    pickle.dump(pieces_train, handle, protocol=pickle.HIGHEST_PROTOCOL)

with open('../data/set_1/test/pieces_test.pkl', 'wb') as handle:
    pickle.dump(pieces_test, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [185]:
# Extract train data
notes_input = []
durations_input = []
phrases_input = []

for piece in pieces_train:
    notes = piece[0]
    notes_input.append(129) # START = 129
    notes_input.extend(notes)

    durations = piece[1]
    durations_input.append(0)
    durations_input.extend(durations)

    phrases = piece[2]
    phrases_input.append(0)
    phrases_input.extend(phrases)

In [186]:
print("Length of the notes_input: {}".format(len(notes_input)))
print("Twenty first elements: {}".format(notes_input[:20]))
print("Length of the durations_input: {}".format(len(durations_input)))
print("Twenty first elements: {}".format(durations_input[:20]))
print("Length of the phrases_input: {}".format(len(phrases_input)))
print("Twenty first elements: {}".format(phrases_input[:20]))

Length of the notes_input: 175648
Twenty first elements: [129, 63, 63, 65, 67, 65, 63, 62, 60, 58, 63, 63, 65, 67, 65, 67, 63, 63, 65, 67]
Length of the durations_input: 175648
Twenty first elements: [0, 2.0, 2.0, 2.0, 4.0, 2.0, 2.0, 2.0, 2.0, 6.0, 2.0, 2.0, 2.0, 4.0, 2.0, 6.0, 2.0, 2.0, 2.0, 4.0]
Length of the phrases_input: 175648
Twenty first elements: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]


In [187]:
durations_to_int = {}
int_to_duration = {}
notes_to_int = {}
int_to_notes = {}
phrases_to_int = {}
int_to_phrases = {}
for index, duration in enumerate(sorted(set(durations_input))):
    durations_to_int[duration] = index
    int_to_duration[index] = duration

for index, note in enumerate(sorted(set(notes_input))):
    notes_to_int[note] = index
    int_to_notes[index] = note

for index, phrase in enumerate(sorted(set(phrases_input))):
    phrases_to_int[phrase] = index
    int_to_phrases[index] = phrase

durations_network_input = [ durations_to_int[duration] for duration in durations_input ]
notes_network_input = [ notes_to_int[note] for note in notes_input ]
phrases_network_input = [ phrases_to_int[phrase] for phrase in phrases_input ]

In [188]:
int_to_duration

{0: 0,
 1: 0.00390625,
 2: 0.125,
 3: Fraction(1, 6),
 4: 0.25,
 5: Fraction(1, 3),
 6: 0.375,
 7: 0.5,
 8: Fraction(2, 3),
 9: 0.75,
 10: 1.0,
 11: Fraction(4, 3),
 12: 1.5,
 13: 2.0,
 14: 3.0,
 15: 4.0,
 16: 6.0,
 17: 8.0,
 18: 12.0}

In [189]:
print("Length of the notes_input: {}".format(len(notes_network_input)))
print("Twenty first elements: {}".format(notes_network_input[:20]))
print("Length of the durations_input: {}".format(len(durations_network_input)))
print("Twenty first elements: {}".format(durations_network_input[:20]))
print("Length of the phrases_input: {}".format(len(phrases_network_input)))
print("Twenty first elements: {}".format(phrases_network_input[:20]))

Length of the notes_input: 175648
Twenty first elements: [43, 24, 24, 26, 28, 26, 24, 23, 21, 19, 24, 24, 26, 28, 26, 28, 24, 24, 26, 28]
Length of the durations_input: 175648
Twenty first elements: [0, 13, 13, 13, 15, 13, 13, 13, 13, 16, 13, 13, 13, 15, 13, 16, 13, 13, 13, 15]
Length of the phrases_input: 175648
Twenty first elements: [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0]


In [190]:
n_notes = len(notes_to_int)
n_durations = len(durations_to_int)
n_phrases = len(phrases_to_int)

In [191]:
print("Number of distinct notes: {}".format(n_notes))
print("Number of distinct durations: {}".format(n_durations))

Number of distinct notes: 44
Number of distinct durations: 19


In [192]:
with open('dictionary.pkl', 'wb') as file:
    dictionary = { "durations_to_int" : durations_to_int, "int_to_duration" : int_to_duration, "notes_to_int" : notes_to_int, "int_to_notes" : int_to_notes, "phrases_to_int" : phrases_to_int, "int_to_phrases" : int_to_phrases }
    # A new file will be created
    pickle.dump(dictionary, file)

In [193]:
import os
import pickle
import numpy
from music21 import note, chord

from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.utils import plot_model
from RNNmodel import prepare_sequences, create_network

In [194]:
notes_network_input

[43,
 24,
 24,
 26,
 28,
 26,
 24,
 23,
 21,
 19,
 24,
 24,
 26,
 28,
 26,
 28,
 24,
 24,
 26,
 28,
 26,
 24,
 23,
 21,
 19,
 24,
 24,
 26,
 28,
 26,
 28,
 28,
 26,
 24,
 29,
 28,
 26,
 24,
 26,
 28,
 26,
 24,
 29,
 28,
 26,
 24,
 26,
 23,
 24,
 26,
 28,
 24,
 23,
 21,
 23,
 31,
 29,
 28,
 26,
 24,
 23,
 23,
 21,
 43,
 21,
 21,
 23,
 25,
 25,
 26,
 23,
 25,
 25,
 26,
 28,
 30,
 30,
 28,
 26,
 28,
 28,
 28,
 28,
 25,
 26,
 30,
 28,
 25,
 25,
 23,
 25,
 26,
 25,
 26,
 23,
 21,
 43,
 16,
 21,
 9,
 9,
 42,
 21,
 21,
 20,
 18,
 16,
 16,
 13,
 16,
 16,
 14,
 14,
 14,
 14,
 13,
 13,
 16,
 18,
 16,
 14,
 13,
 16,
 11,
 11,
 16,
 16,
 18,
 20,
 18,
 20,
 21,
 23,
 25,
 23,
 42,
 23,
 23,
 21,
 21,
 21,
 21,
 20,
 23,
 20,
 20,
 18,
 21,
 18,
 16,
 42,
 43,
 25,
 26,
 28,
 25,
 21,
 28,
 28,
 26,
 30,
 28,
 26,
 23,
 25,
 26,
 23,
 20,
 26,
 26,
 25,
 28,
 26,
 25,
 16,
 21,
 23,
 25,
 26,
 28,
 33,
 30,
 26,
 30,
 30,
 28,
 25,
 21,
 28,
 26,
 23,
 20,
 21,
 42,
 25,
 26,
 28,
 25,
 21,
 28,
 2

In [195]:
durations_network_input

[0,
 13,
 13,
 13,
 15,
 13,
 13,
 13,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 13,
 13,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 15,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 15,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 15,
 13,
 16,
 13,
 13,
 13,
 15,
 13,
 15,
 13,
 16,
 0,
 10,
 13,
 10,
 13,
 10,
 13,
 10,
 13,
 10,
 13,
 10,
 13,
 10,
 14,
 13,
 10,
 13,
 10,
 13,
 10,
 10,
 10,
 10,
 13,
 10,
 13,
 10,
 10,
 10,
 10,
 14,
 13,
 0,
 7,
 10,
 10,
 10,
 7,
 7,
 7,
 7,
 7,
 7,
 10,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 10,
 7,
 7,
 10,
 7,
 7,
 7,
 7,
 7,
 7,
 12,
 7,
 10,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 7,
 10,
 7,
 0,
 4,
 4,
 7,
 7,
 7,
 7,
 4,
 4,
 4,
 4,
 7,
 4,
 4,
 7,
 7,
 7,
 7,
 4,
 4,
 4,
 4,
 7,
 7,
 9,
 4,
 7,
 7,
 7,
 7,
 7,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 10,
 7,
 4,
 4,
 7,
 7,
 7,
 7,
 4,
 4,
 4,
 4,
 7,
 4,
 4,
 7,
 7,
 7,
 7,
 4,
 4,
 4,
 4,
 7,
 7,
 9,
 4,
 7,
 7,
 7,
 7,
 7,


In [196]:
phrases_network_input

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,


Remove phrases that are too short or too long.

In [197]:
len(notes_network_input)

175648

In [198]:
len(durations_network_input)

175648

In [199]:
len(phrases_network_input)

175648

In [200]:
phrases_network_input

[0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,


In [201]:
new_phrases_network_input, new_notes_network_input, new_durations_network_input = select_phrases(phrases_network_input, notes_network_input, durations_network_input, lower_bound=4, upper_bound=12)

In [202]:
seq_len = 5
network_input, network_output = prepare_sequences(notes_network_input, durations_network_input, phrases_network_input, seq_len)

In [203]:
len(network_input[0])

175643

In [204]:
network_input[0]

array([[43, 24, 24, 26, 28],
       [24, 24, 26, 28, 26],
       [24, 26, 28, 26, 24],
       ...,
       [21, 24, 28, 26, 23],
       [24, 28, 26, 23, 24],
       [28, 26, 23, 24, 21]])

In [205]:
len(network_input[1])

175643

In [206]:
network_input[1]

array([[ 0, 13, 13, 13, 15],
       [13, 13, 13, 15, 13],
       [13, 13, 15, 13, 13],
       ...,
       [ 7,  7,  7,  7,  7],
       [ 7,  7,  7,  7,  7],
       [ 7,  7,  7,  7, 10]])

In [207]:
len(network_output[0])

175643

In [208]:
network_output[0]

array([[1., 0.],
       [1., 0.],
       [1., 0.],
       ...,
       [1., 0.],
       [1., 0.],
       [0., 1.]], dtype=float32)

In [209]:
print('note input')
print(network_input[0][0])
print('duration input')
print(network_input[1][0])
print('phrases_output')
print(network_output[0][0])

note input
[43 24 24 26 28]
duration input
[ 0 13 13 13 15]
phrases_output
[1. 0.]


In [210]:
embed_size = 100
rnn_units = 256
use_attention = True
model, att_model = create_network(n_notes, n_durations, n_phrases, embed_size, rnn_units, use_attention)
model.summary()

Model: "model_8"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_9 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 input_10 (InputLayer)          [(None, None)]       0           []                               
                                                                                                  
 embedding_8 (Embedding)        (None, None, 100)    4400        ['input_9[0][0]']                
                                                                                                  
 embedding_9 (Embedding)        (None, None, 100)    1900        ['input_10[0][0]']               
                                                                                            

  super().__init__(name, **kwargs)


In [211]:
import numpy as np
weights_folder = "weights" #os.path.join(run_folder, 'weights')
# model.load_weights(os.path.join(weights_folder, "weights.h5"))


checkpoint1 = ModelCheckpoint(
    os.path.join(weights_folder, "weights-improvement-{epoch:02d}-{loss:.4f}-bigger.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

checkpoint2 = ModelCheckpoint(
    os.path.join(weights_folder, "weights.h5"),
    monitor='loss',
    verbose=0,
    save_best_only=True,
    mode='min'
)

early_stopping = EarlyStopping(
    monitor='loss'
    , restore_best_weights=True
    , patience = 10
)


callbacks_list = [
    checkpoint1
    , checkpoint2
    , early_stopping
 ]

model.save_weights(os.path.join(weights_folder, "weights.h5"))
model.fit(network_input, network_output
          , epochs=20, batch_size=32
          , validation_split = 0.2
          , callbacks=callbacks_list
          , shuffle=True
         )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2e7ac3c10>