In [1]:
import csv
import os
import numpy as np
import tensorflow as tf
from random import randint
from tensorflow import keras
from keras.utils import np_utils, plot_model, to_categorical
from keras.models import Model
from keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed

In [2]:
# returns train, inference_encoder and inference_decoder models
def define_models(n_input, n_output, n_units):
	# define training encoder
	encoder_inputs = Input(shape=(None, n_input))
	encoder = LSTM(n_units, return_state=True)
	encoder_outputs, state_h, state_c = encoder(encoder_inputs)
	encoder_states = [state_h, state_c]
	# define training decoder
	decoder_inputs = Input(shape=(None, n_output))
	decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
	decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
	decoder_dense = Dense(n_output, activation='softmax')
	decoder_outputs = decoder_dense(decoder_outputs)
	model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
	# define inference encoder
	encoder_model = Model(encoder_inputs, encoder_states)
	# define inference decoder
	decoder_state_input_h = Input(shape=(n_units,))
	decoder_state_input_c = Input(shape=(n_units,))
	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
	decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
	decoder_states = [state_h, state_c]
	decoder_outputs = decoder_dense(decoder_outputs)
	decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
	# return all models
	return model, encoder_model, decoder_model
 
# generate target given source sequence
def predict_sequence(infenc, infdec, source, n_steps, cardinality):
	# encode
	state = infenc.predict(source)
	# start of sequence input
	target_seq = np.array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
	# collect predictions
	output = list()
	for t in range(n_steps):
		# predict next char
		yhat, h, c = infdec.predict([target_seq] + state)
		# store prediction
		output.append(yhat[0,0,:])
		# update state
		state = [h, c]
		# update target sequence
		target_seq = yhat
	return np.array(output)
 
# decode a one hot encoded string
def one_hot_decode(encoded_seq):
	return [np.argmax(vector) for vector in encoded_seq]

In [3]:
#load data
stream_list = []

for directory, subdirectories, files in os.walk('data'):
    for file in files:
        with open('data/' + file, 'r') as f: 
            reader = csv.reader(f)
            sub_list = [list(map(float,rec)) for rec in csv.reader(f, delimiter=',')]
            stream_list = stream_list + sub_list

In [4]:
#create seperate data structures for each variable (offset, quarterlength, pitch)
#normalise offset and quarterlength

ptch = []

for row in stream_list:
    ptch.append(row[2])

In [12]:
#divide the sets in sequences of specific length 
dtlngth=len(stream_list)
seq_length = 10#100 groups of 3

dataX1_p = []
dataX2_p = []

for i in range(0, dtlngth - seq_length + 1, 1):
	seq_in_p = ptch[i:i + seq_length]
	# create padded input target sequence
	target_in = [0] + seq_in_p[:-1]
	
	src_encoded = to_categorical(seq_in_p, num_classes=85+1)
	tar2_encoded = to_categorical(target_in, num_classes=85+1)

	dataX1_p.append(src_encoded)
	dataX2_p.append(tar2_encoded)

n_patterns = len(dataX1_p)
print ("Total Patterns: ", n_patterns)

Total Patterns:  11975


In [13]:
# configure problem
n_features = np.array(dataX2_p).shape[2]
n_steps_in = 10
n_steps_out = 10
# define model
train, infenc, infdec = define_models(n_features, n_features, 128)
train.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
#divide data in train and validation sets
dataX1_p_tr = dataX1_p[:-1000]
dataX1_p_v = dataX1_p[-1000:]
dataX2_p_tr = dataX2_p[:-1000]
dataX2_p_v = dataX2_p[-1000:]

In [15]:
# train model
train.fit([np.array(dataX1_p_tr), np.array(dataX2_p_tr)], np.array(dataX1_p_tr), epochs=6)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<tensorflow.python.keras.callbacks.History at 0x7f5b0b696150>

In [17]:
# evaluate LSTM
total, correct = 100, 0
for _ in range(total):
	i = randint(1, 1000) 
	X1 = np.reshape(dataX1_p_v[i], (1, 10, 86))
	target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
	if np.array_equal(one_hot_decode(dataX1_p_v[i]), one_hot_decode(target)):
		correct += 1
print('Accuracy: %.2f%%' % (float(correct)/float(total)*100.0))

Accuracy: 49.00%


In [21]:
# spot check some examples
for _ in range(10):
	i = randint(1, 1000)
	X1 = np.reshape(dataX1_p_v[i], (1, 10, 86))
	target = predict_sequence(infenc, infdec, X1, n_steps_out, n_features)
	print('X=%s, yhat=%s' % (one_hot_decode(dataX1_p_v[i]), target))

  1.54064794e-04 1.14475620e-06 4.64235285e-07 1.01173448e-03
  1.43287070e-06 2.05073263e-07 6.11309190e-07 1.99525311e-04
  6.31210720e-03 3.23752232e-04 6.27566042e-05 1.77802832e-03
  1.01341002e-01 1.71836143e-06 2.82139572e-05 1.66181941e-03
  3.25012843e-06 5.00232214e-04 9.84003350e-07 1.00881271e-05
  9.39064412e-05 3.12598022e-05 5.17015033e-05 2.00718903e-04
  8.66931260e-01 9.95974290e-08 2.25033000e-06 3.58005104e-06
  1.16574802e-06 6.55778349e-05 2.33848732e-06 2.99019064e-07
  2.77321297e-03 2.22790436e-06]
 [8.77704281e-07 7.26842757e-07 1.21285245e-06 2.87227834e-07
  5.45887190e-07 1.84432963e-06 5.72018848e-07 4.25521108e-07
  2.86755721e-07 6.23560709e-07 9.40510290e-07 4.15273263e-07
  1.04824881e-06 6.67472023e-07 1.81340567e-07 9.63027787e-07
  4.45884280e-06 4.17846707e-07 3.87972335e-07 4.47411963e-07
  6.43077954e-07 1.16545596e-06 7.23291976e-07 6.84672443e-07
  4.44353122e-04 1.42731881e-06 6.18433842e-05 7.06921185e-07
  2.14068532e-05 9.78377557e-06 2.854

In [None]:
one_hot_decode(dataY_p[-1])

In [None]:
np.array(dataX2_p_v).shape

In [None]:
max(element for element in ptch)