In [1]:
import csv
import os
import gc
import numpy as np
import tensorflow as tf
from random import randint
from tensorflow import keras
from keras.utils import np_utils, plot_model, to_categorical
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import Input, LSTM, Dense, RepeatVector, TimeDistributed, concatenate

In [2]:
# returns train, inference_encoder and inference_decoder models
def define_models(dim_p, n_units):
	# define training encoder
	enc_in_o = Input(shape=(None, 1))
	enc_in_q = Input(shape=(None, 1))
	enc_in_p = Input(shape=(None, dim_p))
	encoder_inputs = concatenate([enc_in_o, enc_in_q, enc_in_p])
	encoder = LSTM(n_units, return_state=True)
	encoder_outputs, state_h, state_c = encoder(encoder_inputs)
	encoder_states = [state_h, state_c]
	
	# define training decoder
	dec_in_o = Input(shape=(None, 1))
	dec_in_q = Input(shape=(None, 1))
	dec_in_p = Input(shape=(None, dim_p))
	decoder_inputs = concatenate([dec_in_o, dec_in_q, dec_in_p])
	decoder_lstm = LSTM(n_units, return_sequences=True, return_state=True)
	decoder_outputs, _, _ = decoder_lstm(decoder_inputs, initial_state=encoder_states)
	dec_dense_o = Dense(1, activation='relu', name='tr_out_o')
	dec_dense_q = Dense(1, activation='sigmoid', name='tr_out_q')
	dec_dense_p = Dense(dim_p, activation='softmax', name='tr_out_p')
	#out_o = Dense(1, activation='relu', name='tr_out_o')(decoder_outputs)#act relu
	#out_q = Dense(1, activation='sigmoid', name='tr_out_q')(decoder_outputs)
	#out_p = Dense(dim_p, activation='softmax', name='tr_out_p')(decoder_outputs)
	out_o = dec_dense_o(decoder_outputs)
	out_q = dec_dense_q(decoder_outputs)
	out_p = dec_dense_p(decoder_outputs)
	
	model = Model([enc_in_o, enc_in_q, enc_in_p, dec_in_o, dec_in_q, dec_in_p], [out_o, out_q, out_p])
	
	# define inference encoder
	encoder_model = Model([enc_in_o, enc_in_q, enc_in_p], encoder_states)
	
	# define inference decoder
	decoder_state_input_h = Input(shape=(n_units,))
	decoder_state_input_c = Input(shape=(n_units,))
	decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]
	decoder_outputs, state_h, state_c = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
	decoder_states = [state_h, state_c]
	#out_o = TimeDistributed(Dense(1, activation='relu'))(decoder_outputs)#act relu
	#out_q = TimeDistributed(Dense(1, activation='sigmoid'))(decoder_outputs)
	#out_p = TimeDistributed(Dense(dim_p, activation='softmax'))(decoder_outputs)
	out_o = dec_dense_o(decoder_outputs)
	out_q = dec_dense_q(decoder_outputs)
	out_p = dec_dense_p(decoder_outputs)

	decoder_model = Model([dec_in_o, dec_in_q, dec_in_p] + decoder_states_inputs, [out_o, out_q, out_p] + decoder_states)
	# return all models
	return model, encoder_model, decoder_model


# generate target given source sequence
def predict_sequence(infenc, infdec, src_o, src_q, src_p, n_steps, cardinality):
	# encode
	state = infenc.predict([src_o, src_q, src_p])
	# start of sequence input
	target_o = np.array([-1]).reshape(1, 1, 1)
	target_q = np.array([-1]).reshape(1, 1, 1)
	#target_p = 0
	target_p = np.array([0.0 for _ in range(cardinality)]).reshape(1, 1, cardinality)
	# collect predictions
	output = list()
	for t in range(n_steps):
		# predict next char
		#print(target_o.shape)
		#print(target_q.shape)
		#print(target_p.shape)
		#print(state[0].shape)
		o, q, p, h, c = infdec.predict([target_o, target_q, target_p] + state)
		#print(a)
		# store prediction
		output.append(o[0,0,:])
		output.append(q[0,0,:])
		output.append(p[0,0,:])
		# update state
		state = [h, c]
		# update target sequence
		target_o = o
		target_q = q
		target_p = p
	return np.array(output)

# decode a one hot encoded string
def one_hot_decode(encoded_seq):
	return [np.argmax(vector) for vector in encoded_seq]

#create list with window length sequences of list a data
def rolling_window(a, window):
    shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
    strides = a.strides + (a.strides[-1],)
    return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)

#generate train inputs and outputs while one hot encoding pitch and padding for seq2seq
def generatorex(features1, features2, features3, seq_length, batch_size):
    # Create empty arrays to contain batch of features and labels# 
    batch_features1 = np.zeros((batch_size, seq_length, 1))
    batch_features2 = np.zeros((batch_size, seq_length, 1))
    batch_features3 = np.zeros((batch_size, seq_length, 128))
    batch_feat_pad1 = np.zeros((batch_size, seq_length, 1))
    batch_feat_pad2 = np.zeros((batch_size, seq_length, 1))
    batch_feat_pad3 = np.zeros((batch_size, seq_length, 128))
    i = 0
    while True:
        for b in range(batch_size):
            batch_features1[b] = features1[i]
            batch_features2[b] = features2[i]
            batch_features3[b] = to_categorical(features3[i], num_classes=128)
            batch_feat_pad1[b] = np.append([-1], features1[i][:-1]).reshape(seq_length, 1)
            batch_feat_pad2[b] = np.append([-1], features2[i][:-1]).reshape(seq_length, 1)
            batch_feat_pad3[b] = to_categorical(np.append([0], features3[i][:-1]).reshape(seq_length, 1), num_classes=128)
            i += 1
            if (i == len(features1)):
                i=0
        #print(batch_features, batch_labels)
        yield [batch_features1, batch_features2, batch_features3, batch_feat_pad1, batch_feat_pad2, batch_feat_pad3], [batch_features1, batch_features2, batch_features3]

In [3]:
#load data
stream_list = []

for path, subdirectories, files in os.walk('/kaggle/input/data-rock'):
    for name in files:
        with open(os.path.join(path, name), 'r') as f: 
            reader = csv.reader(f)
            sub_list = [list(map(float,rec)) for rec in csv.reader(f, delimiter=',')]
            stream_list = stream_list + sub_list

In [4]:
#create seperate data structures for each variable (offset, quarterlength, pitch)
#normalise offset and quarterlength
offs = []
qlngth = []
ptch = []

offsb = max(element[0] for element in stream_list if element[0]<=1200.0)
qlngthb = max(element[1] for element in stream_list if element[1]<=50.0)
#ptchb = 127.0

for row in stream_list:
    if (row[0] <= 1200.0 and row[1] <= 50.0):
        offs.append(row[0]/offsb)
        qlngth.append(row[1]/qlngthb)
        ptch.append(row[2])

In [5]:
#divide the sets in sequences of specific length 
dtlngth=len(offs)
seq_length = 4#100 groups of 3

dataX1_o = rolling_window(np.asarray(offs), seq_length)
dataX1_q = rolling_window(np.asarray(qlngth), seq_length)
dataX1_p = rolling_window(np.asarray(ptch), seq_length)

n_patterns = len(dataX1_p)
print ("Total Patterns: ", n_patterns)

Total Patterns:  4963917


In [6]:
#reshape inputs to be [samples, time steps, features]
dataX1_o = np.reshape(dataX1_o, (dtlngth - seq_length + 1, seq_length, 1))
dataX1_q = np.reshape(dataX1_q, (dtlngth - seq_length + 1, seq_length, 1))
dataX1_p = np.reshape(dataX1_p, (dtlngth - seq_length + 1, seq_length, 1))

In [7]:
#divide data in train and validation sets
split_i = n_patterns*10 // 100

dataX1_o_v = dataX1_o[-split_i:]
dataX1_o = dataX1_o[:-split_i]

dataX1_q_v = dataX1_q[-split_i:]
dataX1_q = dataX1_q[:-split_i]

dataX1_p_v = dataX1_p[-split_i:]
dataX1_p = dataX1_p[:-split_i]

In [8]:
# configure problem
n_features = 127+1
n_steps_out = seq_length
# define model
train, infenc, infdec = define_models(n_features, 256)
train.compile(optimizer='adam', loss={'tr_out_o': 'mse', 'tr_out_q': 'mse', 'tr_out_p': 'categorical_crossentropy'},
 metrics={'tr_out_o': 'mean_squared_error', 'tr_out_q': 'mean_squared_error', 'tr_out_p': 'accuracy'})

In [9]:
# train model
train.fit(generatorex(dataX1_o, dataX1_q, dataX1_p, seq_length, batch_size=5400), epochs = 10, steps_per_epoch= (dtlngth-split_i) // 5400)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fc1e4428a90>

In [10]:
# spot check some examples
for _ in range(10):
    i = randint(1, 100000)
    X1_o = np.reshape(dataX1_o_v[i], (1, seq_length, 1))
    X1_q = np.reshape(dataX1_q_v[i], (1, seq_length, 1))
    X1_p = np.reshape(to_categorical(dataX1_p_v[i], num_classes=n_features), (1, seq_length, n_features))
    target = predict_sequence(infenc, infdec, X1_o, X1_q, X1_p, n_steps_out, n_features)
    for j in range(seq_length):
        print('X_o=%s, y_o=%s, X_q=%s, y_q=%s, X_p=%s, y_p=%s' % (dataX1_o_v[i][j]*offsb, target[3*j]*offsb,
                                                                  dataX1_q_v[i][j]*qlngthb, target[3*j+1]*qlngthb,
                                                                  dataX1_p_v[i][j], one_hot_decode([target[3*j+2]])))
    print()

X_o=[335.], y_o=[293.4726], X_q=[0.25], y_q=[0.76060593], X_p=[45.], y_p=[45]
X_o=[335.], y_o=[290.98416], X_q=[0.25], y_q=[0.5146682], X_p=[52.], y_p=[52]
X_o=[335.], y_o=[283.11334], X_q=[0.25], y_q=[0.6598443], X_p=[57.], y_p=[57]
X_o=[335.], y_o=[320.0026], X_q=[0.25], y_q=[0.51357746], X_p=[61.], y_p=[61]

X_o=[69.75], y_o=[25.993637], X_q=[0.75], y_q=[0.89167655], X_p=[53.], y_p=[53]
X_o=[70.25], y_o=[34.1766], X_q=[0.], y_q=[0.6193444], X_p=[41.], y_p=[41]
X_o=[70.5], y_o=[29.75795], X_q=[0.25], y_q=[0.4404664], X_p=[55.], y_p=[55]
X_o=[70.5], y_o=[47.78494], X_q=[0.33333333], y_q=[0.84058344], X_p=[41.], y_p=[41]

X_o=[92.], y_o=[49.02166], X_q=[0.5], y_q=[0.99671334], X_p=[54.], y_p=[54]
X_o=[92.], y_o=[50.075966], X_q=[0.5], y_q=[0.5840838], X_p=[30.], y_p=[30]
X_o=[93.], y_o=[40.355106], X_q=[0.5], y_q=[0.98662674], X_p=[42.], y_p=[42]
X_o=[93.], y_o=[35.601383], X_q=[0.5], y_q=[1.5752971], X_p=[49.], y_p=[49]

X_o=[326.], y_o=[304.75006], X_q=[0.25], y_q=[0.6732911], X_p=[5