In [3]:
import pandas as pd
import numpy as np
import os

In [4]:
cols=os.listdir("splitcoords/train/data")

In [5]:
# load a single file as a numpy array
def load_file(filepath):
    dataframe = pd.read_csv(filepath, index_col=0)
    return dataframe.values

In [6]:
# load a list of files into a 3D array of [samples, timesteps, features]
def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = load_file(prefix + name)
        loaded.append(data)
    # stack group so that features are the 3rd dimension
    loaded = np.dstack(loaded)
    return loaded

In [7]:
# load a dataset group, such as train or test
def load_dataset_group(group, prefix=''):
    filepath = prefix + group + '/data/'
    # load all 9 files as a single array
    filenames = cols
    # load input data
    X = load_group(filenames, filepath)
    # load class output
    y = load_file(prefix + group + '/y_'+group+'.csv')
    return X, y

In [8]:
# load the dataset, returns train and test X and y elements
def load_dataset(prefix=''):
    # load all train
    trainX, trainy = load_dataset_group('train')
    print(trainX.shape, trainy.shape)
    # load all test
    testX, testy = load_dataset_group('val')
    print(testX.shape, testy.shape)
    # zero-offset class values
    trainy = trainy - 1
    testy = testy - 1
    # one hot encode y
    trainy = to_categorical(trainy)
    testy = to_categorical(testy)
    print(trainX.shape, trainy.shape, testX.shape, testy.shape)
    return trainX, trainy, testX, testy

In [9]:
os.chdir("splitcoords")

In [10]:
from tensorflow.keras.utils import to_categorical

In [11]:
X_train, y_train, X_test, y_test=load_dataset()

(1137, 300, 99) (1137, 1)
(298, 300, 99) (298, 1)
(1137, 300, 99) (1137, 6) (298, 300, 99) (298, 6)


In [39]:
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Activation
from keras.layers import Embedding
from keras.layers import Bidirectional
from keras.layers import Dropout
from keras.layers import TimeDistributed
from keras.layers import Conv1D
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import ConvLSTM2D

In [13]:
batch_size=64
epochs=15

In [47]:
# fit and evaluate a model
def evaluate_model(X_train, y_train, X_test, y_test):
    verbose, epochs, batch_size = 0, 15, 64
    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]
    model = Sequential()
    model.add(LSTM(100, input_shape=(n_timesteps,n_features)))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
    return accuracy

In [None]:
# repeat experiment
scores1 = []
for r in range(10):
    score = evaluate_model(X_train, y_train, X_test, y_test)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores1.append(score)

In [36]:
from numpy import mean
from numpy import std

In [37]:
def summarize_results(scores):
    print(scores)
    m, s = mean(scores), std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [None]:
summarize_results(scores1)

In [17]:
#1D CNN added to model
def evaluate_model_CNN(X_train, y_train, X_test, y_test):
    # define model
    verbose, epochs, batch_size = 0, 25, 64
    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[2], y_train.shape[1]
    # reshape data into time steps of sub-sequences
    n_steps, n_length = 10, 30
    X_train = X_train.reshape((X_train.shape[0], n_steps, n_length, n_features))
    X_test = X_test.reshape((X_test.shape[0], n_steps, n_length, n_features))
    # define model
    model = Sequential()
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu'), input_shape=(None,n_length,n_features)))
    model.add(TimeDistributed(Conv1D(filters=64, kernel_size=3, activation='relu')))
    model.add(TimeDistributed(MaxPooling1D(pool_size=2)))
    model.add(TimeDistributed(Flatten()))
    model.add(LSTM(100))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
    return accuracy

In [26]:
# repeat experiment with 1D CNN added
scores2 = []
for r in range(10):
    score = evaluate_model_CNN(X_train, y_train, X_test, y_test)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores2.append(score)

>#1: 71.812
>#2: 71.812
>#3: 69.463
>#4: 66.107
>#5: 70.470
>#6: 68.792
>#7: 64.430
>#8: 72.148
>#9: 66.107
>#10: 69.463


In [52]:
summarize_results(scores2)

[62.41610646247864, 68.45637559890747, 64.09395933151245, 59.06040072441101, 63.08724880218506, 53.35570573806763, 65.77181220054626, 67.1140968799591, 60.40268540382385, 62.41610646247864]
Accuracy: 62.617% (+/-4.124)


In [46]:
os.chdir("../splitcoords2")

In [49]:
# repeat experiment on smoothed data
scores3 = []
for r in range(10):
    score = evaluate_model(X_train, y_train, X_test, y_test)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores3.append(score)

>#1: 34.564
>#2: 30.872
>#3: 32.886
>#4: 27.181
>#5: 26.846
>#6: 31.544
>#7: 30.537
>#8: 29.866
>#9: 26.846
>#10: 35.235


In [50]:
summarize_results(scores3)

[34.563758969306946, 30.87248206138611, 32.88590610027313, 27.18120813369751, 26.8456369638443, 31.54362440109253, 30.536913871765137, 29.865771532058716, 26.8456369638443, 35.23489832878113]
Accuracy: 30.638% (+/-2.895)


In [53]:
# repeat experiment with 1D CNN added on smoothed data
scores4 = []
for r in range(10):
    score = evaluate_model_CNN(X_train, y_train, X_test, y_test)
    score = score * 100.0
    print('>#%d: %.3f' % (r+1, score))
    scores2.append(score)

>#1: 65.101
>#2: 66.443
>#3: 67.785
>#4: 71.812
>#5: 69.128
>#6: 66.107
>#7: 71.141
>#8: 67.114
>#9: 62.416
>#10: 67.114
