In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# load dataset
from numpy import genfromtxt
import os
from pandas import DataFrame
import numpy as np
import pandas as pd
import keras
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint

# load a list of files
def load_group(filenames, prefix=''):
    loaded = list()
    for name in filenames:
        data = genfromtxt(prefix + name, delimiter=',')
        if 'x' in name:
            loaded.append(data)
        if 'y' in name:
            xData = genfromtxt(prefix + name[:-5] + 'x.csv', delimiter=',')
            interpolated_data = nn_interpolate(data, (xData.shape[0], xData.shape[1]))
            loaded.append(interpolated_data)
    return loaded


def load_dataset(group, prefix=''):
    filenamesX = list()
    filenamesY = list()
    # body acceleration
    for file in os.listdir(prefix):
        if file.startswith(group) and file.endswith('__x.csv'):
            filenamesX += [file]
    # load input data
    X = load_group(filenamesX, prefix)
    # load class output
    for file in os.listdir(prefix):
        if file.startswith(group) and file.endswith('__y.csv'):
            filenamesY += [file]

    y = load_group(filenamesY, prefix)
    return X, y


def nn_interpolate(A, new_size):
    """Vectorized Nearest Neighbor Interpolation"""

    old_size = A.shape
    row_ratio, col_ratio = np.array(new_size) / np.array(old_size)

    # row wise interpolation
    row_idx = (np.ceil(range(1, 1 + int(old_size[0] * row_ratio)) / row_ratio) - 1).astype(int)

    final_matrix = A[row_idx]

    return final_matrix


# # summarize the balance of classes in an output variable column
# def class_breakdown(data):
#     # convert the numpy array into a dataframe
#     df = DataFrame(data)
#     # group data by the class value and calculate the number of rows
#     counts = df.groupby(0).size()
#     # retrieve raw rows
#     counts = counts.values
#     # summarize
#     for i in range(len(counts)):
#         percent = counts[i] / len(df) * 100
#         print('Class=%d, total=%d, percentage=%.3f' % (i + 1, counts[i], percent))

def pre_processing(X):
    len_sequences = []
    for one_file in X:
        for one_seq in one_file:
            len_sequences.append(len(one_seq))

    to_pad = 70172
    new_seq = []
    for one_file in X:
        for one_seq in one_file:
            len_one_seq = len(one_seq)
            last_val = one_seq[-1]
            n = to_pad - len_one_seq

            to_concat = np.repeat(one_seq[-1], n).reshape(6, n).transpose()
            new_one_seq = np.concatenate([one_seq, to_concat])
            new_seq.append(new_one_seq)

        final_seq = np.stack(new_seq)
    seq_len = 60000
    final_seq = sequence.pad_sequences(final_seq, maxlen=seq_len, padding='post', dtype='float',
                                       truncating='post')

def data_finalize(trainX, trainy, valX, valY):
    # train, train_target, validation, val_target = [], [], [], []
    trainSubject, trainTarget, validationSubject, validationTarget = [], [], [], []
    for i in range(0, len(trainX)):
        trainSubject.append(np.vstack(trainX[i]))

    train = np.vstack(trainSubject)

    for i in range(0, len(trainy)):
        trainTarget.append(np.concatenate(trainy[i]))

    train_target = np.concatenate(trainTarget)

    for i in range(0, len(valX)):
        validationSubject.append(np.vstack(valX[i]))

    validation = np.vstack(validationSubject)

    for i in range(0, len(valY)):
        validationTarget.append(np.concatenate(valY[i]))

    val_target = np.concatenate(validationTarget)

    return train, train_target, validation, val_target

def make_model(input_shape, val):
    num_classes = len(np.unique(val))
    input_layer = keras.layers.Input(input_shape)

    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)

    conv2 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)

    conv3 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)

    gap = keras.layers.GlobalAveragePooling1D()(conv3)

    output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)

    return keras.models.Model(inputs=input_layer, outputs=output_layer)

# load all train
trainX = list()
trainy = list()
validationX = list()
validationY = list()
for i in range(1, 6):
    trX, trY = load_dataset('subject_00' + str(i), '/content/gdrive/MyDrive/data/')
    trainX.append(trX)
    trainy.append(trY)
for i in range(6, 9):
    valX, valY = load_dataset('subject_00' + str(i), '/content/gdrive/MyDrive/data/')
    validationX.append(valX)
    validationY.append(valY)
train, train_target, val, val_target = data_finalize(trainX, trainy, validationX, validationY)
train = train[:-2, :]
train = np.expand_dims(train, 2)
print(train.shape)
print(train_target.shape)
print(val.shape)
print(val_target.shape)

model = make_model(input_shape=train.shape[1:], val=val)
keras.utils.plot_model(model, show_shapes=True)

epochs = 500
batch_size = 256

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "best_model.h5", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
history = model.fit(
    train,
    train_target,
    validation_data=(val, val_target),
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    shuffle=True,
    verbose=1
)

(963420, 6, 1)
(963420,)
(378224, 6)
(378224,)
Epoch 1/500


ResourceExhaustedError: ignored