# MLP Implementation # 

In [1]:
# pip install tensorflow-addons

In [2]:
import os

import numpy as np
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import layers
from tqdm.notebook import tqdm
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_validate
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

# Load Dateset #

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [11]:
#data_path = '/content/drive/MyDrive/motionsense_dataset'
data_path = os.path.join(os.getcwd(), 'data')
save_path = os.path.join(os.getcwd(), 'temp_models')
print(os.listdir(data_path))

['dws_11', 'ups_12', '.DS_Store', 'wlk_7', 'std_14', 'wlk_15', 'wlk_8', 'dws_2', 'sit_13', 'jog_9', 'ups_3', 'ups_4', 'jog_16', 'dws_1', 'sit_5', 'std_6']


In [5]:
folders = glob(os.path.join(data_path,'*_*'))
folders = [s for s in folders if ("csv" not in s) or (".DS_Store" not in s) or (".ipynb" not in s)]
df_all_list = []
activity_codes = {'dws':0,'jog':1,'sit':2,'std':3,'ups':4,'wlk':5}
activity_decodes = {0:'dws',1:'jog',2:'sit',3:'std',4:'ups',5:'wlk'}
activity_types = list(activity_codes.keys())
print(folders)

['/Users/apple/Desktop/CG4002_Capstone/ML/data/dws_11', '/Users/apple/Desktop/CG4002_Capstone/ML/data/ups_12', '/Users/apple/Desktop/CG4002_Capstone/ML/data/wlk_7', '/Users/apple/Desktop/CG4002_Capstone/ML/data/std_14', '/Users/apple/Desktop/CG4002_Capstone/ML/data/wlk_15', '/Users/apple/Desktop/CG4002_Capstone/ML/data/wlk_8', '/Users/apple/Desktop/CG4002_Capstone/ML/data/dws_2', '/Users/apple/Desktop/CG4002_Capstone/ML/data/sit_13', '/Users/apple/Desktop/CG4002_Capstone/ML/data/jog_9', '/Users/apple/Desktop/CG4002_Capstone/ML/data/ups_3', '/Users/apple/Desktop/CG4002_Capstone/ML/data/ups_4', '/Users/apple/Desktop/CG4002_Capstone/ML/data/jog_16', '/Users/apple/Desktop/CG4002_Capstone/ML/data/dws_1', '/Users/apple/Desktop/CG4002_Capstone/ML/data/sit_5', '/Users/apple/Desktop/CG4002_Capstone/ML/data/std_6']


In [6]:
#Feature extraction for NN model e.x. MLP
window = 500
feature_n = 6
attributes = ['attitude.roll', 'attitude.pitch', 'attitude.yaw','userAcceleration.x','userAcceleration.y','userAcceleration.z']
data_all_x_list = []
data_all_y_list = []
for j in folders:
    csv_list = glob(j + '/*')
    label = j.split('/')[-1].split('_')[0]
    for i in csv_list:
        df = pd.read_csv(
            i,
            usecols = ['attitude.roll','attitude.pitch',
                       'attitude.yaw','userAcceleration.x',
                       'userAcceleration.y','userAcceleration.z']
         )
        win_count = int(df.shape[0] / (window / 2)) - 1
        data_x = np.zeros((win_count, df.shape[1] * 5))
        data_y = np.zeros(win_count)

        length = len(df)
        for c in range(win_count):
            start_idx = int(c * (window / 2))
            end_idx = start_idx + window
            temp_list = []
            for k in range(feature_n):
                means = df[start_idx:end_idx][attributes[k]].mean()
                stds = df[start_idx:end_idx][attributes[k]].mean()
                mins = df[start_idx:end_idx][attributes[k]].min()
                maxs = df[start_idx:end_idx][attributes[k]].max()
                mads = df[start_idx:end_idx][attributes[k]].mad()
                temp_list.append(means)
                temp_list.append(stds)
                temp_list.append(mins)
                temp_list.append(maxs)
                temp_list.append(mads)
            data_x[c] = temp_list
            data_y[c] = activity_codes[label]

        data_all_x_list.append(data_x)
        data_all_y_list.append(data_y)
data_all_x = np.concatenate(data_all_x_list,axis=0)
data_all_y = np.concatenate(data_all_y_list,axis=0)
data_all_y = data_all_y.astype(int)
print(data_all_x.shape)
print(data_all_y.shape)

(5102, 30)
(5102,)


## MLP Train & Test ##

In [7]:
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [8]:
train_x,test_x,train_y,test_y = train_test_split(data_all_x,data_all_y,test_size=0.2)
assert(len(train_x) == len(train_y))
assert(len(test_x) == len(test_y))
print(train_x.shape, test_x.shape)
print(train_y.shape, test_y.shape)

(4081, 30) (1021, 30)
(4081,) (1021,)


In [9]:
kfold = KFold(n_splits=5, shuffle=True)

In [None]:

# train_x = tf.convert_to_tensor(train_x, dtype=tf.float32)
# test_x = tf.convert_to_tensor(test_x, dtype=tf.float32)
# train_y = tf.convert_to_tensor(tf.keras.utils.to_categorical(train_y,  num_classes=6), dtype=tf.float32)
# test_y = tf.convert_to_tensor(tf.keras.utils.to_categorical(test_y,  num_classes=6), dtype=tf.float32)

In [None]:
# train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y))
# test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y))

In [None]:
# BATCH_SIZE = 32
# SHUFFLE_BUFFER_SIZE = 100

# train_dataset = train_dataset.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
# test_dataset = test_dataset.batch(BATCH_SIZE)

In [None]:
# for i,j in train_dataset.take(1):
#     print(i.numpy().shape)
#     print(j.numpy().shape)

(30,)
(6,)


In [15]:
num_classes = 6
num_features = 30
acc_list = {}
f1_list = {}
train_acc_list = {}
fold_no = 1
epochs = 15
best_id = 1
best_acc = 0

for train, test in tqdm(kfold.split(train_x, train_y)):
    model = tf.keras.Sequential([
      tf.keras.Input(shape=train_x.shape[1]),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(32, activation='relu'),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(num_classes, activation='softmax')
  ])
    model.compile(optimizer='adam',
              loss= tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

    history = model.fit(
      train_x[train],
      train_y[train],
      epochs=epochs
    )
    train_acc = history.history['accuracy']
    y_pred = model.predict(train_x[test], verbose=0)
    # print(y_pred)
    y_pred_1 = np.argmax(y_pred, axis = 1)
    # print(y_pred_1)
    # print(train_y[test])
    train_y_1 = np.zeros((train_y[test].size, train_y[test].max()+1))
    train_y_1[np.arange(train_y[test].size),train_y[test]] = 1
    # print(train_y_1)
    f1_metric = tfa.metrics.F1Score(num_classes= num_classes, average = 'macro', threshold=None)
    acc_metric = tf.keras.metrics.Accuracy()
    acc_metric.update_state(train_y[test], y_pred_1)
    acc = acc_metric.result().numpy()
    # print(acc)
    y_pred_1 = y_pred_1.astype(np.float32)
    f1_metric.update_state(train_y_1, y_pred)
    f1_macro = f1_metric.result().numpy()
    if acc > best_acc:
        best_acc = acc
        best_id = fold_no
    # print(f1_macro)
    print(f"accuracy for fold {fold_no}: {acc}; f1_macro is {f1_macro}")
    acc_list[fold_no] = acc
    train_acc_list[fold_no] = train_acc
    f1_list[fold_no] = f1_macro
    model.save(os.path.join(save_path, f"mlp_{fold_no}.h5"))
    fold_no += 1


0it [00:00, ?it/s]

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
accuracy for fold 1: 0.9069767594337463; f1_macro is 0.8669610619544983
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
accuracy for fold 2: 0.9007353186607361; f1_macro is 0.8536238074302673
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
accuracy for fold 3: 0.8774510025978088; f1_macro is 0.8328812122344971
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15
accuracy for fold 4: 0.8786764740943909; f1_macro is 0.8120880722999573
Epoch 1/15
Epoch 2/15
Epoch 

In [16]:
print(acc_list)
print(f1_list)
print(train_acc_list)
print(best_id)

{1: 0.90697676, 2: 0.9007353, 3: 0.877451, 4: 0.8786765, 5: 0.9142157}
{1: 0.86696106, 2: 0.8536238, 3: 0.8328812, 4: 0.8120881, 5: 0.8601149}
{1: [0.4264705777168274, 0.6311274766921997, 0.717524528503418, 0.7457107901573181, 0.7720588445663452, 0.7864583134651184, 0.8134191036224365, 0.8207720518112183, 0.8305760025978088, 0.8416053652763367, 0.841911792755127, 0.8630514740943909, 0.8639705777168274, 0.8679534196853638, 0.875612735748291], 2: [0.4192955493927002, 0.6122511625289917, 0.6875957250595093, 0.7267993688583374, 0.758958637714386, 0.7785605192184448, 0.7966309189796448, 0.8064318299293518, 0.8147013783454895, 0.82756507396698, 0.8336906433105469, 0.8505359888076782, 0.8493108749389648, 0.8591117858886719, 0.8633996844291687], 3: [0.430321604013443, 0.5908116102218628, 0.7022970914840698, 0.7479326128959656, 0.784073531627655, 0.7926493287086487, 0.8125573992729187, 0.8309341669082642, 0.8260337114334106, 0.8376722931861877, 0.8382848501205444, 0.8514547944068909, 0.84471666

In [17]:
best_model = tf.keras.models.load_model(os.path.join(save_path, f"mlp_{best_id}.h5"))
y_pred = best_model.predict(test_x, verbose=0)
# print(y_pred)
y_pred_1 = np.argmax(y_pred, axis = 1)
# print(y_pred_1)
# print(train_y[test])
train_y_1 = np.zeros((test_y.size, test_y.max()+1))
train_y_1[np.arange(test_y.size),test_y] = 1
# print(train_y_1)
f1_metric = tfa.metrics.F1Score(num_classes= num_classes, average = 'macro', threshold=None)
acc_metric = tf.keras.metrics.Accuracy()
acc_metric.update_state(test_y, y_pred_1)
acc = acc_metric.result().numpy()
# print(acc)
y_pred_1 = y_pred_1.astype(np.float32)
f1_metric.update_state(train_y_1, y_pred)
f1_macro = f1_metric.result().numpy()
print(f"test accuracy is {acc}")
print(f"test f1 macro is {f1_macro}")

test accuracy is 0.9040156602859497
test f1 macro is 0.8549318909645081


In [18]:
single_sample = test_x[-1]
sample_label = activity_decodes[test_y[-1]]
prediction = activity_decodes[y_pred[-1]]
plt.figure(figsize=(15, 8))
plt.plot(single_sample)
plt.title(f"Sample Label: {sample_label}; Predict: {prediction}")
plt.show()

TypeError: unhashable type: 'numpy.ndarray'