
# LOCOMOTION Activity Recognition using LSTM Neural Network

# Reading the dataset

In [1]:
import pandas as pd
import numpy as np
import glob

import tensorflow as tf
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader

from sklearn.metrics import precision_score, recall_score, f1_score, jaccard_score,confusion_matrix, plot_confusion_matrix
import time
from typing import Any
import torch
import torch.nn as nn

import tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout, LSTM
from sklearn.model_selection import KFold, StratifiedKFold, RepeatedKFold
from sklearn.metrics import f1_score, confusion_matrix, plot_confusion_matrix
from pandas import DataFrame
from keras.models import load_model


files = glob.glob('/data/shk/dl-for-har/tutorial_notebooks/Prof_New/*.csv')

name = [file.split('/')[-1] for file in files]
df = pd.concat(map(pd.read_csv,files),ignore_index = True) #4230213 rows × 10 columns

data = df

clean_data = data[data['locomotion']!='not_labeled']
clean_data = clean_data[data['locomotion'].notna()]
clean_data['locomotion'].replace({'layup':'jumping'},inplace=True)
print('Before_game',len(clean_data))

new_data = clean_data[clean_data.coarse != 'game'] #623758 rows × 10 columns
new_data = new_data.drop(columns='coarse')
new_data = new_data.drop(columns='basketball')
new_data = new_data.iloc[:,:6]
print('new_data',new_data.shape)
print('____')

game_data = clean_data[clean_data.coarse == 'game']
game_data = game_data.drop(columns='coarse')
game_data = game_data.drop(columns='basketball')
game_data = game_data.iloc[:,:6] #45446 rows × 6 columns
print('game_data',game_data.shape)

2022-08-15 00:45:39.999222: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-15 00:45:39.999245: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Before_game 2710751
new_data (1982320, 6)
____
game_data (728431, 6)


# Labelling the data

In [2]:
label = new_data['locomotion']
x_axis = sorted(label.unique())
y_axis = label.value_counts()

X = new_data.iloc[:,:-1]#.astype(np.float32)

class_names = ['walking','running','standing','sitting','jumping']
num_classes = len(class_names)

def labelling(clean_data,data_y):
    clean_data[data_y == 'walking'] = 0
    clean_data[data_y == 'running'] = 1
    clean_data[data_y == 'standing'] = 2
    clean_data[data_y == 'sitting'] = 3
    clean_data[data_y == 'jumping'] = 4
    
    return data_y

y = labelling(new_data,new_data.iloc[:,-1]).astype(int)


data_labelled = np.concatenate((X, y[:,None]), axis=1)[:,1:] #669204, 6



# Special preprocessing

In [3]:
data_walking = data_labelled[data_labelled[:,-1] == 0]
data_running = data_labelled[data_labelled[:,-1] == 1]
data_standing = data_labelled[data_labelled[:,-1] == 2]
data_sitting = data_labelled[data_labelled[:,-1] == 3]
data_jumping = data_labelled[data_labelled[:,-1] == 4]


def spilt(data):

    dd = np.asarray(data)
    T = int(0.8* len(dd))
    train_size = int(T)
    test_size = len(dd) - train_size

    train_df,test_df = torch.utils.data.random_split(dd, [train_size,test_size])
    X = train_df[:][:,0:4]
    Y = train_df[:][:,4]

    X_v = test_df[:][:,0:4]
    y_v = test_df[:][:,4]
    
    return X,Y,X_v,y_v

X_walking,Y_walking,X_v_walking,y_v_walking = spilt(data_walking)
X_running,Y_running,X_v_running,y_v_running = spilt(data_running)
X_standing,Y_standing,X_v_standing,y_v_standing = spilt(data_standing)
X_sitting,Y_sitting,X_v_sitting,y_v_sitting = spilt(data_sitting)
X_jumping,Y_jumping,X_v_jumping,y_v_jumping = spilt(data_jumping)


X = np.concatenate((X_walking, X_running,X_standing,X_sitting,X_jumping), axis=0)[:,1:]
Y = np.concatenate((Y_walking, Y_running,Y_standing,Y_sitting,Y_jumping), axis=0)

X_v = np.concatenate((X_v_walking, X_v_running,X_v_standing,X_v_sitting,X_v_jumping), axis=0)[:,1:]
y_v = np.concatenate((y_v_walking, y_v_running,y_v_standing,y_v_sitting,y_v_jumping), axis=0)

train_data = np.column_stack((X,Y))
valid_data = np.column_stack((X_v,y_v))

# Sliding Window

In [15]:
def sliding_window(data, samples_per_window, overlap_ratio):
    windows = []
    indices = []
    curr = 0
    win_len = int(samples_per_window)
    
    if overlap_ratio is not None:
        overlapping_elements = int((overlap_ratio / 100) * (win_len))
        if overlapping_elements >= win_len:
            print('Number of overlapping elements exceeds window size.')
            return
    while curr < len(data) - win_len:
        windows.append(data[curr:curr + win_len])
        indices.append([curr, curr + win_len])
        curr = curr + win_len - overlapping_elements
    try:
        result_windows = np.array(windows)
        result_indices = np.array(indices)
    except:
        result_windows = np.empty(shape=(len(windows), win_len, data.shape[1]), dtype=object)
        result_indices = np.array(indices)
        for i in range(0, len(windows)):
            result_windows[i] = windows[i]
            result_indices[i] = indices[i]
    return result_windows, result_indices

def apply_sliding_window(data_x, data_y, sliding_window_size, sampling_rate, sliding_window_overlap):
    
    output_x, _ = sliding_window(data_x, sliding_window_size, sliding_window_overlap)
    output_y, _ = sliding_window(data_y, sliding_window_size, sliding_window_overlap)

    return output_x,output_y

sw_length = 50
sw_overlap = 25

X_train, y_train = apply_sliding_window(X, Y, sw_length, sampling_rate=50,sliding_window_overlap=sw_overlap)
X_valid, y_valid = apply_sliding_window(X_v, y_v, sw_length, sampling_rate=50,sliding_window_overlap=sw_overlap)


X_train, y_train = X_train.astype(np.float32), y_train.astype(np.uint8)
X_valid, y_valid = X_valid.astype(np.float32), y_valid.astype(np.uint8)

print("\nShape of the X_train and Y_train datasets after windowing: ")
print(X_train.shape, y_train.shape) 

print("\nShape of the X_valid and y_valid datasets after windowing: ")
print(X_valid.shape, y_valid.shape)

y_train = y_train[:,0]
y_valid = y_valid[:,0]

data_X = np.concatenate((X_train,X_valid),axis=0) 

print("\nShape of the X after windowing: ")
print(data_X.shape)

print("\nShape of the Y after windowing: ")
data_Y = np.concatenate((y_train,y_valid),axis=0) 
print(data_Y.shape)


Shape of the X_train and Y_train datasets after windowing: 
(41732, 50, 3) (41732, 50)

Shape of the X_valid and y_valid datasets after windowing: 
(10432, 50, 3) (10432, 50)

Shape of the X after windowing: 
(52164, 50, 3)

Shape of the Y after windowing: 
(52164,)


# Network

In [8]:
def Network(X_train,y_train,X_valid, y_valid):

    train_preds = []
    train_gt = []
    test_preds = []
    test_gt = []   
    loss_acc = []
    c_matrix =[]

    model = Sequential()
    model.add(LSTM(units=26,input_shape=[X_train.shape[1], X_train.shape[2]], activation = 'relu', return_sequences = True))
    model.add(Dropout(rate=0.1))
    model.add(Flatten())
    model.add(Dense(units=5000, activation='relu'))
    model.add(Dropout(rate=0.2))
    model.add(Dense(units=2000, activation='relu'))
    model.add(Dense(units=800, activation='relu'))
    model.add(Dropout(rate=0.3))
    model.add(Dense(units=5, activation='softmax'))

    model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
                  metrics=['accuracy'])
   
    model.fit(X_train,y_train,epochs=4, validation_data=(X_valid,y_valid),verbose=1)

    y_pred_train = np.argmax(model.predict(X_train), axis=-1)
    y_pred_test = np.argmax(model.predict(X_valid), axis=-1)


    #Training
    acc_train = jaccard_score(y_train, y_pred_train, average='macro')
    pre_train = precision_score(y_train, y_pred_train, average='macro', zero_division=0)
    reca_train = recall_score(y_train, y_pred_train, average='macro', zero_division=0)
    f1_train = f1_score(y_train, y_pred_train, average='macro', zero_division=0)

    train_preds = np.concatenate((np.array(train_preds, int), np.array(y_pred_train, int)))
    train_gt = np.concatenate((np.array(train_gt, int), np.array(y_train, int)))

    print("___Training____")
    print(f"accuracy {round(acc_train,3)*100} ,precision {round(pre_train,3)*100}, recall {round(reca_train,3)*100},f1_score {round(f1_train,3)*100} ")

    #Validation
    acc_test = jaccard_score(y_valid, y_pred_test, average='macro')
    pre_test = precision_score(y_valid, y_pred_test, average='macro', zero_division=0)
    reca_test = recall_score(y_valid, y_pred_test, average='macro', zero_division=0)
    f1_test = f1_score(y_valid, y_pred_test, average='macro', zero_division=0)

    test_preds = np.concatenate((np.array(test_preds, int), np.array(y_pred_test, int)))
    test_gt = np.concatenate((np.array(test_gt, int), np.array(y_valid, int)))

    print("___Validation____")
    print(f"accuracy {round(acc_test,3)*100} ,precision {round(pre_test,3)*100}, recall {round(reca_test,3)*100}, f1_score {round(f1_test,3)*100} ")

        #Graphs
    loss_acc.append(model.evaluate(X_valid,y_valid))
    c_matrix.append(confusion_matrix(y_valid, y_pred_test))   
      

    #Save Weights
    model.save_weights('./Pre_Trained_models/locomotion_without_k_fold/locomotion_without_k_fold',overwrite=True)
    model.save('./Pre_Trained_models/locomotion_without_k_fold',overwrite=True)
    
    return train_preds,train_gt,test_preds,test_gt,loss_acc,c_matrix

In [9]:
train_preds,train_gt,test_preds,test_gt,loss_acc,c_matrix = Network(X_train,y_train,X_valid, y_valid)

print(train_preds.shape)
print(train_gt.shape)
print()

print(test_preds.shape)
print(test_gt.shape)
#print(scores)

val_loss,val_acc = loss_acc[-1]
print(f'validataion_loss {round(val_loss,3)}, validation_accuracy {round(val_acc,3)*100}')

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
___Training____
accuracy 85.3 ,precision 92.0, recall 91.2,f1_score 91.5 
___Validation____
accuracy 85.3 ,precision 91.4, recall 91.7, f1_score 91.5 
INFO:tensorflow:Assets written to: ./locomotion_without_k_fold/assets


INFO:tensorflow:Assets written to: ./locomotion_without_k_fold/assets


(41732,)
(41732,)

(10432,)
(10432,)
validataion_loss 0.148, validation_accuracy 94.6


# Predictions

In [10]:
cls = np.array(range(5))
class_names = ['walking','running','standing','sitting','jumping']

print('\nTraining RESULTS: ')
print("\nAvg. Accuracy: {0}".format(jaccard_score(train_gt, train_preds, average='macro')))
print("Avg. Precision: {0}".format(precision_score(train_gt, train_preds, average='macro')))
print("Avg. Recall: {0}".format(recall_score(train_gt, train_preds, average='macro')))
print("Avg. F1: {0}".format(f1_score(train_gt, train_preds, average='macro')))

print("\nTraining RESULTS (PER CLASS): ")
print("\nAccuracy:")
for i, rslt in enumerate(jaccard_score(train_gt, train_preds, average=None, labels=cls)):
    print("   {0}: {1} %".format(class_names[i], rslt*100))
print("\nPrecision:")
for i, rslt in enumerate(precision_score(train_gt, train_preds, average=None, labels=cls)):
    print("   {0}: {1} %".format(class_names[i], rslt*100))
print("\nRecall:")
for i, rslt in enumerate(recall_score(train_gt, train_preds, average=None, labels=cls)):
    print("   {0}: {1} %".format(class_names[i], rslt*100))
print("\nF1:")
for i, rslt in enumerate(f1_score(train_gt, train_preds, average=None, labels=cls)):
    print("   {0}: {1} %".format(class_names[i], rslt*100))


Training RESULTS: 

Avg. Accuracy: 0.8530709483087472
Avg. Precision: 0.9196178390191317
Avg. Recall: 0.9118724348346505
Avg. F1: 0.9146087810258351

Training RESULTS (PER CLASS): 

Accuracy:
   walking: 90.65148946476145 %
   running: 97.73949701991569 %
   standing: 66.16782469267771 %
   sitting: 99.96527777777777 %
   jumping: 72.01138519924099 %

Precision:
   walking: 96.27057613168725 %
   running: 98.45511787963098 %
   standing: 75.71865443425077 %
   sitting: 99.96527777777777 %
   jumping: 89.39929328621908 %

Recall:
   walking: 93.95080321285141 %
   running: 99.26182918727393 %
   standing: 83.98914518317503 %
   sitting: 100.0 %
   jumping: 78.7344398340249 %

F1:
   walking: 95.09654471544717 %
   running: 98.85682778900939 %
   standing: 79.63975554840785 %
   sitting: 99.98263587428373 %
   jumping: 83.72862658576945 %


# Validation Accuracies

In [11]:
print('\nValidation RESULTS: ')
print("\nAvg. Accuracy: {0}".format(jaccard_score(test_gt, test_preds, average='macro')))
print("Avg. Precision: {0}".format(precision_score(test_gt, test_preds, average='macro')))
print("Avg. Recall: {0}".format(recall_score(test_gt, test_preds, average='macro')))
print("Avg. F1: {0}".format(f1_score(test_gt, test_preds, average='macro')))

print("\nVALIDATION RESULTS (PER CLASS): ")
print("\nAccuracy:")
for i, rslt in enumerate(jaccard_score(test_gt, test_preds, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')

print("\nPrecision:")
for i, rslt in enumerate(precision_score(test_gt, test_preds, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')

print("\nRecall:")
for i, rslt in enumerate(recall_score(test_gt, test_preds, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')

print("\nF1:")
for i, rslt in enumerate(f1_score(test_gt, test_preds, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')



Validation RESULTS: 

Avg. Accuracy: 0.8533236148319254
Avg. Precision: 0.9144025850680377
Avg. Recall: 0.9167798904049302
Avg. F1: 0.9148459519029203

VALIDATION RESULTS (PER CLASS): 

Accuracy:
walking: 91.0
running: 98.0
standing: 66.0
sitting: 100.0
jumping: 72.0

Precision:
walking: 96.0
running: 99.0
standing: 75.0
sitting: 100.0
jumping: 87.0

Recall:
walking: 94.0
running: 99.0
standing: 84.0
sitting: 100.0
jumping: 81.0

F1:
walking: 95.0
running: 99.0
standing: 80.0
sitting: 100.0
jumping: 84.0


In [12]:
print("\nGENERALIZATION GAP ANALYSIS: ")
print("\nTrain-Val-Accuracy Difference: {0}".format(jaccard_score(train_gt, train_preds, average='macro') -
                                                  jaccard_score(test_gt, test_preds, average='macro')))
print("Train-Val-Precision Difference: {0}".format(precision_score(train_gt, train_preds, average='macro') -
                                                   precision_score(test_gt, test_preds, average='macro')))
print("Train-Val-Recall Difference: {0}".format(recall_score(train_gt, train_preds, average='macro') -
                                                recall_score(test_gt, test_preds, average='macro')))
print("Train-Val-F1 Difference: {0}".format(f1_score(train_gt, train_preds, average='macro') -
                                            f1_score(test_gt, test_preds, average='macro')))


GENERALIZATION GAP ANALYSIS: 

Train-Val-Accuracy Difference: -0.00025266652317823546
Train-Val-Precision Difference: 0.005215253951094012
Train-Val-Recall Difference: -0.004907455570279717
Train-Val-F1 Difference: -0.00023717087708519102


# Testing on Game data

In [16]:
print('game_data',game_data.shape)

label_game = game_data['locomotion']
x_axis_game = sorted(label_game.unique())
y_axis_game = label_game.value_counts()

X_game = game_data.iloc[:,:-1]


def adjust_lables(clean_data,data_y):
    clean_data[data_y == 'walking'] = 0
    clean_data[data_y == 'running'] = 1
    clean_data[data_y == 'standing'] = 2
    clean_data[data_y == 'sitting'] = 3
    clean_data[data_y == 'jumping'] = 4
    
    return data_y

y_game = adjust_lables(game_data,game_data.iloc[:,-1]).astype(int)

data_labelled_game = np.concatenate((X_game, y_game[:,None]), axis=1)[:,2:] 
print('data_labelled_game ',data_labelled_game.shape)

data_labelled_X = data_labelled_game[:,:-1]
data_labelled_Y = data_labelled_game[:,-1]

print(data_labelled_X.shape,data_labelled_Y.shape)

X_train_game, y_train_game = apply_sliding_window(data_labelled_X, data_labelled_Y, sliding_window_size=sw_length, 
                                        sampling_rate=50,sliding_window_overlap=sw_overlap)


X_train_game, y_train_game = X_train_game.astype(np.float32), y_train_game[:,0].astype(np.uint8)

print("\nShape of the X_train_game and y_train_game datasets after splitting and windowing: ")
print(X_train_game.shape, y_train_game.shape) 

game_data (728431, 6)




data_labelled_game  (728431, 4)
(728431, 3) (728431,)

Shape of the X_train_game and y_train_game datasets after splitting and windowing: 
(19168, 50, 3) (19168,)


# Loading the pretrained model

In [17]:
model = load_model('./Pre_Trained_models/locomotion_without_k_fold')

pt = './Pre_Trained_models/locomotion_without_k_fold/locomotion_without_k_fold'
model.load_weights(pt)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fa5f465cb10>

In [18]:
X_tt = data_X
y_tt = data_Y
loss_drill,acc_drill = model.evaluate(X_tt,y_tt)

print('___DRILL___')
print(f'DRILL_loss {round(loss_drill,3)}, Drill_accuracy {round(acc_drill,3)*100}')

#X_tt = data_X
#y_tt = data_Y
loss_game,acc_game = model.evaluate(X_train_game,y_train_game)

print('___GAME___')
print(f'GAME_loss {round(loss_game,3)}, GAME_accuracy {round(acc_game,3)*100}')

___DRILL___
DRILL_loss 0.143, Drill_accuracy 94.69999999999999
___GAME___
GAME_loss 26.824, GAME_accuracy 1.5


In [19]:
y_pred_game = np.argmax(model.predict(X_train_game), axis=-1)

#Training
acc_game = jaccard_score(y_train_game, y_pred_game, average='macro')
pre_game = precision_score(y_train_game, y_pred_game, average='macro', zero_division=0)
reca_game = recall_score(y_train_game, y_pred_game, average='macro', zero_division=0)
f1_game = f1_score(y_train_game, y_pred_game, average='macro', zero_division=0)

print("___GAME____")
print(f"accuracy {round(acc_game,3)*100} ,precision {round(pre_game,3)*100}, recall {round(reca_game,3)*100},f1_score {round(f1_game,3)*100} ")

___GAME____
accuracy 1.5 ,precision 2.1, recall 13.100000000000001,f1_score 2.8000000000000003 


In [20]:
print('\nGame RESULTS: ')
print("\nAvg. Accuracy: {0}".format(jaccard_score(y_train_game, y_pred_game, average='macro')))
print("Avg. Precision: {0}".format(precision_score(y_train_game, y_pred_game, average='macro')))
print("Avg. Recall: {0}".format(recall_score(y_train_game, y_pred_game, average='macro')))
print("Avg. F1: {0}".format(f1_score(y_train_game, y_pred_game, average='macro')))

print("\nGAME RESULTS (PER CLASS): ")
print("\nAccuracy:")
for i, rslt in enumerate(jaccard_score(y_train_game, y_pred_game, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')
    #print("   {0}: {1} %".format(class_names[i], rslt*100))
print("\nPrecision:")
for i, rslt in enumerate(precision_score(y_train_game, y_pred_game, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')
    #print("   {0}: {1} %".format(class_names[i], rslt*100))
print("\nRecall:")
for i, rslt in enumerate(recall_score(y_train_game, y_pred_game, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')
    #print("   {0}: {1} %".format(class_names[i], rslt*100))
print("\nF1:")
for i, rslt in enumerate(f1_score(y_train_game, y_pred_game, average=None, labels=cls)):
    print(f'{class_names[i]}: {round(rslt,2)*100}')
    #print("   {0}: {1} %".format(class_names[i], rslt*100))


Game RESULTS: 

Avg. Accuracy: 0.014825852019375266
Avg. Precision: 0.020654112013889205
Avg. Recall: 0.13077782736364632
Avg. F1: 0.02818865319568421

GAME RESULTS (PER CLASS): 

Accuracy:
walking: 1.0
running: 1.0
standing: 0.0
sitting: 0.0
jumping: 6.0

Precision:
walking: 2.0
running: 2.0
standing: 0.0
sitting: 0.0
jumping: 6.0

Recall:
walking: 1.0
running: 1.0
standing: 0.0
sitting: 0.0
jumping: 64.0

F1:
walking: 1.0
running: 1.0
standing: 0.0
sitting: 0.0
jumping: 12.0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# END