In [1]:
# Different layers
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.layers import LayerNormalization, BatchNormalization, Layer, Dropout
from tensorflow.keras.layers import GlobalAveragePooling1D, Conv1D
#from tensorflow.keras.layers import MultiHeadAttention
# For miscellaneous functions
from tensorflow import convert_to_tensor
from tensorflow.keras import utils
# Keras models
from tensorflow.keras import Model, Sequential
# For evaluation
from sklearn import metrics
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
# For math/arrays
import numpy as np
# For plotting
import matplotlib.pyplot as plt
# For importing and processing HybrIK files
import os
import pickle
import scipy.optimize

from tensorflow.keras.optimizers import Adam
import time

## Data Preprocessing

In [2]:
def extract_gait_cycle(feet_distances):
    '''
    To extract gait cycle according to the pattern of graph
    Output: a list of estimated start and end index for gait cycles
    '''
    y = feet_distances
    close_feet_count = 0
    cycle_start = 0

    if y[1] < y[0] :
        open_feet = False
        cycle_start = -1
    else:
        open_feet = True

    cycle_list = list()

    for i in range(1, len(feet_distances)):
        if y[i] < y[i-1] and open_feet:
            open_feet = False
        elif y[i] > y[i-1] and not open_feet:
            open_feet = True
            if cycle_start == -1:
                cycle_start = i
                continue

            close_feet_count = close_feet_count + 1
            if close_feet_count % 2 == 0:
                cycle_end = i - 1
                cycle_list.append((cycle_start, cycle_end))

                cycle_start = cycle_end

    return cycle_list

In [3]:
def fit_sin(tt, yy):
    '''
    Use coordinates given to produce sine graph
    '''
    tt = np.array(tt)
    yy = np.array(yy)

    ff = np.fft.fftfreq(len(tt), (tt[1]-tt[0]))
    Fyy = abs(np.fft.fft(yy))
    guess_freq = abs(ff[np.argmax(Fyy[1:])+1])
    guess_amp = np.std(yy) * 2.**0.5
    guess_offset = np.mean(yy)
    guess = np.array([guess_amp, 2.*np.pi*guess_freq, 0., guess_offset])

    def sinfunc(t, A, w, p, c):  return A * np.sin(w*t + p) + c
    popt, pcov = scipy.optimize.curve_fit(sinfunc, tt, yy, p0=guess)
    A, w, p, c = popt
    f = w/(2.*np.pi)
    fitfunc = lambda t: A * np.sin(w*t + p) + c
    return {"amp": A, "omega": w, "phase": p, "offset": c, "freq": f, "period": 1./f, "fitfunc": fitfunc, "maxcov": np.max(pcov), "rawres": (guess,popt,pcov)}

In [4]:
def process_single_file (data, coord_pos) :
    '''
    data: all data from one single pickle file
    coord_pos: 0 for x coordinates, 1 for y coordinates, 2 for z coordinates
    '''
    LEFT_ANKLE = 7
    RIGHT_ANKLE = 8
    feet_distances = []
    gait_cycles = []

    # Get distance between left and right ankles as feet distance
    for i, frame in enumerate(data['pred_uvd']):
        LAnkle = frame[LEFT_ANKLE, coord_pos]
        RAnkle = frame[RIGHT_ANKLE, coord_pos]

        feet_distance = abs(LAnkle - RAnkle)
        feet_distances.append(feet_distance)

    # Fit the feet distances data into sine graph,
    # to remove outliers or inconsistent patterns
    x = np.array(range(0, len(feet_distances)))
    try:
        f = fit_sin(x, feet_distances)["fitfunc"]
        y = f(x)
        gait_cycles = extract_gait_cycle(y)
        #print(gait_cycles)
    except:
        # Fit the feet distances data into polynomial graph,
        # when it cannot fit into sine graph or no gait cycles found from sine graph
        if len(gait_cycles) <= 0:
            from scipy.interpolate import splrep, splev
            max_distance = max(feet_distances)
            smoothness = max_distance ** 2
            bspl = splrep(x,feet_distances,s=smoothness)
            y = splev(x,bspl)
            gait_cycles = extract_gait_cycle(y)
            #print(gait_cycles)

    return gait_cycles

In [5]:
def extract_gait_information(file, data):
    '''
    Get gait start and end index lists according to the gait's view
    '''
    if 'front' in file.lower() or 'back' in file.lower():
        return process_single_file(data, 2)
    elif 'left' in file.lower() or 'right' in file.lower():
        return process_single_file(data, 0)

In [6]:
def extract_data(input_file, max_length):
    '''
    Extract all samples and labels from the input file.
    Return any file not met maximum sequence length as error files
    '''
    all_samples = []
    all_labels = []
    error_files = []

    for file in os.listdir(input_file):

        # Process only HybrIK pickle files
        if not file.endswith('.pk'):
            continue

        # Extract features with shape (sample_size, frame_num, keypoints_num * xyz)
        file_path = os.path.join(input_file, file)
        with open(file_path, 'rb') as f:
            data = pickle.load(f)

        full_keypoints = data['pred_xyz_24_struct']
        #full_keypoints = data['pred_xyz_29']
        total_frames = len(full_keypoints)

        gait_cycles = extract_gait_information(file, data)
        mid_index = len(gait_cycles) // 2

        # Take middle gait cycles if possible
        start_index = gait_cycles[mid_index][0] if len(gait_cycles) > 0 else 0
        final_index = start_index + max_length - 1

        # If max sequence length is greater than total frames of files,
        # the file will not be used for processing
        if final_index >= total_frames:
            if max_length <= total_frames:
                start_index = 0
                final_index = max_length - 1
            else:
                error_files.append((file, total_frames))
                continue

        keypoints = []

        for i in range(start_index, final_index + 1) :
            keypoints.append(list(full_keypoints[i].flat))

        # Extract label
        # Check for gender
        if 'F' in file:
            gender = 1
        else:
            gender = 0

        # Check for age group
        from drive.MyDrive.model_preprocessing import process
        age = process(file)
        #age = int(file.split("_")[3])
        if age <= 0:
            continue
        elif age < 15:
            group = 0 # Child group
        elif age < 65:
            group = 1 # Adult group
        else:
            group = 2 # Senior group

        # Male child = 0
        # Female child = 1
        # Male adult = 2
        # Female adult = 3
        # Male senior = 4
        # Female senior = 5
        label = 2 * group + gender

        all_samples.append(keypoints)
        all_labels.append(label)

    return all_samples, all_labels, error_files

In [7]:
# Locate dataset for train, validation and test set
# Connect to Google Drive
from google.colab import drive
drive.mount('/content/drive')
input_folder = "/content/drive/MyDrive"
test_folder = os.path.join(input_folder, "PK_test_set")
train_folder = os.path.join(input_folder, "PK_train_set")
validation_folder = os.path.join(input_folder, "PK_validation_set")

# Set sequence length
sequence_length = 60

# Retrieve dataset
all_error_files = []
X_train, y_train, error_files = extract_data(train_folder, sequence_length)
all_error_files.extend(error_files)
X_test, y_test, error_files = extract_data(test_folder, sequence_length)
all_error_files.extend(error_files)
X_val, y_val, error_files = extract_data(validation_folder, sequence_length)
all_error_files.extend(error_files)

# Print processing results
print("Train set:", len(X_train), "\nTest set:", len(X_test), "\nValidation set:", len(X_val))
if len(all_error_files) > 0:
    print("Error in processing files:")
    for file in all_error_files:
        print(f"\t- {file[0]} ({file[1]} frames)")

Mounted at /content/drive
Train set: 877 
Test set: 274 
Validation set: 219


In [8]:
# Calculate number of output classes
n_classes = len(np.unique(y_train))

# Calculate dimension of vector that similar to embedding
num_coord = 24
xyz = 3
embed_dim = num_coord * xyz

# For balancing classes
#X_train_array = np.array(X_train)
#X_train_2d = X_train_array.reshape((X_train_array.shape[0], sequence_length * embed_dim))
#smote = SMOTE(random_state = 42)
#X_train_resampled, y_train_resampled = smote.fit_resample(X_train_2d, y_train)
#X_train_3d = X_train_resampled.reshape((X_train_resampled.shape[0], sequence_length, embed_dim))

# Convert list to tensor
#X_train_tensor = convert_to_tensor(X_train_3d, dtype = np.float32)
X_train_tensor = convert_to_tensor(X_train, dtype = np.float32)
X_test_tensor = convert_to_tensor(X_test, dtype = np.float32)
X_val_tensor = convert_to_tensor(X_val, dtype = np.float32)

# Convert labels using one hot encoding
#y_train_categorical = utils.to_categorical(y_train_resampled)
y_train_categorical = utils.to_categorical(y_train)
y_test_categorical = utils.to_categorical(y_test)
y_val_categorical = utils.to_categorical(y_val)

# Print processing results
#print(len(X_train_tensor), len(X_test_tensor), len(y_train_resampled), len(y_test))
print(len(X_train_tensor), len(X_test_tensor), len(y_train), len(y_test))
print(X_train_tensor.shape)
print(n_classes)

877 274 877 274
(877, 60, 72)
6


In [9]:
# Check frequency of each class in train dataset
label, counts = np.unique(y_train, return_counts=True)
#label, counts = np.unique(y_train_resampled, return_counts=True)
print(dict(zip(label, counts)))

{np.int64(0): np.int64(118), np.int64(1): np.int64(75), np.int64(2): np.int64(249), np.int64(3): np.int64(305), np.int64(4): np.int64(72), np.int64(5): np.int64(58)}


## Model Training

In [10]:
from tensorflow.keras.layers import LSTM

t = []
acc = []
pcs = []
rc = []

for i in range(5):
    print(f"-------------- Training {i + 1} ------------------")
    # Setting seed for reproducibility
    np.random.seed(42)
    tf.random.set_seed(42)

    # Start connecting the layers together
    input_layer = Input(shape=(sequence_length, embed_dim))
    lstm_layer_0 = LSTM(512, return_sequences = True)(input_layer)
    lstm_layer_1 = LSTM(256)(lstm_layer_0)
    d = Dense(256, activation = "relu")(lstm_layer_1)
    output_layer = Dense(n_classes, activation="softmax")(d)

    # Construct the model
    model = Model(inputs=input_layer, outputs=output_layer)
    optimizer = Adam(learning_rate = 0.0001)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy', 'Precision', 'Recall'])

    # Train the model and record execution time
    start = time.time()
    history = model.fit(
        X_train_tensor,
        y_train_categorical,
        batch_size=8,
        epochs=175,
        validation_data=(X_val_tensor, y_val_categorical),
        verbose = 0,
    )

    print("Time elapsed:", time.time() - start, "s")
    t.append(time.time() - start)

    # Evaluate on test dataset
    scores = model.evaluate(X_test_tensor, y_test_categorical, verbose = 0)
    print("Loss:", scores[0])
    print("Accuracy: %.2f%%" % (scores[1] * 100))
    print("Precision: %.2f%%" % (scores[2] * 100))
    print("Recall: %.2f%%" % (scores[3] * 100))
    acc.append(scores[1] * 100)
    pcs.append(scores[2] * 100)
    rc.append(scores[3] * 100)

print("--------------------------------------------------------------")
print("Average execution time (s):", np.mean(t))
print("SD of execution time:", np.std(t))
print("Average accuracy:", np.mean(acc))
print("SD accuracy:", np.std(acc))
print("Average precision:", np.mean(pcs))
print("SD precision:", np.std(pcs))
print("Average recall:", np.mean(rc))
print("SD recall:", np.std(rc))

-------------- Training 1 ------------------
Time elapsed: 375.3066523075104 s
Loss: 2.4791669845581055
Accuracy: 51.46%
Precision: 51.97%
Recall: 48.18%
-------------- Training 2 ------------------
Time elapsed: 380.02662110328674 s
Loss: 2.730795383453369
Accuracy: 45.62%
Precision: 46.01%
Recall: 44.16%
-------------- Training 3 ------------------
Time elapsed: 369.7431230545044 s
Loss: 2.667595863342285
Accuracy: 53.28%
Precision: 53.64%
Recall: 51.09%
-------------- Training 4 ------------------
Time elapsed: 374.88874650001526 s
Loss: 2.6005468368530273
Accuracy: 54.01%
Precision: 54.17%
Recall: 52.19%
-------------- Training 5 ------------------
Time elapsed: 367.34511160850525 s
Loss: 2.362765312194824
Accuracy: 51.46%
Precision: 52.69%
Recall: 50.00%
--------------------------------------------------------------
Average execution time (s): 373.4621747493744
SD of execution time: 4.4671920028835865
Average accuracy: 51.16788327693939
SD accuracy: 2.950566472442932
Average preci

In [11]:
from tensorflow.keras.layers import GRU

t = []
acc = []
pcs = []
rc = []

for i in range(5):
    print(f"-------------- Training {i + 1} ------------------")
    # Setting seed for reproducibility
    np.random.seed(42)
    tf.random.set_seed(42)

    # Start connecting the layers together
    input_layer = Input(shape=(sequence_length, embed_dim))
    gru_layer_0 = GRU(512, return_sequences = True)(input_layer)
    gru_layer_1 = GRU(256)(gru_layer_0)
    d = Dense(256, activation = "relu")(gru_layer_1)
    output_layer = Dense(n_classes, activation="softmax")(d)

    # Construct the model
    model = Model(inputs=input_layer, outputs=output_layer)
    optimizer = Adam(learning_rate = 0.0001)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy', 'Precision', 'Recall'])

    # Train the model and record execution time
    start = time.time()
    history = model.fit(
        X_train_tensor,
        y_train_categorical,
        batch_size=8,
        epochs=200,
        validation_data=(X_val_tensor, y_val_categorical),
        verbose = 0,
    )

    print("Time elapsed:", time.time() - start, "s")
    t.append(time.time() - start)

    # Evaluate on test dataset
    scores = model.evaluate(X_test_tensor, y_test_categorical, verbose = 0)
    print("Loss:", scores[0])
    print("Accuracy: %.2f%%" % (scores[1] * 100))
    print("Precision: %.2f%%" % (scores[2] * 100))
    print("Recall: %.2f%%" % (scores[3] * 100))
    acc.append(scores[1] * 100)
    pcs.append(scores[2] * 100)
    rc.append(scores[3] * 100)

print("--------------------------------------------------------------")
print("Average execution time (s):", np.mean(t))
print("SD of execution time:", np.std(t))
print("Average accuracy:", np.mean(acc))
print("SD accuracy:", np.std(acc))
print("Average precision:", np.mean(pcs))
print("SD precision:", np.std(pcs))
print("Average recall:", np.mean(rc))
print("SD recall:", np.std(rc))

-------------- Training 1 ------------------
Time elapsed: 376.8782002925873 s
Loss: 2.793826103210449
Accuracy: 48.91%
Precision: 49.62%
Recall: 47.81%
-------------- Training 2 ------------------
Time elapsed: 382.6825761795044 s
Loss: 3.3382856845855713
Accuracy: 47.08%
Precision: 47.33%
Recall: 45.26%
-------------- Training 3 ------------------
Time elapsed: 355.6174659729004 s
Loss: 2.948258876800537
Accuracy: 51.09%
Precision: 51.89%
Recall: 50.00%
-------------- Training 4 ------------------
Time elapsed: 386.61936259269714 s
Loss: 2.8996641635894775
Accuracy: 55.11%
Precision: 55.51%
Recall: 55.11%
-------------- Training 5 ------------------
Time elapsed: 380.7115013599396 s
Loss: 2.8947274684906006
Accuracy: 50.73%
Precision: 50.75%
Recall: 49.64%
--------------------------------------------------------------
Average execution time (s): 376.5020074367523
SD of execution time: 10.904987000688028
Average accuracy: 50.58394134044647
SD accuracy: 2.677946362251478
Average precis

In [12]:
from tensorflow.keras.layers import SimpleRNN

t = []
acc = []
pcs = []
rc = []

for i in range(5):
    print(f"-------------- Training {i + 1} ------------------")
    # Setting seed for reproducibility
    np.random.seed(42)
    tf.random.set_seed(42)

    # Start connecting the layers together
    input_layer = Input(shape=(sequence_length, embed_dim))
    rnn_layer_0 = SimpleRNN(512, return_sequences = True)(input_layer)
    rnn_layer_1 = SimpleRNN(256)(rnn_layer_0)
    d = Dense(256, activation = "relu")(rnn_layer_1)
    output_layer = Dense(n_classes, activation="softmax")(d)

    # Construct the model
    model = Model(inputs=input_layer, outputs=output_layer)
    optimizer = Adam(learning_rate = 0.0001)
    model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=['accuracy', 'Precision', 'Recall'])

    # Train the model and record execution time
    start = time.time()
    history = model.fit(
        X_train_tensor,
        y_train_categorical,
        batch_size=32,
        epochs=150,
        validation_data=(X_val_tensor, y_val_categorical),
        verbose = 0,
    )

    print("Time elapsed:", time.time() - start, "s")
    t.append(time.time() - start)

    # Evaluate on test dataset
    scores = model.evaluate(X_test_tensor, y_test_categorical, verbose = 0)
    print("Loss:", scores[0])
    print("Accuracy: %.2f%%" % (scores[1] * 100))
    print("Precision: %.2f%%" % (scores[2] * 100))
    print("Recall: %.2f%%" % (scores[3] * 100))
    acc.append(scores[1] * 100)
    pcs.append(scores[2] * 100)
    rc.append(scores[3] * 100)

print("--------------------------------------------------------------")
print("Average execution time (s):", np.mean(t))
print("SD of execution time:", np.std(t))
print("Average accuracy:", np.mean(acc))
print("SD accuracy:", np.std(acc))
print("Average precision:", np.mean(pcs))
print("SD precision:", np.std(pcs))
print("Average recall:", np.mean(rc))
print("SD recall:", np.std(rc))

-------------- Training 1 ------------------
Time elapsed: 96.86304450035095 s
Loss: 2.2211225032806396
Accuracy: 53.28%
Precision: 55.86%
Recall: 52.19%
-------------- Training 2 ------------------
Time elapsed: 106.41248488426208 s
Loss: 2.5750386714935303
Accuracy: 54.01%
Precision: 55.21%
Recall: 52.19%
-------------- Training 3 ------------------
Time elapsed: 96.72406578063965 s
Loss: 2.556178569793701
Accuracy: 52.92%
Precision: 54.79%
Recall: 52.19%
-------------- Training 4 ------------------
Time elapsed: 100.01885747909546 s
Loss: 2.5838658809661865
Accuracy: 48.54%
Precision: 50.77%
Recall: 48.18%
-------------- Training 5 ------------------
Time elapsed: 100.38089895248413 s
Loss: 2.2688026428222656
Accuracy: 52.55%
Precision: 54.23%
Recall: 51.46%
--------------------------------------------------------------
Average execution time (s): 100.08009605407715
SD of execution time: 3.515951687696386
Average accuracy: 52.262773513793945
SD accuracy: 1.9229112272066855
Average p