In [None]:
!nvidia-smi

Sat Nov  5 16:13:15 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 512.16       Driver Version: 512.16       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:09:00.0  On |                  Off |
|  0%   37C    P8    18W / 450W |   2225MiB / 24564MiB |     11%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

#Library

Access to Google Drive folder

In [None]:
# import requirements
from google.colab import drive
drive.mount('/content/drive')

Useful libraries

In [None]:
import os
import shutil
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random

from tqdm import tqdm

from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import LeaveOneOut

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

#Constants

In [None]:
ID_EXERCISE_LABEL = -2
ID_SUBJECT_LABEL = -1

NORM_SAMPLE_LENGTH = 100
# CONSIDERED_IMU_POSITION = ['LeftShank', 'RightShank', 'LeftThigh', 'RightThigh', 'Pelvis', 'Chest', 'LeftFoot', 'RightFoot', 'LeftWrist', 'RightWrist']
CONSIDERED_IMU_POSITION = ['RightWrist', 'Chest']
NOT_CONSIDERED_INFO = ['Time', 'Orientation', 'Magnetometer'] # not use these information
NUM_SENSOR_INFO = 2 # gyroscope and acceleration
NUM_AX_PER_SENSOR = 3 # x-, y-, and z-

NUM_SUBJECT = 20

# # Model hyper-parameters
# LEARNING_RATE = 1e-4
# NUM_EPOCHS = 50
# ADAM_WEIGHT_DECAY = 1e-2
# LEARNING_RATE_REDUCTION_FACTOR = 0.5

# BATCH_SIZE = 32

# CONV_NUM_IN = 60 # temporarily hardcoded
# CONV_NUM_OUT = 128
# KERNEL_SIZE = 3
# STRIDE = 1

# POOL_SIZE = 4

In [None]:
# --- Model hyper-parameters --- #
# Fixed parameters
LEARNING_RATE = 1e-4
NUM_EPOCHS = 30
ADAM_WEIGHT_DECAY = 1e-2
LEARNING_RATE_REDUCTION_FACTOR = 0.5

# CONV_NUM_IN = 60 # temporarily hardcoded
num_imu = len(CONSIDERED_IMU_POSITION)
conv_num_in = num_imu*NUM_SENSOR_INFO*NUM_AX_PER_SENSOR

# Tuning parameters
BATCH_SIZE_GRID = [16, 32, 64, 128]
CONV_NUM_OUT_GRID = [32, 64, 128, 256]
KERNEL_SIZE_GRID = [4]
STRIDE_GRID = [1]

# POOL_SIZE = [2, 4, 8]
POOL_SIZE = [2]

# Id for tuning parameters
ID_BATCH_SIZE = 0
ID_NUM_OUT = 1
ID_KERNEL_SIZE = 2
ID_STRIDE = 3
ID_POOL_SIZE = 4

In [None]:
# Generate the search space
hp_search_space = []

for bs in BATCH_SIZE_GRID:
  for no in CONV_NUM_OUT_GRID:
    for ks in KERNEL_SIZE_GRID:
      for st in STRIDE_GRID:
        for ps in POOL_SIZE:
          temp_point = [bs, no, ks, st, ps]
          hp_search_space.append(temp_point)

print('Size of the search space: ' + str(len(hp_search_space)))

Size of the search space: 16


#Utils

**Pre-processing**

In [None]:
# all helper function here
# helper functions
def mkfolder(pth):
  if not os.path.exists(pth):
    os.mkdir(pth)

def read(pth):
  return pd.read_csv(pth)

# Load and re-format the data file
def load_df(pth):
  dtframe = read(pth)
  dtframe = dtframe.iloc[:, 3:] # remove the first 3 columns

  # Re-formatting the column's names
  # e.g., Pevist Accelerometer X, LeftFoot Gyroscope Z, etc.
  names = list(dtframe.columns)
  names = [name.split('.')[0] for name in names]
  names_2 = dtframe.iloc[0, :]
  names_3 = dtframe.iloc[1, :]

  for i in range(len(names)):
    names[i] = names[i]+' '+names_2[i]+' '+names_3[i]

  dtframe = dtframe.iloc[2:, :] # remove the first 2 rows
  dtframe.columns = names # update new column's names

  return dtframe

def slice_df(dtframe):
  cols = sorted(dtframe.columns)

  req_cols = [col for col in cols if col.split(' ')[0] in CONSIDERED_IMU_POSITION] # only keep data from considered sensors (may not be all 10), e.g., chest, pelvis, etc.
  req_cols = [col for col in req_cols if col.split(' ')[1] not in NOT_CONSIDERED_INFO] # not use information from orientation, magnetometer or time

  dtframe = dtframe.loc[:, req_cols]

  return dtframe

# One hot encoding
def one_hot_encoding(label, num_exercise):
  temp = np.zeros(num_exercise)
  temp[label] = 1

  return temp

# One hot decoding
def one_hot_decoding(num):
  if num.shape[0] > 0:
    temp = np.array([np.where(row == 1) for row in num])
  else:
    temp = np.argwhere(num == 1)

  return temp

# Normalize data to have the same sample length for training the network
def normLength(arr, maxlength):
  new_arr = np.zeros((maxlength, arr.shape[-1]))
  for i in range(arr.shape[-1]):
    a = arr[:, i]
    k = a.shape[0]
    y = np.interp(np.linspace(0, 1, maxlength), np.linspace(0, 1, k), a)
    new_arr[:, i] = y
  return new_arr

**Dataset Handler**

In [None]:
class MyDataset(Dataset):
    def __init__(self, list_of_samples, to_size):
      self.to_size = to_size

      list_of_samples = [normLength(sample, NORM_SAMPLE_LENGTH).T for sample in list_of_samples]

      self.X = [sample[:ID_EXERCISE_LABEL, :] for sample in list_of_samples]
      
      self.Y = [one_hot_encoding(int(sample[ID_EXERCISE_LABEL, :][0]), num_exercise) for sample in list_of_samples]

    def __len__(self):
      return len(self.Y)

    def __getitem__(self, idx):
      x = torch.from_numpy(self.X[idx]).float()
      y = self.Y[idx]
      # y = int(self.Y[0])
      # y = np.array(y)
      if device == 'cuda':
        x = x.to(device)
        y = torch.from_numpy(y)
        y = y.to(device)
      return x, y

**Model**

Architecture

In [None]:
class CNN_One_Block(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_One_Block, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.pooling  = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pooling(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

In [None]:
class CNN_Alter_Block(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_Alter_Block, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.bnorm    = nn.BatchNorm1d(num_out)
        self.pooling  = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.bnorm(x)
        x = self.pooling(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

In [None]:
class CNN_One_Deep_Block(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_One_Deep_Block, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.conv2    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.relu2    = nn.ReLU()
        self.pooling  = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pooling(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

In [None]:
class CNN_Two_Blocks(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_Two_Blocks, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.pooling1 = nn.MaxPool1d((pool_size))

        self.conv2    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.relu2    = nn.ReLU()
        self.pooling2 = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pooling1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pooling2(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

In [None]:
class CNN_Alter_Two_Block(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_Alter_Two_Block, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.bnorm1   = nn.BatchNorm1d(num_out)
        self.pooling1 = nn.MaxPool1d((pool_size))

        self.conv2    = nn.Conv1d(num_out, num_out, kernel_size, stride)      # input paramters
        self.relu2    = nn.ReLU()
        self.bnorm2   = nn.BatchNorm1d(num_out)
        self.pooling2 = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.bnorm1(x)
        x = self.pooling1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.bnorm2(x)
        x = self.pooling2(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

In [None]:
class CNN_Two_Deep_Blocks(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_Two_Deep_Blocks, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.conv2    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.relu2    = nn.ReLU()
        self.pooling1 = nn.MaxPool1d((pool_size))

        self.conv3    = nn.Conv1d(num_out, num_out, kernel_size, stride)      # input paramters
        self.relu3    = nn.ReLU()
        self.conv4    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.relu4    = nn.ReLU()
        self.pooling2 = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pooling1(x)

        x = self.conv3(x)
        x = self.relu3(x)
        x = self.conv4(x)
        x = self.relu4(x)
        x = self.pooling2(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

In [None]:
# Parallel model design
class CNN_Parallel_Blocks(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_Parallel_Blocks, self).__init__()
        # Feature extraction: Shallow stream
        self.s_conv    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.s_relu    = nn.ReLU()
        self.s_pooling = nn.MaxPool1d((pool_size))

        # Feature extraction: Deep stream
        self.d_conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)
        self.d_relu1    = nn.ReLU()
        self.d_pooling1 = nn.MaxPool1d((pool_size))

        self.d_conv2    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.d_relu2    = nn.ReLU()
        self.d_pooling2 = nn.MaxPool1d((pool_size))

        # Down-stream
        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        # Feature extraction: Shallow stream
        ss = self.s_conv(x)
        ss = self.s_relu(ss)
        ss = self.s_pooling(ss)
        ss = self.flatten(ss)

        # Feature extraction: Deep stream
        ds = self.d_conv1(x)
        ds = self.d_relu1(ds)
        ds = self.d_pooling1(ds)
        ds = self.d_conv2(ds)
        ds = self.d_relu2(ds)
        ds = self.d_pooling2(ds)
        ds = self.flatten(ds)

        # Concatenate outputs of the two feature extraction streams
        y = torch.hstack((ss, ds))

        # Down-stream task
        y = self.dropout(y)
        y = self.fcl(y)
        y = self.sfmx(y)

        return y

In [None]:
class CNN_Three_Blocks(nn.Module):
    def __init__(self, num_in, num_out, kernel_size, stride, pool_size):
        super(CNN_Three_Blocks, self).__init__()
        self.conv1    = nn.Conv1d(num_in, num_out, kernel_size, stride)      # input paramters
        self.relu1    = nn.ReLU()
        self.pooling1 = nn.MaxPool1d((pool_size))

        self.conv2    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.relu2    = nn.ReLU()
        self.pooling2 = nn.MaxPool1d((pool_size))

        self.conv3    = nn.Conv1d(num_out, num_out, kernel_size, stride)
        self.relu3    = nn.ReLU()
        self.pooling3 = nn.MaxPool1d((pool_size))

        self.flatten  = nn.Flatten()
        self.dropout  = nn.Dropout(p = 0.5)
        self.fcl      = nn.LazyLinear(out_features = 37)
        self.sfmx     = nn.Softmax(dim=1)

    def forward(self, x):
        # x = x.cuda()
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.pooling1(x)

        x = self.conv2(x)
        x = self.relu2(x)
        x = self.pooling2(x)

        x = self.conv3(x)
        x = self.relu3(x)
        x = self.pooling3(x)

        x = self.flatten(x)
        x = self.dropout(x)
        x = self.fcl(x)
        x = self.sfmx(x)
        return x

Evaluate Prediction

In [None]:
# ---> Remember to check one more time before running the loop
def predict(some_tensor, labs):
  some_tensor = some_tensor.cpu().detach().numpy()
  labs        = labs.cpu().detach().numpy()

  count = 0

  for i in range(some_tensor.shape[0]):
    temp_pred = np.argmax(some_tensor[i])
    temp_truth = np.argmax(labs[i])

    # print('Prediction = ' + str(temp_pred) + ' - ' + 'Truth = ' + str(temp_truth))

    if temp_pred == temp_truth:
      count = count + 1
    else:
      pass # do nothing
  
  return count

Model Loop

In [None]:
val_acc, best_val_loss = 0, 1.0
def train_loop(dataloader, model, loss_fn, optimizer):
    global train_mode
    train_mode = True

    size        = len(dataloader.dataset)
    num_batches = len(dataloader)
    train_loss, correct, sched_factor = 0, 0, 0
    for batch, (X, y) in enumerate(dataloader):
        # print(X)
        # Compute prediction and loss
        pred = model(X)
        # print(pred)
        # print(predict(pred, y))
        # print('----------------------')
        # print(pred)
        # print(y)
        # break
        y = y.type(torch.FloatTensor)
        if device == 'cuda': y = y.cuda()

        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 20 == 0:
            loss, current = loss.item(), batch * len(X)
            # print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")
        
        correct     += predict(pred, y)
        train_loss  += loss_fn(pred, y).item()

    train_loss /= num_batches
    train_losses.append(train_loss)
    correct /= size

    scheduler.step(train_loss)

    # print(f"Train Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {train_loss:>8f} \n")

    return correct

def val_loop(dataloader, model, loss_fn):
    global val_acc, train_mode, best_val_loss
    train_mode = False

    size        = len(dataloader.dataset)
    num_batches = len(dataloader)
    val_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            # pred = predict(pred, y)
            y = y.type(torch.FloatTensor)
            if device == 'cuda': y = y.cuda()
            val_loss += loss_fn(pred, y).item()
            # correct += (pred.argmax(1) == y).type(torch.float).sum().item() # comment out
            correct += predict(pred, y)


    val_loss /= num_batches
    val_losses.append(val_loss)
    correct /= size
    print(f"Val Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {val_loss:>8f} \n")

    model_name = 'MaxJump5050_lstm_fcn_all_32_1en4_sch_801010_wd.pt'
    if correct > val_acc:
      torch.save(model, model_name)
      val_acc = correct
      best_val_loss = val_loss
      print('Model saved\n')
    elif correct == val_acc and val_loss < best_val_loss:
      torch.save(model, model_name)
      best_val_loss = val_loss
      print('Model saved\n')

def test_loop(dataloader, model, loss_fn):
    global train_mode
    train_mode = False

    size        = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct, size = 0, 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            # pred = predict(pred, y)
            y = y.type(torch.FloatTensor)
            if device == 'cuda': y = y.cuda()
            test_loss += loss_fn(pred, y).item()
            # correct += (pred.argmax(1) == y).type(torch.float).sum().item() # comment out
            correct += predict(pred, y)
            size    += y.shape[0]

    test_loss /= num_batches
    correct /= size
    # print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

    return correct

LOSOCV

In [None]:
def losocv_split_train_list(all_subject_id, test_subject):  
  train_list = [m for m in all_subject_id if m != test_subject]

  return train_list

#Subjects and Exercises

In [None]:
root_path = 'drive/MyDrive/mbl/parsed_h5_csv/'
subs = sorted(list(os.listdir(root_path)))
subs = subs[:-1] # remove 'merged' # we don't have 'merged' in this folder

FileNotFoundError: ignored

In [None]:
# This is when using the local machine <------------------- NOTICE
root_path = 'Documents/parsed_h5_csv/'
subs = sorted(list(os.listdir(root_path)))
# subs = subs[:-1] # remove 'merged' # we don't have 'merged' in this folder

In [None]:
subs

['SUB01',
 'SUB02',
 'SUB03',
 'SUB04',
 'SUB05',
 'SUB06',
 'SUB07',
 'SUB08',
 'SUB09',
 'SUB10',
 'SUB11',
 'SUB12',
 'SUB14',
 'SUB15',
 'SUB16',
 'SUB17',
 'SUB18',
 'SUB19',
 'SUB21',
 'SUB22']

In [None]:
# total should be 20
num_subject = len(subs)
num_subject

20

Obtain exercises

In [None]:
exercises = [sorted(os.listdir(root_path+sub)) for sub in subs]

# Get types of exercises (i.e., physical activities)
exercise_types = []
for ex in exercises:
  exercise_types.extend(ex)

In [None]:
for ex in exercises:
  print(len(ex), ex)

14 ['CMJDL', 'DropJumpDL', 'DropLandDL', 'HeelRaise', 'Lunge', 'MaxJump', 'Run', 'SqDL', 'SqHalfDL', 'StepDnH', 'StepDnL', 'StepUpH', 'StepUpL', 'Walk']
37 ['BulgSq', 'CMJDL', 'CMJSL', 'DeclineSq', 'DropJumpDL', 'DropJumpSL', 'DropLandDL', 'DropLandSL', 'FwHop', 'FwHopFast', 'FwJump', 'FwJumpFast', 'HeelRaise', 'LatHop', 'LatHopFast', 'LatJump', 'LatJumpFast', 'Lunge', 'MaxHop', 'MaxJump', 'Pose', 'Run', 'RunCut', 'RunDec', 'SpainSq', 'SplitJump', 'SportJump', 'SqDL', 'SqHalfDL', 'SqHalfSL', 'SqSL', 'StepDnH', 'StepDnL', 'StepUpH', 'StepUpL', 'SumoSq', 'Walk']
37 ['BulgSq', 'CMJDL', 'CMJSL', 'DeclineSq', 'DropJumpDL', 'DropJumpSL', 'DropLandDL', 'DropLandSL', 'FwHop', 'FwHopFast', 'FwJump', 'FwJumpFast', 'HeelRaise', 'LatHop', 'LatHopFast', 'LatJump', 'LatJumpFast', 'Lunge', 'MaxHop', 'MaxJump', 'Pose', 'Run', 'RunCut', 'RunDec', 'SpainSq', 'SplitJump', 'SportJump', 'SqDL', 'SqHalfDL', 'SqHalfSL', 'SqSL', 'StepDnH', 'StepDnL', 'StepUpH', 'StepUpL', 'SumoSq', 'Walk']
37 ['BulgSq', 'CMJD

In [None]:
exercise_types = np.array(exercise_types)
exercise_types, _ = np.unique(exercise_types, return_index=True)
exercise_types = exercise_types.tolist()
exercise_types

['BulgSq',
 'CMJDL',
 'CMJSL',
 'DeclineSq',
 'DropJumpDL',
 'DropJumpSL',
 'DropLandDL',
 'DropLandSL',
 'FwHop',
 'FwHopFast',
 'FwJump',
 'FwJumpFast',
 'HeelRaise',
 'LatHop',
 'LatHopFast',
 'LatJump',
 'LatJumpFast',
 'Lunge',
 'MaxHop',
 'MaxJump',
 'Pose',
 'Run',
 'RunCut',
 'RunDec',
 'SpainSq',
 'SplitJump',
 'SportJump',
 'SqDL',
 'SqHalfDL',
 'SqHalfSL',
 'SqSL',
 'StepDnH',
 'StepDnL',
 'StepUpH',
 'StepUpL',
 'SumoSq',
 'Walk']

In [None]:
# total should be 37
num_exercise = len(exercise_types)
num_exercise

37

Label Exercises

In [None]:
exercise_code = list(range(0, num_exercise))
label_code = dict(zip(exercise_types, exercise_code))
code_label = dict(zip(exercise_code, exercise_types))

In [None]:
label_code

{'BulgSq': 0,
 'CMJDL': 1,
 'CMJSL': 2,
 'DeclineSq': 3,
 'DropJumpDL': 4,
 'DropJumpSL': 5,
 'DropLandDL': 6,
 'DropLandSL': 7,
 'FwHop': 8,
 'FwHopFast': 9,
 'FwJump': 10,
 'FwJumpFast': 11,
 'HeelRaise': 12,
 'LatHop': 13,
 'LatHopFast': 14,
 'LatJump': 15,
 'LatJumpFast': 16,
 'Lunge': 17,
 'MaxHop': 18,
 'MaxJump': 19,
 'Pose': 20,
 'Run': 21,
 'RunCut': 22,
 'RunDec': 23,
 'SpainSq': 24,
 'SplitJump': 25,
 'SportJump': 26,
 'SqDL': 27,
 'SqHalfDL': 28,
 'SqHalfSL': 29,
 'SqSL': 30,
 'StepDnH': 31,
 'StepDnL': 32,
 'StepUpH': 33,
 'StepUpL': 34,
 'SumoSq': 35,
 'Walk': 36}

In [None]:
code_label

{0: 'BulgSq',
 1: 'CMJDL',
 2: 'CMJSL',
 3: 'DeclineSq',
 4: 'DropJumpDL',
 5: 'DropJumpSL',
 6: 'DropLandDL',
 7: 'DropLandSL',
 8: 'FwHop',
 9: 'FwHopFast',
 10: 'FwJump',
 11: 'FwJumpFast',
 12: 'HeelRaise',
 13: 'LatHop',
 14: 'LatHopFast',
 15: 'LatJump',
 16: 'LatJumpFast',
 17: 'Lunge',
 18: 'MaxHop',
 19: 'MaxJump',
 20: 'Pose',
 21: 'Run',
 22: 'RunCut',
 23: 'RunDec',
 24: 'SpainSq',
 25: 'SplitJump',
 26: 'SportJump',
 27: 'SqDL',
 28: 'SqHalfDL',
 29: 'SqHalfSL',
 30: 'SqSL',
 31: 'StepDnH',
 32: 'StepDnL',
 33: 'StepUpH',
 34: 'StepUpL',
 35: 'SumoSq',
 36: 'Walk'}

Label Subjects

In [None]:
subs_code = list(range(0, num_subject))
subject_code = dict(zip(subs, subs_code))
code_subject = dict(zip(subs_code, subs))

In [None]:
subject_code

{'SUB01': 0,
 'SUB02': 1,
 'SUB03': 2,
 'SUB04': 3,
 'SUB05': 4,
 'SUB06': 5,
 'SUB07': 6,
 'SUB08': 7,
 'SUB09': 8,
 'SUB10': 9,
 'SUB11': 10,
 'SUB12': 11,
 'SUB14': 12,
 'SUB15': 13,
 'SUB16': 14,
 'SUB17': 15,
 'SUB18': 16,
 'SUB19': 17,
 'SUB21': 18,
 'SUB22': 19}

In [None]:
code_subject

{0: 'SUB01',
 1: 'SUB02',
 2: 'SUB03',
 3: 'SUB04',
 4: 'SUB05',
 5: 'SUB06',
 6: 'SUB07',
 7: 'SUB08',
 8: 'SUB09',
 9: 'SUB10',
 10: 'SUB11',
 11: 'SUB12',
 12: 'SUB14',
 13: 'SUB15',
 14: 'SUB16',
 15: 'SUB17',
 16: 'SUB18',
 17: 'SUB19',
 18: 'SUB21',
 19: 'SUB22'}

#Collect Data Samples

In [None]:
sample_list, file_paths = [], []

# The number below depends on how many sensor information we'd like to use
# e.g., if we use 4 sensors: left and right thigh and shank, and 2 information:
# accelerometer (3 axes) and gyroscope (3 axes), we would have 4*(3+3) = 24, 
# since we have the last column for target --> 25 columns in total

# See parts_to_consider in the slice_df function to define this number
# not_25 = 0
# not_31 = 0
# not_37 = 0
# not_43 = 0
# not_49 = 0
# not_55 = 0
# not_61 = 0

# not_62 = 0 # 62 since we now added 1 more column at the end for subject ID


num_missing_dt = 0 # count number of files with missing data to remove them out
num_imu = len(CONSIDERED_IMU_POSITION)
num_col_in_dt = num_imu*NUM_SENSOR_INFO*NUM_AX_PER_SENSOR + 2 # adding exercise + subject labels

print("Number of columns that should be in the dataframe = " + str(num_col_in_dt))

no_of_samples = None

# Loop through subjects
for subject in tqdm(subs[1:]): 
  print()
  print('Collecting data from subject ' + str(subject) + ' ...')

  for ex in exercise_types:
    # If the subject perform the task
    try:
      folder_path = root_path + subject + '/' + ex + '/'
      file_names = os.listdir(folder_path) # obtain all parsed data files
      no_of_samples = len(file_names) # to display the no. of samples lately

      for file_name in file_names:
        sample_path = folder_path + file_name
        df = load_df(sample_path) # load and re-format the data file
        df = slice_df(df) # pick data from considered sensors and remove unnecessary information
        df['target'] = label_code[ex] # add output for the data file
        df['subject_id'] = subject_code[subject] # add subject id for the data

        sample_arr = np.array(df).astype(float) # convert to numpy array type

        # Check if the data has all considered parts
        if sample_arr.shape[1] != num_col_in_dt:
          num_missing_dt += 1
        else:
          sample_list.append(sample_arr)

        # break
      # print('\n' + str(no_of_samples) + ' samples collected from ' + ex)
      
    except:
      # print('\n' + ex + ' was missing')
      pass # do nothing

  print() # TBD

Number of columns that should be in the dataframe = 14


  0%|                                                                                           | 0/19 [00:00<?, ?it/s]


Collecting data from subject SUB02 ...


  5%|████▎                                                                              | 1/19 [00:02<00:43,  2.40s/it]



Collecting data from subject SUB03 ...


 11%|████████▋                                                                          | 2/19 [00:04<00:41,  2.46s/it]



Collecting data from subject SUB04 ...


 16%|█████████████                                                                      | 3/19 [00:07<00:38,  2.42s/it]



Collecting data from subject SUB05 ...


 21%|█████████████████▍                                                                 | 4/19 [00:09<00:36,  2.41s/it]



Collecting data from subject SUB06 ...


 26%|█████████████████████▊                                                             | 5/19 [00:12<00:33,  2.41s/it]



Collecting data from subject SUB07 ...


 32%|██████████████████████████▏                                                        | 6/19 [00:14<00:31,  2.45s/it]



Collecting data from subject SUB08 ...


 37%|██████████████████████████████▌                                                    | 7/19 [00:16<00:29,  2.43s/it]



Collecting data from subject SUB09 ...


 42%|██████████████████████████████████▉                                                | 8/19 [00:19<00:26,  2.39s/it]



Collecting data from subject SUB10 ...


 47%|███████████████████████████████████████▎                                           | 9/19 [00:21<00:23,  2.34s/it]



Collecting data from subject SUB11 ...


 53%|███████████████████████████████████████████▏                                      | 10/19 [00:23<00:20,  2.33s/it]



Collecting data from subject SUB12 ...


 58%|███████████████████████████████████████████████▍                                  | 11/19 [00:25<00:18,  2.27s/it]



Collecting data from subject SUB14 ...


 63%|███████████████████████████████████████████████████▊                              | 12/19 [00:28<00:15,  2.25s/it]



Collecting data from subject SUB15 ...


 68%|████████████████████████████████████████████████████████                          | 13/19 [00:30<00:13,  2.26s/it]



Collecting data from subject SUB16 ...


 74%|████████████████████████████████████████████████████████████▍                     | 14/19 [00:32<00:11,  2.23s/it]



Collecting data from subject SUB17 ...


 79%|████████████████████████████████████████████████████████████████▋                 | 15/19 [00:35<00:09,  2.31s/it]



Collecting data from subject SUB18 ...


 84%|█████████████████████████████████████████████████████████████████████             | 16/19 [00:37<00:07,  2.46s/it]



Collecting data from subject SUB19 ...


 89%|█████████████████████████████████████████████████████████████████████████▎        | 17/19 [00:40<00:04,  2.40s/it]



Collecting data from subject SUB21 ...


 95%|█████████████████████████████████████████████████████████████████████████████▋    | 18/19 [00:42<00:02,  2.39s/it]



Collecting data from subject SUB22 ...


100%|██████████████████████████████████████████████████████████████████████████████████| 19/19 [00:44<00:00,  2.36s/it]







In [None]:
len(sample_list)

3210

In [None]:
sample_list[0].shape

(197, 14)

#The Loop

Check Device

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


The Great Loop *(including both hyper-parameters tuning and performance evaluation)*

In [None]:
# Removed subject 1 due to missing IMUs
all_subject_id = list(range(1, NUM_SUBJECT))

# Training/testing accuracy of performance evaluation
perf_train_acc = []
perf_test_acc = []

# Performance evaluation
for test_subject in all_subject_id[:]:
# for test_subject in all_subject_id:
  print('# Working on test subject ' + str(test_subject))

  # --- Divide training/testing data according to LOSOCV --- #
  train_list, test_list = [], []

  for sample in sample_list:    
    if sample[0, ID_SUBJECT_LABEL] != test_subject: # data not from the testing subject
      train_list.append(sample) 
    else: # data from the testing subject
      test_list.append(sample)

  ''' Hyper-parameters tuning loop with val_list '''
  # Get subject id from the train list
  train_subject_id = losocv_split_train_list(all_subject_id, test_subject)

  # Initialize the selected hyper-parameters
  s_batch_size = 0 
  s_num_out = 0
  s_kernel_size = 0
  s_stride = 0
  s_pool_size = 0

  hp_val_acc = 0

  # --- Loop throught the search space --- #
  print('--- Start the hyper-parameters searching')
  for hp_point in hp_search_space:

    t_bs = hp_point[ID_BATCH_SIZE]
    t_no = hp_point[ID_NUM_OUT]
    t_ks = hp_point[ID_KERNEL_SIZE]
    t_st = hp_point[ID_STRIDE] 
    t_ps = hp_point[ID_POOL_SIZE]

    temp_perf_val_acc = []

    for val_subject in train_subject_id:
      print('\t Val subject = ' + str(val_subject))

      # Divide training/val data according to LOSOCV
      temp_train_list, temp_val_list = [], []

      for sample in train_list:
        if sample[0, ID_SUBJECT_LABEL] != val_subject: # data not from the val subject
          temp_train_list.append(sample) 
        else: # data from the testing subject
          temp_val_list.append(sample)

      # Normalize data
      temp_train_data  = MyDataset(temp_train_list, NORM_SAMPLE_LENGTH)
      temp_test_data   = MyDataset(temp_val_list, NORM_SAMPLE_LENGTH)

      # Wrap dataloader   
      temp_train_dataloader  = DataLoader(temp_train_data, batch_size = t_bs, shuffle=True)
      temp_test_dataloader   = DataLoader(temp_test_data, batch_size  = t_bs, shuffle=False)

      # Model
      # temp_model = CNN_One_Block(conv_num_in, t_no, t_ks, t_st, t_ps)
      # temp_model = CNN_Two_Blocks(conv_num_in, t_no, t_ks, t_st, t_ps)
      # temp_model = CNN_Three_Blocks(conv_num_in, t_no, t_ks, t_st, t_ps)
      # temp_model = CNN_One_Deep_Block(conv_num_in, t_no, t_ks, t_st, t_ps)
      # temp_model = CNN_Two_Deep_Blocks(conv_num_in, t_no, t_ks, t_st, t_ps)
      # temp_model = CNN_Parallel_Blocks(conv_num_in, t_no, t_ks, t_st, t_ps)
      temp_model = CNN_Alter_Block(conv_num_in, t_no, t_ks, t_st, t_ps)
      # temp_model = CNN_Alter_Two_Block(conv_num_in, t_no, t_ks, t_st, t_ps)
      if device == 'cuda': temp_model = temp_model.cuda()

      # for plotting
      train_losses, val_losses = [], []

      # Initialize the loss function
      temp_loss_fn = nn.CrossEntropyLoss()

      # Initialize the optimisation function
      temp_optimizer = torch.optim.Adam(temp_model.parameters(), lr = LEARNING_RATE, weight_decay = ADAM_WEIGHT_DECAY)

      # Scheduler
      scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(temp_optimizer, 'min', factor = LEARNING_RATE_REDUCTION_FACTOR)

      for t in range(NUM_EPOCHS):
        # print(f"Epoch {t+1}\n-------------------------------")
        temp_train_acc  = train_loop(temp_train_dataloader, temp_model, temp_loss_fn, temp_optimizer)
        temp_val_acc   = test_loop(temp_test_dataloader, temp_model, temp_loss_fn)

      temp_perf_val_acc.append(temp_val_acc)

    # Update the hyper-parameters if getting better val accuracy
    temp_avg_val_acc = sum(temp_perf_val_acc)/len(train_subject_id)
    print('Temp. avg. val. acc. = ' + str(temp_avg_val_acc))

    if temp_avg_val_acc > hp_val_acc:
      # Update accuracy
      hp_val_acc = temp_avg_val_acc

      print('\t Parameters updated, and val_acc = ' + str(hp_val_acc))
      print('\t Parameters: ' + str(hp_point))

      # Update hyper-parameters
      s_batch_size = t_bs
      s_num_out = t_no
      s_kernel_size = t_ks
      s_stride = t_st
      s_pool_size = t_ps

    else:
      pass # do nothing

  print('--- Start the performance evaluation')
  # --- Make dataset --- #
  # Normalize
  train_data  = MyDataset(train_list, NORM_SAMPLE_LENGTH)
  test_data   = MyDataset(test_list, NORM_SAMPLE_LENGTH)

  # Wrap dataloader   
  train_dataloader  = DataLoader(train_data, batch_size = s_batch_size, shuffle=True)
  test_dataloader   = DataLoader(test_data, batch_size  = s_batch_size, shuffle=False)

  # --- Model and training --- #
  # Model
  # model = CNN_One_Block(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  # model = CNN_Two_Blocks(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  # model = CNN_Three_Blocks(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  # model = CNN_One_Deep_Block(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  # model = CNN_Two_Deep_Blocks(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  # model = CNN_Parallel_Blocks(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  model = CNN_Alter_Block(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  # model = CNN_Alter_Two_Block(conv_num_in, s_num_out, s_kernel_size, s_stride, s_pool_size)
  if device == 'cuda': model = model.cuda()

  # for plotting
  train_losses, val_losses = [], []

  # Initialize the loss function
  loss_fn = nn.CrossEntropyLoss()

  # Initialize the optimisation function
  optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE, weight_decay = ADAM_WEIGHT_DECAY)

  # Scheduler
  scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = LEARNING_RATE_REDUCTION_FACTOR)

  for t in range(NUM_EPOCHS):
    print(f"Epoch {t+1}\n-------------------------------")
    temp_train_acc  = train_loop(train_dataloader, model, loss_fn, optimizer)
    temp_test_acc   = test_loop(test_dataloader, model, loss_fn)

  perf_train_acc.append(temp_train_acc)
  perf_test_acc.append(temp_test_acc)
  print(perf_train_acc)
  print(perf_test_acc)


# Working on test subject 1
--- Start the hyper-parameters searching
	 Val subject = 2




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
-------------------------------
Epoch 20
-------------------------------
Epoch 21
-------------------------------
Epoch 22
-------------------------------
Epoch 23
-------------------------------
Epoch 24
-------------------------------
Epoch 25
-------------------------------
Epoch 26
-------------------------------
Epoch 27
-------------------------------
Epoch 28
-------------------------------
Epoch 29
-------------------------------
Epoch 30
-------------------------------
[0.8050293925538864, 0.8223900196979645, 0.809258043335522, 0.8159187684245005, 0.8206896551724138, 0.8127463863337714]
[0.6351351351351351, 0.5060975609756098, 0.5304878048780488, 0.5477707006369427, 0.6727272727272727, 0.5662650602409639]
# Working on test subject 7
--- Start the hyper-parameters searching
	 Val subject = 1
	 Val subject = 2
	 Val subject = 3
	 Val subject = 4
	 Val subject = 5
	 Val subject = 6
	 Val subject = 8
	 Val subject = 