In [107]:
# import the necessary packages
import pandas as pd
from scipy.signal import butter, lfilter, resample
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy import misc
import torch
from torch.utils.data import Dataset, DataLoader
from einops import rearrange, repeat
from einops.layers.torch import Rearrange
import numpy as np
import os

pd.options.display.float_format = '{:.2f}'.format

In [108]:
# check if cuda is available
print(torch.cuda.is_available())
torch.autograd.set_detect_anomaly(True)

True


<torch.autograd.anomaly_mode.set_detect_anomaly at 0x144b203f070>

In [109]:
data_path = '../data/VTT_ConIot_Dataset'
IMU_path = data_path + '/IMU'
Keypoint_path = data_path + '/Keypoint'
activities = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
users = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]

In [110]:
def create_keypoint_and_imu_data(subject, activity):
    # check if file exists
    if not os.path.exists(Keypoint_path + f'/Subject_{subject:02d}_Task_{activity}.m2ts_keyPoints.csv'):
        print(f'Keypoint file for Subject_{subject:02d}_Task_{activity} does not exist')
        # return two empty dataframes
        return pd.DataFrame(), pd.DataFrame()

    keypoint_data = pd.read_csv(Keypoint_path + f'/Subject_{subject:02d}_Task_{activity}.m2ts_keyPoints.csv')
    imu_data = pd.read_csv(IMU_path + f'/activity_{activity}_user_{subject}_combined.csv')
    # only keep the columns in imu data that are of accelerometer, i.e. that have _A in the name
    imu_data = imu_data[[col for col in imu_data.columns if '_A' in col]]
    # remove frame_number and timestamp columns from keypoint data
    keypoint_data = keypoint_data.drop(columns=['frame_number', 'timestamp'])
    keypoint_data['subject'] = subject
    keypoint_data['activity'] = activity
    imu_data['subject'] = subject
    imu_data['activity'] = activity
    # make sure subject and activity are of type float
    keypoint_data['subject'] = keypoint_data['subject'].astype(float)
    keypoint_data['activity'] = keypoint_data['activity'].astype(float)
    imu_data['subject'] = imu_data['subject'].astype(float)
    imu_data['activity'] = imu_data['activity'].astype(float)
    return keypoint_data, imu_data

In [111]:
def create_relative_keypoints(df_keypoints):
    '''
    "keypoints": [
            "nose","left_eye","right_eye","left_ear","right_ear",
            "left_shoulder","right_shoulder","left_elbow","right_elbow",
            "left_wrist","right_wrist","left_hip","right_hip",
            "left_knee","right_knee","left_ankle","right_ankle"
        ],
    "corresponding points":[
            "0", "1", "2", "3", "4",
            "5", "6", "7", "8",
            "9", "10", "11", "12",
            "13", "14", "15", "16"
        ]
    There are 17 points with each point having their x, y and confidence values
    :param df_keypoints:
    :return:
    '''
    # drop the column "detection_score" as they are all high
    df_keypoints = df_keypoints.drop(columns=['detection_score'])
    # create a weighted average column of the x and y of nose, left eye, right eye, left ear and right ear and call it x_head, y_head, prob_head
    # where the columns for x and y are labelled as x0,y0,prob0,x1,y1,prob1,....,x16,y16,prob16
    # get the x and y values for the nose, left eye, right eye, left ear and right ear
    x_head = (df_keypoints['x0'] * df_keypoints['prob0'] + df_keypoints['x1'] * df_keypoints['prob1'] +
              df_keypoints['x2'] * df_keypoints['prob2'] + df_keypoints['x3'] * df_keypoints['prob3'] +
              df_keypoints['x4'] * df_keypoints['prob4']) / (df_keypoints['prob0'] + df_keypoints['prob1'] +
                                                             df_keypoints['prob2'] + df_keypoints['prob3'] +
                                                             df_keypoints['prob4'])

    y_head = (df_keypoints['y0'] * df_keypoints['prob0'] + df_keypoints['y1'] * df_keypoints['prob1'] +
              df_keypoints['y2'] * df_keypoints['prob2'] + df_keypoints['y3'] * df_keypoints['prob3'] +
              df_keypoints['y4'] * df_keypoints['prob4']) / (df_keypoints['prob0'] + df_keypoints['prob1'] +
                                                             df_keypoints['prob2'] + df_keypoints['prob3'] +
                                                             df_keypoints['prob4'])
    # set prob_head as the maximum of the probabilities of the nose, left eye, right eye, left ear and right ear
    prob_head = df_keypoints[['prob0', 'prob1', 'prob2', 'prob3', 'prob4']].max(axis=1)

    # add the columns to the dataframe
    df_keypoints['x_head'] = x_head
    df_keypoints['y_head'] = y_head
    df_keypoints['prob_head'] = prob_head

    # drop the columns for the nose, left eye, right eye, left ear and right ear
    df_keypoints = df_keypoints.drop(columns=['x0', 'y0', 'prob0', 'x1', 'y1', 'prob1', 'x2', 'y2', 'prob2',
                                              'x3', 'y3', 'prob3', 'x4', 'y4', 'prob4'])

    # convert the x and y values to be relative to the head
    for i in range(5, 17):
        df_keypoints[f'x{i}'] = df_keypoints[f'x{i}'] - df_keypoints['x_head']
        df_keypoints[f'y{i}'] = df_keypoints[f'y{i}'] - df_keypoints['y_head']

    # drop the columns for the x and y values of the head
    df_keypoints = df_keypoints.drop(columns=['x_head', 'y_head', 'prob_head'])

    return df_keypoints


def processed_data():
    full_keypoint_data = pd.DataFrame()
    full_imu_data = pd.DataFrame()
    for activity in activities:
        for user in users:
            keypoint_data, imu_data = create_keypoint_and_imu_data(user, activity)
            full_keypoint_data = pd.concat([full_keypoint_data, keypoint_data])
            full_imu_data = pd.concat([full_imu_data, imu_data])

    # pre-pocess by removing the extra time stamps fom either keypoint or imu data
    # per participant and activity pair there should be 4x the number of imu data points as keypoint data points
    cropped_keypoint_data = pd.DataFrame()
    cropped_imu_data = pd.DataFrame()
    for activity in activities:
        for user in users:
            # print shape of keypoint and imu data
            keypoint_data = full_keypoint_data[
                (full_keypoint_data['subject'] == user) & (full_keypoint_data['activity'] == activity)]
            imu_data = full_imu_data[(full_imu_data['subject'] == user) & (full_imu_data['activity'] == activity)]

            # if either of the data is empty, skip
            if keypoint_data.shape[0] == 0 or imu_data.shape[0] == 0:
                continue

            if keypoint_data.shape[0] * 4 > imu_data.shape[0]:
                # remove the extra keypoint data
                keypoint_data = keypoint_data.iloc[:imu_data.shape[0] // 4]
            else:
                # remove the extra imu data
                imu_data = imu_data.iloc[:keypoint_data.shape[0] * 4]
            # remove keypoint values such that keypoint data is multiple of 25
            keypoint_data = keypoint_data.iloc[:keypoint_data.shape[0] // 25 * 25]

            # remove imu values such that imu data is multiple of 100
            imu_data = imu_data.iloc[:imu_data.shape[0] // 100 * 100]

            # make copy of the columns subject and activity before dropping
            subject_col = imu_data['subject']
            activity_col = imu_data['activity']
            imu_data = imu_data.drop(columns=['subject', 'activity'])
            # resample the imu data to be the same length as the keypoint data
            imu_resampled = resample(imu_data, keypoint_data.shape[0])
            # create a pd dataframe from the resampled data using the columns in imu_data
            imu_data = pd.DataFrame(imu_resampled, columns=imu_data.columns)
            # add the subject and activity columns back
            imu_data['subject'] = subject_col
            imu_data['activity'] = activity_col

            cropped_keypoint_data = pd.concat([cropped_keypoint_data, keypoint_data])
            cropped_imu_data = pd.concat([cropped_imu_data, imu_data])

    # reset the index keypoint
    cropped_keypoint_data = cropped_keypoint_data.reset_index(drop=True)
    # preprocess the keypoints for each point of interest from pose estimation
    cropped_keypoint_data = create_relative_keypoints(cropped_keypoint_data)

    # reset the index for imu
    cropped_imu_data = cropped_imu_data.reset_index(drop=True)

    # create a new df between 25 key-points and 100 IMU data points for each activity and user
    # with columns start, end, subject, activity
    sliding_windows = pd.DataFrame(columns=['start', 'end', 'subject', 'activity'])

    for activity in activities:
        for user in users:
            # preserve the index
            keypoint_data = cropped_keypoint_data[
                (cropped_keypoint_data['subject'] == user) & (cropped_keypoint_data['activity'] == activity)]
            # if either of the data is empty, skip
            if keypoint_data.shape[0] == 0:
                continue

            # split the data into windows of 25
            for i in range(0, keypoint_data.shape[0], 25):
                # check if the window has 25 elements
                if i + 25 > keypoint_data.shape[0]:
                    continue
                # get the start and end index of the window in cropped_keypoint_data
                start = keypoint_data.index[i]
                end = keypoint_data.index[i + 25 - 1]
                # concat to the sliding_windows dataframe
                sliding_windows = pd.concat([sliding_windows, pd.DataFrame(
                    {'start': [start], 'end': [end], 'subject': [user], 'activity': [activity]})])

    sliding_windows = sliding_windows.reset_index(drop=True)

    # normalize the keypoint data
    # create a copy of column activity and subject before dropping
    activity = cropped_keypoint_data['activity']
    subject = cropped_keypoint_data['subject']
    # create a copy of all columns starting with prob
    prob_cols = cropped_keypoint_data.filter(regex='prob').copy()
    # drop the columns activity, subject and all columns starting with prob
    cropped_keypoint_data = cropped_keypoint_data.drop(columns=['activity', 'subject'])
    cropped_keypoint_data = cropped_keypoint_data.drop(columns=cropped_keypoint_data.filter(regex='prob').columns)
    # Do a normalization
    scaler = StandardScaler()
    cropped_keypoint_data = pd.DataFrame(scaler.fit_transform(cropped_keypoint_data),
                                         columns=cropped_keypoint_data.columns)
    # add the columns activity and subject back
    cropped_keypoint_data['activity'] = activity
    cropped_keypoint_data['subject'] = subject
    # add the columns starting with prob back
    cropped_keypoint_data = pd.concat([cropped_keypoint_data, prob_cols], axis=1)

    return cropped_keypoint_data, cropped_imu_data, sliding_windows


In [112]:
keypoint_data, imu_data, sliding_windows = processed_data()

Keypoint file for Subject_06_Task_11 does not exist


In [113]:
print(keypoint_data.shape)
print(imu_data.shape)
# check if the index for keypoint data and imu data are the same
print(keypoint_data.index)
print(imu_data.index)
# check if the imu and keypoint have the same subject and activity at every row, count number of false
print((keypoint_data['subject'] == imu_data['subject']).value_counts())

print(keypoint_data['subject'].loc[0])
print(imu_data['subject'].loc[0])
print(keypoint_data.describe())
print(imu_data.describe())

(286100, 38)
(286100, 11)
RangeIndex(start=0, stop=286100, step=1)
RangeIndex(start=0, stop=286100, step=1)
subject
True    286100
Name: count, dtype: int64
1.0
1.0
             x5        y5        x6        y6        x7        y7        x8  \
count 281934.00 281934.00 281934.00 281934.00 281934.00 281934.00 281934.00   
mean       0.00      0.00     -0.00      0.00      0.00      0.00     -0.00   
std        1.00      1.00      1.00      1.00      1.00      1.00      1.00   
min       -7.25    -16.01     -7.73    -18.31    -10.03     -6.73     -9.30   
25%       -0.81     -0.37     -0.83     -0.48     -0.76     -0.50     -0.70   
50%        0.05      0.09     -0.03     -0.02      0.10      0.11     -0.02   
75%        0.85      0.62      0.81      0.67      0.80      0.64      0.67   
max       11.64     18.05      9.80      8.41      4.14     11.48      6.79   

             y8        x9        y9  ...     prob7     prob8     prob9  \
count 281934.00 281934.00 281934.00  ... 286100.0

In [114]:
class EncoderDecoder(nn.Module):
    def __init__(self, input_size, output_size):
        super(EncoderDecoder, self).__init__()
        self.encoder = nn.Sequential(
            # 1D CNN Layers, with 5 filters
            nn.Conv2d(1, 5, (3, 1), stride=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            nn.Conv2d(5, 5, (3, 1), stride=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            nn.Conv2d(5, 5, (3, 1), stride=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            nn.Conv2d(5, 5, (3, 1), stride=(1, 1)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            # output is 17 x 5 x 36
            # flatten to 17 x 180
            Rearrange('b c h w -> b h (c w)'),
            # FC
            nn.Linear(180, 100),
            nn.ReLU(),
            # LSTM
            nn.LSTM(100, 100, 1, batch_first=True),
            # output is 17 x 100
        )
        self.decoder = nn.Sequential(
            nn.Linear(100, 45),
            nn.ReLU(),
            # make 17 x 1 x 9
            Rearrange('b h (c w) -> b c h w', c=5, w=9),
            # 1D CNN Layers, with 5 filters
            nn.Conv2d(5, 5, (3, 1), stride=(1, 1), padding=(2, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            nn.Conv2d(5, 5, (3, 1), stride=(1, 1), padding=(2, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            nn.Conv2d(5, 5, (3, 1), stride=(1, 1), padding=(2, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(5),
            nn.Conv2d(5, 1, (3, 1), stride=(1, 1), padding=(2, 0)),
            nn.ReLU(),
            nn.BatchNorm2d(1)
        )

    def forward(self, x):
        x, h = self.encoder(x)
        x = self.decoder(x)
        return x

In [115]:
class DataSet_VTT(Dataset):
    def __init__(self, keypoint_data, imu_data, sliding_windows, flag='train'):
        self.flag = flag
        # if train use all data except for user 1
        if self.flag == 'train':
            self.keypoint_data = keypoint_data[keypoint_data['subject'] != 1]
            self.imu_data = imu_data[imu_data['subject'] != 1]
            self.sliding_windows_map = sliding_windows[sliding_windows['subject'] != 1]
            # reset the index
            self.sliding_windows_map = self.sliding_windows_map.reset_index(drop=True)
        # if test use only user 1
        elif self.flag == 'test':
            self.keypoint_data = keypoint_data[keypoint_data['subject'] == 1]
            self.imu_data = imu_data[imu_data['subject'] == 1]
            self.sliding_windows_map = sliding_windows[sliding_windows['subject'] == 1]
            # reset the index
            self.sliding_windows_map = self.sliding_windows_map.reset_index(drop=True)

    def __len__(self):
        return len(self.sliding_windows_map["start"])

    def __getitem__(self, idx):
        start = self.sliding_windows_map["start"][idx]
        end = self.sliding_windows_map["end"][idx]
        user = self.sliding_windows_map["subject"][idx]
        activity = self.sliding_windows_map["activity"][idx]
        # get the keypoint data for user and activity between start and end
        keypoint = self.keypoint_data[
                       (self.keypoint_data['subject'] == user) & (self.keypoint_data['activity'] == activity)].loc[
                   start:end]
        # check if any nan values in keypoint
        # print(np.isnan(keypoint).any())
        keypoint = keypoint.drop(columns=['subject', 'activity'])
        # expand dimensions to make it 4D
        keypoint = np.expand_dims(keypoint, axis=0)
        imu = self.imu_data[(self.imu_data['subject'] == user) & (self.imu_data['activity'] == activity)].loc[
              start:end]
        imu = imu.drop(columns=['subject', 'activity'])
        # imu to numpy
        imu = imu.to_numpy()

        # check if any nan values in keypoint
        # print(np.isnan(keypoint).any())
        # print('----------------')
        return keypoint, imu

In [116]:
# replace any nan value as the mean of the value before and after it
keypoint_data = keypoint_data.fillna(keypoint_data.mean())
imu_data = imu_data.fillna(imu_data.mean())

In [117]:
# check if any nan values in keypoint
print(np.isnan(keypoint_data).any())
print(np.isnan(imu_data).any())

x5          False
y5          False
x6          False
y6          False
x7          False
y7          False
x8          False
y8          False
x9          False
y9          False
x10         False
y10         False
x11         False
y11         False
x12         False
y12         False
x13         False
y13         False
x14         False
y14         False
x15         False
y15         False
x16         False
y16         False
activity    False
subject     False
prob5       False
prob6       False
prob7       False
prob8       False
prob9       False
prob10      False
prob11      False
prob12      False
prob13      False
prob14      False
prob15      False
prob16      False
dtype: bool
trousers_Ax_g    False
trousers_Ay_g    False
trousers_Az_g    False
back_Ax_g        False
back_Ay_g        False
back_Az_g        False
hand_Ax_g        False
hand_Ay_g        False
hand_Az_g        False
subject          False
activity         False
dtype: bool


In [118]:
dataset_train = DataSet_VTT(keypoint_data, imu_data, sliding_windows, flag='train')
dataset_test = DataSet_VTT(keypoint_data, imu_data, sliding_windows, flag='test')
train_loader = DataLoader(dataset=dataset_train, batch_size=32, shuffle=True, num_workers=0)

In [119]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
input_size = (25, 36)
output_size = (25, 9)
model = EncoderDecoder(input_size, output_size)
model = model.double().cuda()
criterion = nn.CrossEntropyLoss(reduction="mean").to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [120]:
# test model input output size by giving random input
test = np.random.rand(32, 25, 36)
print(test.shape)
test = np.expand_dims(test, axis=1)
print(test.shape)
test = torch.tensor(test, dtype=torch.double).to(device)
# print(test)
y = model(test)
# remove the first dimension
print(y)
y = y.squeeze(1)
# print the output size
print(y.shape)
# print(y)

(32, 25, 36)
(32, 1, 25, 36)
tensor([[[[-1.0938e+00,  5.6767e-02, -5.4950e-01,  ...,  5.3953e-02,
           -4.0240e-01, -1.0285e-01],
          [-1.0938e+00, -3.4760e-01, -6.9826e-01,  ...,  4.6984e-01,
            8.3003e-02,  4.0965e-01],
          [-1.0401e+00, -3.5267e-01, -1.7939e-01,  ...,  6.4363e-01,
           -5.4432e-02,  5.2749e-01],
          ...,
          [-1.0938e+00,  2.1561e-01,  7.8518e-01,  ...,  2.1747e+00,
           -9.5683e-02,  1.1864e+00],
          [ 3.4238e-01, -4.4306e-01, -3.1246e-01,  ..., -6.1757e-01,
           -2.4586e-01, -3.7115e-01],
          [-1.0938e+00, -1.0938e+00, -2.8140e-01,  ..., -3.2778e-01,
           -8.4025e-01, -4.8197e-01]]],


        [[[-1.0938e+00,  4.4470e-02, -3.9756e-01,  ...,  3.5513e-02,
           -2.6956e-01, -7.4360e-02],
          [-1.0938e+00, -4.2346e-01, -1.7698e-01,  ...,  4.3826e-01,
            3.5607e-02,  6.7274e-01],
          [-9.6835e-01, -2.0615e-01, -2.9252e-02,  ...,  6.5793e-01,
            6.5318e-01,  3.

In [121]:
torch.autograd.set_detect_anomaly(False)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x144b145d2e0>

In [122]:
for epoch in range(1):
    print(f'Epoch: {epoch}')
    for i, data in enumerate(train_loader):
        keypoint, imu = data
        keypoint = keypoint.double().to(device)
        # check if keypoint has any nan values
        # print(torch.isnan(keypoint).any())
        imu = imu.double().to(device)
        output = model(keypoint)
        output = output.squeeze(1)
        loss = criterion(output, imu)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            print(f'Epoch: {epoch}, Loss: {loss.item()}')


Epoch: 0
Epoch: 0, Loss: 32.560827343581636
Epoch: 0, Loss: 3.6851020641451164
Epoch: 0, Loss: 2.6469098468286534
Epoch: 0, Loss: -7.160711051678369


In [None]:
print(imu_data[(imu_data['subject'] == 7) & (imu_data['activity'] == 7)].index)