In [64]:
import pandas as pd
import numpy as np
import os
import torch
import sklearn.preprocessing

from torch.utils.data import TensorDataset
from torch.nn.utils.rnn import pad_sequence
from tqdm.notebook import tqdm

In [3]:
file = open('MobiAct_Dataset_v2.0/Readme.txt', 'r', encoding='latin1')
strings = file.readlines()
file.close()

In [4]:
person_list, activity_list = [], []
for s in strings:
    if '|' in s:
        temp = s.split('|')
        temp = [x.strip() for x in temp]
        if 'sub' in s and len(temp) == 9:
            person_list.append(temp[3:-1])
        if len(temp) == 8:
            activity_list.append(temp[1:-1])

In [5]:
falls = ['FOL', 'FKL', 'BSC', 'SDL']

columns = ['name', 'age', 'height', 'weight', 'gender']
person_info = pd.DataFrame(person_list, columns=columns)

activity_info = pd.DataFrame(activity_list)
activity_info.columns = activity_info.iloc[0]
activity_info = activity_info.drop([0, 13])
activity_info = activity_info.reset_index(drop=True)
index = activity_info['No.']
activity_info = activity_info.drop(['No.'], axis=1)
activity_info.index = index
activity_info['label_encoded'] = list(range(len(activity_info)))

In [9]:
activity_info.to_csv('mobiact_preprocessed/activity_info.csv')
person_info.to_csv('mobiact_preprocessed/person_info.csv')

In [6]:
data_dir = 'MobiAct_Dataset_v2.0/Annotated Data/'
act_list = os.listdir(data_dir)
print(act_list)

['CHU', 'FOL', 'CSI', 'JOG', 'CSO', 'SBE', 'WAL', 'SBW', 'SLW', 'SIT', 'SCH', 'FKL', 'SRH', 'STD', 'BSC', 'JUM', 'SDL', 'SLH', 'STN', 'STU']


In [8]:
import shutil

save_dir = 'mobiact_preprocessed/'

train_dir, valid_dir, test_dir = 'train/', 'valid/', 'test/'

for dir_name in [train_dir, valid_dir, test_dir]:
    if not os.path.exists(save_dir + dir_name):
        os.makedirs(save_dir + dir_name)
for act in act_list:
    file_dir = data_dir + act + '/'
    file_list = os.listdir(file_dir)
    for file in file_list:
        person_num = int(file.split('_')[1])
        if person_num <= 50:
            shutil.move(file_dir + file, save_dir + train_dir)
        elif person_num > 50 and person_num < 57:
            shutil.move(file_dir + file, save_dir + valid_dir)
        elif person_num >= 57:
            shutil.move(file_dir + file, save_dir + test_dir)

In [9]:
falls = ['FOL', 'FKL', 'BSC', 'SDL']
max_length = 0

for dir_name in [train_dir, valid_dir, test_dir]:
    file_dir = save_dir + dir_name
    file_list = os.listdir(file_dir)
    for file in file_list:
        if file.split('_')[0] in falls:
            temp = pd.read_csv(file_dir + file)
            if len(temp) > max_length:
                max_length = len(temp)
print(max_length)

2995


In [63]:
activity_info

Unnamed: 0_level_0,Label,Activity,Trials,Duration,Description,label_encoded
No.,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,STD,Standing,1,5min,Standing with subtle movements,0
2,WAL,Walking,1,5min,Normal walking,1
3,JOG,Jogging,3,30s,Jogging,2
4,JUM,Jumping,3,30s,Continuous jumping,3
5,STU,Stairs up,6,10s,Stairs up (10 stairs),4
6,STN,Stairs down,6,10s,Stairs down (10 stairs),5
7,SCH,Stand to sit(sit on chair),6,6s,Transition from standing to sitting,6
8,SIT,Sitting on chair,1,1min,Sitting on a chair with subtle movements,7
9,CHU,Sit to stand(chair up),6,6s,Transition from sitting to standing,8
10,CSI,Car-step in,6,6s,Step in a car,9


ValueError: can only convert an array of size 1 to a Python scalar

In [68]:
X.shape, y.shape

(torch.Size([2995, 2313, 6]), (2313,))

In [72]:
columns = ['acc_x', 'acc_y', 'acc_z', 'gyro_x', 'gyro_y', 'gyro_z']
label_encoder = sklearn.preprocessing.LabelEncoder()

save_name = ['train.pt', 'valid.pt', 'test.pt']
for idx, dir_name in enumerate([train_dir, valid_dir, test_dir]):
    X, y = [], []
    file_dir = save_dir + dir_name
    file_list = os.listdir(file_dir)
    for file in file_list:
        label = file.split('_')[0]
        temp = pd.read_csv(file_dir + file)
        if len(temp) > max_length:
            temp = temp.iloc[:max_length]
        val = torch.tensor(temp[columns].values)
        X.append(val)
        y.append(label)
    X = pad_sequence(X)
    X = torch.permute(X, (1, 2, 0)).contiguous()
    if idx == 0:
        y = label_encoder.fit_transform(y)
    else:
        y = label_encoder.transform(y)
    dataset = TensorDataset(X, torch.tensor(y))
    torch.save(dataset, save_dir + save_name[idx])