In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import numpy as np
import os
import pickle
from pdb import set_trace

def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)
        f.close()
def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)


rootdir = "/kaggle/input/toronto-robot-stroke-posture-dataset/"

os.chdir(rootdir + 'data_new')
subjects = [name for name in os.listdir(".") if os.path.isdir(name)]

joints_sel = range(1,26) #select the desired joint numbers. ref: https://msdn.microsoft.com/en-us/library/microsoft.kinect.jointtype.aspx # SH 2023-01-04: Looks like joints numbers are here because of product updates: https://learn.microsoft.com/en-us/azure/kinect-dk/body-joints
total_joint_num = 25 # From kaggle data card: "Each file contains three columns made by stacking many 25x3 matrices vertically. The 25 rows correspond to the Kinect joint indices."
invert_data = True

HsubNames = []
Hlbl_set  = []
Hdata_set = []

PsubNames = []
Plbl_set  = []
Pdata_set = []

for sub in subjects:
    print('==========================='+sub+'============================')
    os.chdir(rootdir+'data_new'+'/'+sub)
    tasks = [name for name in os.listdir('.') if os.path.isdir(name)]
    
    # SH 2023-01-04: initialize empty array for subsequent appending of data for each task
    lbl = np.empty([0,1])
    data = np.empty([0,3*len(joints_sel)]) 
    
    """ Load data and labels"""
    for task in tasks:
        print(task)
        fname = rootdir + 'data_new/' + sub + '/'+ task+'/'+'Joint_Positions.csv'
        if os.path.isfile(fname):
            print('loading '+ task + ' ')
            data_loaded = np.loadtxt(open(fname),delimiter=",") # Load data from a text file. In this case, it's a csv file.
            frame_num = int(len(data_loaded)/total_joint_num) # Number of frames in the data file. From kaggle data card: "Each file contains three columns made by stacking many 25x3 matrices vertically. 
            features = np.empty((frame_num, 3*len(joints_sel))) # SH 2023-01-04: Return a new array of given shape and type, without initializing entries. Shape = (number of frames, 3 * number of joints)
            for index, row in enumerate(data_loaded):
                f = index // total_joint_num # SH 2023-01-04: Get the frame number
                r = index % total_joint_num # SH 2023-01-04: Get the joint number (i.e. the row number in that video frame)
                
                # SH 2023-01-04: Populate the features array with the appropriate data. 
                    # First/left third of the columns is for x coordinates, middle third is for z coordinates, right third is for y coordinates.
                    # Each row corresponds to one frame
                if r in joints_sel: 
                    """ x: flip the joinst if necessary """
                    if invert_data and 'L' in task:
                        features[f, joints_sel.index(r)] = -row[0]
                    else:
                        features[f, joints_sel.index(r)] = row[0]
                    """ z """
                    features[f, joints_sel.index(r) + len(joints_sel)] = row[1]
                    """ y """
                    features[f, joints_sel.index(r) + 2 * len(joints_sel)] = row[2]
        else:
            print(task + ' doesn\'t exist')
        #=================load labels ===================================
        fname = rootdir + 'data_new/' + sub + '/' + task + '/' + 'Labels.csv'
        if os.path.isfile(fname):
            print('loading ' + task + ' labels')
            labels = np.loadtxt(open(fname), delimiter = ",")
            if len(labels) != len(features):
                "lengths don't match"
                set_trace()
        
        # SH 2023-01-04: Concatenate data from all tasks for the participant. Not sure why as this seems to make analysis more complicated.
        data = np.concatenate((data,features),axis=0)  
        lbl = np.concatenate((lbl, labels.reshape(-1,1)), axis = 0)
        
    """ Assign data and labels to healthy or patients"""
    if sub[0] == 'H':
        HsubNames.append(sub)
        Hdata_set.append(data)
        Hlbl_set.append(lbl)
    elif sub[0] == 'P':
        PsubNames.append(sub)
        Pdata_set.append(data)
        Plbl_set.append(lbl)

H_data = np.vstack(Hdata_set)
H_labels = np.vstack(Hlbl_set)
H_sub = []
for ii, lbl in enumerate(Hlbl_set):
    H_sub.extend([HsubNames[ii]*lbl.shape[0]])
H_sub_ids = np.stack(H_sub)

P_data = np.vstack(Pdata_set)
P_labels = np.vstack(Plbl_set)
P_sub = []
for ii, lbl in enumerate(Plbl_set):
    P_sub.extend([PsubNames[ii]]*lbl.shape[0])
P_sub_ids = np.stack(P_sub)

In [None]:
output_dir = '/kaggle/working/'
os.chdir(output_dir)
os.mkdir('stroke_data')
!ls

In [None]:
# Save the data as pickles
os.chdir('stroke_data')
save_obj(H_data,"H_data")#only using left hand
save_obj(H_labels,"H_labels")#only using left hand
save_obj(H_sub_ids,"H_sub_ids")#participant number
save_obj(P_data,"P_data")#only using left hand
save_obj(P_labels,"P_labels")#only using left hand
save_obj(P_sub_ids,"P_sub_ids")#participant number

In [None]:
p_data = load_obj('P_data')
print(p_data.shape)

In [None]:
load_obj('P_labels').shape

In [None]:
h_data = load_obj('H_data')
print(type(h_data))
print(h_data.shape)

In [None]:
load_obj('H_labels').shape

In [None]:
# SH 2023-01-04 20:38: Next step: Create a function to select data from a given task