# Import

In [1]:
import os
import cv2
import csv
import math
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Global Variable

In [2]:
KeypointsROOT = "../1_GaitAnalysis_ver1/data/"
WalkingSubtaskROOT = "data/"

sit_stand_frameIdx_dict = {}
turning_frameIdx_dict = {}
walking_frameIdx_dict = {}

# Load ID

In [3]:
allID_list = []

def load_id_typeString(KeypointsROOT):
    tmpList = []
    
    n_all = 0
    for folder in list(os.listdir(KeypointsROOT)):
        if folder!="50": 
            n_all += 1
            tmpList.append(folder)
    return tmpList
    
allID_list = load_id_typeString(KeypointsROOT)

In [4]:
print(f'[Info] number of pid: {len(allID_list)}\n')
print(allID_list)

[Info] number of pid: 88

['01', '02', '03', '04', '05', '06', '07', '09', '10', '100', '11', '12', '13', '16', '17', '18', '19', '20', '21', '22', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '43', '44', '45', '46', '48', '49', '51', '52', '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67', '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78', '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89', '90', '91', '92', '93', '94', '95', '96', '97', '98', '99']


# Load Sit&Stand frame index
* /5_Gait_and_Eye/frameIdx_sit&stand_subtask.csv
* [pid] start1, end1, period1, start2, end2, period2

In [5]:
sit_stand_frameIdx_dict = {}

def load_sit_and_stand_frameIdx(csv_path):
    tmp_frameIdx_dict = {}
    if not os.path.exists(csv_path):
        print(f'[Notice] Sit and Stand frameIdx CSV does not exit.')
    else:
        df = pd.read_csv(csv_path)
        n_row, n_col = df.shape[0], df.shape[1]
        
        for i in range(n_row):
            pid = df.loc[i, 'pid']
            start1 = df.loc[i, 'start1']
            end1 = df.loc[i, 'end1']
            period1 = df.loc[i, 'period1']
            start2 = df.loc[i, 'start2']
            end2 = df.loc[i, 'end2']
            period2 = df.loc[i, 'period2']
            
            tmp_frameIdx_dict[pid] = {}
            tmp_frameIdx_dict[pid]['start1'] = start1
            tmp_frameIdx_dict[pid]['end1'] = end1
            tmp_frameIdx_dict[pid]['period1'] = period1
            tmp_frameIdx_dict[pid]['start2'] = start2
            tmp_frameIdx_dict[pid]['end2'] = end2
            tmp_frameIdx_dict[pid]['period2'] = period2
    return tmp_frameIdx_dict

sit_stand_frameIdx_dict = load_sit_and_stand_frameIdx('../5_Gait_and_Eye/frameIdx_sit&stand_subtask.csv')

In [6]:
print(f'number of subjects: {len(sit_stand_frameIdx_dict)}\n')

print(sit_stand_frameIdx_dict)

number of subjects: 88

{1: {'start1': 8, 'end1': 24, 'period1': 16, 'start2': 258, 'end2': 277, 'period2': 19}, 2: {'start1': 8, 'end1': 32, 'period1': 24, 'start2': 219, 'end2': 232, 'period2': 13}, 3: {'start1': 0, 'end1': 13, 'period1': 13, 'start2': 223, 'end2': 241, 'period2': 18}, 4: {'start1': 51, 'end1': 69, 'period1': 18, 'start2': 411, 'end2': 420, 'period2': 9}, 5: {'start1': 57, 'end1': 88, 'period1': 31, 'start2': 723, 'end2': 785, 'period2': 62}, 6: {'start1': 53, 'end1': 77, 'period1': 24, 'start2': 281, 'end2': 298, 'period2': 17}, 7: {'start1': 31, 'end1': 51, 'period1': 20, 'start2': 319, 'end2': 337, 'period2': 18}, 9: {'start1': 172, 'end1': 194, 'period1': 22, 'start2': 667, 'end2': 707, 'period2': 40}, 10: {'start1': 70, 'end1': 114, 'period1': 44, 'start2': 534, 'end2': 575, 'period2': 41}, 100: {'start1': 54, 'end1': 76, 'period1': 22, 'start2': 296, 'end2': 319, 'period2': 23}, 11: {'start1': 173, 'end1': 208, 'period1': 35, 'start2': 676, 'end2': 713, 'period

# Load Truning frame index
* /3_turningSubtask/turning1_refine_info.csv
* /3_turningSubtask/turning2_info.csv
* [pid] peak, start, end, period

In [7]:
turning_frameIdx_dict = {}

def load_turning_frameIdx(f1, f2):
    tmp_frameIdx_dict = {}
    
    if not os.path.exists(f1) or not os.path.exists(f2):
        print(f'[Notice] Turning frameIdx CSV does not exit.')
    else:
        df1 = pd.read_csv(f1)
        df2 = pd.read_csv(f2)
        
        n_row, n_col = df1.shape[0], df1.shape[1]
        
        for i in range(n_row):
            pid1 = df1.loc[i, 'pid']
            peak1 = df1.loc[i, 'peak']
            start1 = df1.loc[i, 'start']
            end1 = df1.loc[i, 'end']
            period1 = df1.loc[i, 'period']
            
            pid2 = df2.loc[i, 'pid']
            peak2 = df2.loc[i, 'peak']
            start2 = df2.loc[i, 'start']
            end2 = df2.loc[i, 'end']
            period2 = df2.loc[i, 'period']
            
            if pid1!=pid2:
                print(f'[Notice] Pid is Wrong!')
                
            tmp_frameIdx_dict[pid1] = {}
            tmp_frameIdx_dict[pid1]['peak1'] = peak1
            tmp_frameIdx_dict[pid1]['start1'] = start1
            tmp_frameIdx_dict[pid1]['end1'] = end1
            tmp_frameIdx_dict[pid1]['period1'] = period1
            tmp_frameIdx_dict[pid1]['peak2'] = peak2
            tmp_frameIdx_dict[pid1]['start2'] = start2
            tmp_frameIdx_dict[pid1]['end2'] = end2
            tmp_frameIdx_dict[pid1]['period2'] = period2
            
    return tmp_frameIdx_dict

turning_frameIdx_dict = load_turning_frameIdx('../3_turningSubtask/turning1_refine_info.csv', '../3_turningSubtask/turning2_info.csv')

In [8]:
print(f'number of subjects: {len(turning_frameIdx_dict)}\n')

print(turning_frameIdx_dict)

number of subjects: 88

{1: {'peak1': '138', 'start1': '127', 'end1': '151', 'period1': '24', 'peak2': 241, 'start2': 229, 'end2': 252, 'period2': 23}, 2: {'peak1': '113', 'start1': '98', 'end1': '141', 'period1': '43', 'peak2': 198, 'start2': 181, 'end2': 207, 'period2': 26}, 3: {'peak1': '113', 'start1': '97', 'end1': '149', 'period1': '52', 'peak2': 212, 'start2': 191, 'end2': 223, 'period2': 32}, 4: {'peak1': 'x', 'start1': 'x', 'end1': 'x', 'period1': 'x', 'peak2': 397, 'start2': 367, 'end2': 411, 'period2': 44}, 5: {'peak1': '397', 'start1': '369', 'end1': '417', 'period1': '48', 'peak2': 697, 'start2': 665, 'end2': 723, 'period2': 58}, 6: {'peak1': '160', 'start1': '143', 'end1': '173', 'period1': '30', 'peak2': 266, 'start2': 249, 'end2': 281, 'period2': 32}, 7: {'peak1': 'x', 'start1': 'x', 'end1': 'x', 'period1': 'x', 'peak2': 294, 'start2': 282, 'end2': 319, 'period2': 37}, 9: {'peak1': 'x', 'start1': 'x', 'end1': 'x', 'period1': 'x', 'peak2': 631, 'start2': 581, 'end2': 667

### problem list

In [9]:
t1_problem_list = []
t2_problem_list = []

def get_turning_problem_list(turning_dict):
    tmp_t1, tmp_t2 = [], []
    
    for key in turning_dict:
        peak1 = turning_dict[key]['peak1']
        start1 = turning_dict[key]['start1']
        end1 = turning_dict[key]['end1']
        period1 = turning_dict[key]['period1']
        peak2 = turning_dict[key]['peak2']
        start2 = turning_dict[key]['start2']
        end2 = turning_dict[key]['end2']
        period2 = turning_dict[key]['period2']
        
        if peak1=='x' or start1=='x' or end1=='x' or period1=='x':
            tmp_t1.append(key)
        if peak2=='x' or start2=='x' or end2=='x' or period2=='x':
            tmp_t2.append(key)
    return tmp_t1, tmp_t2
    

t1_problem_list, t2_problem_list = get_turning_problem_list(turning_frameIdx_dict)

In [10]:
print(f'number of problem subjects in Turning 1: {len(t1_problem_list)}')
print(f'number of problem subjects in Turning 2: {len(t2_problem_list)}')

print(f'[Turning1]: {t1_problem_list}')
print(f'[Turning2]: {t2_problem_list}')

number of problem subjects in Turning 1: 7
number of problem subjects in Turning 2: 0
[Turning1]: [4, 7, 9, 21, 32, 60, 65]
[Turning2]: []


# Generate Walking frame index

In [17]:
walking_frameIdx_dict = {}

def generate_walking_frameIdx(sitstand, turn):
    tmp_dict = {}
    
    for key in sitstand:
        if (key in t1_problem_list)==False and (key in t2_problem_list)==False:
            ### [walk phrase 1]
            start1 = int(sitstand[key]['end1'])
            end1 = int(turn[key]['start1'])
            period1 = int(abs(start1-end1))
            
            ### [walk phrase 2]
            start2 = int(turn[key]['end1'])
            end2 = int(turn[key]['start2'])
            period2 = int(abs(start2-end2))
            
            tmp_dict[key] = {}
            tmp_dict[key]['start1'] = start1
            tmp_dict[key]['end1'] = end1
            tmp_dict[key]['period1'] = period1
            tmp_dict[key]['start2'] = start2
            tmp_dict[key]['end2'] = end2
            tmp_dict[key]['period2'] = period2
    return tmp_dict
    
walking_frameIdx_dict = generate_walking_frameIdx(sit_stand_frameIdx_dict, turning_frameIdx_dict)

In [18]:
print(f'number of subjects: {len(walking_frameIdx_dict)}\n')

print(walking_frameIdx_dict)

number of subjects: 81

{1: {'start1': 24, 'end1': 127, 'period1': 103, 'start2': 151, 'end2': 229, 'period2': 78}, 2: {'start1': 32, 'end1': 98, 'period1': 66, 'start2': 141, 'end2': 181, 'period2': 40}, 3: {'start1': 13, 'end1': 97, 'period1': 84, 'start2': 149, 'end2': 191, 'period2': 42}, 5: {'start1': 88, 'end1': 369, 'period1': 281, 'start2': 417, 'end2': 665, 'period2': 248}, 6: {'start1': 77, 'end1': 143, 'period1': 66, 'start2': 173, 'end2': 249, 'period2': 76}, 10: {'start1': 114, 'end1': 245, 'period1': 131, 'start2': 281, 'end2': 464, 'period2': 183}, 100: {'start1': 76, 'end1': 163, 'period1': 87, 'start2': 210, 'end2': 266, 'period2': 56}, 11: {'start1': 208, 'end1': 375, 'period1': 167, 'start2': 421, 'end2': 626, 'period2': 205}, 12: {'start1': 122, 'end1': 200, 'period1': 78, 'start2': 252, 'end2': 318, 'period2': 66}, 13: {'start1': 130, 'end1': 299, 'period1': 169, 'start2': 351, 'end2': 443, 'period2': 92}, 16: {'start1': 189, 'end1': 350, 'period1': 161, 'start2': 

## Save Walking Subtask frame index

In [19]:
walking_frameIdx_dict_sorted = {}

myKeys = list(walking_frameIdx_dict.keys())
myKeys.sort()
walking_frameIdx_dict_sorted = {i: walking_frameIdx_dict[i] for i in myKeys}
 
print(walking_frameIdx_dict_sorted)

{1: {'start1': 24, 'end1': 127, 'period1': 103, 'start2': 151, 'end2': 229, 'period2': 78}, 2: {'start1': 32, 'end1': 98, 'period1': 66, 'start2': 141, 'end2': 181, 'period2': 40}, 3: {'start1': 13, 'end1': 97, 'period1': 84, 'start2': 149, 'end2': 191, 'period2': 42}, 5: {'start1': 88, 'end1': 369, 'period1': 281, 'start2': 417, 'end2': 665, 'period2': 248}, 6: {'start1': 77, 'end1': 143, 'period1': 66, 'start2': 173, 'end2': 249, 'period2': 76}, 10: {'start1': 114, 'end1': 245, 'period1': 131, 'start2': 281, 'end2': 464, 'period2': 183}, 11: {'start1': 208, 'end1': 375, 'period1': 167, 'start2': 421, 'end2': 626, 'period2': 205}, 12: {'start1': 122, 'end1': 200, 'period1': 78, 'start2': 252, 'end2': 318, 'period2': 66}, 13: {'start1': 130, 'end1': 299, 'period1': 169, 'start2': 351, 'end2': 443, 'period2': 92}, 16: {'start1': 189, 'end1': 350, 'period1': 161, 'start2': 430, 'end2': 574, 'period2': 144}, 17: {'start1': 88, 'end1': 204, 'period1': 116, 'start2': 285, 'end2': 371, 'peri

In [20]:
def write_result_to_csv(data_dict, fileName):
    result_list = []
    for key in data_dict:
        v1 = key
        v2 = data_dict[key]["start1"]
        v3 = data_dict[key]["end1"]
        v4 = data_dict[key]["period1"]
        v5 = data_dict[key]["start2"]
        v6 = data_dict[key]["end2"]
        v7 = data_dict[key]["period2"]
        
        result_list.append([v1, v2, v3, v4, v5, v6, v7])
        # print(result_list)

    field_name = ['pid', 'start1', 'end1', 'period1', 'start2', 'end2', 'period2']

    with open(fileName, 'w', newline='') as f:
        write = csv.writer(f)
        write.writerow(field_name)
        write.writerows(result_list)

write_result_to_csv(walking_frameIdx_dict_sorted, "walking_frame_index.csv")

# (X) Create new folder
* /4_Cut_Video/data/

In [36]:
def createFolder(pidList):
    for pid in pidList:
        if (int(pid) in walking_frameIdx_dict)==True:
            folder_path = 'data/' + pid + '/'
            if not os.path.exists(folder_path):
                print(f'[pid {pid}] {folder_path}')
                os.mkdir(folder_path)
            else:
                print(f'{pid} Folder for [Walking] has existed.')
        else:
            print(f'[pid {pid}] NO walking frame index.')
            

createFolder(allID_list)

01 Folder for [Walking] has existed.
[pid 02] data/02/
[pid 03] data/03/
[pid 04] NO walking frame index.
[pid 05] data/05/
[pid 06] data/06/
[pid 07] NO walking frame index.
[pid 09] NO walking frame index.
[pid 10] data/10/
[pid 100] data/100/
[pid 11] data/11/
[pid 12] data/12/
[pid 13] data/13/
[pid 16] data/16/
[pid 17] data/17/
[pid 18] data/18/
[pid 19] data/19/
[pid 20] data/20/
[pid 21] NO walking frame index.
[pid 22] data/22/
[pid 24] data/24/
[pid 25] data/25/
[pid 26] data/26/
[pid 27] data/27/
[pid 28] data/28/
[pid 29] data/29/
[pid 30] data/30/
[pid 31] data/31/
[pid 32] NO walking frame index.
[pid 33] data/33/
[pid 34] data/34/
[pid 35] data/35/
[pid 36] data/36/
[pid 37] data/37/
[pid 38] data/38/
[pid 39] data/39/
[pid 40] data/40/
[pid 43] data/43/
[pid 44] data/44/
[pid 45] data/45/
[pid 46] data/46/
[pid 48] data/48/
[pid 49] data/49/
[pid 51] data/51/
[pid 52] data/52/
[pid 57] data/57/
[pid 58] data/58/
[pid 59] data/59/
[pid 60] NO walking frame index.
[pid 61

# (X) Generate Walking Subtask Coordinate File
* /data/01/01_walking_cut1_coordinate.csv
* /data/01/01_walking_cut2_coordinate.csv

In [68]:
def get_walking_period_coordinate_data(WalkingSubtaskROOT, pidList, walk_dict):
    for pid in pidList:
        if int(pid) in walking_frameIdx_dict:
            all_coord_path = KeypointsROOT + pid + '/_yolo/' + pid + '_keypoints1.csv'
            cut1_coord_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_coordinate.csv'
            cut2_coord_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_coordinate.csv'

            if os.path.exists(all_coord_path)==False:
                print(f'[Notice] NO pid {pid} coordinate csv file.')
            else:
                df = pd.read_csv(all_coord_path, header=None)
                print(f'[pid {pid}] {cut1_coord_path}, {cut2_coord_path}')

                ### Get walking cut1 & cut2 frame index
                start1 = walk_dict[int(pid)]['start1']
                end1 = walk_dict[int(pid)]['end1']
                start2 = walk_dict[int(pid)]['start2']
                end2 = walk_dict[int(pid)]['end2']

                cut1_df = df.iloc[start1:end1, :]
                cut2_df = df.iloc[start2:end2, :]

                cut1_df.reset_index(drop=True, inplace=True)
                cut2_df.reset_index(drop=True, inplace=True)

                cut1_df.to_csv(cut1_coord_path, header=None, index=None)
                cut2_df.to_csv(cut2_coord_path, header=None, index=None)
                
    
get_walking_period_coordinate_data(WalkingSubtaskROOT, allID_list, walking_frameIdx_dict)

[pid 01] data/01/01_walking_cut1_coordinate.csv, data/01/01_walking_cut2_coordinate.csv
[pid 02] data/02/02_walking_cut1_coordinate.csv, data/02/02_walking_cut2_coordinate.csv
[pid 03] data/03/03_walking_cut1_coordinate.csv, data/03/03_walking_cut2_coordinate.csv
[pid 05] data/05/05_walking_cut1_coordinate.csv, data/05/05_walking_cut2_coordinate.csv
[pid 06] data/06/06_walking_cut1_coordinate.csv, data/06/06_walking_cut2_coordinate.csv
[pid 10] data/10/10_walking_cut1_coordinate.csv, data/10/10_walking_cut2_coordinate.csv
[pid 100] data/100/100_walking_cut1_coordinate.csv, data/100/100_walking_cut2_coordinate.csv
[pid 11] data/11/11_walking_cut1_coordinate.csv, data/11/11_walking_cut2_coordinate.csv
[pid 12] data/12/12_walking_cut1_coordinate.csv, data/12/12_walking_cut2_coordinate.csv
[pid 13] data/13/13_walking_cut1_coordinate.csv, data/13/13_walking_cut2_coordinate.csv
[pid 16] data/16/16_walking_cut1_coordinate.csv, data/16/16_walking_cut2_coordinate.csv
[pid 17] data/17/17_walking

# (X) Change y-coordiante
* 1080 - yCoord

In [74]:
def change_y_Coordinate(WalkingSubtaskROOT, pidList, keypointIdx):
    n_keypoint = len(keypointIdx)
    
    for pid in pidList:
        if int(pid) in walking_frameIdx_dict:
            cut1_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_coordinate.csv'
            cut1_output = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_coordinate_2.csv'
            cut2_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_coordinate.csv'
            cut2_output = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_coordinate_2.csv'
            
            if (os.path.exists(cut1_path)==False) or (os.path.exists(cut2_path)==False):
                print(f'[Notice] pid {pid} walking coordinate file does not exist.')
            else:
                cut1_df = pd.read_csv(cut1_path, header=None)
                cut2_df = pd.read_csv(cut2_path, header=None)
                
                ### change y coord for [Cut 1]
                n_row, n_col = cut1_df.shape[0], cut1_df.shape[1]
                for i in range(n_row):
                    for j in range(n_keypoint):
                        kid = keypointIdx[j]
                        coordValue = cut1_df.iloc[[i],[kid]]
                        cut1_df.iloc[[i],[kid]] = 1080 - coordValue
                for i in range(n_row):
                    for j in range(n_col):
                        coordValue = cut1_df.iloc[[i],[j]]
                        cut1_df.iloc[[i],[j]] = round(coordValue, 2)
                
                ### change y coord for [Cut 2]
                n_row, n_col = cut2_df.shape[0], cut2_df.shape[1]
                for i in range(n_row):
                    for j in range(n_keypoint):
                        kid = keypointIdx[j]
                        coordValue = cut2_df.iloc[[i],[kid]]
                        cut2_df.iloc[[i],[kid]] = 1080 - coordValue
                for i in range(n_row):
                    for j in range(n_col):
                        coordValue = cut2_df.iloc[[i],[j]]
                        cut2_df.iloc[[i],[j]] = round(coordValue, 2)
                
                print(f'[pid {pid}] {cut1_output}, {cut2_output}')
                cut1_df.to_csv(cut1_output, index=False, header=False)
                cut2_df.to_csv(cut2_output, index=False, header=False)


y_keypointIdx = [1, 4, 7, 10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49]
change_y_Coordinate(WalkingSubtaskROOT, allID_list, y_keypointIdx)

[pid 01] data/01/01_walking_cut1_coordinate_2.csv, data/01/01_walking_cut2_coordinate_2.csv
[pid 02] data/02/02_walking_cut1_coordinate_2.csv, data/02/02_walking_cut2_coordinate_2.csv
[pid 03] data/03/03_walking_cut1_coordinate_2.csv, data/03/03_walking_cut2_coordinate_2.csv
[pid 05] data/05/05_walking_cut1_coordinate_2.csv, data/05/05_walking_cut2_coordinate_2.csv
[pid 06] data/06/06_walking_cut1_coordinate_2.csv, data/06/06_walking_cut2_coordinate_2.csv
[pid 10] data/10/10_walking_cut1_coordinate_2.csv, data/10/10_walking_cut2_coordinate_2.csv
[pid 100] data/100/100_walking_cut1_coordinate_2.csv, data/100/100_walking_cut2_coordinate_2.csv
[pid 11] data/11/11_walking_cut1_coordinate_2.csv, data/11/11_walking_cut2_coordinate_2.csv
[pid 12] data/12/12_walking_cut1_coordinate_2.csv, data/12/12_walking_cut2_coordinate_2.csv
[pid 13] data/13/13_walking_cut1_coordinate_2.csv, data/13/13_walking_cut2_coordinate_2.csv
[pid 16] data/16/16_walking_cut1_coordinate_2.csv, data/16/16_walking_cut2_

# (X) Generate Time Series data

In [80]:
def generate_time_series_data(WalkingSubtaskROOT, pidList, legs_keypointIdx, flag):
    for pid in pidList:
        if int(pid) in walking_frameIdx_dict:
            if flag=="cut1":
                cut_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_coordinate_2.csv'
                cut_output = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_timeSeries.csv'
            elif flag=="cut2":
                cut_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_coordinate_2.csv'
                cut_output = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_timeSeries.csv'
            else:
                print(f'[Notice] something wrong with flag.')
            
            if (os.path.exists(cut_path)==False) or (os.path.exists(cut_path)==False):
                print(f'[Notice] pid {pid} walking coordinate file does not exist.')
            else:
                df = pd.read_csv(cut_path, header=None)
                
                df = df.iloc[:, legs_keypointIdx]
                n_row, n_col = df.shape[0], df.shape[1]

                df_new = pd.DataFrame()

                xHip_add, yHip_add = [], []
                xKnee_add, yKnee_add = [], []
                xAnkle_add, yAnkle_add = [], []
                xHip_minus, yHip_minus = [], []
                xKnee_minus, yKnee_minus = [], []
                xAnkle_minus, yAnkle_minus = [], []

                for i in range(n_row):
                    xHip_left, yHip_left = df.iloc[[i], [0]].values[0][0], df.iloc[[i], [1]].values[0][0]
                    xHip_right, yHip_right = df.iloc[[i], [2]].values[0][0], df.iloc[[i], [3]].values[0][0]
                    xKnee_left, yKnee_left = df.iloc[[i], [4]].values[0][0], df.iloc[[i], [5]].values[0][0]
                    xKnee_right, yKnee_right = df.iloc[[i], [6]].values[0][0], df.iloc[[i], [7]].values[0][0]
                    xAnkle_left, yAnkle_left = df.iloc[[i], [8]].values[0][0], df.iloc[[i], [9]].values[0][0]
                    xAnkle_right, yAnkle_right = df.iloc[[i], [10]].values[0][0], df.iloc[[i], [11]].values[0][0]

                    xHip_add.append(round(xHip_left+xHip_right, 2))
                    yHip_add.append(round(yHip_left+yHip_right, 2))
                    xKnee_add.append(round(xKnee_left+xKnee_right, 2))
                    yKnee_add.append(round(yKnee_left+yKnee_right, 2))
                    xAnkle_add.append(round(xAnkle_left+xAnkle_right, 2))
                    yAnkle_add.append(round(yAnkle_left+yAnkle_right, 2))

                    xHip_minus.append(round(np.abs(xHip_left-xHip_right), 2))
                    yHip_minus.append(round(np.abs(yHip_left-yHip_right), 2))
                    xKnee_minus.append(round(np.abs(xKnee_left-xKnee_right), 2))
                    yKnee_minus.append(round(np.abs(yKnee_left-yKnee_right), 2))
                    xAnkle_minus.append(round(np.abs(xAnkle_left-xAnkle_right), 2))
                    yAnkle_minus.append(round(np.abs(yAnkle_left-yAnkle_right), 2))

                nameList = ['xHip_add', 'xKnee_add', 'xAnkle_add', 'xHip_minus', 'xKnee_minus', 'xAnkle_minus', 'yHip_add', 'yKnee_add', 'yAnkle_add', 'yHip_minus', 'yKnee_minus', 'yAnkle_minus']
                valueList = [xHip_add, xKnee_add, xAnkle_add, xHip_minus, xKnee_minus, xAnkle_minus, yHip_add, yKnee_add, yAnkle_add, yHip_minus, yKnee_minus, yAnkle_minus]
                
                for i in range(len(nameList)):
                    tmp = nameList[i]
                    df_new.insert(i, tmp, valueList[i])

                print(f'[pid {pid}] {cut_output}')
                df_new.to_csv(cut_output, index=None, header=None)
                
                
                
legs_keypointIdx = [33, 34, 36, 37, 39, 40, 42, 43, 45, 46, 48, 49]
generate_time_series_data(WalkingSubtaskROOT, allID_list, legs_keypointIdx, "cut1")
generate_time_series_data(WalkingSubtaskROOT, allID_list, legs_keypointIdx, "cut2")

[pid 01] data/01/01_walking_cut1_timeSeries.csv
[pid 02] data/02/02_walking_cut1_timeSeries.csv
[pid 03] data/03/03_walking_cut1_timeSeries.csv
[pid 05] data/05/05_walking_cut1_timeSeries.csv
[pid 06] data/06/06_walking_cut1_timeSeries.csv
[pid 10] data/10/10_walking_cut1_timeSeries.csv
[pid 100] data/100/100_walking_cut1_timeSeries.csv
[pid 11] data/11/11_walking_cut1_timeSeries.csv
[pid 12] data/12/12_walking_cut1_timeSeries.csv
[pid 13] data/13/13_walking_cut1_timeSeries.csv
[pid 16] data/16/16_walking_cut1_timeSeries.csv
[pid 17] data/17/17_walking_cut1_timeSeries.csv
[pid 18] data/18/18_walking_cut1_timeSeries.csv
[pid 19] data/19/19_walking_cut1_timeSeries.csv
[pid 20] data/20/20_walking_cut1_timeSeries.csv
[pid 22] data/22/22_walking_cut1_timeSeries.csv
[pid 24] data/24/24_walking_cut1_timeSeries.csv
[pid 25] data/25/25_walking_cut1_timeSeries.csv
[pid 26] data/26/26_walking_cut1_timeSeries.csv
[pid 27] data/27/27_walking_cut1_timeSeries.csv
[pid 28] data/28/28_walking_cut1_time

# (X) Normalize time series data

In [84]:
from sklearn.preprocessing import MinMaxScaler

def normalize_time_series_data(WalkingSubtaskROOT, pidList, flag):
    df_all = pd.DataFrame()
    nSamples_dict = {}
    
    for pid in pidList:
        if int(pid) in walking_frameIdx_dict:
            if flag=="cut1":
                cut_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_timeSeries.csv'
            elif flag=="cut2":
                cut_path = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_timeSeries.csv'
            else:
                print(f'[Notice] something wrong with flag.')
                
            if (os.path.exists(cut_path)==False) or (os.path.exists(cut_path)==False):
                print(f'[Notice] pid {pid} walking coordinate file does not exist.')
            else:
                df = pd.read_csv(cut_path, header=None)
                n_row, n_col = df.shape[0], df.shape[1]
                nSamples_dict[pid] = n_row
                coordCombined = [df_all, df]
                df_all = pd.concat(coordCombined)
                

    array_all = df_all.values
    nFeatures = array_all.shape[1]
    for i in range(nFeatures):
        tmpList = [row[i] for row in array_all ]
        tmpArray = np.array(tmpList)
        tmpArray = tmpArray.reshape(len(tmpArray), 1)
        scaler = MinMaxScaler(feature_range=(0, 1))
        scaledArray = scaler.fit_transform(tmpArray)
        scaledArray = scaledArray.reshape(-1)
        if i==0:
            resultArray = scaledArray
        else:
            resultArray = np.vstack((resultArray, scaledArray))

    dataScaled = []
    nFrames = resultArray.shape[1]
    for i in range(nFrames):
        dataScaled.append([resultArray[0][i], resultArray[1][i],
                           resultArray[2][i], resultArray[3][i],
                           resultArray[4][i], resultArray[5][i],
                           resultArray[6][i], resultArray[7][i],
                           resultArray[8][i], resultArray[9][i],
                           resultArray[10][i], resultArray[11][i]])
    dataScaled = np.array(dataScaled)

    
    start, end = 0, 0
    for pid in pidList:
        if int(pid) in walking_frameIdx_dict:
            if flag=="cut1":
                cut_output = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut1_timeSeries_normalized.csv'
            elif flag=="cut2":
                cut_output = WalkingSubtaskROOT + pid + '/' + pid + '_walking_cut2_timeSeries_normalized.csv'
            else:
                print(f'[Notice] something wrong with flag.')
                
            nSample = nSamples_dict[pid]
            end += nSample

            tmp = dataScaled[start:end, :]
            tmp_df = pd.DataFrame(tmp)
            
            print(f'[pid {pid}] {cut_output}')
            tmp_df.to_csv(cut_output, index=None, header=None)

            start += nSample  


normalize_time_series_data(WalkingSubtaskROOT, allID_list, "cut1")
normalize_time_series_data(WalkingSubtaskROOT, allID_list, "cut2")

[pid 01] data/01/01_walking_cut1_timeSeries_normalized.csv
[pid 02] data/02/02_walking_cut1_timeSeries_normalized.csv
[pid 03] data/03/03_walking_cut1_timeSeries_normalized.csv
[pid 05] data/05/05_walking_cut1_timeSeries_normalized.csv
[pid 06] data/06/06_walking_cut1_timeSeries_normalized.csv
[pid 10] data/10/10_walking_cut1_timeSeries_normalized.csv
[pid 100] data/100/100_walking_cut1_timeSeries_normalized.csv
[pid 11] data/11/11_walking_cut1_timeSeries_normalized.csv
[pid 12] data/12/12_walking_cut1_timeSeries_normalized.csv
[pid 13] data/13/13_walking_cut1_timeSeries_normalized.csv
[pid 16] data/16/16_walking_cut1_timeSeries_normalized.csv
[pid 17] data/17/17_walking_cut1_timeSeries_normalized.csv
[pid 18] data/18/18_walking_cut1_timeSeries_normalized.csv
[pid 19] data/19/19_walking_cut1_timeSeries_normalized.csv
[pid 20] data/20/20_walking_cut1_timeSeries_normalized.csv
[pid 22] data/22/22_walking_cut1_timeSeries_normalized.csv
[pid 24] data/24/24_walking_cut1_timeSeries_normalize