In [27]:
import pandas as pd
import numpy as np
from scipy.spatial.transform import Rotation as R
from sklearn.preprocessing import MinMaxScaler
from pickle import dump

In [28]:
file_list = ["20240216_202836897-Tracking.csv",
             "20240216_203857568-Tracking.csv",
             "20240216_203503978-Tracking.csv",
             "20240216_204214403-Tracking.csv",
             "20240401_155833662-TCPTracking.csv",
             "20240224_152045199-TCPTracking.csv",
             "20240401_160406757-TCPTracking.csv",
             "20240224_150531415-TCPTracking.csv",
             "20240224_150859831-TCPTracking.csv",
             "20240224_151713583-TCPTracking.csv",
             ]

for idx, name in enumerate(file_list):
    file_list[idx] = "./dataset/labeled/{}".format(name)
file_list

['./dataset/labeled/20240216_202836897-Tracking.csv',
 './dataset/labeled/20240216_203857568-Tracking.csv',
 './dataset/labeled/20240216_203503978-Tracking.csv',
 './dataset/labeled/20240216_204214403-Tracking.csv',
 './dataset/labeled/20240401_155833662-TCPTracking.csv',
 './dataset/labeled/20240224_152045199-TCPTracking.csv',
 './dataset/labeled/20240401_160406757-TCPTracking.csv',
 './dataset/labeled/20240224_150531415-TCPTracking.csv',
 './dataset/labeled/20240224_150859831-TCPTracking.csv',
 './dataset/labeled/20240224_151713583-TCPTracking.csv']

In [29]:
df = None
for idx, file in enumerate(file_list):
    df_idx = pd.read_csv(file, low_memory=False)
    #df_idx.drop(labels='Unnamed:0', axis=1, inplace=True)
    if df is None:
        df = df_idx
        print('Has data samples: {}'.format(len(df)))
    else:
        df = pd.concat([df, df_idx], ignore_index=True)
        print('Has data samples: {}'.format(len(df)))

Has data samples: 2709
Has data samples: 6525
Has data samples: 9138
Has data samples: 12747
Has data samples: 26149
Has data samples: 38583
Has data samples: 60041
Has data samples: 66892
Has data samples: 73367
Has data samples: 84406


In [30]:
df.shape

(84406, 55)

In [31]:
df.head()

Unnamed: 0,Label,Time,Counter,IndexDistalJoint,IndexKnuckle,IndexMetacarpal,IndexMiddleJoint,IndexTip,MiddleDistalJoint,MiddleKnuckle,...,RingDistalJoint.1,RingKnuckle.1,RingMetacarpal.1,RingMiddleJoint.1,RingTip.1,ThumbDistalJoint.1,ThumbMetacarpalJoint.1,ThumbProximalJoint.1,ThumbTip.1,Wrist.1
0,,2024-02-19 20:28:37.156,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,,2024-02-19 20:28:37.523,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,,2024-02-19 20:28:37.845,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,,2024-02-19 20:28:37.879,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,,2024-02-19 20:28:37.907,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [32]:
df['Label'].value_counts()

Label
9.0    35583
8.0    29933
4.0     2837
5.0     2583
1.0     2242
7.0     2234
6.0     2200
0.0     1895
2.0     1889
3.0     1717
Name: count, dtype: int64

In [33]:
def remove_invalid_rows(raw_df):
    new_df = raw_df.copy()
    invalid_flags = np.zeros(new_df.shape[0])
    ## Check Labels
    for i, row in new_df.iterrows():
        label = row.iloc[0]
        right = row.iloc[3]
        left  = row.iloc[29]
        # if left hand label
        if (label in [0.0,1.0,2.0,3.0]):
            if left == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        # if right hand label
        elif (label in [4.0,5.0,6.0,7.0]):
            if right == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        elif (label == 8.0):
            if left == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        elif (label == 9.0):
            if right == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        else:
            invalid_flags[i] = 1
            #print("Invalid Label: {} found in row {}".format(label, i))
        
    invalid_indice =  np.where(invalid_flags==1)[0]
    print("found {} Invalid Rows".format(len(invalid_indice)))
    valid_df = new_df.drop(invalid_indice, axis=0)
    return valid_df

In [34]:
valid_df = remove_invalid_rows(df)

found 3316 Invalid Rows


In [35]:
print("Orginal Rows:{} |  Valid Rows:{}  |  Total Rows Removed:{}".format(len(df), len(valid_df), 
                                                                          len(df)-len(valid_df)))

Orginal Rows:84406 |  Valid Rows:81090  |  Total Rows Removed:3316


In [36]:
valid_df.head()

Unnamed: 0,Label,Time,Counter,IndexDistalJoint,IndexKnuckle,IndexMetacarpal,IndexMiddleJoint,IndexTip,MiddleDistalJoint,MiddleKnuckle,...,RingDistalJoint.1,RingKnuckle.1,RingMetacarpal.1,RingMiddleJoint.1,RingTip.1,ThumbDistalJoint.1,ThumbMetacarpalJoint.1,ThumbProximalJoint.1,ThumbTip.1,Wrist.1
86,8.0,2024-02-19 20:28:41.171,86,0,0,0,0,0,0,0,...,(-0.152/ -0.288/ 0.309),(-0.172/ -0.290/ 0.253),(-0.187/ -0.315/ 0.204),(-0.161/ -0.281/ 0.290),(-0.145/ -0.297/ 0.321),(-0.108/ -0.313/ 0.242),(-0.162/ -0.317/ 0.194),(-0.127/ -0.314/ 0.217),(-0.096/ -0.313/ 0.255),(-0.186/ -0.319/ 0.184)
87,8.0,2024-02-19 20:28:41.220,87,0,0,0,0,0,0,0,...,(-0.130/ -0.288/ 0.304),(-0.154/ -0.289/ 0.250),(-0.170/ -0.313/ 0.200),(-0.140/ -0.281/ 0.286),(-0.122/ -0.297/ 0.316),(-0.090/ -0.308/ 0.234),(-0.146/ -0.313/ 0.188),(-0.110/ -0.309/ 0.210),(-0.076/ -0.308/ 0.245),(-0.170/ -0.315/ 0.180)
88,8.0,2024-02-19 20:28:41.265,88,0,0,0,0,0,0,0,...,(-0.101/ -0.303/ 0.293),(-0.134/ -0.294/ 0.245),(-0.152/ -0.312/ 0.195),(-0.115/ -0.293/ 0.280),(-0.089/ -0.312/ 0.300),(-0.071/ -0.293/ 0.222),(-0.130/ -0.304/ 0.181),(-0.093/ -0.295/ 0.200),(-0.056/ -0.291/ 0.231),(-0.154/ -0.311/ 0.174)
89,8.0,2024-02-19 20:28:41.311,89,0,0,0,0,0,0,0,...,(-0.087/ -0.333/ 0.244),(-0.125/ -0.300/ 0.239),(-0.145/ -0.309/ 0.192),(-0.099/ -0.321/ 0.257),(-0.081/ -0.340/ 0.229),(-0.065/ -0.286/ 0.219),(-0.124/ -0.297/ 0.178),(-0.087/ -0.287/ 0.197),(-0.049/ -0.283/ 0.227),(-0.148/ -0.305/ 0.172)
90,8.0,2024-02-19 20:28:41.355,90,0,0,0,0,0,0,0,...,(-0.080/ -0.331/ 0.233),(-0.116/ -0.301/ 0.236),(-0.136/ -0.309/ 0.190),(-0.089/ -0.321/ 0.251),(-0.077/ -0.335/ 0.217),(-0.058/ -0.282/ 0.216),(-0.117/ -0.296/ 0.176),(-0.080/ -0.284/ 0.194),(-0.043/ -0.279/ 0.225),(-0.141/ -0.305/ 0.170)


In [37]:
def str_process(string):
    number_list = string.replace("(", "").replace(")", "").replace(" ", "").split("/")
    if len(number_list) > 1:
        number_list = [float(x) for x in number_list]
    else:
        number_list = [0., 0., 0.]
    return number_list

In [38]:
def split_n_convert(valid_df):
    # The first two columns 
    new_df = valid_df[['Label', 'Time', 'Counter']].reset_index().drop(['index'], axis=1)
    feature_name_dict = {}
    # for each column except the first two (Time and Counter)
    for i, col in enumerate(valid_df.columns[3:]):
        row_col = valid_df[col]
        new_feature = []
        print("Loading Feature: "+col)
        for j, str_feature in enumerate(row_col):
            floats_list = str_process(str_feature)
            assert(len(floats_list) == 3 or len(floats_list) == 4)
            new_feature.append(floats_list)
        new_feature = np.array(new_feature, dtype=float)

        width = new_feature.shape[1]
        new_feature_names = [col+'_'+str(idx) for idx in range(width)]
        feature_name_dict[col] = new_feature_names
        #print(new_feature_name)
        new_feature_df = pd.DataFrame(new_feature, columns=new_feature_names)
        assert(len(new_feature_df)==len(new_df))
        new_df = pd.concat([new_df, new_feature_df], axis=1)
    return new_df, feature_name_dict

In [39]:
#new_df,feature_name_dict = split_n_convert(valid_df)
new_df,feature_name_dict = split_n_convert(valid_df)

Loading Feature: IndexDistalJoint
Loading Feature: IndexKnuckle
Loading Feature: IndexMetacarpal
Loading Feature: IndexMiddleJoint
Loading Feature: IndexTip
Loading Feature: MiddleDistalJoint
Loading Feature: MiddleKnuckle
Loading Feature: MiddleMetacarpal
Loading Feature: MiddleMiddleJoint
Loading Feature: MiddleTip
Loading Feature: Palm
Loading Feature: PinkyDistalJoint
Loading Feature: PinkyKnuckle
Loading Feature: PinkyMetacarpal
Loading Feature: PinkyMiddleJoint
Loading Feature: PinkyTip
Loading Feature: RingDistalJoint
Loading Feature: RingKnuckle
Loading Feature: RingMetacarpal
Loading Feature: RingMiddleJoint
Loading Feature: RingTip
Loading Feature: ThumbDistalJoint
Loading Feature: ThumbMetacarpalJoint
Loading Feature: ThumbProximalJoint
Loading Feature: ThumbTip
Loading Feature: Wrist
Loading Feature: IndexDistalJoint.1
Loading Feature: IndexKnuckle.1
Loading Feature: IndexMetacarpal.1
Loading Feature: IndexMiddleJoint.1
Loading Feature: IndexTip.1
Loading Feature: MiddleDis

In [40]:
new_df.head()

Unnamed: 0,Label,Time,Counter,IndexDistalJoint_0,IndexDistalJoint_1,IndexDistalJoint_2,IndexKnuckle_0,IndexKnuckle_1,IndexKnuckle_2,IndexMetacarpal_0,...,ThumbMetacarpalJoint.1_2,ThumbProximalJoint.1_0,ThumbProximalJoint.1_1,ThumbProximalJoint.1_2,ThumbTip.1_0,ThumbTip.1_1,ThumbTip.1_2,Wrist.1_0,Wrist.1_1,Wrist.1_2
0,8.0,2024-02-19 20:28:41.171,86,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.194,-0.127,-0.314,0.217,-0.096,-0.313,0.255,-0.186,-0.319,0.184
1,8.0,2024-02-19 20:28:41.220,87,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.188,-0.11,-0.309,0.21,-0.076,-0.308,0.245,-0.17,-0.315,0.18
2,8.0,2024-02-19 20:28:41.265,88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.181,-0.093,-0.295,0.2,-0.056,-0.291,0.231,-0.154,-0.311,0.174
3,8.0,2024-02-19 20:28:41.311,89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.178,-0.087,-0.287,0.197,-0.049,-0.283,0.227,-0.148,-0.305,0.172
4,8.0,2024-02-19 20:28:41.355,90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.176,-0.08,-0.284,0.194,-0.043,-0.279,0.225,-0.141,-0.305,0.17


In [41]:
new_df['Label'].value_counts()

Label
9.0    34933
8.0    28562
4.0     2837
5.0     2583
1.0     2240
7.0     2234
6.0     2200
0.0     1895
2.0     1889
3.0     1717
Name: count, dtype: int64

In [42]:
feature_name_dict

{'IndexDistalJoint': ['IndexDistalJoint_0',
  'IndexDistalJoint_1',
  'IndexDistalJoint_2'],
 'IndexKnuckle': ['IndexKnuckle_0', 'IndexKnuckle_1', 'IndexKnuckle_2'],
 'IndexMetacarpal': ['IndexMetacarpal_0',
  'IndexMetacarpal_1',
  'IndexMetacarpal_2'],
 'IndexMiddleJoint': ['IndexMiddleJoint_0',
  'IndexMiddleJoint_1',
  'IndexMiddleJoint_2'],
 'IndexTip': ['IndexTip_0', 'IndexTip_1', 'IndexTip_2'],
 'MiddleDistalJoint': ['MiddleDistalJoint_0',
  'MiddleDistalJoint_1',
  'MiddleDistalJoint_2'],
 'MiddleKnuckle': ['MiddleKnuckle_0', 'MiddleKnuckle_1', 'MiddleKnuckle_2'],
 'MiddleMetacarpal': ['MiddleMetacarpal_0',
  'MiddleMetacarpal_1',
  'MiddleMetacarpal_2'],
 'MiddleMiddleJoint': ['MiddleMiddleJoint_0',
  'MiddleMiddleJoint_1',
  'MiddleMiddleJoint_2'],
 'MiddleTip': ['MiddleTip_0', 'MiddleTip_1', 'MiddleTip_2'],
 'Palm': ['Palm_0', 'Palm_1', 'Palm_2'],
 'PinkyDistalJoint': ['PinkyDistalJoint_0',
  'PinkyDistalJoint_1',
  'PinkyDistalJoint_2'],
 'PinkyKnuckle': ['PinkyKnuckle_0', 

In [43]:
def get_relative_position(df, feature_name_dict):
    relative_df = df.copy(deep=True)
    for col in list(feature_name_dict.keys()):
        # for hand positions
        handSkipList = ['Wrist', 'Wrist.1']
        if col not in handSkipList:
            if(col[-2:] == '.1'):  # if it is left hand
                relative_df[feature_name_dict[col]] = relative_df[feature_name_dict[col]] - \
                                         relative_df[feature_name_dict['Wrist.1']].values
            else: # if it is right hand
                relative_df[feature_name_dict[col]] = relative_df[feature_name_dict[col]] - \
                                         relative_df[feature_name_dict['Wrist']].values

    return relative_df

In [44]:
relative_df = get_relative_position(new_df, feature_name_dict)
len(relative_df)

81090

In [45]:
relative_df

Unnamed: 0,Label,Time,Counter,IndexDistalJoint_0,IndexDistalJoint_1,IndexDistalJoint_2,IndexKnuckle_0,IndexKnuckle_1,IndexKnuckle_2,IndexMetacarpal_0,...,ThumbMetacarpalJoint.1_2,ThumbProximalJoint.1_0,ThumbProximalJoint.1_1,ThumbProximalJoint.1_2,ThumbTip.1_0,ThumbTip.1_1,ThumbTip.1_2,Wrist.1_0,Wrist.1_1,Wrist.1_2
0,8.0,2024-02-19 20:28:41.171,86,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.010,0.059,0.005,0.033,0.090,0.006,0.071,-0.186,-0.319,0.184
1,8.0,2024-02-19 20:28:41.220,87,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.008,0.060,0.006,0.030,0.094,0.007,0.065,-0.170,-0.315,0.180
2,8.0,2024-02-19 20:28:41.265,88,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.007,0.061,0.016,0.026,0.098,0.020,0.057,-0.154,-0.311,0.174
3,8.0,2024-02-19 20:28:41.311,89,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.006,0.061,0.018,0.025,0.099,0.022,0.055,-0.148,-0.305,0.172
4,8.0,2024-02-19 20:28:41.355,90,0.000,0.000,0.000,0.000,0.000,0.000,0.000,...,0.006,0.061,0.021,0.024,0.098,0.026,0.055,-0.141,-0.305,0.170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81085,8.0,152018,11034,-0.033,0.057,0.133,-0.025,0.036,0.069,-0.008,...,0.026,-0.032,0.023,0.058,-0.044,0.030,0.104,-0.095,-0.277,0.176
81086,8.0,152018,11035,-0.031,0.051,0.136,-0.023,0.032,0.072,-0.007,...,0.026,-0.032,0.023,0.058,-0.044,0.030,0.104,-0.095,-0.276,0.176
81087,8.0,152018,11036,-0.029,0.047,0.137,-0.022,0.030,0.073,-0.007,...,0.025,-0.032,0.023,0.057,-0.044,0.030,0.104,-0.095,-0.275,0.176
81088,8.0,152018,11037,-0.026,0.045,0.140,-0.020,0.030,0.074,-0.007,...,0.026,-0.032,0.023,0.058,-0.044,0.030,0.104,-0.095,-0.275,0.175


In [46]:
def get_scaled_features(relative_df):
    header_df = relative_df[['Label','Time','Counter']].copy()
    feature_df = relative_df.drop(['Label','Time','Counter'], axis=1).copy()
    scaler = MinMaxScaler(feature_range=(0,1))
    feature_names = feature_df.columns
    scaled_df = pd.DataFrame(scaler.fit_transform(feature_df), columns=feature_names)
    scaled_df = feature_df
    scaled_df = pd.concat([header_df, scaled_df], axis=1)
    return scaled_df, scaler

In [47]:
scaled_df, scaler = get_scaled_features(relative_df)

In [48]:
scaled_df.head()

Unnamed: 0,Label,Time,Counter,IndexDistalJoint_0,IndexDistalJoint_1,IndexDistalJoint_2,IndexKnuckle_0,IndexKnuckle_1,IndexKnuckle_2,IndexMetacarpal_0,...,ThumbMetacarpalJoint.1_2,ThumbProximalJoint.1_0,ThumbProximalJoint.1_1,ThumbProximalJoint.1_2,ThumbTip.1_0,ThumbTip.1_1,ThumbTip.1_2,Wrist.1_0,Wrist.1_1,Wrist.1_2
0,8.0,2024-02-19 20:28:41.171,86,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.01,0.059,0.005,0.033,0.09,0.006,0.071,-0.186,-0.319,0.184
1,8.0,2024-02-19 20:28:41.220,87,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.008,0.06,0.006,0.03,0.094,0.007,0.065,-0.17,-0.315,0.18
2,8.0,2024-02-19 20:28:41.265,88,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.007,0.061,0.016,0.026,0.098,0.02,0.057,-0.154,-0.311,0.174
3,8.0,2024-02-19 20:28:41.311,89,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.006,0.061,0.018,0.025,0.099,0.022,0.055,-0.148,-0.305,0.172
4,8.0,2024-02-19 20:28:41.355,90,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.006,0.061,0.021,0.024,0.098,0.026,0.055,-0.141,-0.305,0.17


In [49]:
#labelled_data.to_csv('./dataset/labelled_holding.csv', index=False)
scaled_df.to_csv('./dataset/data_combined.csv', index=False)

In [None]:
relative_df.to_csv('./dataset/data_combined_wo_scaled.csv', index=False)

In [None]:
dump(scaler, open('./dataset/scaler.pkl', 'wb'))

In [None]:
# To load the scaler
from pickle import load
scaler = load(open('./dataset/scaler.pkl', 'rb'))