In [1]:
import pandas as pd
import numpy as np
from scipy.spatial.transform import Rotation as R
from sklearn.preprocessing import MinMaxScaler
from pickle import dump

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
file_list = ["20240216_202836897-Tracking.csv",
             "20240216_203857568-Tracking.csv",
             "20240216_203503978-Tracking.csv",
             "20240216_204214403-Tracking.csv"]

for idx, name in enumerate(file_list):
    file_list[idx] = "./dataset/labeled/{}".format(name)
file_list

['./dataset/labeled/20240216_202836897-Tracking.csv',
 './dataset/labeled/20240216_203857568-Tracking.csv',
 './dataset/labeled/20240216_203503978-Tracking.csv',
 './dataset/labeled/20240216_204214403-Tracking.csv']

In [3]:
df = None
for idx, file in enumerate(file_list):
    df_idx = pd.read_csv(file)
    #df_idx.drop(labels='Unnamed:0', axis=1, inplace=True)
    if df is None:
        df = df_idx
        print('Has data samples: {}'.format(len(df)))
    else:
        df = pd.concat([df, df_idx], ignore_index=True)
        print('Has data samples: {}'.format(len(df)))

Has data samples: 2709
Has data samples: 6525
Has data samples: 9138
Has data samples: 12747


In [4]:
df.shape

(12747, 55)

In [5]:
df.head()

Unnamed: 0,Label,Time,Counter,IndexDistalJoint,IndexKnuckle,IndexMetacarpal,IndexMiddleJoint,IndexTip,MiddleDistalJoint,MiddleKnuckle,...,RingDistalJoint.1,RingKnuckle.1,RingMetacarpal.1,RingMiddleJoint.1,RingTip.1,ThumbDistalJoint.1,ThumbMetacarpalJoint.1,ThumbProximalJoint.1,ThumbTip.1,Wrist.1
0,,2024-02-19 20:28:37.156,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,,2024-02-19 20:28:37.523,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,,2024-02-19 20:28:37.845,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,,2024-02-19 20:28:37.879,3,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,,2024-02-19 20:28:37.907,4,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [6]:
df['Label'].value_counts()

Label
8.0    4867
9.0    4476
0.0     575
5.0     452
1.0     381
4.0     371
6.0     305
7.0     289
3.0     275
2.0     235
Name: count, dtype: int64

In [7]:
def remove_invalid_rows(raw_df):
    new_df = raw_df.copy()
    invalid_flags = np.zeros(new_df.shape[0])
    ## Check Labels
    for i, row in new_df.iterrows():
        label = row.iloc[0]
        right = row.iloc[3]
        left  = row.iloc[29]
        # if left hand label
        if (label in [0.0,1.0,2.0,3.0]):
            if left == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        # if right hand label
        elif (label in [4.0,5.0,6.0,7.0]):
            if right == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        elif (label == 8.0):
            if left == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        elif (label == 9.0):
            if right == '0':
                invalid_flags[i] = 1
                #print("Invalid Label: {} found in row {}".format(label, i))
        else:
            invalid_flags[i] = 1
            #print("Invalid Label: {} found in row {}".format(label, i))
        
    invalid_indice =  np.where(invalid_flags==1)[0]
    print("found {} Invalid Rows".format(len(invalid_indice)))
    valid_df = new_df.drop(invalid_indice, axis=0)
    return valid_df

In [8]:
valid_df = remove_invalid_rows(df)

found 1463 Invalid Rows


In [9]:
print("Orginal Rows:{} |  Valid Rows:{}  |  Total Rows Removed:{}".format(len(df), len(valid_df), 
                                                                          len(df)-len(valid_df)))

Orginal Rows:12747 |  Valid Rows:11284  |  Total Rows Removed:1463


In [10]:
valid_df.head()

Unnamed: 0,Label,Time,Counter,IndexDistalJoint,IndexKnuckle,IndexMetacarpal,IndexMiddleJoint,IndexTip,MiddleDistalJoint,MiddleKnuckle,...,RingDistalJoint.1,RingKnuckle.1,RingMetacarpal.1,RingMiddleJoint.1,RingTip.1,ThumbDistalJoint.1,ThumbMetacarpalJoint.1,ThumbProximalJoint.1,ThumbTip.1,Wrist.1
86,0.0,2024-02-19 20:28:41.171,86,0,0,0,0,0,0,0,...,(-0.152/ -0.288/ 0.309),(-0.172/ -0.290/ 0.253),(-0.187/ -0.315/ 0.204),(-0.161/ -0.281/ 0.290),(-0.145/ -0.297/ 0.321),(-0.108/ -0.313/ 0.242),(-0.162/ -0.317/ 0.194),(-0.127/ -0.314/ 0.217),(-0.096/ -0.313/ 0.255),(-0.186/ -0.319/ 0.184)
87,0.0,2024-02-19 20:28:41.220,87,0,0,0,0,0,0,0,...,(-0.130/ -0.288/ 0.304),(-0.154/ -0.289/ 0.250),(-0.170/ -0.313/ 0.200),(-0.140/ -0.281/ 0.286),(-0.122/ -0.297/ 0.316),(-0.090/ -0.308/ 0.234),(-0.146/ -0.313/ 0.188),(-0.110/ -0.309/ 0.210),(-0.076/ -0.308/ 0.245),(-0.170/ -0.315/ 0.180)
88,0.0,2024-02-19 20:28:41.265,88,0,0,0,0,0,0,0,...,(-0.101/ -0.303/ 0.293),(-0.134/ -0.294/ 0.245),(-0.152/ -0.312/ 0.195),(-0.115/ -0.293/ 0.280),(-0.089/ -0.312/ 0.300),(-0.071/ -0.293/ 0.222),(-0.130/ -0.304/ 0.181),(-0.093/ -0.295/ 0.200),(-0.056/ -0.291/ 0.231),(-0.154/ -0.311/ 0.174)
89,0.0,2024-02-19 20:28:41.311,89,0,0,0,0,0,0,0,...,(-0.087/ -0.333/ 0.244),(-0.125/ -0.300/ 0.239),(-0.145/ -0.309/ 0.192),(-0.099/ -0.321/ 0.257),(-0.081/ -0.340/ 0.229),(-0.065/ -0.286/ 0.219),(-0.124/ -0.297/ 0.178),(-0.087/ -0.287/ 0.197),(-0.049/ -0.283/ 0.227),(-0.148/ -0.305/ 0.172)
90,0.0,2024-02-19 20:28:41.355,90,0,0,0,0,0,0,0,...,(-0.080/ -0.331/ 0.233),(-0.116/ -0.301/ 0.236),(-0.136/ -0.309/ 0.190),(-0.089/ -0.321/ 0.251),(-0.077/ -0.335/ 0.217),(-0.058/ -0.282/ 0.216),(-0.117/ -0.296/ 0.176),(-0.080/ -0.284/ 0.194),(-0.043/ -0.279/ 0.225),(-0.141/ -0.305/ 0.170)


In [None]:
def str_process(string):
    number_list = string.replace("(", "").replace(")", "").replace(" ", "").split("/")
    if len(number_list) > 1:
        number_list = [float(x) for x in number_list]
    else:
        number_list = [0., 0., 0.]
    return number_list

In [None]:
def split_n_convert(valid_df):
    # The first two columns 
    new_df = valid_df[['Label', 'Time', 'Counter']].reset_index().drop(['index'], axis=1)
    feature_name_dict = {}
    # for each column except the first two (Time and Counter)
    for i, col in enumerate(valid_df.columns[3:]):
        row_col = valid_df[col]
        new_feature = []
        print("Loading Feature: "+col)
        for j, str_feature in enumerate(row_col):
            floats_list = str_process(str_feature)
            assert(len(floats_list) == 3 or len(floats_list) == 4)
            new_feature.append(floats_list)
        new_feature = np.array(new_feature, dtype=float)

        width = new_feature.shape[1]
        new_feature_names = [col+'_'+str(idx) for idx in range(width)]
        feature_name_dict[col] = new_feature_names
        #print(new_feature_name)
        new_feature_df = pd.DataFrame(new_feature, columns=new_feature_names)
        assert(len(new_feature_df)==len(new_df))
        new_df = pd.concat([new_df, new_feature_df], axis=1)
    return new_df, feature_name_dict

In [None]:
#new_df,feature_name_dict = split_n_convert(valid_df)
new_df,feature_name_dict = split_n_convert(valid_df)

In [None]:
new_df.head()

In [None]:
new_df['Label'].value_counts()

In [None]:
feature_name_dict

In [None]:
def get_relative_position(df, feature_name_dict):
    relative_df = df.copy(deep=True)
    for col in list(feature_name_dict.keys()):
        # for hand positions
        handSkipList = ['Wrist', 'Wrist.1']
        if col not in handSkipList:
            if(col[-2:] == '.1'):  # if it is left hand
                relative_df[feature_name_dict[col]] = relative_df[feature_name_dict[col]] - \
                                         relative_df[feature_name_dict['Wrist.1']].values
            else: # if it is right hand
                relative_df[feature_name_dict[col]] = relative_df[feature_name_dict[col]] - \
                                         relative_df[feature_name_dict['Wrist']].values

    return relative_df

In [None]:
relative_df = get_relative_position(new_df, feature_name_dict)
len(relative_df)

In [None]:
relative_df

In [None]:
def get_scaled_features(relative_df):
    header_df = relative_df[['Label','Time','Counter']].copy()
    feature_df = relative_df.drop(['Label','Time','Counter'], axis=1).copy()
    scaler = MinMaxScaler(feature_range=(0,1))
    feature_names = feature_df.columns
    scaled_df = pd.DataFrame(scaler.fit_transform(feature_df), columns=feature_names)
    scaled_df = feature_df
    scaled_df = pd.concat([header_df, scaled_df], axis=1)
    return scaled_df, scaler

In [None]:
scaled_df, scaler = get_scaled_features(relative_df)

In [None]:
scaled_df.head()

In [None]:
#labelled_data.to_csv('./dataset/labelled_holding.csv', index=False)
scaled_df.to_csv('./dataset/data_combined.csv', index=False)

In [None]:
relative_df.to_csv('./dataset/data_combined_wo_scaled.csv', index=False)

In [None]:
dump(scaler, open('./dataset/scaler.pkl', 'wb'))

In [None]:
# To load the scaler
from pickle import load
scaler = load(open('./dataset/scaler.pkl', 'rb'))