In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
file_dir = "C:/Users/Asus/Desktop/Master Thesis/bbdc_2020_public_data/bbdc_2020/train/emg"
entries = os.listdir(file_dir)
subjects = []

#listed the files in the local folder to fetch all the subjects in 1 go.
def load_files(files,sub_data,dir_st):
    for subject_file in files:
        sub_data.append(pd.read_csv(dir_st + "/" + subject_file))
    return sub_data

subjects = load_files(entries,subjects,file_dir)

In [3]:
#Label file extraction
label_file = "C:/Users/Asus/Desktop/Master Thesis/bbdc_2020_public_data/bbdc_2020/train/labels.train.csv"
label_df = pd.read_csv(label_file)

In [4]:
#Forming a map with the label class
subjects_hand_unique = list(label_df['Subject Hand'].unique())
subjects_hand_left = [hand for hand in subjects_hand_unique if not hand.endswith('ra')]
subjects_hand_right = [hand for hand in subjects_hand_unique if not hand.endswith('la')]

In [5]:
label_subjects_left = []
label_subjects_right = []
for lab in subjects_hand_left:
    temp_df = label_df[label_df['Subject Hand'] == lab]
    temp_index = [temp_row for temp_row in range(temp_df.shape[0])]
    temp_df.index = temp_index
    label_subjects_left.append(temp_df)
    del temp_df
    del temp_index
for lab in subjects_hand_right:
    temp_df = label_df[label_df['Subject Hand'] == lab]
    temp_index = [temp_row for temp_row in range(temp_df.shape[0])]
    temp_df.index = temp_index
    label_subjects_right.append(temp_df)
    del temp_df
    del temp_index

In [6]:
label_mapping_left = {'la-nothing':0,'la-object-orient':1,'la-object-switch-hands':2,'la-object-place':3,'la-object-carry':4,'la-object-pick':5}
label_mapping_right = {'ra-nothing':0,'ra-object-orient':1,'ra-object-switch-hands':2,'ra-object-place':3,'ra-object-carry':4,'ra-object-pick':5}

In [7]:
#Feature Extraction
from IPython.core.debugger import set_trace
subject_features_left = []
subject_features_right = []

def feature_extraction(subject_data, label_subject, lab_map):
    feature_variables = pd.DataFrame()
    for label_ob_row in range(0,len(label_subject)):
        start_time = label_subject.loc[label_ob_row,'Start Time']
        end_time = label_subject.loc[label_ob_row, 'End Time']
        hand_activity = label_subject.loc[label_ob_row, 'Hand Activity']
        
        subset_subject_data = subject_data.loc[(subject_data['ts'] >= start_time) & (subject_data['ts'] <= end_time)]
        subset_subject_data = subset_subject_data.drop(columns = ['ts'],axis = 1)
        subset_subject_data = subset_subject_data.dropna()
        subset_subject_data = subset_subject_data[(subset_subject_data != 0).all(1)]
                
        temp_shape = subset_subject_data.shape[0] % 50
        if(temp_shape != 0):
            subset_subject_data = subset_subject_data[:-temp_shape]
        size = 50
        subset_subject_data = index_assign(subset_subject_data)
        if(subset_subject_data.empty == False):
            list_of_dfs = [subset_subject_data.iloc[i:i+size,:] for i in range(0,len(subset_subject_data),size)]
            for data_df in range(len(list_of_dfs)):
                result_df = features(list_of_dfs[data_df],hand_activity,lab_map)
                result_df = index_assign(result_df)
                feature_variables = feature_variables.append(result_df)
                del result_df
        feature_variables = index_assign(feature_variables)
    return feature_variables

def features(feature_data, hand_data, map_data):
    
    mean_df = pd.DataFrame(feature_data.mean()).transpose()
    max_df = pd.DataFrame(feature_data.max()).transpose()
    min_df = pd.DataFrame(feature_data.min()).transpose()
    median_df = pd.DataFrame(feature_data.median()).transpose()
    variance_df = pd.DataFrame(feature_data.var()).transpose()
    mean_absolute_value_df = pd.DataFrame(feature_data.mad()).transpose()
    rms_df = pd.DataFrame(((feature_data.pow(2)).mean())** (1/2)).transpose()
    skew_df = pd.DataFrame(feature_data.skew()).transpose()
    kurt_df = pd.DataFrame(feature_data.kurt()).transpose()
    result_df = pd.concat([mean_df,median_df,min_df,max_df,variance_df,mean_absolute_value_df,rms_df,skew_df,kurt_df], axis= 1, join= 'inner')
    result_df['Label'] = map_data[hand_data]
    return result_df

def index_assign(feature_data):
    new_index = [index_row for index_row in range(feature_data.shape[0])]
    feature_data.index = new_index
    new_columns = [index_col for index_col in range(feature_data.shape[1])]
    feature_data.columns = new_columns
    del new_index
    del new_columns
    return feature_data


for list_number in range(len(subjects)):
    features_train = feature_extraction(subjects[list_number], label_subjects_left[list_number], label_mapping_left)
    if(features_train.empty == False):
        subject_features_left.append(features_train)
    del features_train

for list_number in range(len(subjects)):
    features_train = feature_extraction(subjects[list_number], label_subjects_right[list_number], label_mapping_right)
    if(features_train.empty == False):
        subject_features_right.append(features_train)
    del features_train

In [8]:
for list_ele in range(len(subject_features_left)):
    print(subject_features_left[list_ele].shape, end= " ")

print("################################")
print("################################")


(1395, 73) (1868, 73) (1264, 73) (1174, 73) (1295, 73) (710, 73) (1504, 73) (1644, 73) (2137, 73) (1752, 73) (1468, 73) (4112, 73) (2107, 73) ################################
################################


In [9]:
train_left = pd.concat(subject_features_left)

In [10]:
train_left = index_assign(train_left)

In [11]:
train_right = pd.concat(subject_features_right)

In [12]:
train_right = index_assign(train_right)

In [13]:
train_left.shape

(22430, 73)

In [14]:
train_left.to_csv(r'C:/Users/Asus/Desktop/Master Thesis/bbdc_2020_public_data/bbdc_2020/preprocessed/train_left.csv',index = False)
train_right.to_csv(r'C:/Users/Asus/Desktop/Master Thesis/bbdc_2020_public_data/bbdc_2020/preprocessed/train_right.csv',index = False)