## 0. Import Library

In [1]:
import sys
sys.path.append("D:\\WETrak")

import numpy as np
import os
import configparser
import os.path as path
import pandas as pd
import json
import csv
from glob import glob
from scipy import signal

import import_ipynb
import tracker_network.data_sources.data_utils as data_utils

importing Jupyter notebook from D:\WETrak\tracker_network\data_sources\data_utils.ipynb


## 1. Transformation and remove outliers of GT

In [2]:
def remove_main(p_root,user_name,number_list):
    print("---------------remove_main--------------")
    finger_type_list = ["Thumb","Index","Middle","Ring","Pinky"]
    
    # Load data
    for number in number_list:
#         print("number",number)
        p_path = path.join(p_root,user_name,"data",number)
        
        # 读取json文件
        leap_data = json.load(open(path.join(p_path,"leap_data.json"),mode='r', encoding='utf-8'))
        
        # 读取所有手指的角度 (rad) [N,5,4]
        rad_angles = data_utils.parse_leap_all_finger(leap_data,finger_type_list)
        
        # 转换角度 rad -> degree, 平铺时候角度为0
        degree_angles = np.degrees(rad_angles)
        trans_degree_angles = np.where(degree_angles > 0, 180 - degree_angles, -180 - degree_angles)
        
        # 限制角度范围（>0）,去除范围之外的角度
        finger_angle = []
        removed_idx = []
        for i,finger_type in enumerate(finger_type_list):
            per_finger_angle, per_finger_removed_idx = data_utils.constraints_angle_by_finger(trans_degree_angles[:,i,:],finger_type)
            finger_angle.append(per_finger_angle)
            removed_idx += per_finger_removed_idx
        
        positive_finger_angle = np.stack(finger_angle, axis=1)
        removed_idx = list(set(removed_idx))
#         print("len(removed_idx)",len(removed_idx))

        new_finger_angle = np.delete(positive_finger_angle,removed_idx,axis=0)
        
        # 将新的角度值写入leap_data_pre.json"
        result = data_utils.compose_leap_all_finger(new_finger_angle,finger_type_list)

#         print("len(result)",len(result))
        leap_file = open(path.join(p_path,"leap_data_processed.json"), 'w')
        json.dump(result, leap_file, sort_keys=False, indent=4, separators=(',', ':'))
        leap_file.close()

## 2. Interpolation

In [3]:
def interpolate_main(p_root,user_name,number_list):
    print("---------------interpolate_main--------------")
    finger_type_list = ["Thumb","Index","Middle","Ring","Pinky"]
    
    # Raw data need to be interpolated
    for number in number_list:
#         print("number",number)
        p_path = path.join(p_root,user_name,"data",number)

        emg_data = pd.read_csv(path.join(p_path,"emg_data.csv"),header=None)
        emg_length = len(emg_data)
#         print("emg_length",emg_length)

        # 一次性读取所有手指的角度
        leap_data = json.load(open(path.join(p_path,"leap_data_processed.json"),mode='r', encoding='utf-8'))
        leap_data = data_utils.parse_leap_all_finger(leap_data,finger_type_list)
        leap_length = len(leap_data)
#         print("leap_length",leap_length)
        
        # 一次性插值所有手指的角度
        new_leap_data = data_utils.interpolate_leap_all_finger(leap_data,leap_length,emg_length)
        
        # 将新的角度值写入"leap_data_processed.json"
        result = data_utils.compose_leap_all_finger(new_leap_data,finger_type_list)

#         print("len(result)",len(result))
        leap_file = open(path.join(p_path,"leap_data_processed.json"), 'w')
        json.dump(result, leap_file, sort_keys=False, indent=4, separators=(',', ':'))
        leap_file.close()

#         print("Data interpolation complete!")

## 3. Split train and test data sets

In [4]:
def split_data_main(p_root,user_name,number_list,split_length,train_ratio,val_ratio,test_ratio):
    print("---------------split_data_main--------------")
    finger_type_list = ["Thumb","Index","Middle","Ring","Pinky"]
    
    assert train_ratio + val_ratio + test_ratio == 1, "The sum of the ratios must be equal to 1."
    
    emg_train_list = []
    emg_f_train_list = []
    leap_train_list = []
    
    emg_test_list = []
    emg_f_test_list = []
    leap_test_list = []

    for number in number_list:
#         print("number",number)
        p_path = path.join(p_root,user_name,"data",number)

        emg_data = np.array(pd.read_csv(path.join(p_path,"emg_data.csv"),header=None))
        leap_data = json.load(open(path.join(p_path,"leap_data_processed.json"),mode='r', encoding='utf-8'))
        leap_data = data_utils.parse_leap_all_finger(leap_data,finger_type_list)

        emg_train,leap_train,emg_test,leap_test = data_utils.split_data(emg_data,leap_data,split_length,train_ratio,val_ratio,test_ratio)
        
        emg_train_list += emg_train
        leap_train_list += leap_train

        emg_test_list += emg_test
        leap_test_list += leap_test

#         print("len(emg_train_list)",len(emg_train_list))
#         print("len(leap_train_list)",len(leap_train_list))

#         print("len(emg_test_list)",len(emg_test_list))
#         print("len(leap_test_list)",len(leap_test_list))

    for i in range(len(emg_train_list)):
        p_dest = path.join(p_root,user_name,"train","{:05}".format(i+1))
        data_utils.make_dir(p_dest)

        # write emg_train_data
        np.savetxt(path.join(p_dest,"emg_data.csv"),emg_train_list[i],delimiter=',',fmt='%.6f')

        # write leap_train_data
        result = data_utils.compose_leap_all_finger(leap_train_list[i],finger_type_list)
        leap_file = open(path.join(p_dest,"leap_data_processed.json"), 'w')
        json.dump(result, leap_file, sort_keys=False, indent=4, separators=(',', ':'))
        leap_file.close()

    for i in range(len(emg_test_list)):
        p_dest = path.join(p_root,user_name,"test","{:05}".format(i+1))
        data_utils.make_dir(p_dest)

        # write emg_test_data
        np.savetxt(path.join(p_dest,"emg_data.csv"),emg_test_list[i],delimiter=',',fmt='%.6f')

        # write leap_test_data
        result = data_utils.compose_leap_all_finger(leap_test_list[i],finger_type_list)
        leap_file = open(path.join(p_dest,"leap_data_processed.json"), 'w')
        json.dump(result, leap_file, sort_keys=False, indent=4, separators=(',', ':'))
        leap_file.close()

## 4. EMG Processing 

In [5]:
def emg_pre_main(p_root,user_name):
    print("---------------emg_pre_main--------------")
    # sampling rate
    sr = 500.0
    # sampling interval
    ts = 1.0/sr
    
    low_Hz = 20
    high_Hz = 150
    w1 = 2*low_Hz/sr
    w2 = 2*high_Hz/sr
    
    for split in ["train","test"]:
        p_split = path.join(p_root,user_name,split)
        files = glob(path.join(p_split,'*'))

        for file_path in files:
#             print(file_path)
            emg_data_raw = pd.read_csv(path.join(file_path,"emg_data.csv"),header=None).to_numpy()
            emg_data_raw = np.nan_to_num(emg_data_raw, nan=0)
            
            filted_emg_data_list = []

            for i in range(emg_data_raw.shape[1]):
                emg_data = emg_data_raw[:,i]

                # remove DC (Direct current)
                emg_data = emg_data - np.mean(emg_data)
                
                # low high anti-notch filter
                b, a = signal.butter(2, [w1,w2], 'bandpass')
                emg_data = signal.filtfilt(b, a, emg_data)
                emg_data = data_utils.Implement_Notch_Filter(ts,5,50,4,'butter',emg_data)
                
                filted_emg_data_list.append(emg_data)

            emg_filtered = pd.DataFrame(np.array(filted_emg_data_list).T)
            emg_filtered.to_csv(path.join(file_path,"emg_filtered.csv"),header=None, index=None)

## 5. Tragger

In [6]:
def preprocess_data(p_root,user_name):
    
    number_list = [str(i) for i in range(1, 23)]
    
    split_length = 1000
    train_ratio = 0.8
    val_ratio = 0.0
    test_ratio = 0.2
    
    # step 1,2,3,4
    remove_main(p_root,user_name,number_list)
    interpolate_main(p_root,user_name,number_list)
    split_data_main(p_root,user_name,number_list,split_length,train_ratio,val_ratio,test_ratio)
    emg_pre_main(p_root,user_name)
        
    print("Done")

In [None]:
# if __name__ == "__main__":
#     # Read configuration
#     config = configparser.ConfigParser()
#     config.read('D:\\WETrak\\config.ini')
    
#     p_root = config['main']['data_path']
#     user_name = "user1"
    
#     preprocess_data(p_root,user_name)