In [2]:
import warnings
warnings.filterwarnings("ignore")
from tecohelper.hfilehelper import H5FileHelper
from tecohelper.anvilhelper import AnvilHelper
from tecohelper.config import RTLS,LABELS
import pandas as pd
import numpy as np
from tqdm import tqdm
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import os

In [3]:
input_columns = ['left_acc_x', 'left_acc_y', 'left_acc_z', 'left_gyr_x','left_gyr_y', 'left_gyr_z', 
                 'left_quat_w', 'left_quat_x', 'left_quat_y','left_quat_z', 
                 'hip_acc_x', 'hip_acc_y', 'hip_acc_z', 'hip_gyr_x', 'hip_gyr_y', 'hip_gyr_z', 
                 'hip_quat_w','hip_quat_x', 'hip_quat_y', 'hip_quat_z',
                 'right_acc_x', 'right_acc_y', 'right_acc_z', 'right_gyr_x', 'right_gyr_y', 'right_gyr_z', 
                 'right_quat_w', 'right_quat_x', 'right_quat_y', 'right_quat_z',
                 'rtls_accuracy', 'rtls_accuracy_radius',
                 'rtls_mapped_position', 'rtls_state', 'rtls_x_filtered',
                 'rtls_x_unfiltered', 'rtls_y_filtered', 'rtls_y_unfiltered',
                 'rtls_z_filtered', 'rtls_z_unfiltered']

In [None]:
for grupe_index in range(1,6):
    file = "data/Gruppe{}_data_recording_annotated.h5".format(grupe_index)
    print(file)
    helper = H5FileHelper(file)
    keys = helper.recordings
    for seg_key in keys:
        print(seg_key)
        rec = AnvilHelper(file,
                          seg_key,
                          "data/{}.txt".format(seg_key))



        labels_df = {}
        for labelindex,label in enumerate(LABELS):
            # extract all tokens(segments) for each label as a Dataframe
            len_tokens = len( rec.tokens[label])
            df_per_label = pd.DataFrame()
            for index in tqdm(range(len_tokens)):
                dfs = rec.get_token_dataframe(label, index)
                df_per_label = pd.concat([df_per_label,dfs[input_columns]])
            df_per_label["label"] =  labelindex   
            labels_df[label] = df_per_label

        # df_all contains all segements of all labels of one sequence
        df_all = pd.DataFrame()
        for key in labels_df.keys():
            df_all = pd.concat([df_all,labels_df[key]])

        df_all.sort_index(inplace=True)
        
        # delete duplicated rows
        df_NAN = pd.DataFrame( index = pd.date_range(start=df_all.index[0], end=df_all.index[-1], freq='20ms'))
        df_take_trop = df_all[(df_all["label"]>=0) & (df_all["label"]<=4)]
        df_walk_stand = df_all.append(df_take_trop)
        df_walk_stand =   df_walk_stand[~df_walk_stand.index.duplicated(keep=False)] 
        df_take_trop = df_take_trop[~df_take_trop.index.duplicated()]
        df_all = df_walk_stand.append(df_take_trop)
        df_all.sort_index(inplace=True)
        df_merged = df_NAN.merge(df_all, how='outer', left_index=True, right_index=True)
        df_merged.fillna(method='ffill', inplace=True)

        # change the rest labels as "other:0"
        df_take_drop = df_merged[((df_merged["label"]>=1)&(df_merged["label"]<=2))]
        df_other = df_merged.append(df_take_drop)
        df_other = df_other[~df_other.index.duplicated(keep=False)] 
        df_other["label"]=0

        df_all = df_other.append(df_take_drop).sort_index()
        df_all.sort_index(inplace=True)
        df_all.to_csv("Csv_data/"+seg_key+".csv", index=True)
        

In [4]:
files = os.listdir("Csv_data/")
df = []
for index,file in enumerate(files):
    #print(file)
    df_all = pd.read_csv("Csv_data/"+file, index_col=[0])
    df_all["id"] = index
    df.append(df_all)
df = pd.concat(df)
df.to_csv("Csv_data/all.csv", index=True)