In [2]:
import pandas as pd 
import numpy as np

In [3]:
raw_file = './data/hasc-111018-165936-acc.csv'
raw_label = './data/hasc-111018-165936.label'

raw_df = pd.read_csv(raw_file, header=None)
raw_df.columns = ['time', 'x', 'y', 'z']
raw_label_df = pd.read_csv(raw_label, skiprows=2, header=None)
raw_label_df.columns = ['start', 'end', 'label']
print(f'raw shape: {raw_df.shape}, label shape: {raw_label_df.shape}')
print('Raw activity data')
print(raw_df.head())
print('\nraw label data')
print(raw_label_df.head())

raw shape: (39397, 4), label shape: (40, 3)
Raw activity data
          time         x         y         z
0  5015.672119  0.115128 -0.988739 -0.090057
1  5015.687371  0.110138 -0.986694 -0.086288
2  5015.704061  0.116180 -0.991669 -0.091003
3  5015.715389  0.112259 -0.989670 -0.091095
4  5015.726564  0.114349 -0.989746 -0.097855

raw label data
      start       end                    label
0  5057.661  5091.260  walk;floor;B2F;steps;44
1  5071.934       NaN                walk;turn
2  5098.502  5126.499         escalatorUp;stay
3  5125.117       NaN           move;floor;B1F
4  5127.665  5143.411  walk;floor;B1F;steps;22


In [9]:
# add index column
raw_df = raw_df.reset_index()
print(raw_df.head())

   level_0  index         time         x         y         z   l2_norm
0        0      0  5015.672119  0.115128 -0.988739 -0.090057  0.999485
1        1      1  5015.687371  0.110138 -0.986694 -0.086288  0.996565
2        2      2  5015.704061  0.116180 -0.991669 -0.091003  1.002590
3        3      3  5015.715389  0.112259 -0.989670 -0.091095  1.000174
4        4      4  5015.726564  0.114349 -0.989746 -0.097855  1.001124


In [7]:
raw_df['l2_norm'] = np.sqrt(np.square(raw_df[['x', 'y', 'z']]).sum(axis=1))
raw_df['l2_norm'].head()

0    0.999485
1    0.996565
2    1.002590
3    1.000174
4    1.001124
Name: l2_norm, dtype: float64

In [10]:
raw_df[['l2_norm']].to_csv('./data/preprocess/hasc_l2_norm.csv')

## create breakpoints index label

In [12]:
raw_time = raw_df['time'].to_numpy()
raw_label_start = raw_label_df['start'].to_numpy()

In [15]:
breakpoints_index = []
start_index = 0
for i in range(len(raw_time)):
    if start_index < len(raw_label_start) and raw_time[i] > raw_label_start[start_index]:
        breakpoints_index.append(i)
        start_index += 1
print(f'len: {len(breakpoints_index)}, value: {breakpoints_index}')

len: 40, value: [3959, 5305, 7812, 10325, 10565, 13084, 14136, 14184, 14411, 14931, 18428, 19901, 21086, 21538, 23834, 24236, 24469, 24818, 24884, 25214, 25597, 25710, 25780, 26510, 27408, 27431, 28116, 28396, 29188, 30016, 30072, 30407, 30769, 30858, 33567, 34033, 34265, 35529, 37511, 37787]


In [16]:
# save
np.savetxt('./data/preprocess/hasc_label_index.txt', breakpoints_index, fmt='%i')