# Определение активности субъекта

## Чтение данных с одного субъекта

In [82]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
# from skimage import transform

In [83]:
col_names = ['timestamp', 'activityID', 'heart rate', 'temperature hand',\
             '3Da_x scale_16 hand', '3Da_y scale_16 hand', '3Da_z scale_16 hand', \
             '3Da_x scale_6 hand', '3Da_y scale_6 hand', '3Da_z scale_6 hand', \
             '3Dg_x hand', '3Dg_y hand', '3Dg_z hand', '3Dm_x hand', '3Dm_y hand', '3Dm_z hand', \
             'orientation_0 hand', 'orientation_1 hand', 'orientation_2 hand', 'orientation_3 hand', 
             'temperature chest', '3Da_x scale_16 chest', '3Da_y scale_16 chest', '3Da_z scale_16 chest', \
             '3Da_x scale_6 chest', '3Da_y scale_6 chest', '3Da_z scale_6 chest', \
             '3Dg_x chest', '3Dg_y chest', '3Dg_z chest', '3Dm_x chest', '3Dm_y chest', '3Dm_z chest', \
             'orientation_0 chest', 'orientation_1 chest', 'orientation_2 chest', 'orientation_3 chest',
             'temperature ankle', '3Da_x scale_16 ankle', '3Da_y scale_16 ankle', '3Da_z scale_16 ankle', \
             '3Da_x scale_6 ankle', '3Da_y scale_6 ankle', '3Da_z scale_6 ankle', \
             '3Dg_x ankle', '3Dg_y ankle', '3Dg_z ankle', '3Dm_x ankle', '3Dm_y ankle', '3Dm_z ankle', \
             'orientation_0 ankle', 'orientation_1 ankle', 'orientation_2 ankle', 'orientation_3 ankle']

In [84]:
good_cols = ['activityID', 'heart rate', 'temperature hand',\
             '3Da_x scale_16 hand', '3Da_y scale_16 hand', '3Da_z scale_16 hand', \
             '3Dg_x hand', '3Dg_y hand', '3Dg_z hand', '3Dm_x hand', '3Dm_y hand', '3Dm_z hand', \
             'temperature chest', '3Da_x scale_16 chest', '3Da_y scale_16 chest', '3Da_z scale_16 chest', \
             '3Dg_x chest', '3Dg_y chest', '3Dg_z chest', '3Dm_x chest', '3Dm_y chest', '3Dm_z chest', \
             'temperature ankle', '3Da_x scale_16 ankle', '3Da_y scale_16 ankle', '3Da_z scale_16 ankle', \
             '3Dg_x ankle', '3Dg_y ankle', '3Dg_z ankle', '3Dm_x ankle', '3Dm_y ankle', '3Dm_z ankle']

In [85]:
def get_good_data(fname, delete_zero_activity = True):
    data = pd.read_csv(fname, names = col_names, sep = ' ')
    data_gc = data[good_cols]
    if delete_zero_activity:
        data_gc = data_gc[(data_gc.activityID != 0)]
        
    return data_gc

In [86]:
subj_fname = 'PAMAP2_Dataset/Protocol/subject101.dat'
df = get_good_data(subj_fname)

In [87]:
df.sample()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
97836,17,,33.3125,-5.79307,1.23154,8.10849,-1.61399,0.776333,1.31861,4.0073,...,34.0,9.47381,-2.36683,-1.21638,-0.040777,0.072751,-0.01869,-57.0793,-1.63985,46.3186


### Выделяем активности

In [88]:
def data_fill_na(data):
    data_nonans = data.interpolate(axis = 0, method='linear')
    data_nonans = data_nonans.fillna(axis = 0, method='bfill')
    data_nonans = data_nonans.fillna(axis = 0, method='ffill')
    return data_nonans

In [89]:
def get_activity(data, activityID, with_fill = True):
    data_act = data[(data.activityID == activityID)]
    data_act.pop('activityID')
    if with_fill:
        data_act = data_fill_na(data_act)
    return np.array(data_act).T

In [90]:
len(get_activity(df, 5)[0])

21265

### Собираем один DataFrame

In [91]:
def get_df():
    adf = pd.DataFrame(columns=good_cols, dtype = object)

    for i in tqdm(range(1,10)):
        subj_fname = f'PAMAP2_Dataset/Protocol/subject10{i}.dat'
        subj_df = get_good_data(subj_fname)
        df_arr = []
        uniq_act = df['activityID'].unique().tolist()
        for act in uniq_act:
            arr = []
            arr.append(act)
            arr += list(get_activity(subj_df, act))
            if len(arr[1]):
                df_arr.append(arr)

        add = pd.DataFrame(data = np.array(df_arr, dtype = object), dtype = object, columns=good_cols)
        adf = pd.concat([adf,add])
    
    adf.index = pd.Int64Index(list(range(len(adf))))
    return adf

In [92]:
all_df = get_df()
all_df.sample()

100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:18<00:00,  2.08s/it]


Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
75,7,"[79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79.0, 79....","[33.125, 33.125, 33.125, 33.125, 33.125, 33.12...","[2.09004, 2.11981, 2.00216, 2.04824, 2.03354, ...","[9.31792, 9.31562, 9.27755, 9.31721, 8.9348, 9...","[2.84954, 2.61907, 2.50198, 2.73346, 2.38814, ...","[0.237487, 0.223714, 0.21202, 0.207086, 0.1519...","[-0.148331, -0.128281, -0.139093, -0.18215, -0...","[0.0963161, 0.104304, 0.113829, 0.102348, 0.09...","[6.21408, 5.46588, 5.7135, 6.08314, 5.71167, 6...",...,"[32.3125, 32.3125, 32.3125, 32.3125, 32.3125, ...","[9.42539, 9.4627, 9.38506, 9.35133, 9.45861, 9...","[-0.451371, -0.261444, -0.374233, -0.299023, -...","[-3.27401, -3.3134, -3.39067, -3.27572, -3.428...","[0.0524805, 0.017474, 0.00471726, -0.0142941, ...","[0.00381463, -0.0221129, -0.0223519, -0.034231...","[0.000602943, -0.0312419, -0.0210371, 0.017400...","[-50.0208, -50.2626, -50.5415, -50.0157, -50.5...","[18.5976, 18.4839, 19.0094, 18.2103, 19.1403, ...","[11.7063, 12.6984, 11.5622, 11.4231, 12.413, 1..."


### Смотрим на длину активностей

In [93]:
def sep_by_len(df, min_act_len, ret_min_len = False) -> (pd.DataFrame, np.ndarray):
    uniq_act = df['activityID'].unique().tolist()
    min_len = np.zeros((len(uniq_act)), dtype = np.int64) - 1
    chosen = []

    for i in range(len(df)):
        p = df.iloc[i]
        if len(p['heart rate']) >= min_act_len:
            chosen.append(i)
            if min_len[uniq_act.index(p['activityID'])] == -1:
                min_len[uniq_act.index(p['activityID'])] = len(p['heart rate'])
            else:
                min_len[uniq_act.index(p['activityID'])] = min(min_len[uniq_act.index(p['activityID'])], len(p['heart rate']))
    
    if ret_min_len:
        return min_len
    else:
        return df.iloc[(chosen)] 

Минимальные длины

In [94]:
lens = sep_by_len(all_df, 0, ret_min_len=True)
lens

array([22044, 12282, 20533, 23573, 20037, 10389,  9655, 22253, 20265,
       20486,     1,   256], dtype=int64)

Выбираем активности с 20000+ длиной

In [14]:
activities = sep_by_len(all_df, 20000)
activities.describe()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
count,64,64,64,64,64,64,64,64,64,64,...,64,64,64,64,64,64,64,64,64,64
unique,9,64,64,64,64,64,64,64,64,64,...,64,64,64,64,64,64,64,64,64,64
top,1,"[100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100...","[30.375, 30.375, 30.375, 30.375, 30.375, 30.37...","[2.2153, 2.29196, 2.2909, 2.218, 2.30106, 2.07...","[8.27915, 7.67288, 7.1424, 7.14365, 7.25857, 7...","[5.58753, 5.74467, 5.82342, 5.8993, 6.09259, 6...","[-0.00475004, -0.17171, -0.238241, -0.192912, ...","[0.0375788, 0.0254788, 0.0112136, 0.0190534, -...","[-0.011145, -0.00953821, 0.000830722, 0.013374...","[8.932, 9.583, 9.05516, 9.92698, 9.15626, 8.60...",...,"[30.75, 30.75, 30.75, 30.75, 30.75, 30.75, 30....","[9.73855, 9.69762, 9.69633, 9.6637, 9.77578, 9...","[-1.84761, -1.88438, -1.92203, -1.84714, -1.88...","[0.0951561, -0.0208042, -0.0591734, 0.0943855,...","[0.00290826, 0.020882, -0.0353922, -0.0325136,...","[-0.0277138, 0.000944724, -0.0524217, -0.01884...","[0.00175228, 0.00600704, -0.00488214, 0.026949...","[-61.1081, -60.8916, -60.3407, -60.7646, -60.2...","[-36.8636, -36.3197, -35.7842, -37.1028, -37.1...","[-58.3696, -58.3656, -58.6119, -57.8799, -57.8..."
freq,8,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [15]:
def cut_act(df, cut_len, count=-1, random_start = False) -> pd.DataFrame:
    '''
    Consideres that cut_len is le to the lenght of all activities
    count is maximum number of cutted signals from one activity (-1 is default for maximum number)
    random start works with due regard for count
    '''
    tdf = pd.DataFrame(columns=good_cols, dtype = object)
    for i in range(len(df)):
        l = len(df.iloc[i]['heart rate'])
        start = np.random.randint(0, l-count*(l//count)+1) if random_start else 0
        el = np.array(df.iloc[i], dtype = object)
#         print(el[1], '\n')
        for j in range(start, l-cut_len, cut_len):
            if j//cut_len == count:
                break
            new_el = [el[0]]
            for k in range(1, len(el)):
                new_el.append(el[k][j:j+cut_len])
            tdf = tdf.append(pd.DataFrame([new_el], columns=good_cols))
#             pd.DataFrame.append()
#             print(tdf, '\n')
    
    tdf.index = pd.Int64Index(list(range(len(tdf))))
    return tdf

In [16]:
cut_df = cut_act(activities, 20000)
len(cut_df.iloc[0][1])

20000

### Сохранение

In [96]:
all_df.to_hdf("PAMAP2_Dataset/protocol_all_subj.csv", mode='w')
pd.DataFrame.to_hdf()

TypeError: to_hdf() missing 1 required positional argument: 'key'

In [96]:
read_df = pd.read_csv("PAMAP2_Dataset/protocol_all_subj.csv", index= False)
read_df