# Определение активности субъекта

## Чтение данных с одного субъекта

In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
# from skimage import transform

In [3]:
col_names = ['timestamp', 'activityID', 'heart rate', 'temperature hand',\
             '3Da_x scale_16 hand', '3Da_y scale_16 hand', '3Da_z scale_16 hand', \
             '3Da_x scale_6 hand', '3Da_y scale_6 hand', '3Da_z scale_6 hand', \
             '3Dg_x hand', '3Dg_y hand', '3Dg_z hand', '3Dm_x hand', '3Dm_y hand', '3Dm_z hand', \
             'orientation_0 hand', 'orientation_1 hand', 'orientation_2 hand', 'orientation_3 hand', 
             'temperature chest', '3Da_x scale_16 chest', '3Da_y scale_16 chest', '3Da_z scale_16 chest', \
             '3Da_x scale_6 chest', '3Da_y scale_6 chest', '3Da_z scale_6 chest', \
             '3Dg_x chest', '3Dg_y chest', '3Dg_z chest', '3Dm_x chest', '3Dm_y chest', '3Dm_z chest', \
             'orientation_0 chest', 'orientation_1 chest', 'orientation_2 chest', 'orientation_3 chest',
             'temperature ankle', '3Da_x scale_16 ankle', '3Da_y scale_16 ankle', '3Da_z scale_16 ankle', \
             '3Da_x scale_6 ankle', '3Da_y scale_6 ankle', '3Da_z scale_6 ankle', \
             '3Dg_x ankle', '3Dg_y ankle', '3Dg_z ankle', '3Dm_x ankle', '3Dm_y ankle', '3Dm_z ankle', \
             'orientation_0 ankle', 'orientation_1 ankle', 'orientation_2 ankle', 'orientation_3 ankle']

In [4]:
good_cols = ['activityID', 'heart rate', 'temperature hand',\
             '3Da_x scale_16 hand', '3Da_y scale_16 hand', '3Da_z scale_16 hand', \
             '3Dg_x hand', '3Dg_y hand', '3Dg_z hand', '3Dm_x hand', '3Dm_y hand', '3Dm_z hand', \
             'temperature chest', '3Da_x scale_16 chest', '3Da_y scale_16 chest', '3Da_z scale_16 chest', \
             '3Dg_x chest', '3Dg_y chest', '3Dg_z chest', '3Dm_x chest', '3Dm_y chest', '3Dm_z chest', \
             'temperature ankle', '3Da_x scale_16 ankle', '3Da_y scale_16 ankle', '3Da_z scale_16 ankle', \
             '3Dg_x ankle', '3Dg_y ankle', '3Dg_z ankle', '3Dm_x ankle', '3Dm_y ankle', '3Dm_z ankle']

In [5]:
def get_good_data(fname, delete_zero_activity = True):
    data = pd.read_csv(fname, names = col_names, sep = ' ')
    data_gc = data[good_cols]
    if delete_zero_activity:
        data_gc = data_gc[(data_gc.activityID != 0)]
        
    return data_gc

In [7]:
subj_fname = 'PAMAP2_Dataset/Protocol/subject101.dat'
df = get_good_data(subj_fname)

In [8]:
df.sample()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
320447,5,,30.8125,7.91731,2.91115,0.110378,0.050064,-1.69267,-1.18515,46.2438,...,33.25,14.1371,2.14373,-3.85923,1.53655,-2.38077,7.49638,-42.1469,-32.0606,-4.50593


### Выделяем активности

In [9]:
def data_fill_na(data):
    data_nonans = data.interpolate(axis = 0, method='linear')
    data_nonans = data_nonans.fillna(axis = 0, method='bfill')
    data_nonans = data_nonans.fillna(axis = 0, method='ffill')
    return data_nonans

In [17]:
def get_activity(data, activityID, with_fill = True):
    data_act = data[(data.activityID == activityID)]
    data_act.pop('activityID')
    if with_fill:
        data_act = data_fill_na(data_act)
    return np.array(data_act).T

In [75]:
len(get_activity(df, 5)[0])

21265

### Собираем один DataFrame

In [102]:
def get_df():
    adf = pd.DataFrame(columns=good_cols, dtype = object)

    for i in tqdm(range(1,10)):
        subj_fname = f'PAMAP2_Dataset/Protocol/subject10{i}.dat'
        subj_df = get_good_data(subj_fname)
        df_arr = []
        uniq_act = df['activityID'].unique().tolist()
        for act in uniq_act:
            arr = []
            arr.append(act)
            arr += list(get_activity(subj_df, act))
            if len(arr[1]):
                df_arr.append(arr)

        add = pd.DataFrame(data = np.array(df_arr, dtype = object), dtype = object, columns=good_cols)
        adf = pd.concat([adf,add])
    
    adf.index = pd.Int64Index(list(range(len(adf))))
    return adf

In [103]:
all_df = get_df()
all_df.sample()

100%|████████████████████████████████████████████████████████████████████████████████████| 9/9 [00:18<00:00,  2.07s/it]


Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
30,13,"[93.0, 93.0, 93.0, 93.0, 93.0, 93.0, 93.0, 93....","[32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32.0, 32....","[-8.9842, -9.02441, -9.06273, -8.94535, -8.843...","[2.42466, 2.65158, 2.576, 2.57613, 2.68605, 2....","[2.97633, 2.8595, 2.85918, 2.9764, 2.63117, 2....","[-0.0314765, 0.00116463, -0.00155612, -0.00924...","[0.0315177, 0.0178532, 0.018084, 0.00157553, -...","[-0.00166257, -0.00251298, 0.00748611, -0.0098...","[21.9491, 21.5198, 21.7414, 22.2889, 22.2997, ...",...,"[33.9375, 33.9375, 33.9375, 33.9375, 33.9375, ...","[9.69769, 9.67076, 9.73952, 9.74412, 9.81287, ...","[0.674057, 0.785309, 0.672535, 0.709219, 0.596...","[-1.8118, -1.46546, -1.657, -1.50283, -1.69437...","[0.0509772, 0.0450844, 0.0105264, 0.0131189, 0...","[-0.0432215, -0.0131225, -0.000626505, -0.0049...","[0.00409657, -0.0113914, -0.0216058, 0.0374412...","[-51.0636, -51.0611, -51.7373, -51.0688, -50.9...","[21.6909, 21.5788, 21.7162, 22.1307, 21.7956, ...","[-15.8662, -15.7436, -15.9842, -15.7377, -15.7..."


Сохранение не работает...

In [95]:
adf.to_csv("PAMAP2_Dataset/protocol_all_subj.csv", index_label='Index', )

In [96]:
all_df = pd.read_csv("PAMAP2_Dataset/protocol_all_subj.csv", index_col='Index')

### Смотрим на длину активностей

In [131]:
def sep_by_len(df, min_act_len, ret_min_len = False) -> (pd.DataFrame, np.ndarray):
    uniq_act = df['activityID'].unique().tolist()
    min_len = np.zeros((len(uniq_act)), dtype = np.int64) - 1
    chosen = []

    for i in range(len(df)):
        p = df.iloc[i]
        if len(p['heart rate']) >= min_act_len:
            chosen.append(i)
            if min_len[uniq_act.index(p['activityID'])] == -1:
                min_len[uniq_act.index(p['activityID'])] = len(p['heart rate'])
            else:
                min_len[uniq_act.index(p['activityID'])] = min(min_len[uniq_act.index(p['activityID'])], len(p['heart rate']))
    
    if ret_min_len:
        return min_len
    else:
        return df.iloc[(chosen)] 

Минимальные длины

In [132]:
lens = sep_by_len(all_df, 0, ret_min_len=True)
lens

array([22044, 12282, 20533, 23573, 20037, 10389,  9655, 22253, 20265,
       20486,     1,   256], dtype=int64)

Выбираем активности с 20000+ длиной

In [133]:
activities = sep_by_len(all_df, 20000)
activities.describe()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
count,64,64,64,64,64,64,64,64,64,64,...,64,64,64,64,64,64,64,64,64,64
unique,9,64,64,64,64,64,64,64,64,64,...,64,64,64,64,64,64,64,64,64,64
top,1,"[100.0, 100.0, 100.0, 100.0, 100.0, 100.0, 100...","[30.375, 30.375, 30.375, 30.375, 30.375, 30.37...","[2.2153, 2.29196, 2.2909, 2.218, 2.30106, 2.07...","[8.27915, 7.67288, 7.1424, 7.14365, 7.25857, 7...","[5.58753, 5.74467, 5.82342, 5.8993, 6.09259, 6...","[-0.00475004, -0.17171, -0.238241, -0.192912, ...","[0.0375788, 0.0254788, 0.0112136, 0.0190534, -...","[-0.011145, -0.00953821, 0.000830722, 0.013374...","[8.932, 9.583, 9.05516, 9.92698, 9.15626, 8.60...",...,"[30.75, 30.75, 30.75, 30.75, 30.75, 30.75, 30....","[9.73855, 9.69762, 9.69633, 9.6637, 9.77578, 9...","[-1.84761, -1.88438, -1.92203, -1.84714, -1.88...","[0.0951561, -0.0208042, -0.0591734, 0.0943855,...","[0.00290826, 0.020882, -0.0353922, -0.0325136,...","[-0.0277138, 0.000944724, -0.0524217, -0.01884...","[0.00175228, 0.00600704, -0.00488214, 0.026949...","[-61.1081, -60.8916, -60.3407, -60.7646, -60.2...","[-36.8636, -36.3197, -35.7842, -37.1028, -37.1...","[-58.3696, -58.3656, -58.6119, -57.8799, -57.8..."
freq,8,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1


In [181]:
def cut_act(df, cut_len, count=-1, random_start = False) -> pd.DataFrame:
    '''
    Consideres that cut_len is le to the lenght of all activities
    count is maximum number of cutted signals from one activity (-1 is default for maximum number)
    random start works with due regard for count
    '''
    tdf = pd.DataFrame(columns=good_cols, dtype = object)
    for i in range(len(df)):
        l = len(df.iloc[i]['heart rate'])
        start = np.random.randint(0, l-count*(l//count)+1) if random_start else 0
        el = np.array(df.iloc[i], dtype = object)
#         print(el[1], '\n')
        for j in range(start, l-cut_len, cut_len):
            if j//cut_len == count:
                break
            new_el = [el[0]]
            for k in range(1, len(el)):
                new_el.append(el[k][j:j+cut_len])
            tdf = tdf.append(pd.DataFrame([new_el], columns=good_cols))
#             pd.DataFrame.append()
#             print(tdf, '\n')
    
    tdf.index = pd.Int64Index(list(range(len(tdf))))
    return tdf

In [186]:
cut_df = cut_act(activities, 20000)
len(cut_df.iloc[0][1])

20000

### Нормализация

In [198]:
def normalize_array(arr) -> np.ndarray:
    x_min = min(arr)
    x_max = max(arr)
    if x_min != x_max:
        return (arr-x_min)/(x_max-x_min)
    else:
        return 0*arr
    

def normalize_df(df) -> pd.DataFrame: # Доделать
    arr = []
    for i in range(len(df)):
        arr.append([df.iloc[i]['activityID']])
        for el in df.iloc[i][1:]:
            arr[i].append(normalize_array(el))
    
#     print(arr)
    return pd.DataFrame(arr, columns=good_cols, dtype = object)

In [203]:
n_df = normalize_df(cut_df)
n_df.sample()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
10,2,"[0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.95, 0.9...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.4451911808635308, 0.44526040350191864, 0.44...","[0.3075810595362546, 0.3121682653069226, 0.315...","[0.42179452579415566, 0.4237484610382433, 0.41...","[0.5736236365842091, 0.5720285965318599, 0.572...","[0.4554039277314264, 0.4579876068899164, 0.450...","[0.5328578487377673, 0.5359796249633884, 0.533...","[0.9052753491379464, 0.9071174048508059, 0.905...",...,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.3639895192877763, 0.3623385386246288, 0.364...","[0.45398629916830724, 0.44940797139298483, 0.4...","[0.5492608775718615, 0.5513336430448831, 0.559...","[0.4635702887032733, 0.46310919620297253, 0.46...","[0.5251245033356226, 0.5303276635926617, 0.513...","[0.3763297270508589, 0.37097525473071324, 0.37...","[0.7784902622007298, 0.7465833487942238, 0.773...","[0.45354579099261594, 0.4417920547410489, 0.44...","[0.844224345074137, 0.8172727539119734, 0.8259..."
