# Определение активности субъекта

## Чтение данных с одного субъекта

In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
# from skimage import transform

In [102]:
subj_fname = 'PAMAP2_Dataset/Protocol/subject101.dat'
col_names = ['timestamp', 'activityID', 'heart rate', 'temperature hand',\
             '3Da_x scale_16 hand', '3Da_y scale_16 hand', '3Da_z scale_16 hand', \
             '3Da_x scale_6 hand', '3Da_y scale_6 hand', '3Da_z scale_6 hand', \
             '3Dg_x hand', '3Dg_y hand', '3Dg_z hand', '3Dm_x hand', '3Dm_y hand', '3Dm_z hand', \
             'orientation_0 hand', 'orientation_1 hand', 'orientation_2 hand', 'orientation_3 hand', 
             'temperature chest', '3Da_x scale_16 chest', '3Da_y scale_16 chest', '3Da_z scale_16 chest', \
             '3Da_x scale_6 chest', '3Da_y scale_6 chest', '3Da_z scale_6 chest', \
             '3Dg_x chest', '3Dg_y chest', '3Dg_z chest', '3Dm_x chest', '3Dm_y chest', '3Dm_z chest', \
             'orientation_0 chest', 'orientation_1 chest', 'orientation_2 chest', 'orientation_3 chest',
             'temperature ankle', '3Da_x scale_16 ankle', '3Da_y scale_16 ankle', '3Da_z scale_16 ankle', \
             '3Da_x scale_6 ankle', '3Da_y scale_6 ankle', '3Da_z scale_6 ankle', \
             '3Dg_x ankle', '3Dg_y ankle', '3Dg_z ankle', '3Dm_x ankle', '3Dm_y ankle', '3Dm_z ankle', \
             'orientation_0 ankle', 'orientation_1 ankle', 'orientation_2 ankle', 'orientation_3 ankle']

In [103]:
data = pd.read_csv(subj_fname, names = col_names, sep = ' ')

In [104]:
data.sample()

Unnamed: 0,timestamp,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Da_x scale_6 hand,3Da_y scale_6 hand,3Da_z scale_6 hand,...,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle,orientation_0 ankle,orientation_1 ankle,orientation_2 ankle,orientation_3 ankle
140660,1414.98,16,,33.6875,-10.0406,3.8822,3.80231,-8.64246,5.32388,4.50608,...,0.014146,0.242432,-0.001987,13.526,-13.9562,-3.65415,1.0,0.0,0.0,0.0


### Удаление ненужных записей 
Удаляем записи с activityID = 0 и столбцы с невалидными измерениями (указано в readme к датасету)

In [107]:
good_cols = ['activityID', 'heart rate', 'temperature hand',\
             '3Da_x scale_16 hand', '3Da_y scale_16 hand', '3Da_z scale_16 hand', \
             '3Dg_x hand', '3Dg_y hand', '3Dg_z hand', '3Dm_x hand', '3Dm_y hand', '3Dm_z hand', \
             'temperature chest', '3Da_x scale_16 chest', '3Da_y scale_16 chest', '3Da_z scale_16 chest', \
             '3Dg_x chest', '3Dg_y chest', '3Dg_z chest', '3Dm_x chest', '3Dm_y chest', '3Dm_z chest', \
             'temperature ankle', '3Da_x scale_16 ankle', '3Da_y scale_16 ankle', '3Da_z scale_16 ankle', \
             '3Dg_x ankle', '3Dg_y ankle', '3Dg_z ankle', '3Dm_x ankle', '3Dm_y ankle', '3Dm_z ankle']



data_gc = data[good_cols]

In [108]:
data_gc.sample()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
324888,5,171.0,30.625,-7.94836,-4.40732,-0.602922,-0.040025,-0.902947,-3.69771,-33.3304,...,33.125,17.7803,1.23639,-7.13494,1.53863,0.884771,-4.27804,-60.487,-21.3581,19.5238


In [109]:
data_gcu = data_gc[(data_gc.activityID != 0)]

In [110]:
data_gcu.sample()

Unnamed: 0,activityID,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
39041,2,,32.25,0.971407,6.7087,7.0752,-0.062435,-0.004289,-0.006565,-29.4794,...,32.625,-1.12648,0.923164,-10.1053,0.020048,-0.023577,-0.009192,-11.5209,32.9223,72.9271


### Предобработка данных
activityID здесь и далее счиитаем меткой класса 

In [31]:
# from read_func import *

Выделение меток классов

In [111]:
Y_act = data_gcu.pop("activityID")

In [112]:
Y_act

2928       1
2929       1
2930       1
2931       1
2932       1
          ..
361795    24
361796    24
361797    24
361798    24
361799    24
Name: activityID, Length: 249957, dtype: int64

Заполнение нанов

In [113]:
X_nonans = data_gcu.interpolate(axis = 0, method='linear')
X_nonans = X_nonans.fillna(axis = 0, method='bfill')
X_nonans = X_nonans.fillna(axis = 0, method='ffill')

In [116]:
n = np.random.randint(0, len(X_nonans.index))
data_gcu[n:n+5]

Unnamed: 0,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,3Dm_y hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
197924,,33.6875,-11.565,4.71528,6.50927,-2.0572,0.540675,-0.64423,80.9785,-45.2454,...,34.875,2.02489,-7.40414,-1.686,-0.958421,-0.636759,3.05781,-55.821,-51.105,3.81973
197925,137.0,33.6875,-11.235,4.02841,6.36258,-2.03032,0.528419,-0.35098,81.405,-45.5846,...,34.875,2.54544,-5.74399,-0.8034,-0.630879,-0.752215,3.2947,-57.7556,-48.2954,5.10331
197926,,33.6875,-10.7992,3.22516,6.02543,-1.8003,0.537589,-0.0477,81.1576,-46.864,...,34.875,2.76052,-4.60186,-1.34854,-0.299538,-0.935935,3.54278,-60.0479,-44.7919,4.91187
197927,,33.6875,-9.98329,2.53316,5.73209,-1.35079,0.468032,0.232508,81.3685,-48.0686,...,34.875,3.31407,-3.9947,-1.73252,-0.093316,-1.09211,3.81835,-63.5629,-41.1545,6.2172
197928,,33.6875,-9.16342,2.22087,5.47601,-0.744798,0.44796,0.508837,80.1413,-49.5253,...,34.875,4.47922,-3.05284,-1.72642,-0.053989,-1.1739,4.17847,-66.0907,-36.9776,5.78732


In [117]:
X_nonans.iloc[n:n+5]

Unnamed: 0,heart rate,temperature hand,3Da_x scale_16 hand,3Da_y scale_16 hand,3Da_z scale_16 hand,3Dg_x hand,3Dg_y hand,3Dg_z hand,3Dm_x hand,3Dm_y hand,...,temperature ankle,3Da_x scale_16 ankle,3Da_y scale_16 ankle,3Da_z scale_16 ankle,3Dg_x ankle,3Dg_y ankle,3Dg_z ankle,3Dm_x ankle,3Dm_y ankle,3Dm_z ankle
197924,137.090909,33.6875,-11.565,4.71528,6.50927,-2.0572,0.540675,-0.64423,80.9785,-45.2454,...,34.875,2.02489,-7.40414,-1.686,-0.958421,-0.636759,3.05781,-55.821,-51.105,3.81973
197925,137.0,33.6875,-11.235,4.02841,6.36258,-2.03032,0.528419,-0.35098,81.405,-45.5846,...,34.875,2.54544,-5.74399,-0.8034,-0.630879,-0.752215,3.2947,-57.7556,-48.2954,5.10331
197926,137.0,33.6875,-10.7992,3.22516,6.02543,-1.8003,0.537589,-0.0477,81.1576,-46.864,...,34.875,2.76052,-4.60186,-1.34854,-0.299538,-0.935935,3.54278,-60.0479,-44.7919,4.91187
197927,137.0,33.6875,-9.98329,2.53316,5.73209,-1.35079,0.468032,0.232508,81.3685,-48.0686,...,34.875,3.31407,-3.9947,-1.73252,-0.093316,-1.09211,3.81835,-63.5629,-41.1545,6.2172
197928,137.0,33.6875,-9.16342,2.22087,5.47601,-0.744798,0.44796,0.508837,80.1413,-49.5253,...,34.875,4.47922,-3.05284,-1.72642,-0.053989,-1.1739,4.17847,-66.0907,-36.9776,5.78732


In [123]:
X_nonorm = np.array(X_nonans)

In [124]:
n = np.random.randint(0, len(X_nonans.index))
X_nonorm[n]

array([ 1.82000e+02,  3.02500e+01, -2.47525e+01,  1.26679e+01,
        1.01721e+01,  1.58742e-01,  5.88818e-01,  4.60008e-01,
        6.28277e+01, -3.26482e+01,  5.59506e+00,  3.38750e+01,
        2.48052e+00,  4.76403e+01,  1.01160e+01,  4.51433e-01,
        3.28687e-01, -6.28875e-01, -3.48628e+01, -5.53654e+01,
        2.10520e+01,  3.31875e+01,  1.10400e+01,  1.39156e+01,
       -1.10259e+01,  2.41323e+00, -1.31875e+00, -2.02614e+00,
       -4.17983e+01, -3.68085e+01,  4.53234e+01])

Нормализация

In [120]:
X_norm = []
for col in X_nonans:
    val = np.array(X_nonans[col])
    x_min = min(val)
    x_max = max(val)
    if x_min != x_max:
        X_norm.append((val-x_min)/(x_max-x_min))
    else:
        X_norm.append(0*val)
                   
X_norm = np.array(X_norm).T

In [122]:
n = np.random.randint(0, len(X_nonans.index))
X_norm[n]

array([0.47619048, 0.98305085, 0.56432584, 0.33756192, 0.28887681,
       0.42621113, 0.47285684, 0.33028589, 0.68053764, 0.49811192,
       0.59016517, 0.77777778, 0.5058106 , 0.25901807, 0.70439901,
       0.32492238, 0.48646405, 0.60959034, 0.32011086, 0.33796038,
       0.62492149, 0.97058824, 0.30966263, 0.52287052, 0.60312471,
       0.46573677, 0.62213362, 0.38766418, 0.40518787, 0.40089759,
       0.56102861])