In [1]:
import numpy as np
import matplotlib.pyplot as plt

# You may change the mhealth_activity module but your algorithm must support the original version
from mhealth_activity import Recording, Trace, Activity, WatchLocation, Path

# For interactive plots, uncomment the following line
# %matplotlib widget
import os
import pandas as pd
import pickle
import tqdm
from scipy.fft import fft, fftfreq
import scipy.stats as stats
from sklearn.model_selection import train_test_split



In [2]:
#read data
create_data_pickle = False
if create_data_pickle:
    files = os.listdir('data/train')
    list_of_dicts = []
    types_to_include = ['ax', 'ay', 'az', 'phone_ax', 'phone_ay', 'phone_az', 'speed', 'longitude', 'latitude', 'altitude', 'phone_steps']

    for file in tqdm(files):
        Dict = {}
        d = Recording(os.path.join('data/train',file))

        Dict['labels'] = d.labels
        for data_type in types_to_include:
            if data_type in d.data.keys():
                Dict[data_type] = d.data[data_type]
        list_of_dicts.append(Dict)

    data = pd.DataFrame(list_of_dicts)
    data.to_pickle(path='data/pickled_and_sorted_training_data.pkl.zst', compression={'method': 'zstd'})
else:
    data = pd.read_pickle('data/pickled_and_sorted_training_data.pkl.zst')


In [35]:
def features_extraction(df): 
    
    FEATURES = ['MIN','MAX','MEAN','RMS','VAR','STD','POWER','PEAK','P2P','CREST FACTOR','SKEW','KURTOSIS',
            'MAX_f','SUM_f','MEAN_f','VAR_f','PEAK_f','SKEW_f','KURTOSIS_f']
    
    Min=[];Max=[];Mean=[];Rms=[];Var=[];Std=[];Power=[];Peak=[];Skew=[];Kurtosis=[];P2p=[];CrestFactor=[];
    FormFactor=[]; PulseIndicator=[];
    Max_f=[];Sum_f=[];Mean_f=[];Var_f=[];Peak_f=[];Skew_f=[];Kurtosis_f=[]
    
    X = df.values
    ## TIME DOMAIN ##
    #list of lists of lists, ugly as fuck but it works 
    for recording in X:
        Min.append(np.min(recording[0]))
        Max.append(np.max(recording[0]))
        Mean.append(np.mean(recording[0]))
        Rms.append(np.sqrt(np.mean(recording[0]**2)))
        Var.append(np.var(recording[0]))
        Std.append(np.std(recording[0]))
        Power.append(np.mean(recording[0]**2))
        Peak.append(np.max(np.abs(recording[0])))
        P2p.append(np.ptp(recording[0]))
        CrestFactor.append(np.max(np.abs(recording[0]))/np.sqrt(np.mean(recording[0]**2)))
        Skew.append(stats.skew(recording[0]))
        Kurtosis.append(stats.kurtosis(recording[0]))
        FormFactor.append(np.sqrt(np.mean(recording[0]**2))/np.mean(recording[0]))
        PulseIndicator.append(np.max(np.abs(recording[0]))/np.mean(recording[0]))
        ## FREQ DOMAIN ##
        ft = fft(recording[0])
        S = np.abs(ft**2)/len(df)
        Max_f.append(np.max(S))
        Sum_f.append(np.sum(S))
        Mean_f.append(np.mean(S))
        Var_f.append(np.var(S))
        
        Peak_f.append(np.max(np.abs(S)))
        Skew_f.append(stats.skew(recording[0]))
        Kurtosis_f.append(stats.kurtosis(recording[0]))
    #Create dataframe from features
    df_features = pd.DataFrame(index = [FEATURES], 
                               data = [Min,Max,Mean,Rms,Var,Std,Power,Peak,P2p,CrestFactor,Skew,Kurtosis,
                                       Max_f,Sum_f,Mean_f,Var_f,Peak_f,Skew_f,Kurtosis_f])
    return df_features

In [6]:
#load pickled training 3d norm accelerometer data
file = open('data/accel_mag_train.pkl', 'rb')
pickled = pickle.load(file)
accel_mag_train =  pd.DataFrame(((x,) for x in pickled), columns=['lists'])
print(accel_mag_train.shape)
file.close()

labels = []
for label in data["labels"]:
    labels.extend([[label["path_idx"],label["activities"],label["step_count"],label["watch_loc"]]])
labels = pd.DataFrame(labels, columns =['path_idx', 'activities', "step_count", "smartwatch_location"])




(396, 1)


In [36]:
print(accel_mag_train.values[2][0])
train_features = pd.DataFrame.transpose(features_extraction(accel_mag_train))
train_features = train_features.reset_index(level=1)
train_features


[1.0076106  1.00275902 1.00581143 ... 1.1451823  1.15308679 1.1590346 ]


IndexError: Too many levels: Index has only 1 level, not 2

In [31]:
print(train_features.columns)
print(labels.columns)

random_forest_data = pd.concat([train_features,labels],axis=0)


MultiIndex([(         'MIN',),
            (         'MAX',),
            (        'MEAN',),
            (         'RMS',),
            (         'VAR',),
            (         'STD',),
            (       'POWER',),
            (        'PEAK',),
            (         'P2P',),
            ('CREST FACTOR',),
            (        'SKEW',),
            (    'KURTOSIS',),
            (       'MAX_f',),
            (       'SUM_f',),
            (      'MEAN_f',),
            (       'VAR_f',),
            (      'PEAK_f',),
            (      'SKEW_f',),
            (  'KURTOSIS_f',)],
           )
Index(['path_idx', 'activities', 'step_count', 'smartwatch_location'], dtype='object')


ValueError: no types given

In [None]:
#load pickled test 3d norm accelerometer data
file = open('data/accel_mag_test.pkl', 'rb')
pickled = pickle.load(file)
accel_mag =  pd.DataFrame(((x,) for x in pickled), columns=['lists'])

features_signal = features_extraction(accel_mag)

features_test_accel_mag = pd.DataFrame.transpose(features_signal)

for sample in acceleration[1:,1]:
    new = []
    for x, t in list(zip(sample.timestamps, sample.values)):
        new.append([x,t])
    new = pd.DataFrame(new)
    features_inside = features_extraction(new)
    features_test_y = pd.concat([features_test_x,pd.DataFrame.transpose(features_inside)])