In [1]:
import numpy as np
import matplotlib.pyplot as plt

# You may change the mhealth_activity module but your algorithm must support the original version
from mhealth_activity import Recording, Trace, Activity, WatchLocation, Path

# For interactive plots, uncomment the following line
# %matplotlib widget
import os
import pandas as pd
import pickle
from tqdm import tqdm
from scipy.fft import fft, fftfreq
import scipy.stats as stats
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from scipy.signal import find_peaks
from scipy.signal import peak_prominences
from sklearn.metrics import mean_absolute_error,accuracy_score,precision_score,recall_score,confusion_matrix,classification_report,f1_score
from multiprocessing import Pool

Load feature extractor for accelerometer, gyroscope and magnetometer

In [15]:
def features_extraction_common(inp): 
    FEATURES = ['Median', 'Numneg', 'Numpos', 'Numabovmed', 'Mean', 'STD', 'MAD', 'Var', 'Min', 'Max', 'SMA', 'Energy', 'IQR', 'Entropy', 'Npeaks', 'avgprom', 'avgpeakdist', 'Sum_f', 'Max_f', 'NPeak_f', 'Avgprom_f','avgpeakdist_f', 'Mean_f', 'Skew_f', 'Kurtosis_f']

    
    print(FEATURES)
    Median=[];Numneg=[];Numpos=[];Numabovmed=[];
    Min=[];Max=[];Mean=[];Mad=[];Sma=[];Eng=[];Iqr=[];Entr=[];Std=[];Var=[];Kurt=[];Skew=[];Npeaks=[];Avgprom=[]
    Min_d=[];Max_d=[];Mean_d=[];Mad_d=[];Sma_d=[];Eng_d=[];Iqr_d=[];Entr_d=[];Std_d=[];Var_d=[];
    Max_f=[];NPeak_f=[];Avgprom_f=[];Mean_f=[];Skew_f=[];Kurtosis_f=[];Sum_f=[]; Avgpeakdist=[]; Avgpeakdist_f=[];
    
    #X = df.values
    ## TIME DOMAIN ##
    #list of lists of lists, ugly as fuck but it works 
    Median.append(np.median(inp))
    Numneg.append(np.sum(np.array(inp) < 0, axis=0))
    Numpos.append(np.sum(np.array(inp) > 0, axis=0))
    Numabovmed.append(np.sum(np.array(inp) > np.median(inp), axis=0))

    Mean.append(np.mean(inp))
    Std.append(np.std(inp))
    #median absolute deviation
    Mad.append(stats.median_abs_deviation(inp, scale=1))
    Var.append(np.var(inp))
    Min.append(np.min(inp))
    Max.append(np.max(inp))
    #Signal Magnitude Area
    Sma.append(np.sum(inp))
    #energy measure
    Eng.append(np.sum(inp**2)/len(inp))
    Iqr.append(stats.iqr(inp))
    Entr.append(stats.entropy(inp))

    npeaks, _ = find_peaks(inp, distance=5)
    Npeaks.append(len(npeaks))
    prom = peak_prominences(inp, npeaks)
    Avgprom.append(np.mean(prom))

    Apeakdist = 0
    for i in range(len(npeaks)-1):
        Apeakdist += abs(npeaks[i] - npeaks[i+1])
    Avgpeakdist.append(Apeakdist/(len(npeaks)-1))

    ## FREQ DOMAIN ##
    ft = np.abs(fft(inp))
    Sum_f.append(np.sum(ft))
    Max_f.append(np.max(ft))

    npeaks, _ = find_peaks(ft, distance=5)
    NPeak_f.append(len(npeaks))
    prom = peak_prominences(ft, npeaks)
    Avgprom_f.append(np.mean(prom))

    Apeakdist = 0
    for i in range(len(npeaks)-1):
        Apeakdist += abs(npeaks[i] - npeaks[i+1])
    Avgpeakdist_f.append(Apeakdist/(len(npeaks)-1))

    Mean_f.append(np.mean(ft))
    Skew_f.append(stats.skew(ft))
    Kurtosis_f.append(stats.kurtosis(ft))

    #derivative
    # f = np.gradient(inp)
    # Mean_d.append(np.mean(inp))
    # Std_d.append(np.std(recording[0]))
    # #median absolute deviation
    # Mad_d.append(stats.median_abs_deviation(recording[0], scale=1))
    # Var_d.append(np.var(recording[0]))
    # Min_d.append(np.min(recording[0]))
    # Max_d.append(np.max(recording[0]))
    # #Signal Magnitude Area
    # Sma_d.append(np.sum(recording[0]))
    # #energy measure
    # Eng_d.append(np.sum(recording[0]**2)/len(recording[0]))
    # Iqr_d.append(stats.iqr(recording[0]))
    # Entr_d.append(stats.entropy(recording[0]))

    #Create dataframe from features
    return np.array([Median, Numneg, Numpos, Numabovmed, Mean,Std, Mad, Var, Min, Max, Sma, Eng, Iqr, Entr, Npeaks, Avgprom, Avgpeakdist, Sum_f, Max_f, NPeak_f, Avgprom_f, Avgpeakdist_f, Mean_f, Skew_f, Kurtosis_f]).reshape(-1) 



In [3]:
#load pickled training 3d norm accelerometer data
file = open('doruks_data/accel_mag_train.pkl', 'rb')
pickled = pickle.load(file)
accel_mag_train =  pd.DataFrame(((x,) for x in pickled), columns=['accel'])
print(accel_mag_train.shape)
file.close()

file = open('doruks_data/magneto_mag_train.pkl', 'rb')
pickled = pickle.load(file)
magneto_mag_train =  pd.DataFrame(((x,) for x in pickled), columns=['magneto'])
print(magneto_mag_train.shape)
file.close()

file = open('doruks_data/gyro_mag_train.pkl', 'rb')
pickled = pickle.load(file)
gyro_mag_train =  pd.DataFrame(((x,) for x in pickled), columns=['gyro'])
print(gyro_mag_train.shape)
file.close()
data = pd.read_pickle('doruks_data/pickled_and_sorted_training_data.pkl.zst')

pos_labels  = []
path_labels = []
for label in data["labels"]:
    path_labels.extend([label["path_idx"]])
    pos_labels.extend([label["watch_loc"]])




(396, 1)
(396, 1)
(396, 1)


In [9]:
unique_one = pd.read_pickle('unique_activity_ONE.pkl')
unique_two = pd.read_pickle('unique_activity_TWO.pkl')
unique_three = pd.read_pickle('unique_activity_THREE.pkl')

unique_all = pd.concat([unique_one, unique_two, unique_three])

In [10]:
unique_indices = unique_all.index
gyro_mag_train_unique = gyro_mag_train.loc[unique_indices]
magneto_mag_train_unique = magneto_mag_train.loc[unique_indices]
accel_mag_train_unique = accel_mag_train.loc[unique_indices]

In [11]:
unique_all = pd.concat([unique_all, gyro_mag_train_unique, magneto_mag_train_unique, accel_mag_train_unique], axis = 1)

In [12]:
unique_all

Unnamed: 0,path_idx,activities,longitude,latitude,speed,altitude,step_count,phone_steps,gyro,magneto,accel
0,2,[1],"Trace(title='longitude', total_time=584.87, sa...","Trace(title='latitude', total_time=584.87, sam...","Trace(title='speed', total_time=584.87, sample...","Trace(title='altitude', total_time=584.87, sam...",,"Trace(title='phone_steps', total_time=584.87, ...","[22.61732432467329, 32.41574505226869, 38.7220...","[88.27317439083511, 88.27317439083511, 88.2731...","[1.0192006185235551, 1.02640566041453, 1.02624..."
3,2,[1],"Trace(title='longitude', total_time=519.63, sa...","Trace(title='latitude', total_time=519.63, sam...","Trace(title='speed', total_time=519.63, sample...","Trace(title='altitude', total_time=519.63, sam...",,,"[2.1308285669884643, 2.207045512870681, 2.2404...","[0.0, 0.0, 0.0, 0.0, 121.69095770042424, 121.0...","[0.9986844349945474, 0.9949037212487427, 0.989..."
4,1,[1],"Trace(title='longitude', total_time=625.44, sa...","Trace(title='latitude', total_time=625.44, sam...","Trace(title='speed', total_time=625.44, sample...","Trace(title='altitude', total_time=625.44, sam...",,,"[69.36546489446876, 62.43667710215915, 57.4007...","[184.54677309477978, 185.84568806912017, 187.2...","[1.0257705146726634, 1.024516975727773, 1.0211..."
5,1,[1],"Trace(title='longitude', total_time=499.81, sa...","Trace(title='latitude', total_time=499.81, sam...","Trace(title='speed', total_time=499.81, sample...","Trace(title='altitude', total_time=499.81, sam...",,"Trace(title='phone_steps', total_time=499.81, ...","[2.6878866068861704, 2.681642556853665, 2.6323...","[107.14079832888719, 111.3795149529107, 109.57...","[1.0148285016682914, 1.0127172366558348, 1.022..."
6,3,[1],"Trace(title='longitude', total_time=473.97, sa...","Trace(title='latitude', total_time=473.97, sam...","Trace(title='speed', total_time=473.97, sample...","Trace(title='altitude', total_time=473.97, sam...",,"Trace(title='phone_steps', total_time=473.97, ...","[118.73803850997427, 115.8545955156364, 113.75...","[63.43675893352908, 63.43675893352908, 63.4367...","[1.0107218127569748, 1.0075784160018777, 1.007..."
...,...,...,...,...,...,...,...,...,...,...,...
189,4,[2],"Trace(title='longitude', total_time=237.11, sa...","Trace(title='latitude', total_time=237.11, sam...","Trace(title='speed', total_time=237.11, sample...","Trace(title='altitude', total_time=237.11, sam...",,"Trace(title='phone_steps', total_time=237.11, ...","[73.68375232923967, 84.23326174867726, 92.9884...","[112.46421388525157, 112.46421388525157, 113.8...","[1.014934158715569, 1.0144429913506725, 1.0152..."
320,1,[2],"Trace(title='longitude', total_time=321.26, sa...","Trace(title='latitude', total_time=321.26, sam...","Trace(title='speed', total_time=321.26, sample...","Trace(title='altitude', total_time=321.26, sam...",,"Trace(title='phone_steps', total_time=321.26, ...","[4.767608582907622, 4.6445157283409255, 4.5606...","[46.52941726371657, 46.52941726371657, 46.5294...","[1.0685734322740992, 1.0782110466251702, 1.073..."
379,2,[2],"Trace(title='longitude', total_time=278.21, sa...","Trace(title='latitude', total_time=278.21, sam...","Trace(title='speed', total_time=278.21, sample...","Trace(title='altitude', total_time=278.21, sam...",,"Trace(title='phone_steps', total_time=278.21, ...","[41.921978298121125, 35.40290955099068, 25.321...","[56.600713750892204, 54.68148470433248, 53.032...","[1.2550239691402134, 1.2608202401132897, 1.229..."
150,3,"[2, 3]","Trace(title='longitude', total_time=464.79, sa...","Trace(title='latitude', total_time=464.79, sam...","Trace(title='speed', total_time=464.79, sample...","Trace(title='altitude', total_time=464.79, sam...",,"Trace(title='phone_steps', total_time=464.79, ...","[4.9048166337065275, 4.497539384005777, 4.2386...","[78.57144557812747, 78.57144557812747, 77.4620...","[1.0190093067350645, 1.0224698274912738, 1.022..."


In [16]:
index = unique_indices

features_of_interest = np.array(["gyro", "magneto", "accel"])



extracted_data_dictionary = {}

for sample_index, sample in unique_all.iterrows():

    print(sample_index)
    feature_dictionary = {}

    for feature_name in features_of_interest:

        sample = unique_all.loc[sample_index][feature_name]


        print("Feature Name:", feature_name)
        print(" Time           Value")

        # Initialize a dictionary to store steps for each second
        values_per_second = {}


        if sample is not None:

            # Iterate over the timestamps and values
            for t, x in zip(sample.timestamps, sample.values):
                # Get the second part of the timestamp as the key
                #if int(t) % 10 == 0 AND #no other 10er in dict:  
                 #   print("10er:")

                second = int(t) #put in if statement for other intervals of pooling
                #print("second:", second, "real: ", t)
                # If the second is not in the dictionary, initialize it with an empty list
                if second not in values_per_second:
                    values_per_second[second] = np.array([])
                # Add the steps to the list for the current second
                values_per_second[second] = np.append(values_per_second[second], x)

            seconds_array = np.array([])
            averages_array = np.array([])

            # Calculate the average steps for each second

            
            for second, values in values_per_second.items():
                average_values = features_extraction_common(values) #np.average(values) #sum(values) / len(values)
                #print(f"{second}s \t{average_values:.5f} steps")

                seconds_array = np.append(seconds_array, second)
                averages_array = np.append(averages_array, average_values)
                

            #das pooled alles in eine sekunde jeweils, eine überlegung wert
            
            feature_dictionary["seconds"] = seconds_array
            feature_dictionary[feature_name]  = averages_array

        else:
            feature_dictionary[feature_name]  = np.array([])            

    #print(feature_dictionary)

    extracted_data_dictionary[sample_index] = feature_dictionary

0
Feature Name: gyro
 Time           Value


AttributeError: 'numpy.ndarray' object has no attribute 'timestamps'

Without Pooling but with 10er Segments

In [None]:
index = unique_indices

features_of_interest = np.array(["gyro", "magneto", "accel"])



extracted_data_dictionary = {}

for sample_index, sample in unique_all.iterrows():

    print(sample_index)
    feature_dictionary = {}

    for feature_name in features_of_interest:

        sample = unique_all.loc[sample_index][feature_name]


        print("Feature Name:", feature_name)
        print(" Time           Value")

        # Initialize a dictionary to store steps for each second
        values_per_interval = {}


        if sample is not None:

            # Iterate over the timestamps and values
            for t, x in zip(sample.timestamps, sample.values):
                # Get the second part of the timestamp as the key

                second = int(t) #put in if statement for other intervals of pooling

                if second % 10 == 0 AND second not in values_per_interval:
                    values_per_interval[second] = np.array([])
                    


                #print("second:", second, "real: ", t)
                # If the second is not in the dictionary, initialize it with an empty list
                if second not in values_per_second:
                    values_per_second[second] = np.array([])
                # Add the steps to the list for the current second
                values_per_second[second] = np.append(values_per_second[second], x)

            seconds_array = np.array([])
            averages_array = np.array([])

            # Calculate the average steps for each second

            
            for second, values in values_per_second.items():
                average_values = features_extraction_common(values) #np.average(values) #sum(values) / len(values)
                #print(f"{second}s \t{average_values:.5f} steps")

                seconds_array = np.append(seconds_array, second)
                averages_array = np.append(averages_array, average_values)
                

            #das pooled alles in eine sekunde jeweils, eine überlegung wert
            
            feature_dictionary["seconds"] = seconds_array
            feature_dictionary[feature_name]  = averages_array

        else:
            feature_dictionary[feature_name]  = np.array([])            

    #print(feature_dictionary)

    extracted_data_dictionary[sample_index] = feature_dictionary