In [1]:
import glob
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

In [2]:
def calc_enmo(some_data):
    # Calculate the magnitue by first squaring all of the x, y, and z value, then summing them, and taking the square root.
    mag = ((some_data.applymap(lambda x : x**2)).sum(axis=1)).transform(lambda x : np.sqrt(x))
    # To calculate ENMO we subtract 1 (Gravity) from the vector magnitudes
    enmo = mag.transform(lambda x : x - 1)
    # Finally if we have any ENMO values less than 0 we round them up.
    enmo.loc[enmo.loc[:] < 0] = 0
    
    return mag, enmo

In [31]:
def calc_mad(some_data, device):
    some_data = some_data.dropna()
    all_mad = {}
    time_name = device + " Time"
        
    # Grab the first timestamp from data
    # print(some_data.shape)
    start = some_data.loc[some_data.index[0], time_name]
    # Specify the amount of time to aggregate over
    agg_len = 5
    # Grab end of aggregation period
    end_time = start + timedelta(seconds=agg_len - 1)
    # print(f"Start Time {start} \nEnd Time {end_time}")
    # Calculate the total length of the trial in seconds
    trial_length = (some_data.loc[some_data.index[-1], time_name] - start).total_seconds()
    # Runs the total length of trial divided by the length of time we aggregate over
    # essentialy creates a window of agg_len, and interval of agg_len
    for i in range(int(trial_length//agg_len)):
        # print(end_time)
        # Get agg_len seconds worth of accelerometer readings
        group_s = some_data.loc[(some_data[time_name] >= start) & (some_data[time_name] <= end_time), :]
        # print(group_s)
        # Get the mean X, Y, and Z of those readings
        agg_s = group_s.aggregate(lambda x : np.mean(x))
        # print(agg_s)
        mag_s = agg_s[4]

        # print(f"{mag_s}")
        # Subtract the mean magnitude from each accelerometer magnitude from each vector magnitude and then take abs
        dif_mean = group_s[device + ' Magnitude'].apply(lambda x : abs(x - mag_s))
        # Caclulate the sum of all the vector mags - mean mags. Then divide by the number of vectors
        # print(dif_mean.sum())
        # print(f"Shape of division {dif_mean.shape[0]}")
        # print(dif_mean.shape[0])
        mad = (dif_mean.sum()) / dif_mean.shape[0]
        if device == "Actigraph":
            if dif_mean.shape[0] > 150 :
                print(f"ERROR {dif_mean.shape[0]} ACTIGRAPH READINGS")
        # print(mad)

        # Add each Mad and the corresponding time to a list :
        all_mad[end_time] = mad
        # 
        start = end_time + timedelta(seconds=1)
        end_time = start + timedelta(seconds=agg_len - 1)
        

    mad_df = pd.Series(data=all_mad)
    return mad_df

In [4]:
# Define path and grab all aligned files
path = "C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\"
aligned_files = glob.glob(path + "Trial*//*_aligned.csv")
aligned_files

['C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 01\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 02\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 03\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 04\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 05\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 06\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 07\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 08\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 09\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 10\\garmin_aligned.csv']

In [5]:
# Creates a dictionary that contains the data of each trial, and its keys are each trial number.
print("Reading in data from :")
data = {}
for file in aligned_files :
    trial_num = file[62:64]
    print(f"Trial Number {trial_num}")
    temp = pd.read_csv(file)
    temp["Actigraph Time"] = pd.to_datetime(temp["Actigraph Time"])
    temp["Proxy Time"] = pd.to_datetime(temp["Proxy Time"])
    temp["Proxy X"] = temp["Proxy X"].transform(lambda x : x/1000)
    temp["Proxy Y"] = temp["Proxy Y"].transform(lambda x : x/1000)
    temp["Proxy Z"] = temp["Proxy Z"].transform(lambda x : x/1000)
    acti1, acti2, acti3, acti4, acti5 = temp.groupby("Actigraph ID")
    data[trial_num] = [acti1[1], acti2[1], acti3[1], acti4[1], acti5[1]]
print("Finished") 

Reading in data from :
Trial Number 01
Trial Number 02
Trial Number 03
Trial Number 04
Trial Number 05
Trial Number 06
Trial Number 07
Trial Number 08
Trial Number 09
Trial Number 10
Finished


In [6]:
# Calculate Magnitude and ENMO for each device in each trial.
for trial in data: # Iterates through dictionary containing each trial
    pair_num = 0
    trial_data = None
    for dev_pair in data[trial] :# Each trial has 5 device pairs.
        # Calculate magnitude and ENMO of actigraph
        acti_mag, acti_enmo = calc_enmo( dev_pair.loc[:, ["Actigraph X", "Actigraph Y", "Actigraph Z"]] )
        dev_pair.insert(10, "Actigraph Magnitude", acti_mag)
        dev_pair.insert(11, "Actigraph ENMO", acti_enmo)
        # Calculate magintude and ENMO of proxy
        proxy_mag, proxy_enmo = calc_enmo( dev_pair.loc[:, ["Proxy X", "Proxy Y", "Proxy Z"]] )
        dev_pair.insert(18, "Proxy Magnitude", proxy_mag)
        dev_pair.insert(19, "Proxy ENMO", proxy_enmo)
        if pair_num == 0 :
            trial_data = dev_pair
        elif pair_num < 4:
            trial_data = pd.concat([trial_data, dev_pair])
        else :
            trial_data = pd.concat([trial_data, dev_pair])
            trial_data.to_csv(path + "Trial " + trial + "//garmin_aligned_v2.csv", index=False)
        pair_num += 1
            


In [17]:
def aggregate_data(data, device):
    # Calculate the MAD for device
    device_mad = calc_mad(data.loc[:, [device + " Time", device + " X", device + " Y", device + " Z", 
                                               device + " Magnitude"]], device).rename(device + " MAD")
    # Intialize formula used to calculate RMS
    rms = lambda x : np.sqrt(np.mean(x**2))
    # Intialize functions to be used on data columns
    device_aggs = {"Order" : [np.min], device + " X": [rms], device + " Y": [rms], device + " Z": [rms], device + " Magnitude": [np.max],
                    device + " ENMO": [np.max]}
    # Group the actigraph data by second and perform aggregations
    if device == "Proxy" :
        device_rms = data.dropna().groupby(["Include", device + " Time"]).agg(device_aggs)
    else :
        device_rms = data.groupby(["Include", device + " Time"]).agg(device_aggs)
    # Rename aggregate actigraph columns
    device_rms= s
    device_rms.columns = ["Include", device + " Time", "Order", device + " RMS X", device + " RMS Y", device + " RMS Z", 
                        device + " MAX MAGNITUDE", device + " MAX ENMO"]
    # Selecte middle minutes from eacht trial only
    device_rms = device_rms.loc[device_rms["Include"] == 1].drop(columns=["Include"])
    # Create a data frame that holds the meta trial data
    acti_meta = data.loc[:,["Trial Number", "Round Number", "Speed", device + " ID", "Order"]]
    # Merge the MAD calculation with the aggregated x, y, z, magintude, and ENMO
    device_mad = device_rms.merge(device_mad, how="left", left_on=device + " Time", right_on=device_mad.index)
    # Merge aggregated data with trial meta data
    device_sec = acti_meta.merge(device_mad, how='inner', on="Order")
    return device_sec

In [32]:
# Calculat MAD for each trial, and also aggregate data to second level.
for trial in data :
    print(f"Processing Trial {trial}")
    # Used to keep track of which device pair is being looked at
    pair_num = 0
    # Resets trial file
    trial_final = None
    # Iterate through each device pair (5 actigraphs  and 2 proxys per trial)
    acti_num = 1
    for acti_pair in data[trial] :
        print(f"Actigraph Number : {acti_num}")
        
        # split the data by proxy ID
        [proxy_1, proxy_2] = acti_pair.groupby("Proxy ID")
        if acti_num == 1 :
            print(f"Proxy 1: {proxy_1[0]} \nProxy 2: {proxy_2[0]}")        
        acti_num += 1
        # print(acti_pair.iloc[:acti_pair.shape[0]//2,:])

        # Aggreagte data and calculate mad for actigraph
        acti_data = aggregate_data(acti_pair.iloc[:acti_pair.shape[0]//2], "Actigraph")
        
        # Aggreagte data and calculate mad for proxy 1:
        proxy1_data = aggregate_data(proxy_1[1], "Proxy")
        
        # Aggreagte data and calculate mad for proxy 2:
        proxy2_data = aggregate_data(proxy_2[1], "Proxy")
        
        # Get rid of duplicate columns
        proxy1_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
        proxy2_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
        # combine into one dataframe
        temp_prox1 = acti_data.merge(proxy1_data, how='inner', left_index=True, right_index=True)
        temp_prox2 = acti_data.merge(proxy2_data, how='inner', left_index=True, right_index=True)
        
        if pair_num == 0 :
            trial_final = pd.concat([temp_prox1, temp_prox2])
        else:
            temp = pd.concat([temp_prox1, temp_prox2])
            trial_final = pd.concat([trial_final, temp])
            if pair_num == 4 :
                trial_final.to_csv(path + "Trial " + trial + "//garmin_rms_v2.csv", index=False)
        pair_num += 1

    

Processing Trial 01
Actigraph Number : 1
Proxy 1: 2428.0 
Proxy 2: 2458.0
Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5
Processing Trial 02
Actigraph Number : 1
Proxy 1: 2428.0 
Proxy 2: 2458.0
Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5
Processing Trial 03
Actigraph Number : 1
Proxy 1: 2342.0 
Proxy 2: 2374.0
Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5
Processing Trial 04
Actigraph Number : 1
Proxy 1: 2342.0 
Proxy 2: 2374.0
Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5
Processing Trial 05
Actigraph Number : 1
Proxy 1: 2390.0 
Proxy 2: 2454.0
Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5
Processing Trial 06
Actigraph Number : 1
Proxy 1: 2390.0 
Proxy 2: 2454.0
Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5
Processing Trial 07
Actigraph Number : 1
Proxy 1: 23

In [67]:
aligned_files

['C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 01\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 02\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 03\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 04\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 05\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 06\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 07\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 08\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 09\\garmin_aligned.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Garmin\\Trial 10\\garmin_aligned.csv']

In [34]:
# Stack all data
aligned_files = glob.glob(path + "//Trial *//**aligned_V2.csv")
stacked_align = None
for file in aligned_files :
    if stacked_align is None :
        stacked_align = pd.read_csv(file)
    else :
        temp = pd.read_csv(file)
        stacked_align = pd.concat([stacked_align, temp])

stacked_align.loc[stacked_align["Proxy X"].isna(), ["Proxy Magnitude", "Proxy ENMO"]] = np.nan
stacked_align.to_csv(path+"//garmin_aligned_enmo_mad.csv")
stacked_align

KeyboardInterrupt: 

In [None]:
stacked_align.loc[stacked_align["Proxy X"].isna(), ["Proxy Magnitude", "Proxy ENMO"]] = np.nan
stacked_align.to_csv(path+"//garmin_aligned_enmo_mad.csv")
stacked_align

In [35]:
# Stack all data
aggregated_files = glob.glob(path + "//Trial *//**RMS_V2.csv")
stacked_agg = None
for file in aggregated_files :
    if stacked_agg is None :
        stacked_agg = pd.read_csv(file)
    else :
        temp = pd.read_csv(file)
        stacked_agg = pd.concat([stacked_agg, temp])
        
stacked_agg.to_csv(path+"//garmin_aggregated_enmo_mad.csv")
stacked_agg

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Order,Actigraph Time,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX MAGNITUDE,Actigraph MAX ENMO,Actigraph MAD,Proxy ID,Proxy Time,Proxy RMS X,Proxy RMS Y,Proxy RMS Z,Proxy MAX MAGNITUDE,Proxy MAX ENMO,Proxy MAD
0,1,1,3.2 Hz,36,152101,2022-03-09 11:29:45,0.312557,0.360333,1.005505,1.260623,0.260623,,2428.0,2022-03-09 11:29:45,0.352394,0.316085,0.973872,1.201872,0.201872,
1,1,1,3.2 Hz,36,152131,2022-03-09 11:29:46,0.323552,0.355961,1.000845,1.247093,0.247093,,2428.0,2022-03-09 11:29:46,0.349128,0.330421,0.966676,1.219416,0.219416,
2,1,1,3.2 Hz,36,152161,2022-03-09 11:29:47,0.332514,0.357078,1.003839,1.260624,0.260624,,2428.0,2022-03-09 11:29:47,0.348489,0.324075,0.954952,1.151667,0.151667,
3,1,1,3.2 Hz,36,152191,2022-03-09 11:29:48,0.316704,0.339678,1.006520,1.233777,0.233777,,2428.0,2022-03-09 11:29:48,0.351758,0.320894,0.965795,1.220865,0.220865,
4,1,1,3.2 Hz,36,152221,2022-03-09 11:29:49,0.332499,0.363242,1.004429,1.277456,0.277456,3.526356e-02,2428.0,2022-03-09 11:29:49,0.340952,0.328592,0.961616,1.179112,0.179112,0.039245
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4195,10,5,0.6 Hz,210,124951,2022-03-18 14:14:10,0.023000,0.023000,0.887000,0.887596,0.000000,,2425.0,2022-03-18 14:14:10,0.037163,0.018881,0.955703,0.965534,0.000000,
4196,10,5,0.6 Hz,210,124981,2022-03-18 14:14:11,0.023000,0.023000,0.887000,0.887596,0.000000,,2425.0,2022-03-18 14:14:11,0.028610,0.012900,0.956986,0.980326,0.000000,
4197,10,5,0.6 Hz,210,125011,2022-03-18 14:14:12,0.023000,0.023000,0.887000,0.887596,0.000000,,2425.0,2022-03-18 14:14:12,0.037685,0.021806,0.953794,0.964879,0.000000,
4198,10,5,0.6 Hz,210,125041,2022-03-18 14:14:13,0.023000,0.023000,0.887000,0.887596,0.000000,,2425.0,2022-03-18 14:14:13,0.029481,0.014511,0.956192,0.968826,0.000000,
