In [1]:
import glob
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import math

In [2]:
def calc_enmo(some_data):
    # Calculate the magnitue by first squaring all of the x, y, and z value, then summing them, and taking the square root.
    mag = ((some_data.applymap(lambda x : x**2)).sum(axis=1)).transform(lambda x : np.sqrt(x))
    # To calculate ENMO we subtract 1 (Gravity) from the vector magnitudes
    enmo = mag.transform(lambda x : x - 1)
    # Finally if we have any ENMO values less than 0 we round them up.
    enmo.loc[enmo.loc[:] < 0] = 0
    
    return mag, enmo

In [11]:
def calc_mad(some_data, device):
    some_data = some_data.dropna()
    all_mad = {}

    time_name = device + " Time"
    some_data[time_name] = some_data[time_name].apply(lambda x: x.replace(microsecond=0))
        
    # Grab the first timestamp from data
    # print(some_data.shape)
    start = some_data.loc[some_data.index[0], time_name]
    # Specify the amount of time to aggregate over
    agg_len = 5
    # Grab end of aggregation period
    end_time = start + timedelta(seconds=agg_len - 1)
    # Calculate the total length of the trial in seconds
    trial_length = (some_data.loc[some_data.index[-1], time_name] - start).total_seconds()
    # Runs the total length of trial divided by the length of time we aggregate over
    # essentialy creates a window of agg_len, and interval of agg_len
    for i in range(int(trial_length//agg_len)):
        # print(end_time)
        # Get agg_len seconds worth of accelerometer readings
        group_s = some_data.loc[(some_data[time_name] >= start) & (some_data[time_name] <= end_time), :]
        # print(group_s)
        # Get the mean X, Y, and Z of those readings
        agg_s = group_s.aggregate(lambda x : np.mean(x))
        # print(mean_s)
        # Calculate the mean accelerometer magnitude
        mag_s = agg_s[4]
        # print(f"{mag_s}")
        # Subtract the mean magnitude from each accelerometer magnitude from each vector magnitude and then take abs
        dif_mean = group_s[device + ' Magnitude'].apply(lambda x : abs(x - mag_s))
        # Caclulate the sum of all the vector mags - mean mags. Then divide by the number of vectors
        # print(dif_mean.sum())
        # print(dif_mean.shape[0])
        if device == "Actigraph":
            if dif_mean.shape[0] != 500 :
                print(f"Error {dif_mean.shape[0]} readings \nStart Time {start} \n End Time {end_time}")
        mad = (dif_mean.sum()) / dif_mean.shape[0]
        # print(mad)

        # Add each Mad and the corresponding time to a list :
        all_mad[end_time] = mad
        # 
        start = end_time + timedelta(seconds=1)
        end_time = start + timedelta(seconds=agg_len - 1)

    mad_df = pd.Series(data=all_mad)
    return mad_df

In [23]:
# Define path of file
path = "C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\"
trial = input("Which Trial is being processed? ")
input_path = path + "Trial " + trial + "\\apple_aligned.csv"

Which Trial is being processed? 09e


In [24]:
# Creates a dictionary that contains the data of each trial, and its keys are each trial number.
temp = pd.read_csv(input_path)
temp["Actigraph Time"] = pd.to_datetime(temp["Actigraph Time"])
temp["Proxy Time"] = pd.to_datetime(temp["Proxy Time"])
acti1, acti2, acti3, acti4, acti5 = temp.groupby("Actigraph ID")
# acti1, acti2, acti3, acti4 = temp.groupby("Actigraph ID")
data = [acti1[1], acti2[1], acti3[1], acti4[1], acti5[1]]
# data = [acti1[1], acti2[1], acti3[1], acti4[1]]
print("Finished") 

Finished


In [25]:
# Calculate Magnitude and ENMO for each device in each trial.
pair_num = 0
for dev_pair in data :# Each trial has 5 device pairs.
    # Calculate magnitude and ENMO of actigraph
    # print(pair_num)
    acti_mag, acti_enmo = calc_enmo( dev_pair.loc[:, ["Actigraph X", "Actigraph Y", "Actigraph Z"]] )
    dev_pair.insert(10, "Actigraph Magnitude", acti_mag)
    dev_pair.insert(11, "Actigraph ENMO", acti_enmo)
    # Calculate magintude and ENMO of proxy
    proxy_mag, proxy_enmo = calc_enmo( dev_pair.loc[:, ["Proxy X", "Proxy Y", "Proxy Z"]] )
    dev_pair.insert(18, "Proxy Magnitude", proxy_mag)
    dev_pair.insert(19, "Proxy ENMO", proxy_enmo)
    dev_pair.loc[(dev_pair["Proxy X"].isna()), ["Proxy Magnitude", "Proxy ENMO"]] = np.nan
    if pair_num == 0 :
        trial_data = dev_pair
    elif pair_num < 3:
        trial_data = pd.concat([trial_data, dev_pair])
    else :
        trial_data = pd.concat([trial_data, dev_pair])
        trial_data.to_csv(path + "Trial " + trial + "//apple_aligned_v2.csv", index=False)
    pair_num += 1

In [26]:
trial_data

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Actigraph Time,Order,Include,Actigraph X,Actigraph Y,Actigraph Z,Actigraph Magnitude,Actigraph ENMO,Proxy ID,Proxy Time,Reading #,Proxy X,Proxy Y,Proxy Z,Proxy Magnitude,Proxy ENMO
252000,9e,,3.2 Hz,36,2022-05-31 14:48:10,252001,0,-0.207,-0.285,-1.023,1.081944,0.081944,,NaT,,,,,,
252001,9e,,3.2 Hz,36,2022-05-31 14:48:10,252002,0,-0.238,-0.242,-1.023,1.077839,0.077839,D09YVJVXDD,2022-05-31 14:48:10.004,,0.008057,-0.041016,-0.987305,0.988189,0.000000
252002,9e,,3.2 Hz,36,2022-05-31 14:48:10,252003,0,-0.246,-0.211,-0.996,1.047403,0.047403,,NaT,,,,,,
252003,9e,,3.2 Hz,36,2022-05-31 14:48:10,252004,0,-0.250,-0.164,-0.973,1.017902,0.017902,D09YVJVXDD,2022-05-31 14:48:10.024,,0.012695,-0.015381,-1.006836,1.007033,0.007033
252004,9e,,3.2 Hz,36,2022-05-31 14:48:10,252005,0,-0.277,-0.098,-0.965,1.008741,0.008741,,NaT,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419995,9e,,0.6 Hz,113,2022-05-31 15:02:09,419996,0,0.023,-0.008,-1.039,1.039285,0.039285,D09YVJVXDD,2022-05-31 15:02:09.950,,0.044189,-0.058105,-0.996094,0.998765,0.000000
419996,9e,,0.6 Hz,113,2022-05-31 15:02:09,419997,0,0.023,-0.008,-1.039,1.039285,0.039285,,NaT,,,,,,
419997,9e,,0.6 Hz,113,2022-05-31 15:02:09,419998,0,0.023,-0.008,-1.039,1.039285,0.039285,D09YVJVXDD,2022-05-31 15:02:09.970,,0.024414,-0.005371,-1.007080,1.007390,0.007390
419998,9e,,0.6 Hz,113,2022-05-31 15:02:09,419999,0,0.023,-0.008,-1.039,1.039285,0.039285,,NaT,,,,,,


In [27]:
def aggregate_data(data, device):
    # Calculate the MAD for device
    device_mad = calc_mad(data.loc[:, [device + " Time", device + " X", device + " Y", device + " Z", 
                                               device + " Magnitude"]], device).rename(device + " MAD")
    # Intialize formula used to calculate RMS
    rms = lambda x : np.sqrt(np.mean(x**2))
    # Intialize functions to be used on data columns
    device_aggs = {"Order" : [np.min], device + " X": [rms], device + " Y": [rms], device + " Z": [rms], device + " Magnitude": [np.max],
                    device + " ENMO": [np.max]}
    # Group the actigraph data by second and perform aggregations
    if device == "Proxy":
        data[device + " Time"] = data[device + " Time"].apply(lambda x : x.replace(microsecond=0))
        device_rms = data.drop(columns=["Reading #"]).dropna().groupby(["Include", device + " Time"]).agg(device_aggs)
        # print(device_rms)
    else:
        device_rms = data.groupby(["Include", device + " Time"]).agg(device_aggs)
        # print(device_rms)
                            
    # print(device_rms)
    # Rename aggregate  columns
    device_rms= device_rms.reset_index()
    device_rms.columns = ["Include", device + " Time", "Order", device + " RMS X", device + " RMS Y", device + " RMS Z", 
                        device + " MAX MAGNITUDE", device + " MAX ENMO"]
    # Selecte middle minutes from eacht trial only
    device_rms = device_rms.loc[device_rms["Include"] == 1].drop(columns=["Include"])
    # print(device_rms)
    # Create a data frame that holds the meta trial data
    acti_meta = data.loc[:,["Trial Number", "Round Number", "Speed", device + " ID", "Order"]]
    #if device == "Proxy":
        #print(device_rms)
        #print(device_mad)
    # Merge the MAD calculation with the aggregated x, y, z, magintude, and ENMO
    device_mad = device_rms.merge(device_mad, how="left", left_on=device + " Time", right_on=device_mad.index)
    # Merge aggregated data with trial meta data
    device_sec = acti_meta.merge(device_mad, how='inner', on="Order")
    # print(device_sec.loc[55:65,:])
    return device_sec

In [28]:
# Calculate MAD for each trial, and also aggregate data to second level.
# Used to keep track of which device pair is being looked at
pair_num = 0
# Resets trial file
trial_final = None
# Iterate through each device pair (5 actigraphs  and 2 proxys per trial)
acti_num = 1
for acti_pair in data :
    print(f"Actigraph Number : {acti_num}")
    acti_pair["Round Number"] = 5 # Used for extra rounds
    # split the data by proxy ID
    
    # [proxy_1, proxy_2] = acti_pair.groupby("Proxy ID")
    # if acti_num == 1 :
        # print(f"Proxy 1: {proxy_1[0]} \nProxy 2: {proxy_2[0]}")
        # print(f"Proxy {acti_pair.iloc[1,6]}")
    acti_num += 1
    # print(proxy_1[1])

    # Aggreagte data and calculate mad for actigraph
    # acti_data = aggregate_data(acti_pair.iloc[:acti_pair.shape[0]//2, :], "Actigraph")
    acti_data = aggregate_data(acti_pair, "Actigraph")
    # print(acti_data)

    # Aggreagte data and calculate mad for proxy 1:
    # proxy1_data = aggregate_data(proxy_1[1], "Proxy")
    proxy1_data = aggregate_data(acti_pair, "Proxy")
    # print(proxy1_data)

    # Aggreagte data and calculate mad for proxy 2:
    # proxy2_data = aggregate_data(proxy_2[1], "Proxy")

    # Get rid of duplicate columns
    proxy1_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
    # print(proxy1_data)
    # proxy2_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
    # combine into one dataframe
    # temp_prox1 = acti_data.merge(proxy1_data, how='inner', left_on="Actigraph Time", right_on="Proxy Time")

    # temp_prox2 = acti_data.merge(proxy2_data, how='inner', left_on="Actigraph Time", right_on="Proxy Time")


    if pair_num == 0 :
        # trial_final = pd.concat([temp_prox1, temp_prox2])
        trial_final = acti_data.merge(proxy1_data, how='inner', left_on="Actigraph Time", right_on="Proxy Time")
    else:
        # temp = pd.concat([temp_prox1, temp_prox2])
        temp = acti_data.merge(proxy1_data, how='inner', left_on="Actigraph Time", right_on="Proxy Time")
        trial_final = pd.concat([trial_final, temp])
        if pair_num == 3 :
            trial_final.to_csv(path + "Trial " + trial + "//apple_rms_v2.csv", index=False)
    pair_num += 1
trial_final

Actigraph Number : 1


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  some_data[time_name] = some_data[time_name].apply(lambda x: x.replace(microsecond=0))


Actigraph Number : 2
Actigraph Number : 3
Actigraph Number : 4
Actigraph Number : 5


Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Order,Actigraph Time,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX MAGNITUDE,Actigraph MAX ENMO,Actigraph MAD,Proxy ID,Proxy Time,Proxy RMS X,Proxy RMS Y,Proxy RMS Z,Proxy MAX MAGNITUDE,Proxy MAX ENMO,Proxy MAD
0,9e,5,3.2 Hz,36,255001,2022-05-31 14:48:40,0.322432,0.352531,0.991440,1.274027,0.274027,,D09YVJVXDD,2022-05-31 14:48:40,0.365575,0.340848,0.998733,1.421136,0.421136,
1,9e,5,3.2 Hz,36,255101,2022-05-31 14:48:41,0.322328,0.352270,0.993433,1.277962,0.277962,,D09YVJVXDD,2022-05-31 14:48:41,0.361186,0.317475,0.995373,1.325885,0.325885,
2,9e,5,3.2 Hz,36,255201,2022-05-31 14:48:42,0.332335,0.363132,0.991934,1.271222,0.271222,,D09YVJVXDD,2022-05-31 14:48:42,0.384043,0.327071,1.004935,1.416948,0.416948,
3,9e,5,3.2 Hz,36,255301,2022-05-31 14:48:43,0.314547,0.357089,0.993327,1.264142,0.264142,,D09YVJVXDD,2022-05-31 14:48:43,0.366822,0.337243,0.994300,1.426143,0.426143,
4,9e,5,3.2 Hz,36,255401,2022-05-31 14:48:44,0.315859,0.360343,0.992132,1.284969,0.284969,3.315026e-02,D09YVJVXDD,2022-05-31 14:48:44,0.366022,0.328313,1.002507,1.370485,0.370485,0.056777
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,9e,5,0.6 Hz,113,416501,2022-05-31 15:01:35,0.023000,0.008000,1.039000,1.039285,0.039285,,D09YVJVXDD,2022-05-31 15:01:35,0.018752,0.015932,0.997769,1.005138,0.005138,
416,9e,5,0.6 Hz,113,416601,2022-05-31 15:01:36,0.023000,0.008000,1.039000,1.039285,0.039285,,D09YVJVXDD,2022-05-31 15:01:36,0.019306,0.027515,0.997479,1.007770,0.007770,
417,9e,5,0.6 Hz,113,416701,2022-05-31 15:01:37,0.023000,0.008000,1.039000,1.039285,0.039285,,D09YVJVXDD,2022-05-31 15:01:37,0.019891,0.016676,0.997690,1.006218,0.006218,
418,9e,5,0.6 Hz,113,416801,2022-05-31 15:01:38,0.023000,0.008000,1.039000,1.039285,0.039285,,D09YVJVXDD,2022-05-31 15:01:38,0.018854,0.027707,0.997585,1.008820,0.008820,


In [15]:
trial_final.iloc[350:370]

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Order,Actigraph Time,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX MAGNITUDE,Actigraph MAX ENMO,Actigraph MAD,Proxy ID,Proxy Time,Proxy RMS X,Proxy RMS Y,Proxy RMS Z,Proxy MAX MAGNITUDE,Proxy MAX ENMO,Proxy MAD
350,7,4,1.0 Hz,20,572001,2022-04-01 14:22:50,0.030672,0.023384,1.001811,1.013079,0.013079,,GKGGN79P34,2022-04-01 14:22:50,0.02172,0.026013,0.993514,1.007797,0.007797,
351,7,4,1.0 Hz,20,572101,2022-04-01 14:22:51,0.030174,0.02194,1.001328,1.009852,0.009852,,GKGGN79P34,2022-04-01 14:22:51,0.024533,0.025323,0.996096,1.013926,0.013926,
352,7,4,1.0 Hz,20,572201,2022-04-01 14:22:52,0.028262,0.022459,1.000129,1.012206,0.012206,,GKGGN79P34,2022-04-01 14:22:52,0.023623,0.025855,0.995348,1.013573,0.013573,
353,7,4,1.0 Hz,20,572301,2022-04-01 14:22:53,0.024556,0.023713,1.000012,1.009293,0.009293,,GKGGN79P34,2022-04-01 14:22:53,0.020894,0.025727,0.994052,1.010052,0.010052,
354,7,4,1.0 Hz,20,572401,2022-04-01 14:22:54,0.025045,0.02405,1.000212,1.009508,0.009508,0.003818033,GKGGN79P34,2022-04-01 14:22:54,0.022876,0.024426,0.994337,1.015954,0.015954,0.0053
355,7,4,1.0 Hz,20,572501,2022-04-01 14:22:55,0.029233,0.023128,0.99989,1.012601,0.012601,,GKGGN79P34,2022-04-01 14:22:55,0.023868,0.021152,0.994515,1.009042,0.009042,
356,7,4,1.0 Hz,20,572601,2022-04-01 14:22:56,0.029979,0.022895,0.99981,1.009456,0.009456,,GKGGN79P34,2022-04-01 14:22:56,0.023037,0.023416,0.995152,1.008233,0.008233,
357,7,4,1.0 Hz,20,572701,2022-04-01 14:22:57,0.030741,0.024288,1.001651,1.013789,0.013789,,GKGGN79P34,2022-04-01 14:22:57,0.022931,0.026165,0.996216,1.013344,0.013344,
358,7,4,1.0 Hz,20,572801,2022-04-01 14:22:58,0.03111,0.023317,0.999889,1.008952,0.008952,,GKGGN79P34,2022-04-01 14:22:58,0.022663,0.026282,0.995328,1.019113,0.019113,
359,7,4,1.0 Hz,20,572901,2022-04-01 14:22:59,0.030094,0.021894,1.001051,1.009571,0.009571,0.003677994,GKGGN79P34,2022-04-01 14:22:59,0.024331,0.025525,0.995789,1.009013,0.009013,0.005618


In [183]:
# Combine all aligned
aligned_files = glob.glob(path + "Trial *//*aligned_V2.csv")
aligned_stack = None
for file in aligned_files:
    if aligned_stack is None:
        aligned_stack = pd.read_csv(file)
    else:
        temp = pd.read_csv(file)
        aligned_stack = pd.concat([aligned_stack, temp])
aligned_stack.to_csv(path + "//apple_aligned_mad_enmo.csv", index=False)

In [190]:
aggregated_files = glob.glob(path + "Trial *//*rms_v2.csv")
aggregated_stack = None
for file in aggregated_files:
    if aggregated_stack is None:
        aggregated_stack = pd.read_csv(file)
    else:
        temp = pd.read_csv(file)
        aggregated_stack = pd.concat([aggregated_stack, temp])
aggregated_stack.to_csv(path + "//apple_aggregated_mad_enmo.csv")

In [189]:
aggregated_files = glob.glob(path + "Trial *//*rms_v2.csv")
aggregated_files

['C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 01\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 02\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 03\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 04\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 05\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 06\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 06e\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 07\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 08\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 09\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial