In [1]:
import glob
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import math

In [2]:
def calc_enmo(some_data):
    # Calculate the magnitue by first squaring all of the x, y, and z value, then summing them, and taking the square root.
    mag = ((some_data.applymap(lambda x : x**2)).sum(axis=1)).transform(lambda x : np.sqrt(x))
    # To calculate ENMO we subtract 1 (Gravity) from the vector magnitudes
    enmo = mag.transform(lambda x : x - 1)
    # Finally if we have any ENMO values less than 0 we round them up.
    enmo.loc[enmo.loc[:] < 0] = 0
    
    return mag, enmo

In [3]:
def calc_mad(some_data, device):
    some_data = some_data.dropna()
    all_mad = {}

    time_name = device + " Time"
    some_data[time_name] = some_data[time_name].apply(lambda x: x.replace(microsecond=0))
        
    # Grab the first timestamp from data
    # print(some_data.shape)
    start = some_data.loc[some_data.index[0], time_name]
    # Specify the amount of time to aggregate over
    agg_len = 5
    # Grab end of aggregation period
    end_time = start + timedelta(seconds=agg_len - 1)
    # Calculate the total length of the trial in seconds
    trial_length = (some_data.loc[some_data.index[-1], time_name] - start).total_seconds()
    # Runs the total length of trial divided by the length of time we aggregate over
    # essentialy creates a window of agg_len, and interval of agg_len
    for i in range(int(trial_length//agg_len)):
        # print(end_time)
        # Get agg_len seconds worth of accelerometer readings
        group_s = some_data.loc[(some_data[time_name] >= start) & (some_data[time_name] <= end_time), :]
        # print(group_s)
        # Get the mean X, Y, and Z of those readings
        agg_s = group_s.aggregate(lambda x : np.mean(x))
        # print(mean_s)
        # Calculate the mean accelerometer magnitude
        mag_s = agg_s[4]
        # print(f"{mag_s}")
        # Subtract the mean magnitude from each accelerometer magnitude from each vector magnitude and then take abs
        dif_mean = group_s[device + ' Magnitude'].apply(lambda x : abs(x - mag_s))
        # Caclulate the sum of all the vector mags - mean mags. Then divide by the number of vectors
        # print(dif_mean.sum())
        if device == "Actigraph":
            if dif_mean.shape[0] != 500 :
                print(f"Error {dif_mean.shape[0]} readings")
        mad = (dif_mean.sum()) / dif_mean.shape[0]
        # print(mad)

        # Add each Mad and the corresponding time to a list :
        all_mad[end_time] = mad
        # 
        start = end_time + timedelta(seconds=1)
        end_time = start + timedelta(seconds=agg_len - 1)

    mad_df = pd.Series(data=all_mad)
    return mad_df

In [4]:
# Define path of file
path = "C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\"
trial = input("Which Trial is being processed? ")
input_path = path + "Trial " + trial + "\\apple_aligned.csv"

Which Trial is being processed? 11e


In [5]:
# Creates a dictionary that contains the data of each trial, and its keys are each trial number.
temp = pd.read_csv(input_path)
temp["Actigraph Time"] = pd.to_datetime(temp["Actigraph Time"])
temp["Proxy Time"] = pd.to_datetime(temp["Proxy Time"])
data = temp
print("Finished") 

Finished


In [6]:
#trial 11
# Calculate magnitude and ENMO of actigraph
# print(pair_num)
acti_mag, acti_enmo = calc_enmo( data.loc[:, ["Actigraph X", "Actigraph Y", "Actigraph Z"]] )
data.insert(10, "Actigraph Magnitude", acti_mag)
data.insert(11, "Actigraph ENMO", acti_enmo)
# Calculate magintude and ENMO of proxy
proxy_mag, proxy_enmo = calc_enmo( data.loc[:, ["Proxy X", "Proxy Y", "Proxy Z"]] )
data.insert(18, "Proxy Magnitude", proxy_mag)
data.insert(19, "Proxy ENMO", proxy_enmo)
data.loc[(data["Proxy X"].isna()), ["Proxy Magnitude", "Proxy ENMO"]] = np.nan

trial_data = data
trial_data.to_csv(path + "Trial " + trial + "//apple_aligned_v2.csv", index=False)

In [7]:
trial_data

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Actigraph Time,Order,Include,Actigraph X,Actigraph Y,Actigraph Z,Actigraph Magnitude,Actigraph ENMO,Proxy ID,Proxy Time,Reading #,Proxy X,Proxy Y,Proxy Z,Proxy Magnitude,Proxy ENMO
0,11e,6,3.2 Hz,210,2022-05-31 15:48:15,1,0,-1.207,-0.703,-1.008,1.722534,0.722534,,NaT,,,,,,
1,11e,6,3.2 Hz,210,2022-05-31 15:48:15,2,0,-0.801,-0.609,-0.906,1.354001,0.354001,NQ7MGWQP54,2022-05-31 15:48:15.010,,0.004395,-0.062744,-0.992432,0.994423,0.000000
2,11e,6,3.2 Hz,210,2022-05-31 15:48:15,3,0,-0.680,-0.504,-0.801,1.165340,0.165340,,NaT,,,,,,
3,11e,6,3.2 Hz,210,2022-05-31 15:48:15,4,0,-0.773,-0.297,-1.043,1.331761,0.331761,NQ7MGWQP54,2022-05-31 15:48:15.030,,0.006348,-0.060303,-1.000000,1.001837,0.001837
4,11e,6,3.2 Hz,210,2022-05-31 15:48:15,5,0,-0.684,-0.141,-0.945,1.175058,0.175058,,NaT,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
335995,11e,6,0.6 Hz,210,2022-05-31 16:02:14,335996,0,-0.059,-0.020,-0.879,0.881205,0.000000,RJ757R62DR,2022-05-31 16:02:14.943,,0.042969,-0.010498,-0.996826,0.997807,0.000000
335996,11e,6,0.6 Hz,210,2022-05-31 16:02:14,335997,0,-0.059,-0.020,-0.879,0.881205,0.000000,,NaT,,,,,,
335997,11e,6,0.6 Hz,210,2022-05-31 16:02:14,335998,0,-0.059,-0.020,-0.879,0.881205,0.000000,RJ757R62DR,2022-05-31 16:02:14.963,,0.060303,0.003418,-1.009277,1.011083,0.011083
335998,11e,6,0.6 Hz,210,2022-05-31 16:02:14,335999,0,-0.059,-0.020,-0.879,0.881205,0.000000,,NaT,,,,,,


In [9]:
def aggregate_data(data, device):
    # Calculate the MAD for device
    device_mad = calc_mad(data.loc[:, [device + " Time", device + " X", device + " Y", device + " Z", 
                                               device + " Magnitude"]], device).rename(device + " MAD")
    # Intialize formula used to calculate RMS
    rms = lambda x : np.sqrt(np.mean(x**2))
    # Intialize functions to be used on data columns
    device_aggs = {"Order" : [np.min], device + " X": [rms], device + " Y": [rms], device + " Z": [rms], device + " Magnitude": [np.max],
                    device + " ENMO": [np.max]}
    # Group the actigraph data by second and perform aggregations
    if device == "Proxy":
        data[device + " Time"] = data[device + " Time"].apply(lambda x : x.replace(microsecond=0))
        device_rms = data.drop(columns=["Reading #"]).dropna().groupby(["Include", device + " Time"]).agg(device_aggs)
        # print(device_rms)
    else:
        device_rms = data.groupby(["Include", device + " Time"]).agg(device_aggs)
        # print(device_rms)
                            
    # print(device_rms)
    # Rename aggregate  columns
    device_rms= device_rms.reset_index()
    device_rms.columns = ["Include", device + " Time", "Order", device + " RMS X", device + " RMS Y", device + " RMS Z", 
                        device + " MAX MAGNITUDE", device + " MAX ENMO"]
    # Selecte middle minutes from eacht trial only
    device_rms = device_rms.loc[device_rms["Include"] == 1].drop(columns=["Include"])
    # print(device_rms)
    # Create a data frame that holds the meta trial data
    acti_meta = data.loc[:,["Trial Number", "Round Number", "Speed", device + " ID", "Order"]]
    #if device == "Proxy":
        #print(device_rms)
        #print(device_mad)
    # Merge the MAD calculation with the aggregated x, y, z, magintude, and ENMO
    device_mad = device_rms.merge(device_mad, how="left", left_on=device + " Time", right_on=device_mad.index)
    # Merge aggregated data with trial meta data
    device_sec = acti_meta.merge(device_mad, how='inner', on="Order")
    # print(device_sec.loc[55:65,:])
    return device_sec

In [21]:
# For Trial 11
trial_data["Round Number"] = 6 # Used for extra rounds
# split the data by proxy ID

[proxy_1, proxy_2, proxy_3, proxy_4] = trial_data.groupby("Proxy ID")


# print(proxy_1[1])

# Aggreagte data and calculate mad for actigraph
acti_data = aggregate_data(trial_data.iloc[:trial_data.shape[0]//4,:], "Actigraph")
# print(acti_data)

# Aggreagte data and calculate mad for proxy 1:
# proxy1_data = aggregate_data(proxy_1[1], "Proxy")
proxy1_data = aggregate_data(proxy_1[1], "Proxy")
# print(proxy1_data)

# Aggreagte data and calculate mad for proxy 2:
proxy2_data = aggregate_data(proxy_2[1], "Proxy")

# Aggreagte data and calculate mad for proxy 3:
proxy3_data = aggregate_data(proxy_3[1], "Proxy")

# Aggreagte data and calculate mad for proxy 4:
proxy4_data = aggregate_data(proxy_4[1], "Proxy")
# print(proxy4_data)
# Get rid of duplicate columns
proxy1_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
proxy2_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
proxy3_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
proxy4_data.drop(columns=["Trial Number", 'Round Number', 'Speed', 'Order'], inplace=True)
# combine into one dataframe
temp_prox1 = acti_data.merge(proxy1_data, how='left', left_on="Actigraph Time", right_on="Proxy Time")

temp_prox2 = acti_data.merge(proxy2_data, how='left', left_on="Actigraph Time", right_on="Proxy Time")

temp_prox3 = acti_data.merge(proxy3_data, how='left', left_on="Actigraph Time", right_on="Proxy Time")

temp_prox4 = acti_data.merge(proxy4_data, how='left', left_on="Actigraph Time", right_on="Proxy Time")



temp_1 = pd.concat([temp_prox1, temp_prox2])
temp_2 = pd.concat([temp_1, temp_prox3])
trial_final = pd.concat([temp_2, temp_prox4])


trial_final.to_csv(path + "Trial " + trial + "//apple_rms_v2.csv", index=False)
trial_final

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Order,Actigraph Time,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX MAGNITUDE,Actigraph MAX ENMO,Actigraph MAD,Proxy ID,Proxy Time,Proxy RMS X,Proxy RMS Y,Proxy RMS Z,Proxy MAX MAGNITUDE,Proxy MAX ENMO,Proxy MAD
0,11e,6,3.2 Hz,210,3001,2022-05-31 15:48:45,0.334916,0.362375,0.878789,1.185160,0.185160,,GKGGN79P34,2022-05-31 15:48:45,0.374231,0.337439,0.996519,1.363113,0.363113,
1,11e,6,3.2 Hz,210,3101,2022-05-31 15:48:46,0.345786,0.365065,0.875186,1.194081,0.194081,,GKGGN79P34,2022-05-31 15:48:46,0.391727,0.349550,0.994668,1.393028,0.393028,
2,11e,6,3.2 Hz,210,3201,2022-05-31 15:48:47,0.335600,0.362712,0.878773,1.213064,0.213064,,GKGGN79P34,2022-05-31 15:48:47,0.385829,0.335826,0.994069,1.392324,0.392324,
3,11e,6,3.2 Hz,210,3301,2022-05-31 15:48:48,0.344763,0.360388,0.875721,1.201480,0.201480,,GKGGN79P34,2022-05-31 15:48:48,0.388796,0.343966,0.995874,1.402385,0.402385,
4,11e,6,3.2 Hz,210,3401,2022-05-31 15:48:49,0.334890,0.370501,0.878255,1.207071,0.207071,4.111501e-02,GKGGN79P34,2022-05-31 15:48:49,0.388774,0.337538,0.994606,1.400423,0.400423,0.052066
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,11e,6,0.6 Hz,210,80501,2022-05-31 16:01:40,0.059000,0.020000,0.879000,0.881205,0.000000,,V3KQT9D9NR,2022-05-31 16:01:40,0.007594,0.056864,0.994035,1.001985,0.001985,
416,11e,6,0.6 Hz,210,80601,2022-05-31 16:01:41,0.059000,0.020000,0.879000,0.881205,0.000000,,V3KQT9D9NR,2022-05-31 16:01:41,0.015042,0.057455,0.994068,1.003475,0.003475,
417,11e,6,0.6 Hz,210,80701,2022-05-31 16:01:42,0.059000,0.020000,0.879000,0.881205,0.000000,,V3KQT9D9NR,2022-05-31 16:01:42,0.009257,0.059582,0.993814,1.002308,0.002308,
418,11e,6,0.6 Hz,210,80801,2022-05-31 16:01:43,0.059000,0.020000,0.879000,0.881205,0.000000,,V3KQT9D9NR,2022-05-31 16:01:43,0.014582,0.054563,0.994129,1.002781,0.002781,


In [13]:
trial_data.shape

(336000, 20)

In [22]:
# Combine all aligned
aligned_files = glob.glob(path + "Trial *//*aligned_V2.csv")
aligned_stack = None
for file in aligned_files:
    if aligned_stack is None:
        aligned_stack = pd.read_csv(file)
    else:
        temp = pd.read_csv(file)
        aligned_stack = pd.concat([aligned_stack, temp])
aligned_stack.to_csv(path + "//apple_aligned_mad_enmo.csv", index=False)

  exec(code_obj, self.user_global_ns, self.user_ns)


In [23]:
aggregated_files = glob.glob(path + "Trial *//*rms_v2.csv")
aggregated_stack = None
for file in aggregated_files:
    if aggregated_stack is None:
        aggregated_stack = pd.read_csv(file)
    else:
        temp = pd.read_csv(file)
        aggregated_stack = pd.concat([aggregated_stack, temp])
aggregated_stack.to_csv(path + "//apple_aggregated_mad_enmo.csv")

In [189]:
aggregated_files = glob.glob(path + "Trial *//*rms_v2.csv")
aggregated_files

['C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 01\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 02\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 03\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 04\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 05\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 06\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 06e\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 07\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 08\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial 09\\apple_rms_v2.csv',
 'C:\\Users\\Nick\\Watch_Extraction\\Shaker_Table\\Data\\Apple\\Trial