In [69]:
import glob
import pandas as pd
import numpy as np
from datetime import timedelta

In [5]:
path = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Apple\\Trial 01\\"
# path = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Garmin\\Trial 01\\"
trials = glob.glob(path + "*aligned_v2.csv")
# agg_trials = glob.glob(path + "*rms_.csv")
rms = lambda x : np.sqrt(np.mean(x ** 2))

In [8]:
trials

['V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Apple\\Trial 01\\apple_aligned_v2.csv']

In [2]:
def calc_enmo(some_data):
    # Calculate the magnitue by first squaring all of the x, y, and z value, then summing them, and taking the square root.
    mag = ((some_data.applymap(lambda x : x**2)).sum(axis=1)).transform(lambda x : np.sqrt(x))
    # To calculate ENMO we subtract 1 (Gravity) from the vector magnitudes
    enmo = mag.transform(lambda x : x - 1)
    # Finally if we have any ENMO values less than 0 we round them up.
    enmo.loc[enmo.loc[:] < 0] = 0
    
    return mag, enmo

In [109]:
def calc_mad(some_data, device):
    some_data = some_data.dropna()
    all_mad = {}
    time_name = device + " Time"
        
    # Grab the first timestamp from data
    # print(some_data.shape)
    start = some_data.loc[some_data.index[0], time_name]
    # Specify the amount of time to aggregate over
    agg_len = 5
    # Grab end of aggregation period
    end_time = start + timedelta(seconds=agg_len - 1)
    # print(f"Start Time {start} \nEnd Time {end_time}")
    # Calculate the total length of the trial in seconds
    trial_length = (some_data.loc[some_data.index[-1], time_name] - start).total_seconds()
    # Runs the total length of trial divided by the length of time we aggregate over
    # essentialy creates a window of agg_len, and interval of agg_len
    for i in range(int(trial_length//agg_len)):
        # print(end_time)
        # Get agg_len seconds worth of accelerometer readings
        group_s = some_data.loc[(some_data[time_name] >= start) & (some_data[time_name] <= end_time), :]
        if group_s.shape[0] > 1 :
            # print(group_s)
            # Get the mean X, Y, and Z of those readings
            agg_s = group_s.aggregate(lambda x : np.mean(x))
            # print(agg_s)
            mag_s = agg_s[4]

            # print(f"{mag_s}")
            # Subtract the mean magnitude from each accelerometer magnitude from each vector magnitude and then take abs
            dif_mean = group_s[device + ' Magnitude'].apply(lambda x : abs(x - mag_s))
            # Caclulate the sum of all the vector mags - mean mags. Then divide by the number of vectors
            # print(dif_mean.sum())
            # print(f"Shape of division {dif_mean.shape[0]}")
            # print(dif_mean.shape[0])
            mad = (dif_mean.sum()) / dif_mean.shape[0]
            # if device == "Actigraph":
                # if dif_mean.shape[0] > 150 :
                   #  print(f"ERROR {dif_mean.shape[0]} ACTIGRAPH READINGS")
            # print(mad)

            # Add each Mad and the corresponding time to a list :
            all_mad[end_time] = mad
            # 
        start = end_time + timedelta(seconds=1)
        end_time = start + timedelta(seconds=agg_len - 1)
        

    mad_df = pd.Series(data=all_mad)
    return mad_df

In [158]:

for trial_path in trials:
    # Read in aligned trial
    data = pd.read_csv(trial_path, parse_dates=['Actigraph Time', 'Proxy Time'], infer_datetime_format=True)
    # Drop old calculations
    data.drop(columns=['Actigraph Magnitude', 'Actigraph ENMO', 'Proxy Magnitude', 'Proxy ENMO'], inplace=True)
    # Round accelerometer data
    data[['Proxy X', 'Proxy Y', 'Proxy Z']] = data[['Proxy X', 'Proxy Y', 'Proxy Z']].round(3)
    # Calculate Magnitude and ENMO for Actigraph
    acti_mag, acti_enmo = calc_enmo( data.loc[:, ["Actigraph X", "Actigraph Y", "Actigraph Z"]] )
    data.insert(10, "Actigraph Magnitude", acti_mag)
    data.insert(11, "Actigraph ENMO", acti_enmo)
    # Calculate Magnitude and ENMO for Garmin
    proxy_mag, proxy_enmo = calc_enmo( data.loc[:, ["Proxy X", "Proxy Y", "Proxy Z"]] )
    data.insert(18, "Proxy Magnitude", proxy_mag)
    data.insert(19, "Proxy ENMO", proxy_enmo)
    # If there isn't an accelration value, set the magnitude and enmo to nan
    data.loc[(data['Proxy X'].isna()), ['Proxy Magnitude', 'Proxy ENMO']] = np.nan
    data.to_csv(path + "aligned_round_v3.csv", index=None)

    data = data.loc[(data['Include'] > 0), :]
    data["Actigraph Time"] = data["Actigraph Time"].apply(lambda x: x.replace(microsecond=0))
    data["Proxy Time"] = data["Proxy Time"].apply(lambda x: x.replace(microsecond=0))
    # Split data into actigraph and proxy
    # ActiGraph
    acti_data = data.iloc[:,3:12].drop_duplicates()
    # Proxy
    proxy_data = data.iloc[:,12:].dropna(how='all').drop(columns=['Reading #']).drop_duplicates()
    # Calculate MEAN x,y,z, magnitude, and enmo
    # Actigraph Calc:
    acti_agg = acti_data.groupby(['Actigraph Time', 'Actigraph ID']).agg({"Actigraph X": [rms, np.mean], "Actigraph Y": [rms, np.mean], 
                                                                      "Actigraph Z": [rms, np.mean], "Actigraph Magnitude": [np.max, np.mean, rms], 
                                                                      "Actigraph ENMO": [np.max, np.mean, rms]}).reset_index()
    # Proxy Calc:
    proxy_agg = proxy_data.groupby(['Proxy Time', 'Proxy ID']).agg({"Proxy X": [rms, np.mean], "Proxy Y": [rms,np.mean], 
                                                                "Proxy Z": [rms, np.mean], "Proxy Magnitude": [np.max, np.mean, rms], 
                                                                "Proxy ENMO": [np.max, np.mean, rms]}).reset_index()
    
    # Grab META
    meta_data = data.iloc[:, [0,1,2, 4]].drop_duplicates()
    # Rename Columns
    # Actigraph
    acti_agg.columns= ['Actigraph Time', 'Actigraph ID', 'Actigraph RMS X', 'Actigraph Mean X', 'Actigraph RMS Y', 
                       'Actigraph Mean Y', 'Actigraph RMS Z', 'Actigraph Mean Z', 'Actigraph MAX Magnitude', 
                       'Actigraph Mean Magnitude', 'Actigraph RMS Magnitude','Actigraph MAX ENMO', 'Actigraph Mean ENMO', 'Actigraph RMS ENMO']
    # Proxy
    proxy_agg.columns= ['Proxy Time', 'Proxy ID', 'Proxy RMS X', 'Proxy Mean X', 'Proxy RMS Y', 'Proxy Mean Y', 'Proxy RMS Z', 
                        'Proxy Mean Z', 'Proxy MAX MAGNITUDE', 'Proxy Mean MAGNITUDE', 'Proxy RMS MAGNITUDE', 'Proxy MAX ENMO',
                        'Proxy Mean ENMO', 'Proxy RMS ENMO']
    
    # Combine actigraph with proxy
    data_agg = acti_agg.merge(proxy_agg, how='left', left_on='Actigraph Time', right_on='Proxy Time')
    
    # Merge aggregated data with Meta data
    new_agg = meta_data.merge(data_agg, on=['Actigraph Time'])
    
    new_agg.sort_values(['Proxy ID', 'Actigraph ID'], inplace=True)
    
    # Calculate mad for Actigraph
    acti_mad = None
    for acti_name in data['Actigraph ID'].unique():
        raw_acti = data.loc[(data['Actigraph ID'] == acti_name), 
                            ["Actigraph Time", "Actigraph X", "Actigraph Y", "Actigraph Z", "Actigraph Magnitude"]].drop_duplicates()
        # print(f'Actigraph {acti_name} shape {raw_acti.shape}.')
        temp_mad = calc_mad(raw_acti, 'Actigraph')
        temp_mad = temp_mad.to_frame(name='Actigraph MAD').reset_index()
        temp_mad = temp_mad.rename(columns={"index": "Actigraph Time"})
        temp_mad.insert(1, "Actigraph ID", acti_name)
        if acti_mad is None :
            acti_mad = temp_mad
        else :
            acti_mad = pd.concat([acti_mad, temp_mad])
    new_agg = new_agg.merge(acti_mad, how='left', on=['Actigraph Time', 'Actigraph ID'])
    
    # Calculate mad for proxy
    proxy_mad = None
    for proxy_name in data['Proxy ID'].unique():
        if str(proxy_name) != 'nan':
            raw_proxy = data.loc[(data['Proxy ID'] == proxy_name), 
                                 ["Proxy Time", "Proxy X", "Proxy Y", "Proxy Z", "Proxy Magnitude"]].drop_duplicates()
            # print(f'Proxy {proxy_name} shape {raw_proxy.shape}.')
            temp_mad = calc_mad(raw_proxy, 'Proxy')
            temp_mad = temp_mad.to_frame(name='Proxy MAD').reset_index()
            temp_mad = temp_mad.rename(columns={"index": "Proxy Time"})
            temp_mad.insert(1, "Proxy ID", proxy_name)
            # print(temp_mad.shape)
            if proxy_mad is None :
                proxy_mad = temp_mad
            else :
                proxy_mad = pd.concat([proxy_mad, temp_mad])
                
    new_agg = new_agg.merge(proxy_mad, how='left', on=['Proxy Time', 'Proxy ID'])
    new_agg = new_agg.drop(columns=['Proxy Time'])
    
    # Write to file
    new_agg.to_csv(path + "agg_round_v3.csv", index=False)
    break


    

In [150]:
new_agg

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph Time,Actigraph ID,Actigraph RMS X,Actigraph Mean X,Actigraph RMS Y,Actigraph Mean Y,Actigraph RMS Z,...,Proxy RMS Z,Proxy Mean Z,Proxy MAX MAGNITUDE,Proxy Mean MAGNITUDE,Proxy RMS MAGNITUDE,Proxy MAX ENMO,Proxy Mean ENMO,Proxy RMS ENMO,Actigraph MAD,Proxy MAD
0,1,1,3.2 Hz,2022-03-23 09:23:45,36,0.349331,0.07211,0.387830,0.01500,0.999455,...,0.995675,-0.99296,1.463342,1.126737,1.130600,0.463342,0.127975,0.157327,,
1,1,1,3.2 Hz,2022-03-23 09:23:46,36,0.335167,-0.00541,0.368634,-0.03080,1.000866,...,0.997342,-0.99498,1.433152,1.121914,1.125214,0.433152,0.122222,0.149247,,
2,1,1,3.2 Hz,2022-03-23 09:23:47,36,0.347623,0.06973,0.382919,0.01801,0.999498,...,0.998085,-0.99584,1.425113,1.129321,1.133607,0.425113,0.129359,0.162549,,
3,1,1,3.2 Hz,2022-03-23 09:23:48,36,0.340423,-0.00387,0.374855,-0.03311,1.000184,...,0.997807,-0.99538,1.351484,1.121761,1.125085,0.351484,0.122505,0.149259,,
4,1,1,3.2 Hz,2022-03-23 09:23:49,36,0.348017,0.07151,0.384733,0.01550,0.999511,...,0.996435,-0.99378,1.464071,1.128810,1.132626,0.464071,0.129808,0.158742,0.039019,0.069626
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4195,1,1,0.6 Hz,2022-03-23 09:36:40,113,0.031000,0.03100,0.020000,-0.02000,1.039000,...,0.995209,-0.99520,1.004626,0.995881,0.995890,0.004626,0.000288,0.000874,,
4196,1,1,0.6 Hz,2022-03-23 09:36:41,113,0.031000,0.03100,0.020000,-0.02000,1.039000,...,0.995521,-0.99548,1.034409,0.995974,0.996014,0.034409,0.001397,0.005313,,
4197,1,1,0.6 Hz,2022-03-23 09:36:42,113,0.031000,0.03100,0.020000,-0.02000,1.039000,...,0.995085,-0.99508,1.000942,0.995732,0.995737,0.000942,0.000068,0.000226,,
4198,1,1,0.6 Hz,2022-03-23 09:36:43,113,0.031000,0.03100,0.020000,-0.02000,1.039000,...,0.995254,-0.99524,1.008084,0.995741,0.995755,0.008084,0.000586,0.001647,,


In [151]:
new_agg.groupby(['Actigraph ID', 'Proxy ID']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Trial Number,Round Number,Speed,Actigraph Time,Actigraph RMS X,Actigraph Mean X,Actigraph RMS Y,Actigraph Mean Y,Actigraph RMS Z,Actigraph Mean Z,...,Proxy RMS Z,Proxy Mean Z,Proxy MAX MAGNITUDE,Proxy Mean MAGNITUDE,Proxy RMS MAGNITUDE,Proxy MAX ENMO,Proxy Mean ENMO,Proxy RMS ENMO,Actigraph MAD,Proxy MAD
Actigraph ID,Proxy ID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
36,FXWLJ49CT6,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
36,JKJ23D9D7J,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
37,FXWLJ49CT6,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
37,JKJ23D9D7J,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
44,FXWLJ49CT6,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
44,JKJ23D9D7J,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
97,FXWLJ49CT6,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
97,JKJ23D9D7J,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
113,FXWLJ49CT6,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83
113,JKJ23D9D7J,420,420,420,420,420,420,420,420,420,420,...,420,420,420,420,420,420,420,420,83,83


In [157]:
test = new_agg.duplicated()
test.loc[test == True]

Series([], dtype: bool)

In [5]:
path_out = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Apple\\Apple_Aggregated.csv"
# path_out = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Garmin\\Garmin_Aggregated.csv"

In [6]:
out_df = None
for trial in agg_trials :
    if out_df is None :
        out_df = pd.read_csv(trial)
    else :
        temp = pd.read_csv(trial)
        out_df = pd.concat([out_df, temp])

In [7]:
out_df.to_csv(path_out, index=False)

In [28]:
out_df

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph ID,Order,Actigraph Time,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX MAGNITUDE,...,Actigraph RMS Magnitude,Actigraph MEAN ENMO,Actigraph RMS ENMO,Proxy MEAN X,Proxy MEAN Y,Proxy MEAN Z,Proxy MEAN MAGNITUDE,Proxy RMS MAGNITUDE,Proxy MEAN ENMO,Proxy RMS ENMO
0,1,1,3.2 Hz,36,152551,2022-03-09 11:30:00,0.320847,0.360875,1.001898,1.266082,...,1.112193,0.111224,0.120518,0.014880,-0.026400,-0.957760,1.073964,1.075223,0.073964,0.090419
1,1,1,3.2 Hz,36,152581,2022-03-09 11:30:00,0.322190,0.368068,1.008318,1.271222,...,1.112193,0.111224,0.120518,0.014880,-0.026400,-0.957760,1.073964,1.075223,0.073964,0.090419
2,1,1,3.2 Hz,36,152611,2022-03-09 11:30:00,0.329058,0.353459,1.004326,1.253176,...,1.112193,0.111224,0.120518,0.014880,-0.026400,-0.957760,1.073964,1.075223,0.073964,0.090419
3,1,1,3.2 Hz,36,152641,2022-03-09 11:30:00,0.329464,0.362217,1.003608,1.263793,...,1.112193,0.111224,0.120518,0.014880,-0.026400,-0.957760,1.073964,1.075223,0.073964,0.090419
4,1,1,3.2 Hz,36,152671,2022-03-09 11:30:00,0.338332,0.354878,1.005469,1.266158,...,1.112193,0.111224,0.120518,0.014880,-0.026400,-0.957760,1.073964,1.075223,0.073964,0.090419
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4195,10,5,0.6 Hz,210,124951,2022-03-18 14:14:10,0.023000,0.023000,0.887000,0.887596,...,0.887596,0.000000,0.000000,0.036800,-0.014560,-0.955680,0.956588,0.956611,0.000000,0.000000
4196,10,5,0.6 Hz,210,124981,2022-03-18 14:14:11,0.023000,0.023000,0.887000,0.887596,...,0.887596,0.000000,0.000000,0.028333,-0.010167,-0.957167,0.957691,0.957717,0.000000,0.000000
4197,10,5,0.6 Hz,210,125011,2022-03-18 14:14:12,0.023000,0.023000,0.887000,0.887596,...,0.887596,0.000000,0.000000,0.036960,-0.019360,-0.953760,0.954754,0.954788,0.000000,0.000000
4198,10,5,0.6 Hz,210,125041,2022-03-18 14:14:13,0.023000,0.023000,0.887000,0.887596,...,0.887596,0.000000,0.000000,0.028160,-0.010720,-0.956160,0.956725,0.956756,0.000000,0.000000
