In [3]:
import glob
import pandas as pd
import numpy as np

In [15]:
# path = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Apple\\Trial *\\"
path = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Garmin\\Trial *\\"
trials = glob.glob(path + "*aligned_v2.csv")
agg_trials = glob.glob(path + "*rms_v2.csv")
rms = lambda x : np.sqrt(np.mean(x ** 2))

In [16]:

for trial_path, agg_path in zip(trials, agg_trials):
    # Read in aligned trial
    data = pd.read_csv(trial_path, parse_dates=['Actigraph Time', 'Proxy Time'], infer_datetime_format=True)
    data = data.loc[(data['Include'] > 0), :]
    # Split data into actigraph and proxy
    # ActiGraph
    acti_data = data.iloc[:,3:12]
    acti_data["Actigraph Time"] = acti_data["Actigraph Time"].apply(lambda x: x.replace(microsecond=0))
    # Proxy
    proxy_data = data.iloc[:,12:].dropna(how='all').drop(columns=['Reading #']).drop_duplicates()
    proxy_data["Proxy Time"] = proxy_data["Proxy Time"].apply(lambda x: x.replace(microsecond=0))
    # Calculate MEAN x,y,z, magnitude, and enmo
    # Actigraph Calc:
    acti_agg = acti_data.groupby(['Actigraph Time', 'Actigraph ID']).agg({"Actigraph X": [np.mean], "Actigraph Y": [np.mean], 
                                                                      "Actigraph Z": [np.mean], "Actigraph Magnitude": [np.mean, rms], 
                                                                      "Actigraph ENMO": [np.mean, rms]}).reset_index()
    # Proxy Calc:
    proxy_agg = proxy_data.groupby(['Proxy Time', 'Proxy ID']).agg({"Proxy X": [np.mean], "Proxy Y": [np.mean], 
                                                                "Proxy Z": [np.mean], "Proxy Magnitude": [np.mean, rms], 
                                                                "Proxy ENMO": [np.mean, rms]}).reset_index()
    # Rename Columns
    # Actigraph
    acti_agg.columns= ['Actigraph Time', 'Actigraph ID', 'Actigraph MEAN X', 'Actigraph MEAN Y', 'Actigraph MEAN Z', 
                       'Actigraph MEAN Magnitude', 'Actigraph RMS Magnitude', 'Actigraph MEAN ENMO', 'Actigraph RMS ENMO']
    # Proxy
    proxy_agg.columns= ['Proxy Time', 'Proxy ID', 'Proxy MEAN X', 'Proxy MEAN Y', 'Proxy MEAN Z', 'Proxy MEAN MAGNITUDE', 
                        'Proxy RMS MAGNITUDE', 'Proxy MEAN ENMO', 'Proxy RMS ENMO']
    
    # Combine actigraph with proxy
    data_agg = acti_agg.merge(proxy_agg, how='left', left_on='Actigraph Time', right_on='Proxy Time').drop(columns=['Proxy Time'])
    
    # Read in aggregated trial
    old_agg = pd.read_csv(agg_path, parse_dates=['Actigraph Time'], infer_datetime_format=True).iloc[:,:20]
    
    # Merge old aggregated data with newly calculated values
    new_agg = old_agg.merge(data_agg, how='left', on=['Actigraph Time', 'Actigraph ID', 'Proxy ID'])

    
    # Write to file
    new_agg.to_csv(agg_path, index=False)


    
    

In [17]:
# path_out = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Apple\\Apple_Aggregated.csv"
path_out = "V:\\R01 - W4K\\2_Shaker project\\Devices Evaluated\\Trial Data\\Processed data\\Garmin\\Garmin_Aggregated.csv"

In [20]:
out_df = None
for trial in agg_trials :
    if out_df is None :
        out_df = pd.read_csv(trial)
    else :
        temp = pd.read_csv(trial)
        out_df = pd.concat([out_df, temp])

In [21]:
out_df.drop(columns=['Order', 'Proxy Time'], inplace=True)
out_df.to_csv(path_out, index=False)

In [22]:
out_df

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph Time,Actigraph ID,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX MAGNITUDE,Actigraph MAX ENMO,...,Actigraph RMS Magnitude,Actigraph MEAN ENMO,Actigraph RMS ENMO,Proxy MEAN X,Proxy MEAN Y,Proxy MEAN Z,Proxy MEAN MAGNITUDE,Proxy RMS MAGNITUDE,Proxy MEAN ENMO,Proxy RMS ENMO
0,1,1,3.2 Hz,3/9/2022 11:29,36,0.312557,0.360333,1.005505,1.260623,0.260623,...,1.112911,0.111665,0.123460,-0.051520,-0.039840,-0.973440,1.081635,1.082829,0.081635,0.096168
1,1,1,3.2 Hz,3/9/2022 11:29,36,0.323552,0.355961,1.000845,1.247093,0.247093,...,1.110444,0.109465,0.118972,-0.014080,0.008160,-0.966080,1.078353,1.079598,0.078812,0.093917
2,1,1,3.2 Hz,3/9/2022 11:29,36,0.332514,0.357078,1.003839,1.260624,0.260624,...,1.116137,0.115010,0.125471,-0.045600,-0.028000,-0.954400,1.066017,1.066959,0.067842,0.079273
3,1,1,3.2 Hz,3/9/2022 11:29,36,0.316704,0.339678,1.006520,1.233777,0.233777,...,1.108497,0.107451,0.117737,0.012480,-0.042720,-0.965120,1.075420,1.076785,0.077743,0.092586
4,1,1,3.2 Hz,3/9/2022 11:29,36,0.332499,0.363242,1.004429,1.277456,0.277456,...,1.118650,0.117671,0.126629,-0.011200,0.019680,-0.960480,1.070860,1.071879,0.072338,0.084562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4195,10,5,0.6 Hz,2022-03-18 14:14:10,210,0.023000,0.023000,0.887000,0.887596,0.000000,...,0.887596,0.000000,0.000000,0.036800,-0.014560,-0.955680,0.956588,0.956611,0.000000,0.000000
4196,10,5,0.6 Hz,2022-03-18 14:14:11,210,0.023000,0.023000,0.887000,0.887596,0.000000,...,0.887596,0.000000,0.000000,0.028333,-0.010167,-0.957167,0.957691,0.957717,0.000000,0.000000
4197,10,5,0.6 Hz,2022-03-18 14:14:12,210,0.023000,0.023000,0.887000,0.887596,0.000000,...,0.887596,0.000000,0.000000,0.036960,-0.019360,-0.953760,0.954754,0.954788,0.000000,0.000000
4198,10,5,0.6 Hz,2022-03-18 14:14:13,210,0.023000,0.023000,0.887000,0.887596,0.000000,...,0.887596,0.000000,0.000000,0.028160,-0.010720,-0.956160,0.956725,0.956756,0.000000,0.000000


In [30]:
test = pd.read_csv(agg_trials[0])
test

Unnamed: 0,Trial Number,Round Number,Speed,Actigraph Time,Actigraph ID,Actigraph RMS X,Actigraph RMS Y,Actigraph RMS Z,Actigraph MAX Magnitude,Actigraph MAX ENMO,...,Actigraph RMS Magnitude,Actigraph MEAN ENMO,Actigraph RMS ENMO,Proxy MEAN X,Proxy MEAN Y,Proxy MEAN Z,Proxy MEAN MAGNITUDE,Proxy RMS MAGNITUDE,Proxy MEAN ENMO,Proxy RMS ENMO
0,1,1,3.2 Hz,2022-03-09 11:29:45,36,0.312557,0.360333,1.005505,1.260623,0.260623,...,1.112911,0.111665,0.123460,-0.051520,-0.039840,-0.973440,1.081635,1.082829,0.081635,0.096168
1,1,1,3.2 Hz,2022-03-09 11:29:46,36,0.323552,0.355961,1.000845,1.247093,0.247093,...,1.110444,0.109465,0.118972,-0.014080,0.008160,-0.966080,1.078353,1.079598,0.078812,0.093917
2,1,1,3.2 Hz,2022-03-09 11:29:47,36,0.332514,0.357078,1.003839,1.260624,0.260624,...,1.116137,0.115010,0.125471,-0.045600,-0.028000,-0.954400,1.066017,1.066959,0.067842,0.079273
3,1,1,3.2 Hz,2022-03-09 11:29:48,36,0.316704,0.339678,1.006520,1.233777,0.233777,...,1.108497,0.107451,0.117737,0.012480,-0.042720,-0.965120,1.075420,1.076785,0.077743,0.092586
4,1,1,3.2 Hz,2022-03-09 11:29:49,36,0.332499,0.363242,1.004429,1.277456,0.277456,...,1.118650,0.117671,0.126629,-0.011200,0.019680,-0.960480,1.070860,1.071879,0.072338,0.084562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4195,1,1,0.6 Hz,2022-03-09 11:42:40,113,0.020000,0.023000,1.047000,1.047444,0.047444,...,1.047444,0.047444,0.047444,-0.015273,-0.048727,-1.034000,1.035307,1.035343,0.035307,0.036338
4196,1,1,0.6 Hz,2022-03-09 11:42:41,113,0.020000,0.023000,1.047000,1.047444,0.047444,...,1.047444,0.047444,0.047444,-0.015167,-0.059333,-1.033667,1.035542,1.035565,0.035542,0.036213
4197,1,1,0.6 Hz,2022-03-09 11:42:42,113,0.020000,0.023000,1.047000,1.047444,0.047444,...,1.047444,0.047444,0.047444,-0.011091,-0.050364,-1.029273,1.030643,1.030665,0.030643,0.031400
4198,1,1,0.6 Hz,2022-03-09 11:42:43,113,0.020000,0.023000,1.047000,1.047444,0.047444,...,1.047444,0.047444,0.047444,-0.021636,-0.058000,-1.030909,1.032819,1.032831,0.032819,0.033187


In [31]:
t1, t2, t3, t4, t5, t6, t7, t8, t9, t10 = test.groupby(["Proxy ID", "Actigraph ID"])
tests = [t1, t2, t3, t4, t5, t6, t7, t8, t9, t10]

In [32]:
i = 1
for test in tests:
    dupes = test[1].duplicated(subset='Actigraph Time')
    print(f"T{i}")
    i += 1
    print(dupes.loc[dupes[:]==True].shape)

T1
(0,)
T2
(0,)
T3
(0,)
T4
(0,)
T5
(0,)
T6
(0,)
T7
(0,)
T8
(0,)
T9
(0,)
T10
(0,)
