In [1]:
from glob import glob
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
import math
import os

In [2]:
def read_apple(path):
    # First check if Apple Data exists in the file
    file = open(path, 'r')
    if "Apple Mean Heart Rate" in file.readline():
        apple_data = pd.read_csv(path, usecols=['Time', 'Apple Mean Heart Rate'], parse_dates=['Time'], infer_datetime_format=True).dropna()
        apple_data['Time'] = apple_data['Time'].apply(lambda x: x.timestamp())
        start = apple_data.iloc[0,0]
        apple_data['Time'] = apple_data['Time'].apply(lambda x: int(x - start))
        apple_data = apple_data.values 
    else:
        #print("No Apple Data")
        apple_data = None
        start = None
    file.close()
    return apple_data, start

In [3]:
def read_garmin(path):
    # First check if Garmin Data exists in the file
    file = open(path, 'r')
    if "Garmin Mean Heart Rate" in file.readline():
        garmin_data = pd.read_csv(path, usecols=['Time', 'Garmin Mean Heart Rate'], parse_dates=['Time'], infer_datetime_format=True).dropna()
        garmin_data['Time'] = garmin_data['Time'].apply(lambda x: x.timestamp())
        start = garmin_data.iloc[0,0]
        garmin_data['Time'] = garmin_data['Time'].apply(lambda x: int(x - start))
        garmin_data = garmin_data.values 
    else:
        #print("No Garmin Data")
        garmin_data = None
        start = None
    file.close()
    return garmin_data, start

In [4]:
def read_fitbit(path):
    # First check if Fitbit Data exists in the file
    file = open(path, 'r')
    if "Fitbit Mean Heart Rate" in file.readline():
        fitbit_data = pd.read_csv(path, usecols=['Time', 'Fitbit Mean Heart Rate'], parse_dates=['Time'], infer_datetime_format=True).dropna()
        if fitbit_data.shape[0] > 0:
            fitbit_data['Time'] = fitbit_data['Time'].apply(lambda x: x.timestamp())
            start = fitbit_data.iloc[0,0]
            fitbit_data['Time'] = fitbit_data['Time'].apply(lambda x: int(x - start))
            fitbit_data = fitbit_data.values 
        else:
            fitbit_data=None
            start = None
    else:
        #print("No Fitbit Data")
        fitbit_data = None
        start = None
    file.close()
    return fitbit_data, start

In [5]:
def read_actiheart(path):
    # First check if Actiheart Data exists in the file
    file = open(path, 'r')
    if "Actiheart Mean Heart Rate" in file.readline():
        actiheart_data = pd.read_csv(path, usecols=['Time', 'Actiheart Mean Heart Rate'], parse_dates=['Time'], infer_datetime_format=True).dropna()
        actiheart_data['Time'] = actiheart_data['Time'].apply(lambda x: x.timestamp())
        start = actiheart_data.iloc[0,0]
        actiheart_data['Time'] = actiheart_data['Time'].apply(lambda x: int(x - start))
        actiheart_data = actiheart_data.values
    else:
        actiheart_data = pd.DataFrame(columns=['Time', 'Actiheart Mean Heart Rate'])  # Return an empty DataFrame when there is no Actiheart data
        start = None
    file.close()
    return actiheart_data, start

In [6]:
def hr_metrics(some_data, start_time, ds_begin_2022, ds_end_2022,ds_begin_2023,ds_end_2023,secagg):
    j = 0
    output = {}
    # Iterate through data
    for i in range(some_data.shape[0]):
        # Once 15 seconds have passed can begin the 31 second aggregation
        if some_data[i,0] >= (secagg/2)+1 and some_data[i,0] <= some_data[-1,0] - secagg/2:
            # Get the 15 seconds worth of data before pint
            j = i
            while j >= 0 and some_data[j,0] > some_data[i,0] - secagg/2:
                j -= 1
            # Get 15 seconds worth of data after the point
            k = i

            while k < some_data.shape[0] and some_data[k,0] <= some_data[i,0] + secagg/2:
                k += 1
            # Get window of 31 seconds centered at i
            window = some_data[j+1:k, 1]
            # Calculate MEAN
            win_mean = np.mean(window)
            # Calculate STD
            win_std = np.std(window)
            # Calculate Coeficient of Variation
            win_cov = win_std / win_mean
            # Calculate Min
            win_min = np.min(window)
            # Calculate Max
            win_max = np.max(window)
            output[some_data[i,0]] = [win_mean, win_std, win_cov, win_min, win_max]
            #print(f"Window if time is {some_data[i,0]}: \n{window}\n")

    output_df = pd.DataFrame.from_dict(output, orient='index').reset_index()
    output_df.columns=['Time', 'Heart Rate Mean', 'Heart Rate STD', 'Heart Rate Coefficient of Variation', 'Heart Rate Min',
                       'Heart Rate Max']
    output_df['Time'] = output_df['Time'].apply(lambda x: datetime.fromtimestamp(x+start_time))
    if (ds_begin_2022 <= output_df.iloc[0,0] <= ds_end_2022) or (ds_begin_2023<= output_df.iloc[0,0] <= ds_end_2023):
        output_df['Time'] = output_df['Time'].apply(lambda x: x + timedelta(hours=4))
    else:
        output_df['Time'] = output_df['Time'].apply(lambda x: x + timedelta(hours=5))
    
    return output_df


## Run code below to calcuate Apple HR Metrics

In [7]:
pa_dir = "C:/Users/sambit/Desktop/Apple/New_Agg_Metrics/"
out_dir = "C:/Users/sambit/Desktop/Apple/Combined_Apple_Metrics_With_Agg_60_seconds/"


ds_begin_2022 = datetime(year=2022, month=3, day=13)
ds_end_2022 = datetime(year=2022, month=11, day=6)
ds_begin_2023 = datetime(year=2023, month=3, day=12)
ds_end_2023 = datetime(year=2023, month=11, day=5)

participant_paths = glob(pa_dir + "[0-9][0-9][0-9][0-9].csv")
total_participant = len(participant_paths)
curr_participant = 1
timewindow = 60
noagglist=[]
for participant in participant_paths:
    participant_id = participant[len(pa_dir):len(pa_dir)+4]
    print(f"Processing Participant {participant_id} {curr_participant}/{total_participant}")
    try:
        # Read in data
        apple_hr, start = read_apple(participant)

        if apple_hr.shape[0] > 0 and os.path.isfile(pa_dir + participant_id + ".csv"):
            # Calculate Metrics and write to file
            hr_df = hr_metrics(apple_hr, start, ds_begin_2022, ds_end_2022,ds_begin_2023,ds_end_2023, timewindow)

            # Define Agg path
            agg_path = pa_dir + participant_id + ".csv"

            # Read in data
            agg_df = pd.read_csv(agg_path, parse_dates=['Time'], infer_datetime_format=True)

            # Merge the dataframes and write to file
            out_path = out_dir + participant_id + "_combined_metrics.csv"
            merged_df = pd.merge(agg_df, hr_df, on='Time', how='left')
            merged_df.to_csv(out_path, index=False)

        else:
            print(f"No Apple Data for {participant_id}")
            noagglist.append(participant_id)
    except Exception as e:
        print(f"An error occurred for Participant {participant_id}: {str(e)}")


    curr_participant += 1
print(noagglist)     

Processing Participant 0118 1/143
Processing Participant 0231 2/143
Processing Participant 0444 3/143
Processing Participant 0601 4/143
Processing Participant 0618 5/143
Processing Participant 0632 6/143
Processing Participant 0695 7/143
Processing Participant 0967 8/143
Processing Participant 1779 9/143
Processing Participant 1854 10/143
Processing Participant 1974 11/143
Processing Participant 1991 12/143
Processing Participant 2007 13/143
Processing Participant 2069 14/143
Processing Participant 2113 15/143
Processing Participant 2186 16/143
Processing Participant 2187 17/143
Processing Participant 2271 18/143
Processing Participant 2272 19/143
Processing Participant 2321 20/143
Processing Participant 2355 21/143
Processing Participant 2433 22/143
Processing Participant 2453 23/143
Processing Participant 2454 24/143
Processing Participant 2455 25/143
Processing Participant 2456 26/143
Processing Participant 2457 27/143
Processing Participant 2458 28/143
Processing Participant 2470 2

## Run code below to calculate Garmin HR Metrics

In [9]:
pa_dir = "C:/Users/sambit/Desktop/Garmin/New_Agg_Metrics/"
out_dir = "C:/Users/sambit/Desktop/Garmin/Combined_Garmin_Metrics_With_Agg_20_seconds/"


ds_begin_2022 = datetime(year=2022, month=3, day=13)
ds_end_2022 = datetime(year=2022, month=11, day=6)
ds_begin_2023 = datetime(year=2023, month=3, day=12)
ds_end_2023 = datetime(year=2023, month=11, day=5)

participant_paths = glob(pa_dir + "[0-9][0-9][0-9][0-9].csv")
total_participant = len(participant_paths)
curr_participant = 1
timewindow = 20
noagglist=[]
for participant in participant_paths:
    participant_id = participant[len(pa_dir):len(pa_dir)+4]
    print(f"Processing Participant {participant_id} {curr_participant}/{total_participant}")
    try:
        # Read in data
        garmin_hr, start = read_garmin(participant)

        if garmin_hr.shape[0] > 0 and os.path.isfile(pa_dir + participant_id + ".csv"):
            # Calculate Metrics and write to file
            hr_df = hr_metrics(garmin_hr, start, ds_begin_2022, ds_end_2022,ds_begin_2023,ds_end_2023, timewindow)

            # Define Agg path
            agg_path = pa_dir + participant_id + ".csv"

            # Read in data
            agg_df = pd.read_csv(agg_path, parse_dates=['Time'], infer_datetime_format=True)

            # Merge the dataframes and write to file
            out_path = out_dir + participant_id + "_combined_metrics.csv"
            merged_df = pd.merge(agg_df, hr_df, on='Time', how='left')
            merged_df.to_csv(out_path, index=False)

        else:
            print(f"No Garmin Data for {participant_id}")
            noagglist.append(participant_id)
    except Exception as e:
        print(f"An error occurred for Participant {participant_id}: {str(e)}")


    curr_participant += 1
print(noagglist)  

Processing Participant 0118 1/131
Processing Participant 0444 2/131
Processing Participant 0601 3/131
Processing Participant 0819 4/131
Processing Participant 0967 5/131
Processing Participant 1333 6/131
Processing Participant 1760 7/131
Processing Participant 1779 8/131
Processing Participant 1799 9/131
Processing Participant 1854 10/131
Processing Participant 1974 11/131
Processing Participant 1991 12/131
Processing Participant 2007 13/131
Processing Participant 2031 14/131
Processing Participant 2069 15/131
Processing Participant 2113 16/131
Processing Participant 2186 17/131
Processing Participant 2187 18/131
Processing Participant 2271 19/131
Processing Participant 2272 20/131
Processing Participant 2453 21/131
Processing Participant 2454 22/131
Processing Participant 2455 23/131
Processing Participant 2456 24/131
Processing Participant 2457 25/131
Processing Participant 2458 26/131
Processing Participant 2470 27/131
Processing Participant 2471 28/131
Processing Participant 2473 2

## Run code below to calculate Fitbit HR Metrics

In [8]:
pa_dir = "C:/Users/sambit/Desktop/Fitbit/New_Agg_Metrics/"
out_dir = "C:/Users/sambit/Desktop/Fitbit/Combined_Fitbit_Metrics_With_Agg_60_seconds/"


ds_begin_2022 = datetime(year=2022, month=3, day=13)
ds_end_2022 = datetime(year=2022, month=11, day=6)
ds_begin_2023 = datetime(year=2023, month=3, day=12)
ds_end_2023 = datetime(year=2023, month=11, day=5)

participant_paths = glob(pa_dir + "[0-9][0-9][0-9][0-9].csv")
total_participant = len(participant_paths)
curr_participant = 1
timewindow = 60
noagglist=[]
for participant in participant_paths:
    participant_id = participant[len(pa_dir):len(pa_dir)+4]
    print(f"Processing Participant {participant_id} {curr_participant}/{total_participant}")
    
    # Read in data
    fitbit_hr, start = read_fitbit(participant)
    

    if fitbit_hr.shape[0] > 0 and os.path.isfile(pa_dir + participant_id + ".csv"):
        # Calculate Metrics and write to file
        hr_df = hr_metrics(fitbit_hr, start, ds_begin_2022, ds_end_2022,ds_begin_2023,ds_end_2023, timewindow)

        # Define Agg path
        agg_path = pa_dir + participant_id + ".csv"

        # Read in data
        agg_df = pd.read_csv(agg_path, parse_dates=['Time'], infer_datetime_format=True)

        # Merge the dataframes and write to file
        out_path = out_dir + participant_id + "_combined_metrics.csv"
        merged_df = pd.merge(agg_df, hr_df, on='Time', how='left')
        merged_df.to_csv(out_path, index=False)

    else:
        print(f"No Fitbit Data for {participant_id}")
        noagglist.append(participant_id)
    
    curr_participant += 1
print(noagglist) 

Processing Participant 0231 1/91
Processing Participant 0618 2/91
Processing Participant 0632 3/91
Processing Participant 0695 4/91
Processing Participant 1333 5/91
Processing Participant 1760 6/91
Processing Participant 1772 7/91
Processing Participant 1799 8/91
Processing Participant 2031 9/91
Processing Participant 2321 10/91
Processing Participant 2433 11/91
Processing Participant 2532 12/91
Processing Participant 2596 13/91
Processing Participant 2635 14/91
Processing Participant 2637 15/91
Processing Participant 2653 16/91
Processing Participant 2657 17/91
Processing Participant 2661 18/91
Processing Participant 2662 19/91
Processing Participant 2663 20/91
Processing Participant 2664 21/91
Processing Participant 2665 22/91
Processing Participant 2666 23/91
Processing Participant 2667 24/91
Processing Participant 2668 25/91
Processing Participant 2669 26/91
Processing Participant 2670 27/91
Processing Participant 2671 28/91
Processing Participant 2672 29/91
Processing Participant 

## Run code below to calculate Actigraph HR Metrics

In [8]:
pa_dir = "V:/ACOI/R01 - W4K/3_PA protocol/1_Participants/"
out_dir = "C:/Users/sambit/Desktop/Actigraph/"

ds_begin = datetime(year=2022, month=3, day=13)
ds_end = datetime(year=2023, month=7, day=6)

participant_paths = glob(pa_dir + "[0-9][0-9][0-9][0-9]/*_agg.csv")
total_participant = len(participant_paths)
curr_participant = 1

for participant in participant_paths:
    participant_id = participant[len(pa_dir):len(pa_dir)+4]
    print(f"Processing Participant {participant_id} {curr_participant}/{total_participant}")
    # Read in data
    actiheart_hr, start = read_actiheart(participant)
    if actiheart_hr is not None and os.path.isfile(out_dir +"Accel_Metrics/" + participant_id + "_accel_metrics.csv"):
        # Calculate Metrics and write to file
        hr_df = hr_metrics(actiheart_hr, start, ds_begin, ds_end)
        # Define Accel path 
        accel_path = out_dir +"Accel_Metrics/" + participant_id + "_accel_metrics.csv"
        # Read in data 
        accel_df = pd.read_csv(accel_path, parse_dates=['Time'], infer_datetime_format=True)
        # Merge to data sets then write to file
        out_path = out_dir + "Combined_Metrics/"+ participant_id + "_combined_metrics.csv"
        accel_df.merge(hr_df, on='Time', how='left').to_csv(out_path, index=False)

        

    else:
        print(f"No Actiheart Data for {participant_id}")
    
    curr_participant += 1   

Processing Participant 2113 1/1
No Actiheart Data for 2113


## Run code below to calculate Actiheart HR Metrics

In [28]:
pa_dir = "C:/Users/sambit/Desktop/Actiheart/New_Agg_Metrics/"
out_dir = "C:/Users/sambit/Desktop/Actiheart/Combined_Actiheart_Metrics_With_Agg_20_seconds/"


ds_begin_2022 = datetime(year=2022, month=3, day=13)
ds_end_2022 = datetime(year=2022, month=11, day=6)
ds_begin_2023 = datetime(year=2023, month=3, day=12)
ds_end_2023 = datetime(year=2023, month=11, day=5)

participant_paths = glob(pa_dir + "2501.csv")
total_participant = len(participant_paths)
curr_participant = 1
timewindow = 20
noagglist=[]
for participant in participant_paths:
    participant_id = participant[len(pa_dir):len(pa_dir)+4]
    print(f"Processing Participant {participant_id} {curr_participant}/{total_participant}")
    
    try:
        # Read in data
        actiheart_hr, start = read_actiheart(participant)


        if actiheart_hr.shape[0] > 0 and os.path.isfile(pa_dir + participant_id + ".csv"):
            # Calculate Metrics and write to file
            hr_df = hr_metrics(actiheart_hr, start, ds_begin_2022, ds_end_2022,ds_begin_2023,ds_end_2023, timewindow)

            # Define Agg path
            agg_path = pa_dir + participant_id + ".csv"

            # Read in data
            agg_df = pd.read_csv(agg_path, parse_dates=['Time'], infer_datetime_format=True)

            # Merge the dataframes and write to file
            out_path = out_dir + participant_id + "_combined_metrics.csv"
            merged_df = pd.merge(agg_df, hr_df, on='Time', how='left')
            merged_df.to_csv(out_path, index=False)

        else:
            print(f"No Actiheart Data for {participant_id}")
            noagglist.append(participant_id)
    except:pass
    curr_participant += 1
print(noagglist) 

Processing Participant 2501 1/1
[]


## Run code below to Merge Agg with Combined

In [49]:
combined_dir="C:/Users/sambit/Desktop/Actiheart/Combined_Metrics/"
out_dir = "C:/Users/sambit/Desktop/Actiheart/Combined_Metrics_With_Agg/"
ds_begin = datetime(year=2022, month=3, day=13)
ds_end = datetime(year=2023, month=7, day=6)
participant_paths = glob(combined_dir + "[0-9][0-9][0-9][0-9]_combined_metrics.csv")
total_participant = len(participant_paths)
curr_participant = 1
column_values=[]

for participant in participant_paths:
    combined_file=pd.read_csv(participant)
    combined_file=combined_file.to_numpy()
    participant_id=participant[-25:-21]
    agg_file=pd.read_csv(f"V:/ACOI/R01 - W4K/3_PA protocol/1_Participants/{participant_id}/{participant_id}_agg.csv")
    agg_file[["Dominant Frequency","Power","Mag Mean","Mag STD","Mag CoV","Mag Min","Mag Max","Mag 10th Percentile","Mag 25th Percentile","Mag 50th Percentile","Mag 75th Percentile","Mag 90th Percentile",
                          "Mag 3rd Moment","Mag 4th Moment","Mag Skewness","Mag Kurtosis","Mag 1sec Autocorrelation","XY Correlation","XZ Correlation","YZ Correlation",
                          "Heart Rate Mean","Heart Rate STD","Heart Rate Coefficient of Variation","Heart Rate Min","Heart Rate Max"]]=np.nan
    column_values.clear()
    for column in agg_file.columns:
        column_values.append(column)
    print(f"Combining Participant {participant_id} {curr_participant}/{total_participant}")
    agg_file=agg_file.to_numpy()
    for idx in range(len(agg_file)):
        agg_file_time=agg_file[idx,2]
        
        for idx2 in range(len(combined_file)):
            combined_file_time=combined_file[idx2,0]
            
            if agg_file_time==combined_file_time:
                try:
                    agg_file[idx,67:92]=combined_file[idx,1:26]
                    
                except:pass
                break
                
            else:
                continue
        
        
    agg_file=pd.DataFrame(agg_file,columns=column_values)
    agg_file.to_csv(out_dir+f"{participant_id}_agg_combined.csv")
    curr_participant =curr_participant+ 1



Combining Participant 0118 1/93
Combining Participant 0444 2/93
Combining Participant 0601 3/93
Combining Participant 0967 4/93
Combining Participant 1779 5/93
Combining Participant 1854 6/93
Combining Participant 1974 7/93
Combining Participant 1991 8/93
Combining Participant 2007 9/93
Combining Participant 2069 10/93
Combining Participant 2113 11/93
Combining Participant 2186 12/93
Combining Participant 2187 13/93
Combining Participant 2271 14/93
Combining Participant 2272 15/93
Combining Participant 2453 16/93
Combining Participant 2454 17/93
Combining Participant 2455 18/93
Combining Participant 2456 19/93
Combining Participant 2457 20/93
Combining Participant 2458 21/93
Combining Participant 2470 22/93
Combining Participant 2471 23/93
Combining Participant 2473 24/93
Combining Participant 2474 25/93
Combining Participant 2475 26/93
Combining Participant 2476 27/93
Combining Participant 2479 28/93
Combining Participant 2480 29/93
Combining Participant 2495 30/93
Combining Participa

In [10]:
agg_dir="V:/ACOI/R01 - W4K/3_PA protocol/1_Participants/"
out_dir = "C:/Users/sambit/Desktop/Apple/Agg_Metrics/"


participant_paths = glob(agg_dir + "[0-9][0-9][0-9][0-9]/")
total_participant = len(participant_paths)
curr_participant = 1
column_values=[]
agg_file_path=""
for participant in participant_paths:
    p_id = participant[-5:-1]
    print(f"Agg file {p_id} {curr_participant}/{total_participant}")
    
    agg_file_path = participant + f"{p_id}_agg.csv"
    out_path = out_dir + f"{p_id}_agg_metrics.csv"
    columns= ["Activity","Flags","Time","Apple Mean Heart Rate","Apple HR Low","Apple HR High","Apple HR Change","Apple RMS X","Apple Mean X","Apple RMS Y","Apple Mean Y","Apple RMS Z","Apple Mean Z","Apple Max Magnitude","Apple Mean Magnitude","Apple RMS Magnitude","Apple Max ENMO","Apple Mean ENMO","Apple RMS ENMO","Apple MAD","K5 Rf","K5 VT","K5 VE","K5 IV","K5 VO2","K5 VCO2","K5 O2exp","K5 CO2exp","K5 VE/VO2","K5 VE/VCO2","K5 VO2/Kg","K5 METS","K5 RQ"]
    if os.path.exists(agg_file_path):
        agg_df=pd.read_csv(agg_file_path)
        with open(agg_file_path, 'r') as file:
            file_contents = file.readline()
            
            df=pd.DataFrame()
            for idx,column in enumerate(columns):
                if column in file_contents:
                    df[idx] = agg_df[column]
                    df.rename(columns={idx: column}, inplace=True)
            col=df.columns
            print(len(col))
            if len(col)>=28:
                df.to_csv(out_path, index=False)
                df = df.drop(df.index)
                    
        
        
    else:
        print("No File")
        
    curr_participant += 1


Agg file 0118 1/192
33
Agg file 0231 2/192
32
Agg file 0444 3/192
32
Agg file 0601 4/192
33
Agg file 0618 5/192
33
Agg file 0632 6/192
33
Agg file 0695 7/192
33
Agg file 0819 8/192
16
Agg file 0967 9/192
33
Agg file 1333 10/192
16
Agg file 1760 11/192
16
Agg file 1772 12/192
15
Agg file 1779 13/192
33
Agg file 1799 14/192
15
Agg file 1854 15/192
33
Agg file 1974 16/192
32
Agg file 1991 17/192
33
Agg file 2007 18/192
32
Agg file 2031 19/192
16
Agg file 2069 20/192
33
Agg file 2113 21/192
33
Agg file 2186 22/192
30
Agg file 2187 23/192
31
Agg file 2271 24/192
33
Agg file 2272 25/192
33
Agg file 2321 26/192
32
Agg file 2355 27/192
32
Agg file 2433 28/192
33
Agg file 2453 29/192
32
Agg file 2454 30/192
33
Agg file 2455 31/192
32
Agg file 2456 32/192
33
Agg file 2457 33/192
31
Agg file 2458 34/192
33
Agg file 2470 35/192
32
Agg file 2471 36/192
32
Agg file 2473 37/192
33
Agg file 2474 38/192
33
Agg file 2475 39/192
28
Agg file 2476 40/192
32
Agg file 2479 41/192
32
Agg file 2480 42/192
33
A

  agg_df=pd.read_csv(agg_file_path)


33
Agg file 2678 123/192
No File
Agg file 2679 124/192
33
Agg file 2680 125/192
16
Agg file 2681 126/192
16
Agg file 2683 127/192
15
Agg file 2684 128/192
31
Agg file 2685 129/192
33
Agg file 2686 130/192


  agg_df=pd.read_csv(agg_file_path)


15
Agg file 2687 131/192
14
Agg file 2688 132/192
32
Agg file 2689 133/192
16
Agg file 2690 134/192
16
Agg file 2691 135/192
16
Agg file 2692 136/192
32
Agg file 2706 137/192
33
Agg file 2709 138/192
16
Agg file 2710 139/192
33
Agg file 2711 140/192
32
Agg file 2712 141/192
16
Agg file 2753 142/192
32
Agg file 2774 143/192
33
Agg file 2775 144/192
33
Agg file 2776 145/192
33
Agg file 2778 146/192
32
Agg file 2779 147/192
32
Agg file 2780 148/192
16
Agg file 2781 149/192
16
Agg file 2782 150/192
33
Agg file 2783 151/192
33
Agg file 2788 152/192
15
Agg file 2789 153/192
16
Agg file 2790 154/192
33
Agg file 2792 155/192
16
Agg file 2793 156/192
15
Agg file 2794 157/192
33
Agg file 2795 158/192
32
Agg file 2796 159/192
33
Agg file 2797 160/192
32
Agg file 2798 161/192
33
Agg file 2801 162/192
33
Agg file 2812 163/192
31
Agg file 2813 164/192
32
Agg file 2814 165/192
32


  agg_df=pd.read_csv(agg_file_path)


Agg file 2815 166/192
33
Agg file 2816 167/192
15
Agg file 2817 168/192
16
Agg file 2818 169/192
15
Agg file 2819 170/192
32
Agg file 2820 171/192
32
Agg file 2821 172/192
32
Agg file 2822 173/192
33
Agg file 2823 174/192
33
Agg file 2824 175/192


  agg_df=pd.read_csv(agg_file_path)


16
Agg file 2987 176/192
16
Agg file 2988 177/192
33
Agg file 2989 178/192
16
Agg file 2990 179/192
33
Agg file 2991 180/192
16
Agg file 2993 181/192
33
Agg file 2995 182/192
33
Agg file 2996 183/192
16
Agg file 2999 184/192
33
Agg file 3057 185/192
33
Agg file 3058 186/192
33
Agg file 3059 187/192
33
Agg file 3060 188/192
33


  agg_df=pd.read_csv(agg_file_path)


Agg file 3063 189/192
16
Agg file 3160 190/192
33
Agg file 3200 191/192
16
Agg file 3201 192/192
16


In [3]:
agg_dir="V:/ACOI/R01 - W4K/3_PA protocol/1_Participants/"
out_dir = "C:/Users/sambit/Desktop/Garmin/Agg_Metrics/"


participant_paths = glob(agg_dir + "[0-9][0-9][0-9][0-9]/")
total_participant = len(participant_paths)
curr_participant = 1
column_values=[]
agg_file_path=""
for participant in participant_paths:
    p_id = participant[-5:-1]
    print(f"Agg file {p_id} {curr_participant}/{total_participant}")
    
    agg_file_path = participant + f"{p_id}_agg.csv"
    out_path = out_dir + f"{p_id}_agg_metrics.csv"
    columns= ["Activity","Flags","Time","Garmin Mean Heart Rate","Garmin HR Low","Garmin HR High","Garmin HR Change","Garmin RMS X","Garmin Mean X",
              "Garmin RMS Y","Garmin Mean Y","Garmin RMS Z","Garmin Mean Z","Garmin Max Magnitude","Garmin Mean Magnitude","Garmin RMS Magnitude",
              "Garmin Max ENMO","Garmin Mean ENMO","Garmin RMS ENMO","Garmin MAD","K5 Rf","K5 VT","K5 VE","K5 IV","K5 VO2","K5 VCO2","K5 O2exp","K5 CO2exp","K5 VE/VO2","K5 VE/VCO2","K5 VO2/Kg","K5 METS","K5 RQ"]
    if os.path.exists(agg_file_path):
        agg_df=pd.read_csv(agg_file_path)
        with open(agg_file_path, 'r') as file:
            file_contents = file.readline()
            
            df=pd.DataFrame()
            for idx,column in enumerate(columns):
                if column in file_contents:
                    df[idx] = agg_df[column]
                    df.rename(columns={idx: column}, inplace=True)
            col=df.columns
            print(len(col))
            if len(col)>=28:
                df.to_csv(out_path, index=False)
                df = df.drop(df.index)
                    
        
        
    else:
        print("No File")
        
    curr_participant += 1


Agg file 0118 1/200
33
Agg file 0231 2/200
15
Agg file 0444 3/200
32
Agg file 0601 4/200
33
Agg file 0618 5/200
16
Agg file 0632 6/200
16
Agg file 0695 7/200
16
Agg file 0819 8/200
33
Agg file 0967 9/200
33
Agg file 1333 10/200
33
Agg file 1760 11/200
33
Agg file 1772 12/200
15
Agg file 1779 13/200
33
Agg file 1799 14/200
32
Agg file 1854 15/200
33
Agg file 1974 16/200
32
Agg file 1991 17/200
33
Agg file 2007 18/200
32
Agg file 2031 19/200
33
Agg file 2069 20/200
33
Agg file 2113 21/200
33
Agg file 2186 22/200
30
Agg file 2187 23/200
31
Agg file 2271 24/200
33
Agg file 2272 25/200
33
Agg file 2321 26/200
15
Agg file 2355 27/200
15
Agg file 2433 28/200
16
Agg file 2453 29/200
32
Agg file 2454 30/200
33
Agg file 2455 31/200
32
Agg file 2456 32/200
33
Agg file 2457 33/200
31
Agg file 2458 34/200
33
Agg file 2470 35/200
32
Agg file 2471 36/200
32
Agg file 2473 37/200
33
Agg file 2474 38/200
33
Agg file 2475 39/200
15
Agg file 2476 40/200
32
Agg file 2479 41/200
32
Agg file 2480 42/200
33
A

  agg_df=pd.read_csv(agg_file_path)


16
Agg file 2678 123/200
No File
Agg file 2679 124/200
16
Agg file 2680 125/200
33
Agg file 2681 126/200
33
Agg file 2683 127/200
32
Agg file 2684 128/200
31
Agg file 2685 129/200
16
Agg file 2686 130/200


  agg_df=pd.read_csv(agg_file_path)


32
Agg file 2687 131/200
31
Agg file 2688 132/200
15
Agg file 2689 133/200
33
Agg file 2690 134/200
33
Agg file 2691 135/200
33
Agg file 2692 136/200
15
Agg file 2706 137/200
16
Agg file 2709 138/200
33
Agg file 2710 139/200
16
Agg file 2711 140/200
15
Agg file 2712 141/200
33
Agg file 2753 142/200
15
Agg file 2774 143/200
16
Agg file 2775 144/200
16
Agg file 2776 145/200
16
Agg file 2778 146/200
15
Agg file 2779 147/200
15
Agg file 2780 148/200
33
Agg file 2781 149/200
33
Agg file 2782 150/200
16
Agg file 2783 151/200
16
Agg file 2788 152/200
32
Agg file 2789 153/200
33
Agg file 2790 154/200
16
Agg file 2792 155/200
33
Agg file 2793 156/200
32
Agg file 2794 157/200
16
Agg file 2795 158/200
15
Agg file 2796 159/200
16
Agg file 2797 160/200
15
Agg file 2798 161/200
16
Agg file 2801 162/200
16
Agg file 2812 163/200
14
Agg file 2813 164/200
15
Agg file 2814 165/200


  agg_df=pd.read_csv(agg_file_path)


15
Agg file 2815 166/200
16
Agg file 2816 167/200
15
Agg file 2817 168/200
33
Agg file 2818 169/200
32
Agg file 2819 170/200
16
Agg file 2820 171/200
15
Agg file 2821 172/200
15
Agg file 2822 173/200
16
Agg file 2823 174/200
16
Agg file 2824 175/200
33
Agg file 2987 176/200
33
Agg file 2988 177/200
16
Agg file 2989 178/200
16
Agg file 2990 179/200
16
Agg file 2991 180/200
33
Agg file 2993 181/200
16
Agg file 2995 182/200
16
Agg file 2996 183/200
16
Agg file 2999 184/200
16
Agg file 3057 185/200
16
Agg file 3058 186/200
16
Agg file 3059 187/200
16
Agg file 3060 188/200


  agg_df=pd.read_csv(agg_file_path)


16
Agg file 3063 189/200
33
Agg file 3160 190/200
16
Agg file 3200 191/200
16
Agg file 3201 192/200
33
Agg file 3202 193/200
16
Agg file 3298 194/200
33
Agg file 3299 195/200
33
Agg file 9900 196/200
16
Agg file 9901 197/200
16
Agg file 9967 198/200
16
Agg file 9998 199/200
16
Agg file 9999 200/200
33


In [None]:
pa_dir = "V:/ACOI/R01 - W4K/3_PA protocol/1_Participants/"
out_dir = "C:/Users/sambit/Desktop/Actiheart/"

## Streamline the column names

In [8]:
agg_dir="V:/ACOI/R01 - W4K/3_PA protocol/1_Participants/"
participant_paths = glob(agg_dir + "[0-9][0-9][0-9][0-9]/")
total_participant = len(participant_paths)
curr_participant = 1


for participant in participant_paths:
    p_id = participant[-5:-1]
    print(f"Agg file {p_id} {curr_participant}/{total_participant}")
    agg_file_path = participant + f"{p_id}_agg.csv"
    if os.path.exists(agg_file_path):
        with open(agg_file_path, 'r') as file:
            file_contents = file.readline()
            agg_df = pd.read_csv(agg_file_path)
            
            if "Apple Heart Rate" in file_contents:
                agg_df.rename(columns={"Apple Heart Rate": 'Apple Mean Heart Rate'}, inplace=True)
            
            
            
                
            agg_df.to_csv(agg_file_path, index=False)
            
    else:
        print("No Agg File")

    curr_participant += 1

Agg file 0118 1/192
Agg file 0231 2/192
Agg file 0444 3/192
Agg file 0601 4/192
Agg file 0618 5/192
Agg file 0632 6/192
Agg file 0695 7/192
Agg file 0819 8/192
Agg file 0967 9/192
Agg file 1333 10/192
Agg file 1760 11/192
Agg file 1772 12/192
Agg file 1779 13/192
Agg file 1799 14/192
Agg file 1854 15/192
Agg file 1974 16/192
Agg file 1991 17/192
Agg file 2007 18/192
Agg file 2031 19/192
Agg file 2069 20/192
Agg file 2113 21/192
Agg file 2186 22/192
Agg file 2187 23/192
Agg file 2271 24/192
Agg file 2272 25/192
Agg file 2321 26/192
Agg file 2355 27/192
Agg file 2433 28/192
Agg file 2453 29/192
Agg file 2454 30/192
Agg file 2455 31/192
Agg file 2456 32/192
Agg file 2457 33/192
Agg file 2458 34/192
Agg file 2470 35/192
Agg file 2471 36/192
Agg file 2473 37/192
Agg file 2474 38/192
Agg file 2475 39/192
Agg file 2476 40/192
Agg file 2479 41/192
Agg file 2480 42/192
Agg file 2495 43/192
Agg file 2496 44/192
Agg file 2497 45/192
Agg file 2498 46/192
Agg file 2499 47/192
Agg file 2500 48/192
A

  agg_df = pd.read_csv(agg_file_path)


Agg file 2678 123/192
No Agg File
Agg file 2679 124/192
Agg file 2680 125/192
Agg file 2681 126/192
Agg file 2683 127/192
Agg file 2684 128/192
Agg file 2685 129/192
Agg file 2686 130/192


  agg_df = pd.read_csv(agg_file_path)


Agg file 2687 131/192
Agg file 2688 132/192
Agg file 2689 133/192
Agg file 2690 134/192
Agg file 2691 135/192
Agg file 2692 136/192
Agg file 2706 137/192
Agg file 2709 138/192
Agg file 2710 139/192
Agg file 2711 140/192
Agg file 2712 141/192
Agg file 2753 142/192
Agg file 2774 143/192
Agg file 2775 144/192
Agg file 2776 145/192
Agg file 2778 146/192
Agg file 2779 147/192
Agg file 2780 148/192
Agg file 2781 149/192
Agg file 2782 150/192
Agg file 2783 151/192
Agg file 2788 152/192
Agg file 2789 153/192
Agg file 2790 154/192
Agg file 2792 155/192
Agg file 2793 156/192
Agg file 2794 157/192
Agg file 2795 158/192
Agg file 2796 159/192
Agg file 2797 160/192
Agg file 2798 161/192
Agg file 2801 162/192
Agg file 2812 163/192
Agg file 2813 164/192
Agg file 2814 165/192


  agg_df = pd.read_csv(agg_file_path)


Agg file 2815 166/192
Agg file 2816 167/192
Agg file 2817 168/192
Agg file 2818 169/192
Agg file 2819 170/192
Agg file 2820 171/192
Agg file 2821 172/192
Agg file 2822 173/192
Agg file 2823 174/192
Agg file 2824 175/192


  agg_df = pd.read_csv(agg_file_path)


Agg file 2987 176/192
Agg file 2988 177/192
Agg file 2989 178/192
Agg file 2990 179/192
Agg file 2991 180/192
Agg file 2993 181/192
Agg file 2995 182/192
Agg file 2996 183/192
Agg file 2999 184/192
Agg file 3057 185/192
Agg file 3058 186/192
Agg file 3059 187/192
Agg file 3060 188/192


  agg_df = pd.read_csv(agg_file_path)


Agg file 3063 189/192
Agg file 3160 190/192
Agg file 3200 191/192
Agg file 3201 192/192
