**[Log 31.10.23]**  
Starting from the beginning. We here want to propose different ways to evaluate if a Fitbit watch was worn or not for each days.
- import minute data from Fitbit
- (check and clean them: 0<MET<18, no duplicate or incorrect values in dates, etc). **We do not propose this step in this code. Also, consider that the name of the columns may change**
- apply different evaluating techniques

In [1]:
import numpy as np
import pandas as pd
import os
import re
import glob
import shutil
from datetime import date

In [2]:
#1- Import MINUTE data

#define here the path were you have stored the Fitbit raw minute data (Steps, MET, Calories, Intensity)
path_min = '/Users/mgg/Documents/GitHub/drePAnon/data-processing/Fitbit-wearing/sample'

dict_min = {}

for file_name in os.listdir(path_min):
    if file_name.endswith(".csv"):
        file_path = os.path.join(path_min, file_name)
        df = pd.read_csv(file_path)
        df = df.set_index('ActivityMinute')
        df.index = pd.to_datetime(df.index)
        
        #here, we calculate the minute with the minimum Calorie value for each day
        df['DailyCalMin'] = df.resample('D')['Calories'].transform('min')
        
        #then we define each minute in each day as active if superior to the minimum of the day
        df['ActiveMin'] = df['Calories'] > df['DailyCalMin']
        
        df.drop(['Index', 'ID'], axis=1, inplace=True)
        key = file_name[:3] 
        df['ID'] = key
        dict_min[key] = df

dict_min_d = {}

for key, df in dict_min.items():
    #now we calculate for each day the number of steps & calories, and the mean in MET and Intensity
    df_resampled = df.resample('D').agg({'Steps': 'sum', 
                                         'Calories': 'sum', 
                                         'METs': 'mean',
                                         'Intensity': 'mean'})
    dict_min_d[key] = df_resampled

In [3]:
#display one df as example
dict_min_d['001']

Unnamed: 0_level_0,Steps,Calories,METs,Intensity
ActivityMinute,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-23,0,1353.887959,0.900000,0.000000
2020-09-24,0,1353.887959,0.900000,0.000000
2020-09-25,0,1353.887959,0.900000,0.000000
2020-09-26,0,1353.887959,0.900000,0.000000
2020-09-27,0,1353.887959,0.900000,0.000000
...,...,...,...,...
2021-02-27,10739,2083.630571,1.427014,0.209722
2021-02-28,8144,1883.249052,1.283403,0.186111
2021-03-01,3563,1587.876007,1.071250,0.059028
2021-03-02,5665,1786.196571,1.215347,0.139583


In [4]:
#2- Estimate each day as worn or not according to different methods

duration_of_PA_min = 600 #minutes
duration_of_PA_hour = 10 #hours
number_active_minutes = 1 #minutes
number_of_steps = 3000 #steps

result_dict = {}

for key, df in dict_min.items():
    
    # Method 1: Number of active minutes per day
    df_daily1 = df.resample('D').sum()
    df_daily1['nActiveMin'] = df[df['Calories'] > df['DailyCalMin']].resample('D').count()['Calories']
    df_daily1['WornDay1'] = df_daily1['nActiveMin'] >= duration_of_PA_min

    # Method 2: Number of hours per days containing at least X active minutes
    df['ActiveMin'] = (df['Calories'] > df['DailyCalMin']).astype(int)
    df['ActiveHour'] = df['ActiveMin'].resample('H').sum() >= number_active_minutes
    df_daily2 = df['ActiveHour'].resample('D').sum().to_frame()
    df_daily2['WornDay2'] = df_daily2['ActiveHour'] >= duration_of_PA_hour

    # Method 3: Number of steps per day
    df_daily3 = df.resample('D').sum()
    df_daily3['WornDay3'] = df_daily3['Steps'] >= number_of_steps

    # Concat all methods, and add them to the results
    df_final = pd.concat([df_daily1['WornDay1'], df_daily2['WornDay2'], df_daily3['WornDay3']], axis=1)
    df_final = df_final.rename_axis('Date')
    result_dict[key] = df_final
    
for key, df in dict_min_d.items():
    if key in result_dict:
        result_df = result_dict[key]
        final_df = pd.concat([df, result_df], axis=1)
        dict_min_d[key] = final_df

In [5]:
#display one df as example
dict_min_d['001']

Unnamed: 0,Steps,Calories,METs,Intensity,WornDay1,WornDay2,WornDay3
2020-09-23,0,1353.887959,0.900000,0.000000,False,False,False
2020-09-24,0,1353.887959,0.900000,0.000000,False,False,False
2020-09-25,0,1353.887959,0.900000,0.000000,False,False,False
2020-09-26,0,1353.887959,0.900000,0.000000,False,False,False
2020-09-27,0,1353.887959,0.900000,0.000000,False,False,False
...,...,...,...,...,...,...,...
2021-02-27,10739,2083.630571,1.427014,0.209722,True,True,True
2021-02-28,8144,1883.249052,1.283403,0.186111,False,True,True
2021-03-01,3563,1587.876007,1.071250,0.059028,False,False,True
2021-03-02,5665,1786.196571,1.215347,0.139583,False,True,True


In [6]:
#Saving results
save_path = '/Users/mgg/Documents/GitHub/drePAnon/data-processing/Fitbit-wearing/results'

if not os.path.exists(save_path):
    os.makedirs(save_path)

for key, df in dict_min_d.items():
    file_path = os.path.join(save_path, f"{key}.csv")
    df.to_csv(file_path, index=True)