**[Log 01.11.23]**  
Whith a 5 days memory capacity, the Fitbit smartwatch can retain minute resolution data for 5 days between 2 synchs. After this period of time, the watch stops reccording minute data, and only reccords on a daily basis.  
As we have access to minute and daily Fitbit data, we just have to compare the minutes data resampled by day to the daily data, If we observe a difference on day n, we can tell that there was no synchronisation 5 days after (n +4).

In [6]:
import numpy as np
import pandas as pd
import os
import re
import glob
import shutil
from datetime import date

In [11]:
#1- Import MINUTE data

#define here the path were you have stored the Fitbit raw minute data (Steps, MET, Calories, Intensity)
path_min = os.path.join(os.getcwd(),'sample/min')

dict_min = {}

for file_name in os.listdir(path_min):
    if file_name.endswith(".csv"):
        file_path = os.path.join(path_min, file_name)
        df = pd.read_csv(file_path)
        df = df.set_index('ActivityMinute')
        df.index = pd.to_datetime(df.index)
        key = file_name[:15] 
        df['ID'] = key
        dict_min[key] = df

#2- Resample the minute data as daily

dict_min_d = {}

for key, df in dict_min.items():
    #now we calculate for each day the number of steps & calories, and the mean in MET and Intensity
    df_resampled = df.resample('D').agg({'Steps': 'sum', 
                                         'Calories': 'sum', 
                                         'METs': 'mean',
                                         'Intensity': 'mean'})
    dict_min_d[key] = df_resampled

#3- Import DAILY data

#define here the path were you have stored the Fitbit raw daily data (Steps, MET, Calories, Intensity)
path_day = os.path.join(os.getcwd(),'sample/daily')

dict_day = {}

for file_name in os.listdir(path_day):
    if file_name.endswith(".csv"):
        file_path = os.path.join(path_day, file_name)
        df = pd.read_csv(file_path)
        df = df.set_index('ActivityDay')
        df.index = pd.to_datetime(df.index)
        key = file_name[:15] 
        df['ID'] = key
        dict_day[key] = df

In [12]:
#4- estimate each days as synch or not, based on the difference between minute and daily data n days before

merged_dict = {}

X = 0.1 #threshold = 10% difference to mean
memory_capacity = -4

for key in dict_min_d.keys() & dict_day.keys():
    df_min_d = dict_min_d[key]
    df_day = dict_day[key]
    
    #here we merge the minute resampled by day and the daily data
    merged_df = pd.concat([dict_min_d[key].add_suffix('_m'), 
                           dict_day[key].add_suffix('_d')], axis=1)
    
    #then we calculate the difference between them
    merged_df['diff_step'] = merged_df['StepTotal_d'] - merged_df['Steps_m']
    merged_df['diff_cal'] = merged_df['Calories_d'] - merged_df['Calories_m']
    
    #we determine for each day if the difference in steps/calories data between minute and daily dataset are ok (i.e., under the threshold value)
    mean_step_total_d = np.mean(merged_df['StepTotal_d'])    
    merged_df['diff_step'] = np.abs(merged_df['diff_step'])
    threshold_step = X * mean_step_total_d
    merged_df['diff_step_ok'] = np.abs(merged_df['diff_step']) < threshold_step

    mean_calories_d = np.mean(merged_df['Calories_d'])
    merged_df['diff_cal'] = np.abs(merged_df['diff_cal'])
    threshold_cal = X * mean_calories_d
    merged_df['diff_cal_ok'] = np.abs(merged_df['diff_cal']) < threshold_cal
    
    #we can therefore determine easily a data lost for the day (difference between minute and daily data), and so an absence of synch 4 days before.
    merged_df['DATA_LOST'] = ~(merged_df['diff_step_ok'] | merged_df['diff_cal_ok']) | (~merged_df['diff_step_ok'] | ~merged_df['diff_cal_ok'])    
    merged_df['SYNCH'] = (merged_df['diff_step_ok'] & merged_df['diff_cal_ok']).shift(memory_capacity)

    merged_df.index = pd.to_datetime(merged_df.index)  
    merged_df.index = merged_df.index.strftime('%Y-%m-%d') 
    merged_df['Date'] = merged_df.index

#   merged_df = merged_df.filter(columns_to_keep)
    merged_dict[key] = merged_df

In [13]:
#for instance, we can display the date where our subject forgot to synch his/her watch 
false_rows = merged_dict['Fitbit sample 1'].loc[merged_dict['Fitbit sample 1']['SYNCH'] == False]
false_rows

Unnamed: 0,Steps_m,Calories_m,METs_m,Intensity_m,Index_d,Calories_d,StepTotal_d,SedentaryMinutes_d,LightlyActiveMinutes_d,FairlyActiveMinutes_d,...,ModeratelyActiveDistance_d,VeryActiveDistance_d,ID_d,diff_step,diff_cal,diff_step_ok,diff_cal_ok,DATA_LOST,SYNCH,Date
2020-10-14,0,1353.887959,0.9,0.0,21,1353,0,1440,0,0,...,0.0,0.0,Fitbit sample 1,0,0.887959,True,True,False,False,2020-10-14
2020-12-13,10847,2054.901091,1.400486,0.220833,81,2054,10869,1208,174,30,...,1.46,1.97,Fitbit sample 1,22,0.901091,True,True,False,False,2020-12-13
2021-02-22,9022,2029.296173,1.390208,0.211806,152,2032,9034,1158,268,5,...,0.25,0.68,Fitbit sample 1,12,2.703827,True,True,False,False,2021-02-22


In [14]:
#Saving results
save_path = os.path.join(os.getcwd(),'results')

if not os.path.exists(save_path):
    os.makedirs(save_path)

for key, df in merged_dict.items():
    file_path = os.path.join(save_path, f"{key}.csv")
    df.to_csv(file_path, index=True)
