# Scoring Fitbit as GPAQ

In [1]:
import numpy as np
import pandas as pd
import os

## Import files

In [5]:
path_min = '' #insert the path you filled with all .csv files

data_dict = {}

for file_name in os.listdir(path_min):
    if file_name.endswith('.csv'):  #to only import .csv files and avoid any temporal file
        file_path = os.path.join(path_min, file_name)
        key = file_name
        df = pd.read_csv(file_path)
        df['ActivityMinute'] = pd.to_datetime(df['ActivityMinute'])
        df = df.set_index('ActivityMinute')
        data_dict[key] = df

### Method 1 : using Fitbit PA intensity scale rating
- Intensity = 0 : sed
- Intensity = 2 : MVPA
- Intensity = 3 : VPA
- over Intensity = 2 : PAtot (MVPA + VPA)　**WARNING This is highly discutable! Not considering LVPA (Intensity = 1) in the PA total calculation**

In [7]:
################ VPA & MVPA based on FITBIT Intensity
for key in data_dict:
    # 1. Create a new columns 'Intensity FITBIT'
    data_dict[key]['IntensityFITBIT'] = None
    
    # 2. Define this column according to the Intensity column values
    data_dict[key].loc[data_dict[key]['Intensity'] == 0, 'IntensityFITBIT'] = 'sed'
    data_dict[key].loc[data_dict[key]['Intensity']==2, 'IntensityFITBIT'] = 'MVPA'
    data_dict[key].loc[data_dict[key]['Intensity']==3, 'IntensityFITBIT'] = 'VPA'

#create a new dictionary that will contain the results of Method 1
resampled_IntensityFITBIT = {}

for key, df in data_dict.items():
    df['IntensityFITBIT'] = df['IntensityFITBIT'].astype('category')
    df.index = pd.to_datetime(df.index)
    
    # count the number of minutes per day (VPA, MVPA, sed)
    resampled_df = pd.DataFrame()
    resampled_df['sedMinutes_IntensityFITBIT'] = df[df['IntensityFITBIT'] == 'sed'].resample('D').size()
    resampled_df['MVPAMinutes_IntensityFITBIT'] = df[df['IntensityFITBIT'] == 'MVPA'].resample('D').size()
    resampled_df['VPAMinutes_IntensityFITBIT'] = df[df['IntensityFITBIT'] == 'VPA'].resample('D').size()
    resampled_df = resampled_df.fillna(0)

    # count the equivalent in MET per day
    resampled_df['MVPA_IntensityFITBIT'] = resampled_df['MVPAMinutes_IntensityFITBIT'] * 4
    resampled_df['VPA_IntensityFITBIT'] = resampled_df['VPAMinutes_IntensityFITBIT'] * 8
    resampled_df['PAtot_IntensityFITBIT'] = resampled_df['MVPA_IntensityFITBIT'] + resampled_df['VPA_IntensityFITBIT']
    resampled_df['USABLE_IntensityFITBIT'] = resampled_df['PAtot_IntensityFITBIT'].apply(lambda x: x > 0)

    resampled_IntensityFITBIT[key] = resampled_df

### Method 2 : using GPAQ criteria on Fitbit data
- under 1.5 MET : sed
- between 4 and 7.9 MET : MVPA
- over 8 : VPA
- over 4 MET : PAtot (MVPA + VPA)　**WARNING This is highly discutable! Not considering LVPA (Intensity = 1) in the PA total calculation**

In [6]:
################ VPA & MVPA based on GPAQ
for key in data_dict:
    # 1. Create a new columns 'Intensity GPAQ'
    data_dict[key]['IntensityGPAQ'] = None
    
    # 2. Define this column according to the METs' column values
    data_dict[key].loc[data_dict[key]['METs'] <= 1.5, 'IntensityGPAQ'] = 'sed'
    data_dict[key].loc[(data_dict[key]['METs'] >= 4) & (data_dict[key]['METs'] <= 7.9), 'IntensityGPAQ'] = 'MVPA'
    data_dict[key].loc[data_dict[key]['METs'] > 8, 'IntensityGPAQ'] = 'VPA'

#create a new dictionary that will contain the results of Method 2
resampled_GPAQscoring = {}

for key, df in data_dict.items():
    df['IntensityGPAQ'] = df['IntensityGPAQ'].astype('category')
    df.index = pd.to_datetime(df.index)

    # count the number of minutes per day (VPA, MVPA, sed)
    resampled_df = pd.DataFrame()
    resampled_df['sedMinutes_GPAQscoring'] = df[df['IntensityGPAQ'] == 'sed'].resample('D').size()
    resampled_df['MVPAMinutes_GPAQscoring'] = df[df['IntensityGPAQ'] == 'MVPA'].resample('D').size()
    resampled_df['VPAMinutes_GPAQscoring'] = df[df['IntensityGPAQ'] == 'VPA'].resample('D').size()
    resampled_df = resampled_df.fillna(0)

    # count the equivalent in MET per day
    resampled_df['MVPA_GPAQscoring'] = resampled_df['MVPAMinutes_GPAQscoring'] * 4
    resampled_df['VPA_GPAQscoring'] = resampled_df['VPAMinutes_GPAQscoring'] * 8
    resampled_df['PAtot_GPAQscoring'] = resampled_df['MVPA_GPAQscoring'] + resampled_df['VPA_GPAQscoring']
    resampled_df['USABLE_GPAQscoring'] = resampled_df['PAtot_GPAQscoring'].apply(lambda x: x > 0)

    resampled_GPAQscoring[key] = resampled_df

### Gather all information, and calculate the average over 7 days

In [8]:
# Gather Method 1 and Method 2
concat = {}

for key in resampled_IntensityFITBIT.keys():
    df1 = resampled_IntensityFITBIT[key]
    df2 = resampled_GPAQscoring[key]
    df3 = df1.join(df2)
    df_filtered = df3.loc[(df3['USABLE_IntensityFITBIT'] == True) &
                          (df3['USABLE_GPAQscoring'] == True)]
    concat[key] = df_filtered
    
# Calculate the average over 7 days
results = []

for key, df in concat.items():
    avg_sum = (df.sum()/(len(df.index)))*7
    avg_sum['ID'] = key
    results.append(avg_sum)

result = pd.DataFrame(results)
result.set_index('ID', inplace=True)
result = result.sort_index()
result = result.round(2)

# Saving files

In [None]:
#concatenated files (one unique dataframe)
saving_path = "" #precise your destination path
concatenated_filename = os.path.join(saving_path, "concatenated_data.csv")
result.to_csv(concatenated_filename, index=True)