# Scoring Fitbit as GPAQ

In [1]:
import numpy as np
import pandas as pd
import os

## Import files

In [2]:
path_min = os.path.join(os.getcwd(),'sample')

data_dict = {}

for file_name in os.listdir(path_min):
    if file_name.endswith('.csv'):
        file_path = os.path.join(path_min, file_name)
        key = file_name[:3]
        df = pd.read_csv(file_path)
        df['ActivityMinute'] = pd.to_datetime(df['ActivityMinute'])
        df = df.set_index('ActivityMinute')
        data_dict[key] = df

In [4]:
def filter_dataframe(df):
    df.index = pd.to_datetime(df.index)
    df['time_diff'] = df.index.to_series().diff()
    group_series = (df['time_diff'] > pd.Timedelta(minutes=10)).cumsum()
    filtered_df = df.groupby(group_series).filter(lambda group: group.index.size >= 10)
    filtered_df = filtered_df.drop(columns=['time_diff'])
    return filtered_df

def resample_and_sum(df):
    df.index = pd.to_datetime(df.index)
    resampled_df = df['MET.min'].resample('D').sum()
    return resampled_df

resampled_dict2 = {}

for key, df in data_dict.items():
    df['MPA'] = (df['METs'] >= 4) & (df['METs'] <= 7.9)
    df['VPA'] = df['METs'] >= 8
    df['PAtot'] = df['METs'] >= 4

    df['MET.min'] = 0
    df.loc[(df['METs'] >= 4) & (df['METs'] <= 7.9), 'MET.min'] = 4
    df.loc[df['METs'] >= 8, 'MET.min'] = 8

    df_MPA = df[df['MPA'] == True]
    df_VPA = df[df['VPA'] == True]
    df_PAtot = df[df['PAtot'] == True]

    filtered_df_PAtot = filter_dataframe(df_PAtot)
    filtered_df_MPA = filter_dataframe(df_MPA)
    filtered_df_VPA = filter_dataframe(df_VPA)

    if filtered_df_PAtot.index.size >= 10 or filtered_df_MPA.index.size >= 10 or filtered_df_VPA.index.size >= 10:
        resampled_PAtot = resample_and_sum(filtered_df_PAtot)
        resampled_MPA = resample_and_sum(filtered_df_MPA)
        resampled_VPA = resample_and_sum(filtered_df_VPA)

        full_index = pd.date_range(start=min(resampled_PAtot.index.min(), resampled_MPA.index.min(), resampled_VPA.index.min()),
                                   end=max(resampled_PAtot.index.max(), resampled_MPA.index.max(), resampled_VPA.index.max()),
                                   freq='D')
        result_df = pd.DataFrame(index=full_index)
        result_df['MVPA'] = resampled_PAtot
        result_df['MPA'] = resampled_MPA
        result_df['VPA'] = resampled_VPA
        result_df = result_df.fillna(0)
        result_df['USABLE'] = result_df['MVPA'].apply(lambda x: x > 0)
    
        resampled_dict2[key] = result_df
        
result = pd.concat(resampled_dict2.values())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['time_diff'] = df.index.to_series().diff()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['time_diff'] = df.index.to_series().diff()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['time_diff'] = df.index.to_series().diff()


In [5]:
result

Unnamed: 0,MVPA,MPA,VPA,USABLE
2022-03-14,760,760,0.0,True
2022-03-15,280,280,0.0,True
2022-03-16,108,108,0.0,True
2022-03-17,368,368,0.0,True
2022-03-18,432,432,0.0,True
2022-03-19,664,664,0.0,True
2022-03-20,0,0,0.0,False
2022-03-21,212,212,0.0,True
2022-03-22,596,596,0.0,True
2022-03-23,300,300,0.0,True


# Saving files

In [6]:
#concatenated files (one unique dataframe)
saving_path = os.path.join(os.getcwd(),'results')
concatenated_filename = os.path.join(saving_path, "concatenated_data.csv")
result.to_csv(concatenated_filename, index=True)