# Retrieving Data from Fitbit Web API

This notebook used as a demonstarte with explanation on the code for retrieving raw data from the web api. It is mostly the same as the code in prediction except some of them are used for retrieving a period of data which us used as raw data for training model. Note that some of the features used in the model requires to acces the Intraday data (daily detail-level response). It is limited to Developer's account or a third-party application request is required.

Instructions:
1. Do not run all the code at the same time
2. Replace the access token with the updated one (you can generate using Fitbit OAuth 2.0 Tutorial)
3. For retrieving period of time data, do not set a long period of time (suggested not over 10 days)
4. For date range input, Fitbit has a limit of 100 days per request. You may need to do merging by yourself

In [None]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os
import numpy as np

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "your_generate_access_token"
headers = {"Authorization": f"Bearer {access_token}"}

start_date = datetime.datetime(2023, 11, 1)
end_date = datetime.datetime(2024, 2, 15)

start_date_str = start_date.strftime("%Y-%m-%d")
end_date_str = end_date.strftime("%Y-%m-%d")

## Retrive Sleep Log (Period)

In [None]:
sleep_log_data = []

url = f"https://api.fitbit.com/1.2/user/-/sleep/date/{start_date_str}/{end_date_str}.json"
response = requests.get(url, headers=headers)
data = response.json()
sleep_log_df = pd.DataFrame(data['sleep'])

# Filter the type column to keep only rows where type is 0 (Sufficient data to generate log)
sleep_log_df = sleep_log_df[sleep_log_df['infoCode'] == 0]
sleep_log_df = sleep_log_df[sleep_log_df['isMainSleep'] == True]

sleep_log_df.drop('logId', axis=1, inplace=True)
sleep_log_df.drop('logType', axis=1, inplace=True)
sleep_log_df.drop('infoCode', axis=1, inplace=True)
sleep_log_df.drop('type', axis=1, inplace=True)
sleep_log_df.drop('isMainSleep', axis=1, inplace=True)

sleep_log_level = pd.DataFrame(sleep_log_df['levels'])
new_df = sleep_log_df.loc[:, ['levels', 'dateOfSleep']]

summary_data_list = []
dates = []  # Store unique dateOfSleep values

for index, row in new_df.iterrows():
    string_df = row['levels']
    summary_data = string_df['summary']
    df_summary = pd.DataFrame(summary_data)
    summary_data_list.append(df_summary)
    dates.append(row['dateOfSleep'])

# Create an empty list to store the modified DataFrames
summary_data_modified = []

# Iterate over the DataFrames in summary_data_list
for df_summary, date in zip(summary_data_list, dates):
    # Flatten the nested structure and rename the attributes
    summary_data_flattened = {}
    for element, attributes in df_summary.items():
        for attribute, value in attributes.items():
            if attribute != 'thirtyDayAvgMinutes':  # Exclude 'thirtyDayAvgMinutes' column
                new_attribute = element + '_' + attribute
                summary_data_flattened[new_attribute] = value
    
    # Create a new DataFrame with the modified data
    df_summary_modified = pd.DataFrame(summary_data_flattened, index=[0])
    df_summary_modified['dateOfSleep'] = date
    
    summary_data_modified.append(df_summary_modified)
    
# Concatenate the modified DataFrames into a single DataFrame
summary_df = pd.concat(summary_data_modified, ignore_index=True)

sleep_log_df.drop('levels', axis=1, inplace=True)

merged_df = pd.merge(summary_df, sleep_log_df, on='dateOfSleep', how='left')


merged_df['duration'] = merged_df['duration'] / (1000 * 60)

merged_df['rem_proportion'] = merged_df['rem_minutes'] / merged_df['duration']
merged_df['deep_proportion'] = merged_df['deep_minutes'] / merged_df['duration']
merged_df['light_proportion'] = merged_df['light_minutes'] / merged_df['duration']
merged_df['wake_proportion'] = merged_df['wake_minutes'] / merged_df['duration']

In [None]:
# Retreive sleep stage count
sleep_log_df = pd.DataFrame(data['sleep'])

# Filter the type column to keep only rows where type is 0 (Sufficient data to generate log)
sleep_log_df = sleep_log_df[sleep_log_df['infoCode'] == 0]
sleep_log_df = sleep_log_df[sleep_log_df['isMainSleep'] == True]

sleep_log_df.drop('logId', axis=1, inplace=True)
sleep_log_df.drop('logType', axis=1, inplace=True)
sleep_log_df.drop('infoCode', axis=1, inplace=True)
sleep_log_df.drop('type', axis=1, inplace=True)
sleep_log_df.drop('isMainSleep', axis=1, inplace=True)

sleep_log_level = pd.DataFrame(sleep_log_df['levels'])
new_df = sleep_log_df.loc[:, ['levels', 'dateOfSleep']]

summary_data_list = []
dates = []  # Store unique dateOfSleep values
new = pd.DataFrame()
for index, row in new_df.iterrows():
    string_df = row['levels']
    summary_data = string_df['data']
    df_summary = pd.DataFrame(summary_data)
    df_summary['dateOfSleep'] = row['dateOfSleep']
    new = pd.concat([new, df_summary], axis=0)

# Create a pivot table to count the occurrences of each level for each day
new_df = pd.pivot_table(new, index='dateOfSleep', columns='level', aggfunc='size', fill_value=0)

new_df.to_csv('sleep_stage_count.csv')

In [None]:
# Sleep patern detail
sleep_log_df = pd.DataFrame(data['sleep'])

# Filter the type column to keep only rows where type is 0 (Sufficient data to generate log)
sleep_log_df = sleep_log_df[sleep_log_df['infoCode'] == 0]
sleep_log_df = sleep_log_df[sleep_log_df['isMainSleep'] == True]

sleep_log_df.drop('logId', axis=1, inplace=True)
sleep_log_df.drop('logType', axis=1, inplace=True)
sleep_log_df.drop('infoCode', axis=1, inplace=True)
sleep_log_df.drop('type', axis=1, inplace=True)
sleep_log_df.drop('isMainSleep', axis=1, inplace=True)

sleep_log_level = pd.DataFrame(sleep_log_df['levels'])
new_df = sleep_log_df.loc[:, ['levels', 'dateOfSleep']]

summary_data_list = []
dates = []  # Store unique dateOfSleep values
new = pd.DataFrame()
for index, row in new_df.iterrows():
    string_df = row['levels']
    summary_data = string_df['data']
    df_summary = pd.DataFrame(summary_data)
    df_summary['dateOfSleep'] = row['dateOfSleep']
    new = pd.concat([new, df_summary], axis=0)

new.to_csv('pattern2.csv', index=False)

## Retrive Breathing Rate (Period)

In [None]:
breathing_rate_data = []

date = start_date
while date <= end_date:
    date_str = date.strftime("%Y-%m-%d")
    url = f"https://api.fitbit.com/1/user/-/br/date/{date_str}/all.json"
    response = requests.get(url, headers=headers)
    data = response.json()

    breathing_rate_data.extend(data['br'])

    date += datetime.timedelta(days=1)

breathing_rate_df = pd.DataFrame(breathing_rate_data)

# Create new columns for each item in the 'value' column
expanded_data = breathing_rate_df['value'].apply(pd.Series)

# Concatenate the expanded columns with the original data
breathing_data = pd.concat([breathing_rate_df['dateTime'], expanded_data], axis=1)

breathing_data['deepSleepSummary'] = breathing_data['deepSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)
breathing_data['remSleepSummary'] = breathing_data['remSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)
breathing_data['fullSleepSummary'] = breathing_data['deepSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)
breathing_data['lightSleepSummary'] = breathing_data['deepSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)

breathing_data = breathing_data.rename(columns={'deepSleepSummary': 'deepSleep'})
breathing_data = breathing_data.rename(columns={'remSleepSummary': 'remSleep'})
breathing_data = breathing_data.rename(columns={'fullSleepSummary': 'fullSleep'})
breathing_data = breathing_data.rename(columns={'lightSleepSummary': 'lightSleep'})

breathing_data.to_csv('breathing.csv', index=False)

## Resting Heart Rate (Period)

In [None]:
url = f"https://api.fitbit.com/1.2/user/-/activities/heart/date/{start_date_str}/{end_date_str}.json"
response = requests.get(url, headers=headers)
data = response.json()

activites = pd.DataFrame(data['activities-heart'])

new = pd.DataFrame()
for index, row in activites.iterrows():
    value_data = row['value']
    if 'restingHeartRate' in value_data:
        summary_data = value_data['restingHeartRate']
    else:
        summary_data = np.nan
    summary_df = pd.DataFrame({'restingHeartRate': [summary_data]})
    summary_df['dateTime'] = row['dateTime']
    new = pd.concat([new, summary_df], axis=0)

new.to_csv('resting_hr.csv', index=False)

## Heart Rate Varability (By date)

In [None]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}

date = datetime.datetime(2024, 2, 16).strftime("%Y-%m-%d")
url = f"https://api.fitbit.com/1/user/-/hrv/date/{date}/all.json"
response = requests.get(url, headers=headers)
data = response.json()

dataset = data['hrv'][0]['minutes']
hrv_df = pd.DataFrame(dataset)

# # Extract values from the "value" column to new columns
hrv_df = pd.concat([hrv_df.drop(['value'], axis=1), hrv_df['value'].apply(pd.Series)], axis=1)

print(hrv_df)

In [None]:
# Alternative Approach to merge all the data into single dataframe
file_pattern = '../input/HRV/202402/*.csv'

# Get a list of CSV files matching the pattern
csv_files = glob.glob(file_pattern)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through each CSV file and read it into a DataFrame
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate the DataFrames into a single DataFrame
concatenated_df = pd.concat(dfs)

# Reset the index of the concatenated DataFrame
concatenated_df.reset_index(drop=True, inplace=True)

## Retrive Blood Oxgyen SPO2 (Period)

In [None]:
# Alternative approach when using json file
folder_path = '../input/spo2/'

json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]

for json_file in json_files:
    date = json_file.split('.')[0]  # Extract the date from the file name
    
    # Read the JSON file
    data = pd.read_json(os.path.join(folder_path, json_file), typ='series')
    
    # Extract the last part from the dataframe
    minutes_data = data['minutes']
    
    # Create a DataFrame from the minutes data
    df = pd.DataFrame(minutes_data)
    df = df.rename(columns={'value': 'spo2'})
    
    # Save the DataFrame as a CSV file
    csv_file = f'spo2_{date}.csv'
    df.to_csv(csv_file, index=False)

## Retrive Blood Oxgyen SPO2 (By date)

In [None]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}

date = datetime.datetime(2024, 2, 16).strftime("%Y-%m-%d")
url = f"https://api.fitbit.com/1/user/-/spo2/date/{date}/all.json"
response = requests.get(url, headers=headers)
data = response.json()

dataset = data['minutes']
spo2_df = pd.DataFrame(dataset)
spo2_df.to_csv(f'spo2_detail_{date}.csv', index=False)

## Retrive Steps (By date)

In [None]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}

date = datetime.datetime(2024, 2, 16).strftime("%Y-%m-%d")
url = f"https://api.fitbit.com/1/user/-/activities/date/{date}.json"
response = requests.get(url, headers=headers)
data = response.json()

steps = data['summary']['steps']
df = pd.DataFrame({'date':date,'steps':steps }, index=[0])
df.to_csv(f'steps_{date}.csv', index=False)

## Merging CSV into Single File

In [None]:
# Alternative approach to merge all the data into single dataframe using the CSV files
file_pattern = './*.csv'  # Update with your file path pattern

# Get a list of CSV files matching the pattern
csv_files = glob.glob(file_pattern)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through each CSV file and read it into a DataFrame
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)
    
# Concatenate the DataFrames into a single DataFrame
concatenated_df = pd.concat(dfs)

concatenated_df.to_csv('attribute_name.csv', index=False)