# Retrieving Data from Fitbit Web API

This notebook used as a demonstarte with explanation on the code for retrieving raw data from the web api. It is mostly the same as the code in prediction except some of them are used for retrieving a period of data which us used as raw data for training model. Note that some of the features used in the model requires to acces the Intraday data (daily detail-level response). It is limited to Developer's account or a third-party application request is required.

Instructions:
1. Do not run all the code at the same time
2. Replace the access token with the updated one (you can generate using Fitbit OAuth 2.0 Tutorial)
3. For retrieving period of time data, do not set a long period of time (suggested not over 10 days)

In [None]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "your_generate_access_token"
headers = {"Authorization": f"Bearer {access_token}"}

start_date = datetime.datetime(2023, 11, 1)
end_date = datetime.datetime(2024, 2, 15)

start_date_str = start_date.strftime("%Y-%m-%d")
end_date_str = end_date.strftime("%Y-%m-%d")

## Retrive Sleep Log (Period)

In [None]:
sleep_log_data = []

url = f"https://api.fitbit.com/1.2/user/-/sleep/date/{start_date_str}/{end_date_str}.json"
response = requests.get(url, headers=headers)
data = response.json()
sleep_log_df = pd.DataFrame(data['sleep'])

# Filter the type column to keep only rows where type is 0 (Sufficient data to generate log)
sleep_log_df = sleep_log_df[sleep_log_df['infoCode'] == 0]
sleep_log_df = sleep_log_df[sleep_log_df['isMainSleep'] == True]

sleep_log_df.drop('logId', axis=1, inplace=True)
sleep_log_df.drop('logType', axis=1, inplace=True)
sleep_log_df.drop('infoCode', axis=1, inplace=True)
sleep_log_df.drop('type', axis=1, inplace=True)
sleep_log_df.drop('isMainSleep', axis=1, inplace=True)

In [None]:
# Sleep Summary
sleep_summary_df = sleep_log_df

sleep_summary_df.drop('levels', axis=1, inplace=True)
sleep_summary_df['startTime'] = pd.to_datetime(sleep_summary_df['startTime'])
sleep_summary_df['endTime'] = pd.to_datetime(sleep_summary_df['endTime'])
sleep_summary_df['duration'] = sleep_summary_df['duration'] / (1000 * 60)
sleep_summary_df = sleep_summary_df.rename(columns={'timeInBed': 'minutesInBed'})

sleep_summary_df.drop('startTime', axis=1, inplace=True)
sleep_summary_df.drop('endTime', axis=1, inplace=True)
sleep_summary_df.to_csv('sleep_summary.csv', index=False)

In [None]:
# Sleep Detail Log
new_rows = []

for index, row in sleep_log_df.iterrows():
    levels_dict = ast.literal_eval(row['levels'])
    data = levels_dict['data']
    
    for entry in data:
        new_row = row.copy()
        
        dateTime = entry['dateTime']
        level = entry['level']
        seconds = entry['seconds']
        
        new_row['dateTime'] = dateTime
        new_row['level'] = level
        new_row['seconds'] = seconds
        
        new_rows.append(new_row)

detail_sleep_df = pd.DataFrame(new_rows)
detail_sleep_df = detail_sleep_df.drop(columns='levels')

columns_to_drop = ['duration', 'efficiency', 'endTime', 'minutesAfterWakeup','minutesAsleep','minutesAwake','minutesToFallAsleep', 'startTime','timeInBed','type', 'dateTime']

detail_sleep_df = detail_sleep_df.drop(columns=columns_to_drop)

detail_sleep_df.to_csv('sleep_detail.csv', index=False)

## Retrive Sleep Log (By date)

In [35]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}
sleep_log_data = []

url = "https://api.fitbit.com/1.2/user/-/sleep/date/2024-02-16.json"
response = requests.get(url, headers=headers)
data = response.json()
sleep_log_df = pd.DataFrame(data['sleep'])

# Filter the type column to keep only rows where type is 0 (Sufficient data to generate log)
sleep_log_df = sleep_log_df[sleep_log_df['infoCode'] == 0]
sleep_log_df = sleep_log_df[sleep_log_df['isMainSleep'] == True]

sleep_log_df.drop('logId', axis=1, inplace=True)
sleep_log_df.drop('logType', axis=1, inplace=True)
sleep_log_df.drop('infoCode', axis=1, inplace=True)
sleep_log_df.drop('type', axis=1, inplace=True)
sleep_log_df.drop('isMainSleep', axis=1, inplace=True)

sleep_summary_df = sleep_log_df

sleep_summary_df.drop('levels', axis=1, inplace=True)
sleep_summary_df['startTime'] = pd.to_datetime(sleep_summary_df['startTime'])
sleep_summary_df['endTime'] = pd.to_datetime(sleep_summary_df['endTime'])
sleep_summary_df['duration'] = sleep_summary_df['duration'] / (1000 * 60)
sleep_summary_df = sleep_summary_df.rename(columns={'timeInBed': 'minutesInBed'})

sleep_summary_df.drop('startTime', axis=1, inplace=True)
sleep_summary_df.drop('endTime', axis=1, inplace=True)
sleep_summary_df.head()

Unnamed: 0,dateOfSleep,duration,efficiency,minutesAfterWakeup,minutesAsleep,minutesAwake,minutesToFallAsleep,minutesInBed
0,2024-02-16,461.0,88,9,390,71,0,461


## Retrive Breathing Rate (Period)

In [None]:
breathing_rate_data = []

date = start_date
while date <= end_date:
    date_str = date.strftime("%Y-%m-%d")
    url = f"https://api.fitbit.com/1/user/-/br/date/{date_str}/all.json"
    response = requests.get(url, headers=headers)
    data = response.json()

    breathing_rate_data.extend(data['br'])

    date += datetime.timedelta(days=1)

breathing_rate_df = pd.DataFrame(breathing_rate_data)

# Create new columns for each item in the 'value' column
expanded_data = breathing_rate_df['value'].apply(pd.Series)

# Concatenate the expanded columns with the original data
breathing_data = pd.concat([breathing_rate_df['dateTime'], expanded_data], axis=1)

breathing_data['deepSleepSummary'] = breathing_data['deepSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)
breathing_data['remSleepSummary'] = breathing_data['remSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)
breathing_data['fullSleepSummary'] = breathing_data['deepSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)
breathing_data['lightSleepSummary'] = breathing_data['deepSleepSummary'].apply(lambda x: x['breathingRate'] if isinstance(x, dict) else x)

breathing_data = breathing_data.rename(columns={'deepSleepSummary': 'deepSleep'})
breathing_data = breathing_data.rename(columns={'remSleepSummary': 'remSleep'})
breathing_data = breathing_data.rename(columns={'fullSleepSummary': 'fullSleep'})
breathing_data = breathing_data.rename(columns={'lightSleepSummary': 'lightSleep'})

breathing_data.to_csv('breathing.csv', index=False)

## Heart Rate (Period)

In [None]:
dataframes = []

# Generate a list of dates within the range
dates = pd.date_range(start_date, end_date, freq='D')

for date in dates:
    date_str = date.strftime('%Y-%m-%d')
    
    url = f"https://api.fitbit.com/1/user/-/activities/heart/date/{start_date_str}/1d/1min.json"
    response = requests.get(url, headers=headers)
    data = response.json()
    df = pd.DataFrame(data)
    
    # Generate each date as a new csv file for referencing
    filename = f'heart_rate_{date_str}.csv'
    df.to_csv(filename, index=False)

    dataframes.append(df)

# Concatenate all the dataframes into a single dataframe
combined_df = pd.concat(dataframes, ignore_index=True)

In [None]:
# Alternative approach to merge all the data into single dataframe using the CSV files
file_pattern = '../input/HR/*.csv'  # Update with your file path pattern

# Get a list of CSV files matching the pattern
csv_files = glob.glob(file_pattern)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through each CSV file and read it into a DataFrame
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate the DataFrames into a single DataFrame
concatenated_df = pd.concat(dfs)

## Heart Rate Varability (By date)

In [None]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}

date = datetime.datetime(2024, 2, 16).strftime("%Y-%m-%d")
url = f"https://api.fitbit.com/1/user/-/hrv/date/{date}/all.json"
response = requests.get(url, headers=headers)
data = response.json()

dataset = data['hrv'][0]['minutes']
hrv_df = pd.DataFrame(dataset)

# # Extract values from the "value" column to new columns
hrv_df = pd.concat([hrv_df.drop(['value'], axis=1), hrv_df['value'].apply(pd.Series)], axis=1)

print(hrv_df)

In [None]:
# Alternative Approach to merge all the data into single dataframe
file_pattern = '../input/HRV/202402/*.csv'

# Get a list of CSV files matching the pattern
csv_files = glob.glob(file_pattern)

# Initialize an empty list to store DataFrames
dfs = []

# Loop through each CSV file and read it into a DataFrame
for file in csv_files:
    df = pd.read_csv(file)
    dfs.append(df)

# Concatenate the DataFrames into a single DataFrame
concatenated_df = pd.concat(dfs)

# Reset the index of the concatenated DataFrame
concatenated_df.reset_index(drop=True, inplace=True)

## Retrive Blood Oxgyen SPO2 (Period)

In [None]:
# Alternative approach when using json file
folder_path = '../input/spo2/'

json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]

for json_file in json_files:
    date = json_file.split('.')[0]  # Extract the date from the file name
    
    # Read the JSON file
    data = pd.read_json(os.path.join(folder_path, json_file), typ='series')
    
    # Extract the last part from the dataframe
    minutes_data = data['minutes']
    
    # Create a DataFrame from the minutes data
    df = pd.DataFrame(minutes_data)
    df = df.rename(columns={'value': 'spo2'})
    
    # Save the DataFrame as a CSV file
    csv_file = f'spo2_{date}.csv'
    df.to_csv(csv_file, index=False)

## Retrive Blood Oxgyen SPO2 (By date)

In [48]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}

date = datetime.datetime(2024, 2, 16).strftime("%Y-%m-%d")
url = f"https://api.fitbit.com/1/user/-/spo2/date/{date}/all.json"
response = requests.get(url, headers=headers)
data = response.json()

dataset = data['minutes']
spo2_df = pd.DataFrame(dataset)

data = spo2_df['value'].mean()

sp02_df = pd.DataFrame({'spo2': [data]})

Unnamed: 0,spo2
0,97.342258


## Retrive Steps (By date)

In [29]:
import requests
import datetime
import pandas as pd
import ast
import glob
import json
import os

# Aceess_token (Generate using Fitbit OAuth 2.0 Tutorial)
access_token = "eyJhbGciOiJIUzI1NiJ9.eyJhdWQiOiIyM1I5SzQiLCJzdWIiOiJCUVpTRlkiLCJpc3MiOiJGaXRiaXQiLCJ0eXAiOiJhY2Nlc3NfdG9rZW4iLCJzY29wZXMiOiJyc29jIHJlY2cgcnNldCByb3h5IHJudXQgcnBybyByc2xlIHJjZiByYWN0IHJyZXMgcmxvYyByd2VpIHJociBydGVtIiwiZXhwIjoxNzA4NDYzMTYwLCJpYXQiOjE3MDg0MzQzNjB9.n5Hb-oYiStbXGQQJrwtUappQndmEBA8ilmaVPqeFNR4"
headers = {"Authorization": f"Bearer {access_token}"}

date = datetime.datetime(2024, 2, 16).strftime("%Y-%m-%d")
url = f"https://api.fitbit.com/1/user/-/activities/date/{date}.json"
response = requests.get(url, headers=headers)
data = response.json()

steps = data['summary']['steps']
df = pd.DataFrame({'steps':steps}, index=[0])
print(df)

   steps
0  10680
