# Workflow Examples

The purpose of this notebook is to organize the logic of how data is loaded, processed, and explored using both vanilla Python and the dbdpy package. Additionally, this notebook should be used to test the functionalities of the package during development. 

# Data Loading

This is the only section where each watch data will have their custom method for loading in the data. Once the data is formatted into the centralized data structure -- the rest of the workflow should be applied to the standardized format. 

### Apple Watch

In [1]:
import pandas as pd
import xml.etree.ElementTree as etree

In [2]:
# Read in export.xml file
tree = etree.parse("data/apple/export.xml")
root = tree.getroot()
record_list = [x.attrib for x in root.iter("Record")]
record_df = pd.DataFrame(record_list)
record_df.head()

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
0,HKQuantityTypeIdentifierHeight,ShunのiPhone,15.6.1,ft,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,5.83333,
1,HKQuantityTypeIdentifierBodyMass,ShunのiPhone,15.6.1,lb,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,178.0,
2,HKQuantityTypeIdentifierBodyMass,Health,15.6.1,lb,2022-11-13 21:38:37 -0400,2022-11-13 21:38:00 -0400,2022-11-13 21:38:00 -0400,180.0,
3,HKQuantityTypeIdentifierBodyMass,Health,15.6.1,lb,2022-11-13 21:38:57 -0400,2022-11-13 21:42:00 -0400,2022-11-13 21:42:00 -0400,180.0,
4,HKQuantityTypeIdentifierBodyMass,Health,15.6.1,lb,2022-11-13 21:39:44 -0400,2022-11-13 21:39:00 -0400,2022-11-13 21:39:00 -0400,180.0,


In [3]:
### Parse record
# Set datetime to ISO 8601 format
datetime_cols = ["creationDate", "startDate", "endDate"]
record_df[datetime_cols] = record_df[datetime_cols].apply(lambda x: pd.to_datetime(x).dt.strftime("%Y-%m-%dT%H:%M:%S"))

# Convert values to numeric type
record_df["value"] = pd.to_numeric(record_df["value"], errors="coerce")

# Shorten observation names
record_df["type"] = record_df["type"].str.replace("HKQuantityTypeIdentifier", "")
record_df["type"] = record_df["type"].str.replace("HKCategoryTypeIdentifier", "")


energy = record_df[record_df["type"] == "BasalEnergyBurned"]
steps = record_df[record_df["type"] == "StepCount"]
distance = record_df[record_df["type"] == "DistanceWalkingRunning"]
oxygen = record_df[record_df["type"] == "OxygenSaturation"]
resting_heart_rate = record_df[record_df["type"] == "RestingHeartRate"]
heart_rate = record_df[record_df["type"] == "HeartRate"]
respiration_rate = record_df[record_df["type"] == "RespiratoryRate"]
sleep = record_df[record_df["type"] == "SleepAnalysis"]

### Garmin

In [65]:
from pathlib import Path
import pandas as pd
import datetime
from garmin_fit_sdk import Decoder, Stream

In [66]:
directory_path = Path("./data/garmin/")

monitoring_data = pd.DataFrame()
oxygen_data = pd.DataFrame()
for file in directory_path.rglob("*WELLNESS.fit"):
    stream = Stream.from_file(file)
    decoder = Decoder(stream)
    messages, _ = decoder.read(
        apply_scale_and_offset=True,
        convert_datetimes_to_dates=False,
        convert_types_to_strings=True,
        enable_crc_check=True,
        expand_sub_fields=True,
        expand_components=True,
        merge_heart_rates=False,
        mesg_listener=None,
    )
    
    monitoring_df = pd.DataFrame(messages["monitoring_mesgs"])[["timestamp", "timestamp_16", "distance", "heart_rate", "active_calories"]]
    try:
        oxygen_df = pd.DataFrame(messages["spo2_data_mesgs"])[["timestamp", "reading_spo2"]]
    except KeyError:
        continue

    monitoring_data = pd.concat([monitoring_data, monitoring_df])
    oxygen_data = pd.concat([oxygen_data, oxygen_df])

In [67]:
monitoring_data

Unnamed: 0,timestamp,timestamp_16,distance,heart_rate,active_calories
0,1.064462e+09,,,,
1,1.064462e+09,,,,
2,1.064462e+09,,,,
3,,26748.0,,74.0,
4,,26808.0,,71.0,
...,...,...,...,...,...
550,,2796.0,533.84,,59.0
551,1.064242e+09,,0.00,,1.0
552,1.064242e+09,,591.75,,63.0
553,1.064242e+09,,,,


In [68]:
monitoring_data.iloc[:, 0] = monitoring_data.iloc[:, 0].fillna(method="ffill")
monitoring_data

  monitoring_data.iloc[:, 0] = monitoring_data.iloc[:, 0].fillna(method="ffill")


Unnamed: 0,timestamp,timestamp_16,distance,heart_rate,active_calories
0,1.064462e+09,,,,
1,1.064462e+09,,,,
2,1.064462e+09,,,,
3,1.064462e+09,26748.0,,74.0,
4,1.064462e+09,26808.0,,71.0,
...,...,...,...,...,...
550,1.064241e+09,2796.0,533.84,,59.0
551,1.064242e+09,,0.00,,1.0
552,1.064242e+09,,591.75,,63.0
553,1.064242e+09,,,,


In [72]:
def adjust_timestamp(row):
    try:
        mesgTimestamp = int(row["timestamp"])
        timestamp_16 = int(row["timestamp_16"])
        duration = (timestamp_16 - (mesgTimestamp & 0xFFFF)) & 0xFFFF
        adjusted_timestamp = mesgTimestamp + duration
    except ValueError:
        adjusted_timestamp = row["timestamp"]
    
    converted_time = datetime.datetime.utcfromtimestamp(631065600 + adjusted_timestamp)\
        - datetime.timedelta(hours=4)
    
    return converted_time

monitoring_data.iloc[:, 0] = monitoring_data.apply(adjust_timestamp, axis=1)

In [80]:
monitoring_data.sort_values(by="timestamp").head(40)

Unnamed: 0,timestamp,timestamp_16,distance,heart_rate,active_calories
0,2023-09-21 00:00:00,,,,
1,2023-09-21 00:00:00,,,,
2,2023-09-21 00:04:00,29872.0,,0.0,
3,2023-09-21 00:13:00,,,,
4,2023-09-21 00:15:00,30532.0,0.0,,
5,2023-09-21 00:20:00,30832.0,,0.0,
6,2023-09-21 00:36:00,31792.0,,0.0,
7,2023-09-21 00:52:00,32752.0,,0.0,
8,2023-09-21 01:08:00,33712.0,,0.0,
9,2023-09-21 01:15:00,,,,


## Using dbdpy

In [50]:
import importlib
from pathlib import Path
import dbdpy
from dbdpy import AppleWatch
importlib.reload(dbdpy)

<module 'dbdpy' from '/Users/billchen/Desktop/dbdpy/dbdpy/__init__.py'>

In [51]:
filepath = Path("./data/apple/export.xml")
watch_data = AppleWatch.read_file(filepath)

In [52]:
watch_data.sleep

Unnamed: 0,startDate,endDate,value
525930,2022-12-28T11:58:04,2022-12-28T12:22:34,
525932,2022-12-28T12:22:34,2022-12-28T12:36:34,
525933,2022-12-28T12:36:34,2022-12-28T12:56:04,
525935,2022-12-28T12:56:04,2022-12-28T13:12:04,
525936,2022-12-28T13:12:04,2022-12-28T13:29:04,
...,...,...,...
532623,2023-09-30T09:15:31,2023-09-30T09:19:31,
532624,2023-09-30T09:15:31,2023-09-30T09:19:31,
532625,2023-09-30T09:19:31,2023-09-30T09:21:31,
532626,2023-09-30T09:21:31,2023-09-30T09:23:31,
