The purpose of this notebook is to demonstrate utilizing dbdpy to analyze data from Apple Watch. This will be completed two ways: once without dbdpy and once with the package. 

## Using Basic Python

In [1]:
import pandas as pd
import xml.etree.ElementTree as etree

In [2]:
# Read in export.xml file
tree = etree.parse("data/apple/export.xml")
root = tree.getroot()
record_list = [x.attrib for x in root.iter("Record")]
record_df = pd.DataFrame(record_list)
record_df.head()

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
0,HKQuantityTypeIdentifierHeight,ShunのiPhone,15.6.1,ft,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,5.83333,
1,HKQuantityTypeIdentifierBodyMass,ShunのiPhone,15.6.1,lb,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,2022-09-15 09:58:48 -0400,178.0,
2,HKQuantityTypeIdentifierBodyMass,Health,15.6.1,lb,2022-11-13 21:38:37 -0400,2022-11-13 21:38:00 -0400,2022-11-13 21:38:00 -0400,180.0,
3,HKQuantityTypeIdentifierBodyMass,Health,15.6.1,lb,2022-11-13 21:38:57 -0400,2022-11-13 21:42:00 -0400,2022-11-13 21:42:00 -0400,180.0,
4,HKQuantityTypeIdentifierBodyMass,Health,15.6.1,lb,2022-11-13 21:39:44 -0400,2022-11-13 21:39:00 -0400,2022-11-13 21:39:00 -0400,180.0,


In [3]:
### Parse record
# Set datetime to ISO 8601 format
datetime_cols = ["creationDate", "startDate", "endDate"]
record_df[datetime_cols] = record_df[datetime_cols].apply(lambda x: pd.to_datetime(x).dt.strftime("%Y-%m-%dT%H:%M:%S"))

# Convert values to numeric type
record_df["value"] = pd.to_numeric(record_df["value"], errors="coerce")

# Shorten observation names
record_df["type"] = record_df["type"].str.replace("HKQuantityTypeIdentifier", "")
record_df["type"] = record_df["type"].str.replace("HKCategoryTypeIdentifier", "")

# De-identify souce name? 

# Remove unnecessary columns? 

energy = record_df[record_df["type"] == "BasalEnergyBurned"]
steps = record_df[record_df["type"] == "StepCount"]
distance = record_df[record_df["type"] == "DistanceWalkingRunning"]
oxygen = record_df[record_df["type"] == "OxygenSaturation"]
resting_heart_rate = record_df[record_df["type"] == "RestingHeartRate"]
heart_rate = record_df[record_df["type"] == "HeartRate"]
respiration_rate = record_df[record_df["type"] == "RespiratoryRate"]
sleep = record_df[record_df["type"] == "SleepAnalysis"]

## Using dbdpy

In [46]:
import importlib
from pathlib import Path
import dbdpy
importlib.reload(dbdpy)

<module 'dbdpy' from '/Users/billchen/Desktop/dbdpy/dbdpy/__init__.py'>

In [47]:
filepath = Path("./data/apple/export.xml")
watch_data = dbdpy.AppleWatch.read_file(filepath)

In [48]:
watch_data.sleep.value.unique()

array([nan])