# Apple Health Data
This notebook is dedicated to exploring my health data exported from Apple Health. I'm going to use this to break it up

In [16]:
from pathlib import Path
from xml.etree import ElementTree as et

import pandas as pd

In [4]:
file = Path('data/apple_health_export/export.xml')

In [12]:
tree = et.parse(file)

In [None]:
root = tree.getroot()
records = pd.DataFrame([x.attrib for x in root.iter('Record')])
for dt in ['creationDate','startDate','endDate']:
    records[dt] = pd.to_datetime(records[dt])

records = records.drop(columns=['sourceName','sourceVersion','device'])
records = records[records.creationDate >= "2024-12-01"]
records['type'] = records['type'].str.replace(r'HK(Quantity|Category)TypeIdentifier','',regex=True)
records.value = pd.to_numeric(records.value,errors='coerce')



In [50]:
records.to_pickle('data/apple_health.pkl')

In [51]:
records.type.value_counts()

type
ActiveEnergyBurned                181505
HeartRate                         171056
BasalEnergyBurned                 166434
PhysicalEffort                     70280
DistanceWalkingRunning             40670
StepCount                          24720
HeadphoneAudioExposure             24195
AppleStandTime                     13694
EnvironmentalAudioExposure         12071
RespiratoryRate                    11909
AppleExerciseTime                  11264
WalkingStepLength                   8262
WalkingSpeed                        8262
SleepAnalysis                       7973
WalkingDoubleSupportPercentage      7314
AppleStandHour                      6180
OxygenSaturation                    5776
HeartRateVariabilitySDNN            3100
WalkingAsymmetryPercentage          2759
FlightsClimbed                      2096
TimeInDaylight                      1941
StairDescentSpeed                   1130
HandwashingEvent                    1001
StairAscentSpeed                     995
Environment

In [63]:
info_df = (
    records[records.type == "AppleExerciseTime"]
    .resample("d", on="startDate")
    .value.sum()
    .astype("int")
    .to_frame()
).rename(columns={'value':'exercise_time'})

In [64]:
info_df['active_calories'] = (
    records[records.type == "ActiveEnergyBurned"]
    .resample("d", on="startDate")
    .value.sum()
    .astype("int")
)


In [66]:
info_df.to_pickle('data/activity_data.pkl')