https://www.python-engineer.com/posts/apple-health-data-python/

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import plotly.express as px

In [2]:
plt.style.use("fivethirtyeight")

# create element tree object
tree = ET.parse('data/Export.xml') 
# for every health record, extract the attributes
root = tree.getroot()
record_list = [x.attrib for x in root.iter('Record')]

In [3]:
record_data = pd.DataFrame(record_list)

# proper type to dates
for col in ['creationDate', 'startDate', 'endDate']:
    record_data[col] = pd.to_datetime(record_data[col])

# value is numeric, NaN if fails
record_data['value'] = pd.to_numeric(record_data['value'], errors='coerce')

# some records do not measure anything, just count occurences
# filling with 1.0 (= one time) makes it easier to aggregate
record_data['value'] = record_data['value'].fillna(1.0)

# shorter observation names
record_data['type'] = record_data['type'].str.replace('HKQuantityTypeIdentifier', '')
record_data['type'] = record_data['type'].str.replace('HKCategoryTypeIdentifier', '')
record_data.tail()

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
2584192,HeartRateVariabilitySDNN,Carter’s Apple Watch,11.1,ms,2024-10-14 23:41:23-07:00,2024-10-14 23:40:22-07:00,2024-10-14 23:41:22-07:00,43.1274,"<<HKDevice: 0x302fcbc50>, name:Apple Watch, ma..."
2584193,HeartRateVariabilitySDNN,Carter’s Apple Watch,11.1,ms,2024-10-15 03:20:06-07:00,2024-10-15 03:19:06-07:00,2024-10-15 03:20:05-07:00,17.9547,"<<HKDevice: 0x302fcbc50>, name:Apple Watch, ma..."
2584194,HeartRateVariabilitySDNN,Carter’s Apple Watch,11.1,ms,2024-10-15 07:20:09-07:00,2024-10-15 07:19:08-07:00,2024-10-15 07:20:07-07:00,29.6792,"<<HKDevice: 0x302fcbc50>, name:Apple Watch, ma..."
2584195,HeartRateVariabilitySDNN,Carter’s Apple Watch,11.1,ms,2024-10-15 11:23:25-07:00,2024-10-15 11:22:24-07:00,2024-10-15 11:23:24-07:00,49.3019,"<<HKDevice: 0x302fcbc50>, name:Apple Watch, ma..."
2584196,HeartRateVariabilitySDNN,Carter’s Apple Watch,11.1,ms,2024-10-15 15:24:30-07:00,2024-10-15 15:23:29-07:00,2024-10-15 15:24:29-07:00,33.6185,"<<HKDevice: 0x302fcbc50>, name:Apple Watch, ma..."


In [4]:
record_data['type'].unique()

array(['Height', 'BodyMass', 'HeartRate', 'OxygenSaturation',
       'RespiratoryRate', 'StepCount', 'DistanceWalkingRunning',
       'BasalEnergyBurned', 'ActiveEnergyBurned', 'FlightsClimbed',
       'DietaryFatTotal', 'DietaryFatPolyunsaturated',
       'DietaryFatMonounsaturated', 'DietaryFatSaturated',
       'DietaryCholesterol', 'DietarySodium', 'DietaryCarbohydrates',
       'DietaryFiber', 'DietarySugar', 'DietaryEnergyConsumed',
       'DietaryProtein', 'DietaryVitaminC', 'DietaryCalcium',
       'DietaryIron', 'DietaryPotassium', 'AppleExerciseTime',
       'DietaryCaffeine', 'DistanceCycling', 'RestingHeartRate', 'VO2Max',
       'WalkingHeartRateAverage', 'EnvironmentalAudioExposure',
       'HeadphoneAudioExposure', 'WalkingDoubleSupportPercentage',
       'SixMinuteWalkTestDistance', 'AppleStandTime', 'WalkingSpeed',
       'WalkingStepLength', 'WalkingAsymmetryPercentage',
       'StairAscentSpeed', 'StairDescentSpeed',
       'HKDataTypeSleepDurationGoal', 'AppleWalkin

In [5]:
energy_burned = record_data[record_data['type'] == 'ActiveEnergyBurned']
energy_burned.head()

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
1154868,ActiveEnergyBurned,Carter’s Apple Watch,4.3,Cal,2018-08-15 01:43:19-07:00,2018-08-15 00:41:01-07:00,2018-08-15 01:41:02-07:00,0.177,"<<HKDevice: 0x302f47340>, name:Apple Watch, ma..."
1154869,ActiveEnergyBurned,Carter’s Apple Watch,4.3,Cal,2018-08-15 06:03:09-07:00,2018-08-15 06:01:32-07:00,2018-08-15 06:02:35-07:00,0.086,"<<HKDevice: 0x302f47340>, name:Apple Watch, ma..."
1154870,ActiveEnergyBurned,Carter’s Apple Watch,4.3,Cal,2018-08-15 06:04:19-07:00,2018-08-15 06:02:35-07:00,2018-08-15 06:03:36-07:00,0.317,"<<HKDevice: 0x302f47340>, name:Apple Watch, ma..."
1154871,ActiveEnergyBurned,Carter’s Apple Watch,4.3,Cal,2018-08-15 06:05:35-07:00,2018-08-15 06:03:36-07:00,2018-08-15 06:04:39-07:00,0.17,"<<HKDevice: 0x302f47340>, name:Apple Watch, ma..."
1154872,ActiveEnergyBurned,Carter’s Apple Watch,4.3,Cal,2018-08-15 06:06:41-07:00,2018-08-15 06:04:39-07:00,2018-08-15 06:05:41-07:00,0.158,"<<HKDevice: 0x302f47340>, name:Apple Watch, ma..."


In [90]:
# Filter rows for the second half of 2023
df = (energy_burned[(energy_burned['startDate'].dt.year == 2023) & 
                    (energy_burned['startDate'].dt.month >= 9)]
      .set_index('startDate')
      ['value']
      .to_frame()
      .resample('1D')
      .sum()
      )
df

Unnamed: 0_level_0,value
startDate,Unnamed: 1_level_1
2023-09-01 00:00:00-07:00,307.555
2023-09-02 00:00:00-07:00,286.974
2023-09-03 00:00:00-07:00,293.904
2023-09-04 00:00:00-07:00,139.895
2023-09-05 00:00:00-07:00,254.960
...,...
2023-12-27 00:00:00-07:00,336.140
2023-12-28 00:00:00-07:00,329.503
2023-12-29 00:00:00-07:00,374.458
2023-12-30 00:00:00-07:00,358.903


In [91]:
fig = px.line(df, x=df.index, y='value', 
                 title='Line Plot of Calories burned through Exercise per Day')

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Calories burned through Exercise',
    showlegend=False
)

# Display the plot
fig.show()

In [92]:
df['value'].describe()

count     122.000000
mean      440.543639
std       240.887977
min         0.000000
25%       271.934500
50%       411.576000
75%       587.592250
max      1513.845000
Name: value, dtype: float64

In [93]:
dl = record_data[record_data['type'] == 'TimeInDaylight']
dl

Unnamed: 0,type,sourceName,sourceVersion,unit,creationDate,startDate,endDate,value,device
2412858,TimeInDaylight,Carter’s Apple Watch,10.2,min,2023-12-21 16:32:48-07:00,2023-12-21 16:09:17-07:00,2023-12-21 16:14:17-07:00,4.0,"<<HKDevice: 0x302f04f00>, name:Apple Watch, ma..."
2412859,TimeInDaylight,Carter’s Apple Watch,10.2,min,2023-12-21 17:15:21-07:00,2023-12-21 17:01:31-07:00,2023-12-21 17:06:31-07:00,5.0,"<<HKDevice: 0x302f04f00>, name:Apple Watch, ma..."
2412860,TimeInDaylight,Carter’s Apple Watch,10.2,min,2023-12-21 17:15:21-07:00,2023-12-21 17:06:31-07:00,2023-12-21 17:11:31-07:00,5.0,"<<HKDevice: 0x302f04f00>, name:Apple Watch, ma..."
2412861,TimeInDaylight,Carter’s Apple Watch,10.2,min,2023-12-21 17:21:31-07:00,2023-12-21 17:11:31-07:00,2023-12-21 17:16:31-07:00,5.0,"<<HKDevice: 0x302f04f00>, name:Apple Watch, ma..."
2412862,TimeInDaylight,Carter’s Apple Watch,10.2,min,2023-12-21 17:27:38-07:00,2023-12-21 17:16:31-07:00,2023-12-21 17:21:31-07:00,5.0,"<<HKDevice: 0x302f04f00>, name:Apple Watch, ma..."
...,...,...,...,...,...,...,...,...,...
2417201,TimeInDaylight,Carter’s Apple Watch,11.1,min,2024-10-15 15:03:55-07:00,2024-10-15 14:50:09-07:00,2024-10-15 14:55:09-07:00,3.0,"<<HKDevice: 0x302f05ef0>, name:Apple Watch, ma..."
2417202,TimeInDaylight,Carter’s Apple Watch,11.1,min,2024-10-15 16:09:13-07:00,2024-10-15 15:58:57-07:00,2024-10-15 16:03:57-07:00,5.0,"<<HKDevice: 0x302f05ef0>, name:Apple Watch, ma..."
2417203,TimeInDaylight,Carter’s Apple Watch,11.1,min,2024-10-15 16:13:58-07:00,2024-10-15 16:03:57-07:00,2024-10-15 16:08:57-07:00,5.0,"<<HKDevice: 0x302f05ef0>, name:Apple Watch, ma..."
2417204,TimeInDaylight,Carter’s Apple Watch,11.1,min,2024-10-15 16:44:07-07:00,2024-10-15 16:27:56-07:00,2024-10-15 16:32:56-07:00,5.0,"<<HKDevice: 0x302f05ef0>, name:Apple Watch, ma..."


In [98]:
# Filter rows for the second half of 2023
df = (dl
      [(dl['startDate'].dt.year == 2024) & (dl['startDate'].dt.month < 13)]
      .set_index('startDate')
      ['value']
      .to_frame()
      .resample('1D')
      .sum()
      .resample('3D')
      .mean()
      )
df.head()

Unnamed: 0_level_0,value
startDate,Unnamed: 1_level_1
2024-01-01 00:00:00-07:00,107.0
2024-01-04 00:00:00-07:00,185.333333
2024-01-07 00:00:00-07:00,29.666667
2024-01-10 00:00:00-07:00,48.666667
2024-01-13 00:00:00-07:00,18.666667


In [135]:
# Filter rows for the second half of 2023
df = (dl
      [(dl['startDate'].dt.year == 2024)]
      .set_index('startDate')
      ['value']
      .to_frame()
      .resample('1D')
      .sum()
      .resample('2D')
      .mean()
      )
df1 = (dl
      [(dl['startDate'].dt.year == 2024)]
      .set_index('startDate')
      ['value']
      .to_frame()
      .resample('1D')
      .sum()
      .resample('ME')
      .mean()
      )

In [136]:
import plotly.graph_objs as go
import numpy as np

# Create the first line plot
line1 = go.Scatter(
    x=df.index,
    y=df['value'],
    mode='lines',
    name='Daily Sunlight',
    line=dict(color='blue')
)

# Create the second line plot
line2 = go.Scatter(
    x=df1.index,
    y=df1['value'],
    mode='lines',
    name='Monthly Average',
    line=dict(color='red')
)

# Combine the plots into a figure
fig = go.Figure(data=[line1, line2])

# Update layout
fig.update_layout(
    title='Daily Time Spent in Sunlight',
    xaxis_title='Date',
    yaxis_title='Time Spent (Minutes)',
    legend_title='Functions'
)

# Show the plot
fig.show()
