In [20]:
import pandas as pd
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from datetime import datetime

# Path to the export.xml file
xml_file = 'apple_health_export/export.xml'


### Understanding the XML Structure 📄

The raw XML data exported from Apple Health includes **workout details** with nested tags that provide additional metrics. Here's a breakdown of the structure:

- **Workout Attributes**:
   - `workoutActivityType`: Specifies the type of activity (e.g., Running).
   - `duration`: Duration of the workout.
   - `startDate` and `endDate`: Workout start and end times.

- **Metadata Entries** (`MetadataEntry`):
   - Contains additional details like `HKAverageMETs` (average METs) and `HKIndoorWorkout` (indoor/outdoor flag).

- **Workout Events** (`WorkoutEvent`):
   - Tracks workout segments, pauses, and resumes with:
     - `type`: Type of event (e.g., Segment, Pause, Motion Resumed).
     - `date`: Timestamp of the event.
     - `duration`: Duration of the segment, if applicable.

- **Workout Statistics** (`WorkoutStatistics`):
   - Includes cumulative metrics such as:
     - `HKQuantityTypeIdentifierActiveEnergyBurned`: Calories burned.
     - `HKQuantityTypeIdentifierDistanceWalkingRunning`: Distance covered.

- **Workout Route**:
   - Provides a path to the GPX route file for GPS data.

Below is the Python code to **parse** this structure and extract relevant data into clean, analyzable tables.


### STRUCTURE OF TREE DATA
```xml
<Workout workoutActivityType="HKWorkoutActivityTypeRunning" duration="17.12425581614177" durationUnit="min" sourceName="Jason’s Apple Watch" sourceVersion="6.1.1" device="&lt;&lt;HKDevice: 0x3023054f0&gt;, name:Apple Watch, manufacturer:Apple Inc., model:Watch, hardware:Watch2,4, software:6.1.1, creation date:2019-12-25 05:23:41 +0000&gt;" creationDate="2020-01-07 12:10:31 -0600" startDate="2020-01-07 11:52:25 -0600" endDate="2020-01-07 12:10:21 -0600">
  <MetadataEntry key="HKAverageMETs" value="11.1312 kcal/hr·kg"/>
  <MetadataEntry key="HKTimeZone" value="America/Chicago"/>
  <MetadataEntry key="HKIndoorWorkout" value="0"/>
  <WorkoutEvent type="HKWorkoutEventTypeSegment" date="2020-01-07 11:52:25 -0600" duration="6.331976975997289" durationUnit="min"/>
  <WorkoutEvent type="HKWorkoutEventTypeSegment" date="2020-01-07 11:52:25 -0600" duration="9.412362335125605" durationUnit="min"/>
  <WorkoutEvent type="HKWorkoutEventTypeMotionPaused" date="2020-01-07 11:52:27 -0600"/>
  <WorkoutEvent type="HKWorkoutEventTypeMotionResumed" date="2020-01-07 11:52:28 -0600"/>
  <WorkoutEvent type="HKWorkoutEventTypePause" date="2020-01-07 11:53:03 -0600"/>
  <WorkoutEvent type="HKWorkoutEventTypeResume" date="2020-01-07 11:53:50 -0600"/>
  <WorkoutEvent type="HKWorkoutEventTypeMotionPaused" date="2020-01-07 11:53:51 -0600"/>
  <WorkoutEvent type="HKWorkoutEventTypeMotionResumed" date="2020-01-07 11:53:54 -0600"/>
  <WorkoutEvent type="HKWorkoutEventTypeSegment" date="2020-01-07 11:58:45 -0600" duration="5.411429115136465" durationUnit="min"/>
  <WorkoutEvent type="HKWorkoutEventTypeSegment" date="2020-01-07 12:01:50 -0600" duration="8.241872878869374" durationUnit="min"/>
  <WorkoutEvent type="HKWorkoutEventTypeSegment" date="2020-01-07 12:04:10 -0600" duration="5.078322347005209" durationUnit="min"/>
  <WorkoutEvent type="HKWorkoutEventTypeSegment" date="2020-01-07 12:10:05 -0600" duration="0.2081167499224345" durationUnit="min"/>
  <WorkoutStatistics type="HKQuantityTypeIdentifierActiveEnergyBurned" startDate="2020-01-07 11:52:25 -0600" endDate="2020-01-07 12:10:21 -0600" sum="230.42" unit="Cal"/>
  <WorkoutStatistics type="HKQuantityTypeIdentifierDistanceWalkingRunning" startDate="2020-01-07 11:52:25 -0600" endDate="2020-01-07 12:10:21 -0600" sum="2.01899" unit="mi"/>
  <WorkoutStatistics type="HKQuantityTypeIdentifierBasalEnergyBurned" startDate="2020-01-07 11:52:25 -0600" endDate="2020-01-07 12:10:21 -0600" sum="28.9791" unit="Cal"/>
  <WorkoutRoute sourceName="Jason’s Apple Watch" sourceVersion="13.3" creationDate="2020-01-07 12:34:38 -0600" startDate="2020-01-07 11:52:30 -0600" endDate="2020-01-07 12:10:19 -0600">
   <MetadataEntry key="HKMetadataKeySyncVersion" value="2"/>
   <MetadataEntry key="HKMetadataKeySyncIdentifier" value="C03A13B7-C4C9-431F-8CD3-A19D8561CEEE"/>
   <FileReference path="/workout-routes/route_2020-01-07_12.10pm.gpx"/>

```

In [63]:
def parse_apple_health_export(xml_file):
    """
    Parses Apple Health's export.xml file to extract workout details.

    Parameters:
    - xml_file (str): Path to the export.xml file.

    Returns:
    - df_workouts (DataFrame): Summary of workouts with top-level details.
    - df_events (DataFrame): Detailed breakdown of workout events.
    """
    def time_difference_minutes(start, end):
        """Helper to calculate time difference in minutes."""
        start_time = datetime.strptime(start, "%Y-%m-%d %H:%M:%S %z")
        end_time = datetime.strptime(end, "%Y-%m-%d %H:%M:%S %z")
        return (end_time - start_time).total_seconds() / 60
    
    tree = ET.parse(xml_file)
    root = tree.getroot()
    
    # Lists to hold parsed data
    workouts = []
    #events_list = []
    # Parse workout data
    for i, workout in enumerate(root.findall('Workout')):
        # Extract top-level workout attributes
        workout_data = {
            'ActivityType': workout.get('workoutActivityType', 'N/A').replace("HKWorkoutActivityType", ""),
            'Duration (min)': float(workout.get('duration', 0)),
            'Source': workout.get('sourceName', 'N/A'),
            'Source_version': workout.get('sourceVersion', 'N/A'),
            'StartDate': workout.get('startDate', 'N/A'),
            'EndDate': workout.get('endDate', 'N/A')
        }
        
        # Extract Metadata Entries
        for metadata in workout.findall('MetadataEntry'):
            key = metadata.get('key', '')
            value = metadata.get('value', '')
            if key == "HKAverageMETs":
                workout_data['AverageMETs'] = float(value.replace(" kcal/hr·kg",""))
            if key == "HKIndoorWorkout":
                workout_data['IndoorWorkout'] = value

        # Loop through WorkoutStatistics and dynamically extract attributess
        for stat in workout.findall('WorkoutStatistics'):
            stat_type = stat.get('type', '').replace("HKQuantityTypeIdentifier", "")
            unit = stat.get('unit', '')
            # Dynamically check for sum, average, minimum, and maximum
            for attr in ['sum', 'average', 'minimum', 'maximum']:
                value = stat.get(attr)
                if value:  # Only add if the attribute exists
                    column_name = f"{stat_type}_{attr}_{unit}".strip()
                    workout_data[column_name] = float(value)
        
        # Initialize tracking pause durations variables
        paused_duration = 0.0
        motion_paused_time = None
        pause_time = None
        
        # Flatten WorkoutEvent data and calculate paused durations
        for event in workout.findall('WorkoutEvent'):
            event_type = event.get('type', 'N/A')
            event_date = event.get('date', 'N/A')            

            # Handle paused/resumed events
            if event_type == "HKWorkoutEventTypeMotionPaused":
                motion_paused_time = event_date
            elif event_type == "HKWorkoutEventTypeMotionResumed" and motion_paused_time:
                paused_duration += time_difference_minutes(motion_paused_time, event_date)
                motion_paused_time = None

            if event_type == "HKWorkoutEventTypePause":
                pause_time = event_date
            elif event_type == "HKWorkoutEventTypeResume" and pause_time:
                paused_duration += time_difference_minutes(pause_time, event_date)
                pause_time = None

        # Add calculated paused duration to workout summary
        workout_data['PausedDuration_mins'] = round(paused_duration, 2)
        workouts.append(workout_data)

    # Convert to DataFrames
    df_workouts = pd.DataFrame(workouts)
    
    return df_workouts

In [64]:
my_workouts = parse_apple_health_export(xml_file)

In [65]:
my_workouts.head()

Unnamed: 0,ActivityType,Duration (min),Source,Source_version,StartDate,EndDate,IndoorWorkout,ActiveEnergyBurned_sum_Cal,BasalEnergyBurned_sum_Cal,PausedDuration_mins,...,RunningVerticalOscillation_maximum_cm,RunningSpeed_average_mi/hr,RunningSpeed_minimum_mi/hr,RunningSpeed_maximum_mi/hr,RunningStrideLength_average_m,RunningStrideLength_minimum_m,RunningStrideLength_maximum_m,HeartRate_average_count/min,HeartRate_minimum_count/min,HeartRate_maximum_count/min
0,HighIntensityIntervalTraining,54.242769,Jason’s Apple Watch,5.3.3,2019-12-12 12:20:45 -0600,2019-12-12 13:15:00 -0600,0,461.313,95.11,0.0,...,,,,,,,,,,
1,HighIntensityIntervalTraining,52.620327,Jason’s Apple Watch,5.3.3,2019-12-16 11:49:35 -0600,2019-12-16 12:42:12 -0600,0,628.034,92.232,0.0,...,,,,,,,,,,
2,TraditionalStrengthTraining,50.564955,Jason’s Apple Watch,5.3.3,2019-12-17 11:52:57 -0600,2019-12-17 12:45:38 -0600,0,363.55,88.3758,0.0,...,,,,,,,,,,
3,Cycling,46.048452,Jason’s Apple Watch,5.3.3,2019-12-19 11:57:07 -0600,2019-12-19 12:43:10 -0600,1,569.988,80.7117,0.0,...,,,,,,,,,,
4,Cycling,10.352728,Jason’s Apple Watch,6.1.1,2019-12-26 17:08:01 -0600,2019-12-26 17:18:23 -0600,0,0.041244,0.172624,0.0,...,,,,,,,,,,


In [66]:
my_workouts.tail()

Unnamed: 0,ActivityType,Duration (min),Source,Source_version,StartDate,EndDate,IndoorWorkout,ActiveEnergyBurned_sum_Cal,BasalEnergyBurned_sum_Cal,PausedDuration_mins,...,RunningVerticalOscillation_maximum_cm,RunningSpeed_average_mi/hr,RunningSpeed_minimum_mi/hr,RunningSpeed_maximum_mi/hr,RunningStrideLength_average_m,RunningStrideLength_minimum_m,RunningStrideLength_maximum_m,HeartRate_average_count/min,HeartRate_minimum_count/min,HeartRate_maximum_count/min
694,Walking,5.090908,Jason’s Apple Watch,11.1,2024-11-16 18:59:56 -0600,2024-11-16 19:05:02 -0600,0,7.38105,8.78876,0.0,...,,,,,,,,90.3207,76.0,102.0
695,Walking,22.325235,Jason’s Apple Watch,11.1,2024-11-19 12:09:33 -0600,2024-11-19 12:31:53 -0600,0,75.1935,38.5557,0.0,...,,,,,,,,101.744,98.0,105.0
696,Running,7.030009,Jason’s Apple Watch,11.1,2024-11-20 16:18:28 -0600,2024-11-20 16:40:45 -0600,0,79.364,12.5771,15.3,...,10.5,5.98898,3.24817,7.14057,0.986667,0.8,1.06,139.0,134.0,143.0
697,TraditionalStrengthTraining,43.71325,Jason’s Apple Watch,11.1,2024-12-11 15:37:08 -0600,2024-12-11 16:20:51 -0600,0,288.942,73.2925,0.0,...,,,,,,,,120.554,83.0,153.0
698,Walking,19.507575,Jason’s Apple Watch,11.2,2024-12-14 11:04:50 -0600,2024-12-14 11:24:20 -0600,0,59.3613,33.1407,0.0,...,,,,,,,,96.1179,90.0,104.0


In [67]:
my_workouts.describe()

Unnamed: 0,Duration (min),ActiveEnergyBurned_sum_Cal,BasalEnergyBurned_sum_Cal,PausedDuration_mins,AverageMETs,DistanceCycling_sum_mi,DistanceWalkingRunning_sum_mi,DistanceSwimming_sum_yd,SwimmingStrokeCount_sum_count,StepCount_sum_count,...,RunningVerticalOscillation_maximum_cm,RunningSpeed_average_mi/hr,RunningSpeed_minimum_mi/hr,RunningSpeed_maximum_mi/hr,RunningStrideLength_average_m,RunningStrideLength_minimum_m,RunningStrideLength_maximum_m,HeartRate_average_count/min,HeartRate_minimum_count/min,HeartRate_maximum_count/min
count,699.0,697.0,665.0,699.0,657.0,4.0,311.0,6.0,6.0,21.0,...,21.0,21.0,21.0,21.0,21.0,21.0,21.0,83.0,83.0,83.0
mean,33.868744,295.72356,54.52525,0.548941,7.982494,0.1029251,2.40059,833.333333,326.166667,3983.209524,...,10.82381,6.400204,4.5341,7.50013,1.031756,0.868571,1.166667,125.533888,96.096386,149.831325
std,54.603344,138.356434,23.07681,2.650898,3.171258,0.1131527,0.841849,258.19889,96.63626,1165.268094,...,0.254764,0.264544,1.020154,0.488318,0.037542,0.06452,0.076376,30.371957,30.098057,28.515216
min,0.0,0.041244,0.129883,0.0,1.3,2.79294e-10,0.012913,500.0,189.0,1011.0,...,10.3,5.98898,2.37821,6.76247,0.979882,0.75,1.06,78.2016,58.0,89.0
25%,23.463122,220.405,39.7456,0.0,4.87238,0.0425451,2.02216,625.0,257.5,3620.0,...,10.6,6.21733,3.73263,7.14057,0.999251,0.83,1.12,101.872,76.0,129.0
50%,29.875018,308.143,49.8284,0.0,9.64053,0.0744396,2.63678,1000.0,385.0,4589.8,...,10.8,6.41435,4.68994,7.43313,1.03739,0.87,1.16,115.301,84.0,154.0
75%,42.039918,351.186,71.951,0.02,10.7624,0.1348196,3.01045,1000.0,385.75,4733.0,...,11.0,6.55316,5.3005,7.67137,1.0557,0.91,1.2,158.315,106.0,176.0
max,1432.133333,651.749,208.195,53.48,12.8434,0.262821,4.40626,1000.0,397.0,4972.0,...,11.4,7.1055,6.04167,8.94708,1.12846,0.98,1.33,184.194,176.0,195.0


In [69]:
# Filter to just running workouts
my_workouts[my_workouts['ActivityType'] =='Running'].tail(5)

Unnamed: 0,ActivityType,Duration (min),Source,Source_version,StartDate,EndDate,IndoorWorkout,ActiveEnergyBurned_sum_Cal,BasalEnergyBurned_sum_Cal,PausedDuration_mins,...,RunningVerticalOscillation_maximum_cm,RunningSpeed_average_mi/hr,RunningSpeed_minimum_mi/hr,RunningSpeed_maximum_mi/hr,RunningStrideLength_average_m,RunningStrideLength_minimum_m,RunningStrideLength_maximum_m,HeartRate_average_count/min,HeartRate_minimum_count/min,HeartRate_maximum_count/min
672,Running,28.27599,Jason’s Apple Watch,10.6.1,2024-10-02 10:24:16 -0600,2024-10-02 10:52:53 -0600,0,351.969,49.7976,0.33,...,10.8,6.43732,4.17089,7.14054,1.04156,0.8,1.14,174.235,157.0,186.0
675,Running,27.999573,Jason’s Apple Watch,10.6.1,2024-10-08 10:01:00 -0600,2024-10-08 10:29:00 -0600,0,350.316,48.692,0.0,...,11.0,6.51519,2.37821,7.44314,1.0563,0.95,1.2,168.33,131.0,181.0
687,Running,29.980431,Jason’s Apple Watch,11.1,2024-10-31 13:21:04 -0600,2024-10-31 13:51:12 -0600,0,358.556,52.2307,0.15,...,11.4,6.15706,2.99211,7.4307,0.988563,0.87,1.08,180.906,165.0,187.0
692,Running,29.49573,Jason’s Apple Watch,11.1,2024-11-14 13:44:47 -0600,2024-11-14 14:14:27 -0600,0,350.271,51.2176,0.17,...,11.0,6.21733,3.66544,6.76247,0.999251,0.75,1.07,177.271,169.0,186.0
696,Running,7.030009,Jason’s Apple Watch,11.1,2024-11-20 16:18:28 -0600,2024-11-20 16:40:45 -0600,0,79.364,12.5771,15.3,...,10.5,5.98898,3.24817,7.14057,0.986667,0.8,1.06,139.0,134.0,143.0


In [70]:
# Save the outputs
my_workouts.to_csv("output/workout_summary.csv", index=False)

print("Main Workout Summary:")
print(my_workouts.head())


Main Workout Summary:
                    ActivityType  Duration (min)               Source  \
0  HighIntensityIntervalTraining       54.242769  Jason’s Apple Watch   
1  HighIntensityIntervalTraining       52.620327  Jason’s Apple Watch   
2    TraditionalStrengthTraining       50.564955  Jason’s Apple Watch   
3                        Cycling       46.048452  Jason’s Apple Watch   
4                        Cycling       10.352728  Jason’s Apple Watch   

  Source_version                  StartDate                    EndDate  \
0          5.3.3  2019-12-12 12:20:45 -0600  2019-12-12 13:15:00 -0600   
1          5.3.3  2019-12-16 11:49:35 -0600  2019-12-16 12:42:12 -0600   
2          5.3.3  2019-12-17 11:52:57 -0600  2019-12-17 12:45:38 -0600   
3          5.3.3  2019-12-19 11:57:07 -0600  2019-12-19 12:43:10 -0600   
4          6.1.1  2019-12-26 17:08:01 -0600  2019-12-26 17:18:23 -0600   

  IndoorWorkout  ActiveEnergyBurned_sum_Cal  BasalEnergyBurned_sum_Cal  \
0             0     