# Apple Health Intelligence System


## 1. Data Extraction

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd


def extract_health_data(xml_path="apple_health_export.xml"):
    """Parse Apple Health data directly from XML file"""
    # Define comprehensive health metrics
    HEALTH_METRICS = {
        # Activity
        "HKQuantityTypeIdentifierStepCount": "steps",
        "HKQuantityTypeIdentifierDistanceWalkingRunning": "distance",
        "HKQuantityTypeIdentifierFlightsClimbed": "flights",
        "HKQuantityTypeIdentifierActiveEnergyBurned": "active_cal",
        "HKQuantityTypeIdentifierBasalEnergyBurned": "basal_cal",
        "HKQuantityTypeIdentifierAppleExerciseTime": "exercise_min",
        "HKQuantityTypeIdentifierVO2Max": "vo2_max",
        # Vitals
        "HKQuantityTypeIdentifierHeartRate": "heart_rate",
        "HKQuantityTypeIdentifierRestingHeartRate": "resting_hr",
        "HKQuantityTypeIdentifierWalkingHeartRateAverage": "walking_hr",
        "HKQuantityTypeIdentifierBloodPressureSystolic": "bp_systolic",
        "HKQuantityTypeIdentifierBloodPressureDiastolic": "bp_diastolic",
        "HKQuantityTypeIdentifierBloodOxygenSaturation": "spo2",
        "HKQuantityTypeIdentifierBodyTemperature": "body_temp",
        "HKQuantityTypeIdentifierRespiratoryRate": "resp_rate",
        "HKQuantityTypeIdentifierHeartRateVariabilitySDNN": "hrv",
        # Body
        "HKQuantityTypeIdentifierBodyMass": "weight",
        "HKQuantityTypeIdentifierBodyFatPercentage": "body_fat",
        "HKQuantityTypeIdentifierLeanBodyMass": "lean_mass",
        "HKQuantityTypeIdentifierBodyMassIndex": "bmi",
        "HKQuantityTypeIdentifierHeight": "height",
        # Sleep
        "HKCategoryTypeIdentifierSleepAnalysis": "sleep",
        # Nutrition
        "HKQuantityTypeIdentifierDietaryEnergyConsumed": "calories_consumed",
        "HKQuantityTypeIdentifierDietaryCarbohydrates": "carbs",
        "HKQuantityTypeIdentifierDietaryProtein": "protein",
        "HKQuantityTypeIdentifierDietaryFatTotal": "fat",
        # Mindfulness
        "HKCategoryTypeIdentifierMindfulSession": "mindfulness",
        # Reproductive
        "HKCategoryTypeIdentifierOvulationTestResult": "ovulation_test",
        "HKCategoryTypeIdentifierMenstrualFlow": "menstrual_flow",
    }

    print(
        f"⏳ Parsing XML file: {xml_path} (this may take several minutes for large files)"
    )

    # Parse XML data directly
    tree = ET.parse(xml_path)
    root = tree.getroot()

    records = []
    for i, record in enumerate(root.findall("Record")):
        if i % 10000 == 0:  # Print progress periodically
            print(f"📊 Processed {i} records...")

        record_type = record.get("type")
        if record_type not in HEALTH_METRICS:
            continue

        # Get value and handle special cases
        value = record.get("value")
        if not value:
            continue

        # Calculate duration-based metrics
        if record_type == "HKCategoryTypeIdentifierSleepAnalysis":
            start = pd.to_datetime(record.get("startDate"))
            end = pd.to_datetime(record.get("endDate"))
            value = (end - start).total_seconds() / 3600  # hours
        elif record_type == "HKCategoryTypeIdentifierMindfulSession":
            start = pd.to_datetime(record.get("startDate"))
            end = pd.to_datetime(record.get("endDate"))
            value = (end - start).total_seconds() / 60  # minutes

        records.append(
            {
                "type": HEALTH_METRICS[record_type],
                "date": pd.to_datetime(record.get("startDate")).normalize(),
                "value": float(value),
                "unit": record.get("unit", ""),
                "source": record.get("sourceName", ""),
            }
        )

    print(f"✅ Parsing complete! Total records: {len(records)}")
    return pd.DataFrame(records)

In [2]:
# [Cell 2] Execute extraction
health_df = extract_health_data()
health_df.head()

⏳ Parsing XML file: apple_health_export.xml (this may take several minutes for large files)


FileNotFoundError: [Errno 2] No such file or directory: 'apple_health_export.xml'