# Apple Health Intelligence System


## 1. Data Extraction

In [None]:
# [Cell 1] Unzip and parse function
import zipfile
import xml.etree.ElementTree as ET


def extract_health_data(zip_path="apple_health_export.zip"):
    """Extract and parse Apple Health data"""
    # Unzip health data
    with zipfile.ZipFile(zip_path, "r") as zip_ref:
        zip_ref.extractall("apple_health_export")

    # Define comprehensive health metrics
    HEALTH_METRICS = {
        # Activity
        "HKQuantityTypeIdentifierStepCount": "steps",
        "HKQuantityTypeIdentifierDistanceWalkingRunning": "distance",
        "HKQuantityTypeIdentifierFlightsClimbed": "flights",
        "HKQuantityTypeIdentifierActiveEnergyBurned": "active_cal",
        "HKQuantityTypeIdentifierBasalEnergyBurned": "basal_cal",
        "HKQuantityTypeIdentifierAppleExerciseTime": "exercise_min",
        "HKQuantityTypeIdentifierVO2Max": "vo2_max",
        # Vitals
        "HKQuantityTypeIdentifierHeartRate": "heart_rate",
        "HKQuantityTypeIdentifierRestingHeartRate": "resting_hr",
        "HKQuantityTypeIdentifierWalkingHeartRateAverage": "walking_hr",
        "HKQuantityTypeIdentifierBloodPressureSystolic": "bp_systolic",
        "HKQuantityTypeIdentifierBloodPressureDiastolic": "bp_diastolic",
        "HKQuantityTypeIdentifierBloodOxygenSaturation": "spo2",
        "HKQuantityTypeIdentifierBodyTemperature": "body_temp",
        "HKQuantityTypeIdentifierRespiratoryRate": "resp_rate",
        "HKQuantityTypeIdentifierHeartRateVariabilitySDNN": "hrv",
        # Body
        "HKQuantityTypeIdentifierBodyMass": "weight",
        "HKQuantityTypeIdentifierBodyFatPercentage": "body_fat",
        "HKQuantityTypeIdentifierLeanBodyMass": "lean_mass",
        "HKQuantityTypeIdentifierBodyMassIndex": "bmi",
        "HKQuantityTypeIdentifierHeight": "height",
        # Sleep
        "HKCategoryTypeIdentifierSleepAnalysis": "sleep",
        # Nutrition
        "HKQuantityTypeIdentifierDietaryEnergyConsumed": "calories_consumed",
        "HKQuantityTypeIdentifierDietaryCarbohydrates": "carbs",
        "HKQuantityTypeIdentifierDietaryProtein": "protein",
        "HKQuantityTypeIdentifierDietaryFatTotal": "fat",
        # Mindfulness
        "HKCategoryTypeIdentifierMindfulSession": "mindfulness",
        # Reproductive
        "HKCategoryTypeIdentifierOvulationTestResult": "ovulation_test",
        "HKCategoryTypeIdentifierMenstrualFlow": "menstrual_flow",
    }

    # Parse XML data
    tree = ET.parse("apple_health_export/export.xml")
    root = tree.getroot()

    records = []
    for record in root.findall("Record"):
        record_type = record.get("type")
        if record_type not in HEALTH_METRICS:
            continue

        # Handle special cases
        value = record.get("value")
        if value is None:
            continue

        # Calculate duration-based metrics
        if record_type == "HKCategoryTypeIdentifierSleepAnalysis":
            start = pd.to_datetime(record.get("startDate"))
            end = pd.to_datetime(record.get("endDate"))
            value = (end - start).total_seconds() / 3600  # hours
        elif record_type == "HKCategoryTypeIdentifierMindfulSession":
            start = pd.to_datetime(record.get("startDate"))
            end = pd.to_datetime(record.get("endDate"))
            value = (end - start).total_seconds() / 60  # minutes

        records.append(
            {
                "type": HEALTH_METRICS[record_type],
                "date": pd.to_datetime(record.get("startDate")).normalize(),
                "value": float(value),
                "unit": record.get("unit", ""),
            }
        )

    return pd.DataFrame(records)