<a href="https://colab.research.google.com/github/Mattlee10/zone2/blob/main/25_05.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 데이터 수집

In [5]:
import xml.etree.ElementTree as ET
import pandas as pd

def parse_health_export(xml_path, record_types=None):
    """
    Apple Health Export XML에서 원하는 Record Type만 추출
    """
    tree = ET.parse(xml_path)
    root = tree.getroot()
    records = []

    for rec in root.findall('Record'):
        rtype = rec.get('type')
        if record_types and rtype not in record_types:
            continue
        val = rec.get('value')
        if val is None:
            continue
        records.append({
            'type': rtype,
            'value': float(val),
            'unit': rec.get('unit'),
            'start': pd.to_datetime(rec.get('startDate')),
            'end': pd.to_datetime(rec.get('endDate'))
        })

    return pd.DataFrame(records)

# XML 경로 설정 (Google Drive에서 마운트한 경로 또는 Colab 업로드 경로)
xml_path = '/content/drive/MyDrive/export_L.xml'

# 필요 Record Type 정의
record_types = [
    'HKQuantityTypeIdentifierHeartRate',
    'HKQuantityTypeIdentifierRestingHeartRate',
    'HKQuantityTypeIdentifierHeartRateVariabilitySDNN'
]

# 파싱 실행
df = parse_health_export(xml_path, record_types=record_types)

# 결과 미리보기
print(df.head())

                                type    value       unit  \
0  HKQuantityTypeIdentifierHeartRate  80.0000  count/min   
1  HKQuantityTypeIdentifierHeartRate  82.0000  count/min   
2  HKQuantityTypeIdentifierHeartRate  88.0000  count/min   
3  HKQuantityTypeIdentifierHeartRate  69.0000  count/min   
4  HKQuantityTypeIdentifierHeartRate  81.6192  count/min   

                      start                       end  
0 2024-10-10 13:09:52+09:00 2024-10-10 13:09:52+09:00  
1 2024-10-10 13:11:37+09:00 2024-10-10 13:11:37+09:00  
2 2024-10-10 13:17:00+09:00 2024-10-10 13:17:00+09:00  
3 2024-10-10 13:22:03+09:00 2024-10-10 13:22:03+09:00  
4 2024-10-10 13:27:40+09:00 2024-10-10 13:27:40+09:00  


# 데이터 전처리

In [7]:
import pandas as pd

# ----------------------------
# 1. 회복/스트레스 분석용 전처리
# ----------------------------
def preprocess_recovery(df, start_date="2025-04-01", end_date="2025-04-30"):
    """HRV + RHR 데이터를 날짜별 평균 및 기준선 포함 형식으로 정리"""

    # 필터링
    df_hrv = df[df['type'] == 'HKQuantityTypeIdentifierHeartRateVariabilitySDNN'].copy()
    df_rhr = df[df['type'] == 'HKQuantityTypeIdentifierRestingHeartRate'].copy()

    # 날짜 파싱
    df_hrv['date'] = df_hrv['start'].dt.date
    df_rhr['date'] = df_rhr['start'].dt.date

    # 기간 필터
    start = pd.to_datetime(start_date).date()
    end = pd.to_datetime(end_date).date()

    df_hrv = df_hrv[(df_hrv['date'] >= start) & (df_hrv['date'] <= end)]
    df_rhr = df_rhr[(df_rhr['date'] >= start) & (df_rhr['date'] <= end)]

    # 일별 평균
    df_hrv_daily = df_hrv.groupby('date')['value'].mean().reset_index().rename(columns={'value': 'hrv'})
    df_rhr_daily = df_rhr.groupby('date')['value'].mean().reset_index().rename(columns={'value': 'rhr'})

    # 병합
    df_daily = pd.merge(df_hrv_daily, df_rhr_daily, on='date', how='inner')

    return df_daily

In [8]:
# ----------------------------
# 2. Zone 2 감지용 전처리
# ----------------------------
def preprocess_zone2(df, start_date="2025-04-01", end_date="2025-04-30", resample_interval='1min'):
    """HR 데이터를 시간 순 정렬 + 지정 간격으로 리샘플링"""

    df_hr = df[df['type'] == 'HKQuantityTypeIdentifierHeartRate'].copy()
    df_hr['timestamp'] = df_hr['start']

    # 시간 필터
    df_hr = df_hr[(df_hr['timestamp'] >= start_date) & (df_hr['timestamp'] <= end_date)]

    # 인덱스 설정 및 정렬
    df_hr = df_hr.set_index('timestamp').sort_index()

    # 리샘플링: 평균값 기준 (예: 1분 간격)
    df_resampled = df_hr['value'].resample(resample_interval).mean().reset_index()
    df_resampled.columns = ['timestamp', 'hr']

    return df_resampled