In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("../data_csv/health_timeseries_core_state.csv")

# 정렬 (user_id, date 기준)
df = (
    df
    .sort_values(["user_id", "date"])
    .reset_index(drop=True)
)

df.head()

Unnamed: 0,user_id,date,mean_hr,hr_std,steps,calories,sleep_minutes,health_state_level,health_state_speed,health_state_index
0,1503960366,2016-04-12,,,13162,1985,327.0,-0.002802,0.0,-0.002802
1,1503960366,2016-04-13,,,10735,1797,384.0,0.054147,0.056949,0.45279
2,1503960366,2016-04-14,,,10460,1776,,,,
3,1503960366,2016-04-15,,,9762,1745,412.0,0.064168,,
4,1503960366,2016-04-16,,,12669,1863,340.0,0.005229,,


μHSM 기본 상태 변수 (HSI, HDR)

In [2]:
# μHSM 기본 구성
df["HSI"] = df["health_state_index"]        # 상태 수준
df["HDR"] = df["health_state_speed"]        # 상태 열화 속도

Recovery Margin (RM) 계산

정의
	•	최근 14일 중
	•	상태가 “개선된 날”의 비율

In [3]:
WINDOW = 14

def compute_recovery_margin(group):
    g = group.copy()
    delta = g["HSI"].diff()

    improving = (delta > 0).astype(int)

    rm = (
        improving
        .rolling(WINDOW, min_periods=5)
        .mean()
    )

    return rm

df["recovery_margin"] = (
    df
    .groupby("user_id", group_keys=False)
    .apply(compute_recovery_margin)
) # 회복 “맥락”만 봄

  .apply(compute_recovery_margin)


Observability Score (OBS)

구성
	•	결측률
	•	관측 간격 불규칙성
	•	스파이크 비율

In [4]:
def compute_observability(group):
    g = group.copy()

    # 1) 결측률
    missing_rate = g["HSI"].isna().mean()

    # 2) 관측 간격 불규칙성
    time_gap = (
        pd.to_datetime(g["date"])
        .diff()
        .dt.days
        .dropna()
    )
    gap_cv = time_gap.std() / (time_gap.mean() + 1e-6) if len(time_gap) > 3 else 0

    # 3) 극단 스파이크 비율
    spike_ratio = (np.abs(g["HDR"]) > 0.3).mean()

    # 가중합 (단순)
    obs = 1 - (0.4 * missing_rate + 0.4 * gap_cv + 0.2 * spike_ratio)

    return pd.Series([obs] * len(g), index=g.index)

df["observability_score"] = (
    df
    .groupby("user_id", group_keys=False)
    .apply(compute_observability)
)

  .apply(compute_observability)


μHSM 테이블 완성

In [6]:
muHSM_df = df[[
    "user_id",
    "date",
    "HSI",
    "HDR",
    "recovery_margin",
    "observability_score"
]].copy()

muHSM_df.head()

Unnamed: 0,user_id,date,HSI,HDR,recovery_margin,observability_score
0,1503960366,2016-04-12,-0.002802,0.0,,0.63871
1,1503960366,2016-04-13,0.45279,0.056949,,0.63871
2,1503960366,2016-04-14,,,,0.63871
3,1503960366,2016-04-15,,,,0.63871
4,1503960366,2016-04-16,,,0.2,0.63871


CSV 저장 (Core 6 산출물)

In [7]:
muHSM_df.to_csv(
    "../data_csv/muHSM_state_monitor.csv",
    index=False
)

print("✅ Core 6: muHSM_state_monitor.csv 생성 완료")

✅ Core 6: muHSM_state_monitor.csv 생성 완료
