In [2]:
import pandas as pd
import numpy as np

health_df = pd.read_csv("../data_csv/health_timeseries_core_state.csv")

# 컬럼 정규화
health_df = (
    health_df
    .rename(columns={
        "user_id": "asset_id",
        "health_state_index": "state_value"
    })
    .sort_values(["asset_id", "date"])
    .reset_index(drop=True)
)

# 날짜 기반 t_index 생성
health_df["t_index"] = (
    health_df
    .groupby("asset_id")
    .cumcount()
)

health_df.head()

Unnamed: 0,asset_id,date,mean_hr,hr_std,steps,calories,sleep_minutes,health_state_level,health_state_speed,state_value,t_index
0,1503960366,2016-04-12,,,13162,1985,327.0,-0.002802,0.0,-0.002802,0
1,1503960366,2016-04-13,,,10735,1797,384.0,0.054147,0.056949,0.45279,1
2,1503960366,2016-04-14,,,10460,1776,,,,,2
3,1503960366,2016-04-15,,,9762,1745,412.0,0.064168,,,3
4,1503960366,2016-04-16,,,12669,1863,340.0,0.005229,,,4


In [3]:
health_df["delta_state"] = (
    health_df
    .groupby("asset_id")["state_value"]
    .diff()
)

health_df["degradation_rate"] = (
    health_df
    .groupby("asset_id")["delta_state"]
    .rolling(window=7, min_periods=3)
    .mean()
    .reset_index(level=0, drop=True)
)

health_df[["asset_id", "t_index", "state_value", "degradation_rate"]].head(10)

Unnamed: 0,asset_id,t_index,state_value,degradation_rate
0,1503960366,0,-0.002802,
1,1503960366,1,0.45279,
2,1503960366,2,,
3,1503960366,3,,
4,1503960366,4,,
5,1503960366,5,,
6,1503960366,6,,
7,1503960366,7,,
8,1503960366,8,,
9,1503960366,9,,


In [4]:
diabetes_df = pd.read_csv("../data_csv/diabetes_dataset.csv")

risk_features = [
    "Glucose", "BMI", "Age", "BloodPressure"
]

diabetes_df["risk_score"] = diabetes_df[risk_features].mean(axis=1)

diabetes_df["risk_group"] = pd.qcut(
    diabetes_df["risk_score"],
    q=3,
    labels=["low", "mid", "high"]
)

diabetes_df[["risk_score", "risk_group"]].head()

Unnamed: 0,risk_score,risk_group
0,75.9,high
1,52.15,low
2,75.575,high
3,51.025,low
4,63.275,mid


In [6]:
asset_ids = health_df["asset_id"].unique()

risk_map = (
    diabetes_df[["risk_group"]]
    .sample(n=len(asset_ids), random_state=42)
    .reset_index(drop=True)
)

risk_lookup = dict(zip(asset_ids, risk_map["risk_group"]))

health_df["risk_group"] = health_df["asset_id"].map(risk_lookup)

health_df[["asset_id", "risk_group"]].drop_duplicates().head()

Unnamed: 0,asset_id,risk_group
0,1503960366,mid
31,1624580081,mid
62,1644430081,low
92,1844505072,mid
123,1927972279,high


In [7]:
asset_ids = health_df["asset_id"].unique()

risk_map = (
    diabetes_df[["risk_group"]]
    .sample(n=len(asset_ids), random_state=42)
    .reset_index(drop=True)
)

risk_lookup = dict(zip(asset_ids, risk_map["risk_group"]))

health_df["risk_group"] = health_df["asset_id"].map(risk_lookup)

health_df[["asset_id", "risk_group"]].drop_duplicates().head()

Unnamed: 0,asset_id,risk_group
0,1503960366,mid
31,1624580081,mid
62,1644430081,low
92,1844505072,mid
123,1927972279,high


In [8]:
def intervention_rule(row):
    if row["risk_group"] == "high" and row["degradation_rate"] < -0.05:
        return 1
    if row["risk_group"] == "mid" and row["degradation_rate"] < -0.10:
        return 1
    return 0

health_df["intervention_flag"] = health_df.apply(
    intervention_rule, axis=1
)

health_df[["asset_id", "degradation_rate", "risk_group", "intervention_flag"]].head()

Unnamed: 0,asset_id,degradation_rate,risk_group,intervention_flag
0,1503960366,,mid,0
1,1503960366,,mid,0
2,1503960366,,mid,0
3,1503960366,,mid,0
4,1503960366,,mid,0


In [9]:
def compute_stabilization(df, window=7):
    df = df.copy()

    df["post_state"] = (
        df
        .groupby("asset_id")["state_value"]
        .shift(-window)
    )

    df["stabilized"] = (
        df["post_state"] - df["state_value"]
    ) > 0

    return df

health_df = compute_stabilization(health_df)

In [10]:
health_df.groupby("intervention_flag")["stabilized"].mean()

intervention_flag
0    0.119298
1    0.590909
Name: stabilized, dtype: float64

개입 강도별 비교

In [None]:
def intervention_rule(row):
    if row["risk_group"] == "high" and row["degradation_rate"] < -0.1:
        return "strong"
    if row["risk_group"] in ["high", "mid"] and row["degradation_rate"] < -0.05:
        return "weak"
    return "none"

In [11]:
health_df.groupby("intervention_flag")["stabilized"].mean()

intervention_flag
0    0.119298
1    0.590909
Name: stabilized, dtype: float64

시간 지연 효과 확인 

In [12]:
for w in [3, 7, 14]:
    tmp = compute_stabilization(health_df, window=w)
    print(w, tmp.groupby("intervention_flag")["stabilized"].mean())

3 intervention_flag
0    0.176608
1    0.659091
Name: stabilized, dtype: float64
7 intervention_flag
0    0.119298
1    0.590909
Name: stabilized, dtype: float64
14 intervention_flag
0    0.083041
1    0.397727
Name: stabilized, dtype: float64


개입 효율(Efficiency) 지표 추가

In [13]:
summary = (
    health_df
    .groupby("intervention_flag")
    .agg(
        stabilize_rate=("stabilized", "mean"),
        count=("stabilized", "size")
    )
)

summary["efficiency"] = summary["stabilize_rate"] / summary["count"]
summary

Unnamed: 0_level_0,stabilize_rate,count,efficiency
intervention_flag,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,0.119298,855,0.00014
1,0.590909,88,0.006715


False Intervention 분석 (개입 실패 사례)

In [14]:
false_intervention = health_df[
    (health_df["intervention_flag"] == 1) &
    (health_df["stabilized"] == False)
]

false_intervention[
    ["risk_group", "degradation_rate"]
].describe()

Unnamed: 0,degradation_rate
count,36.0
mean,-0.164263
std,0.093285
min,-0.403313
25%,-0.221346
50%,-0.147671
75%,-0.088552
max,-0.051396
