In [7]:
import sys
from pathlib import Path
import pandas as pd
NOTEBOOK_DIR = Path.cwd()
FIRST_WEEK_DIR = NOTEBOOK_DIR.parent

if str(FIRST_WEEK_DIR) not in sys.path:
    sys.path.insert(0, str(FIRST_WEEK_DIR))

from manifest import DATA_MANIFEST

DATA_MANIFEST.keys()

dict_keys(['battery_main', 'battery_short', 'ehr_main'])

In [2]:
for k, v in DATA_MANIFEST.items():
    print(k, "->", v["path"])
    print("exists:", v["path"].exists())

battery_main -> /Users/mac/Desktop/project/Decision-Stability-Agent/first_week/data_csv/Lithium-Ion Battery Cycle Life.csv
exists: True
battery_short -> /Users/mac/Desktop/project/Decision-Stability-Agent/first_week/data_csv/100_Cycle_Lithium-Ion Battery Cycle Life.csv
exists: True
ehr_main -> /Users/mac/Desktop/project/Decision-Stability-Agent/first_week/data_csv/ehr_disease_progression.csv
exists: True


In [3]:
def compute_trend(series, window=3):
    return series.diff().rolling(window, min_periods=1).mean()

def compute_event_flag(state, low_th=None, high_th=None):
    if low_th is not None:
        return (state < low_th).astype(int)
    if high_th is not None:
        return (state > high_th).astype(int)
    return 0

In [4]:
def extract_battery_state(df):
    df = df.sort_values("cycle").copy()

    # 대표 관측치: capacity
    init_capacity = df["capacity"].iloc[0]
    df["state_value"] = df["capacity"] / init_capacity  # SOH proxy

    df["trend_value"] = compute_trend(df["state_value"])
    df["event_flag"] = compute_event_flag(df["state_value"], low_th=0.8)

    df["obs_value"] = df["capacity"]
    df["entity_id"] = df.get("cell_id", 0)

    return df[[
        "entity_id", "cycle",
        "obs_value", "state_value",
        "trend_value", "event_flag"
    ]].rename(columns={"cycle": "t"})

In [None]:
 def extract_ehr_state(df):
    df = df.sort_values(["patient_id", "day"]).copy()

    # baseline: 환자별 초기 5일 평균 심박수
    df["baseline_hr"] = (
        df.groupby("patient_id")["heart_rate"]
          .transform(lambda x: x.iloc[:5].mean())
    )

    # 상태값: baseline 대비 이탈 비율
    df["state_value"] = (
        (df["heart_rate"] - df["baseline_hr"]).abs()
        / df["baseline_hr"]
    )

    # 변화율
    df["trend_value"] = (
        df.groupby("patient_id")["state_value"]
          .transform(lambda x: x.diff().rolling(3, min_periods=1).mean())
    )

    # 이벤트: 이탈 30% 초과
    df["event_flag"] = (df["state_value"] > 0.3).astype(int)

    df["obs_value"] = df["heart_rate"]
    df["entity_id"] = df["patient_id"]
    df["t"] = df["day"]

    return df[[
        "entity_id",
        "t",
        "obs_value",
        "state_value",
        "trend_value",
        "event_flag"
    ]]

In [8]:
battery_path = DATA_MANIFEST["battery_main"]["path"]
ehr_path = DATA_MANIFEST["ehr_main"]["path"]

battery_df = pd.read_csv(battery_path)
ehr_df = pd.read_csv(ehr_path)

battery_state = extract_battery_state(battery_df)
ehr_state = extract_ehr_state(ehr_df)

battery_state.head(), ehr_state.head()

KeyError: 'capacity'