In [2]:
import pandas as pd


In [30]:
# =========================================================
# Daily → Weekly dataset 변환 (0h, 72h, 168h obs 유지)
# =========================================================
def convert_to_weekly_dataset(df_all: pd.DataFrame) -> pd.DataFrame:
    """
    Convert daily dosing dataset into weekly dosing dataset.
    Rule:
      - 7일치 누적 dose를 week_start(0h)에 몰아서 투여
      - 관측(Observation)은 각 주기에서 0h, 72h, 168h만 유지
    
    Args:
        df_all: 원본 EstData DataFrame (daily dosing)

    Returns:
        df_weekly: Weekly dosing 변환된 DataFrame
    """
    df = df_all.copy()
    df_weekly = []

    for sid, g in df.groupby("ID"):
        g = g.sort_values("TIME").reset_index(drop=True)

        # dose / obs 분리
        g_dose = g[g["EVID"] == 1].copy()
        g_obs = g[g["EVID"] == 0].copy()

        # week index 계산
        g_dose["WEEK"] = (g_dose["TIME"] // 168).astype(int)
        g_obs["WEEK"] = (g_obs["TIME"] // 168).astype(int)

        weekly_doses = []
        for w, grp in g_dose.groupby("WEEK"):
            week_start = w * 168
            total_amt = grp["AMT"].sum()

            # 첫날(week_start)에 누적 dose 기록
            first_row = grp.iloc[0].copy()
            first_row["TIME"] = week_start
            first_row["AMT"] = total_amt
            weekly_doses.append(first_row)

        g_dose_weekly = pd.DataFrame(weekly_doses)

        # obs 중 0h, 72h, 168h만 유지
        weekly_obs = []
        for w, grp in g_obs.groupby("WEEK"):
            week_start = w * 168
            # target times
            target_times = [week_start, week_start + 72, week_start + 168]
            for t in target_times:
                obs_nearest = grp.iloc[(grp["TIME"] - t).abs().argsort()[:1]].copy()
                obs_nearest["TIME"] = t  # 정확히 맞춰줌
                weekly_obs.append(obs_nearest)

        g_obs_weekly = pd.concat(weekly_obs, ignore_index=True)

        # dose + obs 합치기
        g_weekly = pd.concat([g_dose_weekly, g_obs_weekly], ignore_index=True)
        g_weekly = g_weekly.sort_values("TIME").reset_index(drop=True)

        # Weekly 플래그 추가
        g_weekly["WEEKLY"] = 1
        df_weekly.append(g_weekly)

    df_weekly = pd.concat(df_weekly, ignore_index=True)
    return df_weekly


In [31]:
from loaders import load_estdata

In [32]:
df_all, df_obs, df_dose = load_estdata("EstData.csv")

# Weekly 변환
df_weekly = convert_to_weekly_dataset(df_all)

print("Original:", df_all.shape, "→ Weekly:", df_weekly.shape)

# 저장
df_weekly.to_csv("./EstData_weekly.csv", index=False)


Original: (2820, 11) → Weekly: (1260, 13)


In [33]:
df_weekly

Unnamed: 0,ID,BW,COMED,DOSE,TIME,DV,EVID,MDV,AMT,CMT,DVID,WEEK,WEEKLY
0,1.0,58.0,0.0,0.0,0.0,18.617400,0.0,0.0,0.0,3.0,2.0,0.0,1
1,1.0,58.0,0.0,0.0,72.0,18.127500,0.0,0.0,0.0,3.0,2.0,0.0,1
2,1.0,58.0,0.0,0.0,168.0,18.127500,0.0,0.0,0.0,3.0,2.0,0.0,1
3,1.0,58.0,0.0,0.0,168.0,16.946500,0.0,0.0,0.0,3.0,2.0,1.0,1
4,1.0,58.0,0.0,0.0,240.0,16.946500,0.0,0.0,0.0,3.0,2.0,1.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1255,48.0,61.0,0.0,10.0,1080.0,1.548160,0.0,0.0,0.0,2.0,1.0,6.0,1
1256,48.0,61.0,0.0,10.0,1176.0,1.548160,0.0,0.0,0.0,2.0,1.0,6.0,1
1257,48.0,61.0,0.0,10.0,1176.0,0.689746,0.0,0.0,0.0,2.0,1.0,7.0,1
1258,48.0,61.0,0.0,10.0,1248.0,0.689746,0.0,0.0,0.0,2.0,1.0,7.0,1


In [34]:
# =========================================================
# Daily + Weekly Combined Dataset 생성
# =========================================================
def build_combined_dataset(daily_path: str = "EstData.csv",
                           weekly_out: str = "EstData_weekly.csv",
                           combined_out: str = "EstData_combined.csv") -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Daily EstData.csv -> Weekly 변환 -> Combined dataset 생성
    - daily_path: 원본 EstData.csv 경로
    - weekly_out: 변환된 Weekly dataset 저장 경로
    - combined_out: Daily + Weekly 합친 dataset 저장 경로
    """
    # Load daily dataset
    df_all, df_obs, df_dose = load_estdata(daily_path)

    # Weekly 변환
    df_weekly = convert_to_weekly_dataset(df_all)
    df_weekly.to_csv(weekly_out, index=False)
    print(f"✅ Weekly dataset saved: {weekly_out} ({df_weekly.shape})")

    # Daily/Weekly에 플래그 추가
    df_all = df_all.copy()
    df_all["WEEKLY"] = 0  # Daily flag
    df_weekly["WEEKLY"] = 1  # Weekly flag

    # Combined dataset
    df_combined = pd.concat([df_all, df_weekly], ignore_index=True)
    df_combined = df_combined.sort_values(["ID", "TIME"]).reset_index(drop=True)
    df_combined.to_csv(combined_out, index=False)
    print(f"✅ Combined dataset saved: {combined_out} ({df_combined.shape})")

    return df_all, df_weekly, df_combined


In [35]:
# Daily + Weekly + Combined 데이터셋 생성
df_daily, df_weekly, df_combined = build_combined_dataset(
    daily_path="./EstData.csv",
    weekly_out="./EstData_weekly.csv",
    combined_out="./EstData_combined.csv"
)


✅ Weekly dataset saved: ./EstData_weekly.csv ((1260, 13))
✅ Combined dataset saved: ./EstData_combined.csv ((4080, 13))


In [36]:
df_combined['DOSE'].value_counts()

DOSE
1.0     1164
3.0     1164
10.0    1164
0.0      588
Name: count, dtype: int64