In [3]:
import pandas as pd
from loaders import load_estdata

In [4]:
# =========================================================
# Daily -> Weekly dataset  (0h, 72h, 168h obs )
# =========================================================
def convert_to_weekly_dataset(df_all: pd.DataFrame) -> pd.DataFrame:
    """
    Convert daily dosing dataset into weekly dosing dataset.
    Rule:
      - 7 day cumulative dose is accumulated on week_start(0h)
      - Observation is kept only at 0h, 72h, 168h in each week
    
    Args:
        df_all: original EstData DataFrame (daily dosing)

    Returns:
        df_weekly: Weekly dosing converted DataFrame
    """
    df = df_all.copy()
    df_weekly = []

    for sid, g in df.groupby("ID"):
        g = g.sort_values("TIME").reset_index(drop=True)

        g_dose = g[g["EVID"] == 1].copy()
        g_obs = g[g["EVID"] == 0].copy()

        g_dose["WEEK"] = (g_dose["TIME"] // 168).astype(int)
        g_obs["WEEK"] = (g_obs["TIME"] // 168).astype(int)

        weekly_doses = []
        for w, grp in g_dose.groupby("WEEK"):
            week_start = w * 168
            total_amt = grp["AMT"].sum()

            first_row = grp.iloc[0].copy()
            first_row["TIME"] = week_start
            first_row["AMT"] = total_amt
            weekly_doses.append(first_row)

        g_dose_weekly = pd.DataFrame(weekly_doses)

        weekly_obs = []
        for w, grp in g_obs.groupby("WEEK"):
            week_start = w * 168
            target_times = [week_start, week_start + 72, week_start + 168]
            for t in target_times:
                obs_nearest = grp.iloc[(grp["TIME"] - t).abs().argsort()[:1]].copy()
                obs_nearest["TIME"] = t  
                weekly_obs.append(obs_nearest)

        g_obs_weekly = pd.concat(weekly_obs, ignore_index=True)

        g_weekly = pd.concat([g_dose_weekly, g_obs_weekly], ignore_index=True)
        g_weekly = g_weekly.sort_values("TIME").reset_index(drop=True)

        g_weekly["WEEKLY"] = 1
        df_weekly.append(g_weekly)

    df_weekly = pd.concat(df_weekly, ignore_index=True)
    return df_weekly

In [5]:
df_all, df_obs, df_dose = load_estdata("EstData.csv")

df_weekly = convert_to_weekly_dataset(df_all)

print("Original:", df_all.shape, "→ Weekly:", df_weekly.shape)

df_weekly.to_csv("./EstData_weekly.csv", index=False)


Original: (2820, 11) → Weekly: (1260, 13)


In [6]:
# =========================================================
# Daily + Weekly Combined Dataset 
# =========================================================
def build_combined_dataset(daily_path: str = "EstData.csv",
                           weekly_out: str = "EstData_weekly.csv",
                           combined_out: str = "EstData_combined.csv") -> tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]:
    """
    Daily EstData.csv -> convert to Weekly -> Combined dataset 
    - daily_path: original EstData.csv path
    - weekly_out: converted Weekly dataset path
    - combined_out: Daily + Weekly combined dataset path
    """
    # Load daily dataset
    df_all, df_obs, df_dose = load_estdata(daily_path)

    df_weekly = convert_to_weekly_dataset(df_all)
    df_weekly.to_csv(weekly_out, index=False)
    print(f" Weekly dataset saved: {weekly_out} ({df_weekly.shape})")

    df_all = df_all.copy()
    df_all["WEEKLY"] = 0  # Daily flag
    df_weekly["WEEKLY"] = 1  # Weekly flag

    # Combined dataset
    df_combined = pd.concat([df_all, df_weekly], ignore_index=True)
    df_combined = df_combined.sort_values(["ID", "TIME"]).reset_index(drop=True)
    df_combined.to_csv(combined_out, index=False)
    print(f" Combined dataset saved: {combined_out} ({df_combined.shape})")

    return df_all, df_weekly, df_combined


In [7]:
df_daily, df_weekly, df_combined = build_combined_dataset(
    daily_path="./EstData.csv",
    weekly_out="./EstData_weekly.csv",
    combined_out="./EstData_combined.csv"
)

 Weekly dataset saved: ./EstData_weekly.csv ((1260, 13))
 Combined dataset saved: ./EstData_combined.csv ((4080, 13))
