In [23]:
import pandas as pd
from pathlib import Path

# --- Load data ---
LABKA_PATH = Path(r"C:\Users\kfq6\Documents\Data\LABKA.xlsx")
df = pd.read_excel(LABKA_PATH, engine="openpyxl")

# --- Keep only relevant columns ---
cols = [
    "DW_EK_Borger",
    "Dato_Proevetagningstid",
    "Klok_Proevetagningstid",
    "Alder_Proevetagningstid",
    "Analysenavn",
    "Svar",
    "Enhed",
    "Over70"
]
df = df[[c for c in cols if c in df.columns]].copy()

# --- Parse and clean ---
df["Dato_Proevetagningstid"] = pd.to_datetime(df["Dato_Proevetagningstid"], errors="coerce")
df["Testdato"] = df["Dato_Proevetagningstid"].dt.date
df["Analysenavn"] = df["Analysenavn"].astype(str).str.strip()
df["Svar"] = df["Svar"].astype(str).str.strip()
df["Enhed"] = df["Enhed"].astype(str).str.strip()

# --- Merge Svar + Enhed into one string ---
df["Resultat"] = df.apply(
    lambda r: f"{r['Svar']} {r['Enhed']}" if pd.notna(r["Enhed"]) and r["Enhed"] not in ["nan", ""] else r["Svar"],
    axis=1
)

# --- Keep only last entry per test on that date ---
df = df.sort_values(["DW_EK_Borger", "Testdato", "Analysenavn", "Dato_Proevetagningstid"])
df_last = df.groupby(["DW_EK_Borger", "Testdato", "Analysenavn"], as_index=False).tail(1)

# --- Pivot wide: one column per analysis name ---
wide = df_last.pivot(index=["DW_EK_Borger", "Testdato"], columns="Analysenavn", values="Resultat")

# --- Add key metadata back in ---
meta_cols = ["Alder_Proevetagningstid", "Dato_Proevetagningstid", "Klok_Proevetagningstid", "Over70"]
meta = (
    df.sort_values(["DW_EK_Borger", "Testdato", "Dato_Proevetagningstid"])
      .groupby(["DW_EK_Borger", "Testdato"], as_index=False)[meta_cols]
      .first()
)
wide = meta.merge(wide.reset_index(), on=["DW_EK_Borger", "Testdato"], how="left")

# --- Optional: clean up weird column names for Excel safety ---
wide.columns = [c.replace("\n", " ").replace("/", "_").replace(" ", "_") for c in wide.columns]

# --- Save output ---
out_path = Path(r"C:\Users\kfq6\Documents\Data\LABKA_clean_wide.xlsx")
wide.to_excel(out_path, index=False)

print("Clean dataset saved to:", out_path)
print("Shape:", wide.shape)


  warn("Workbook contains no default style, apply openpyxl's default")


Clean dataset saved to: C:\Users\kfq6\Documents\Data\LABKA_clean_wide.xlsx
Shape: (57480, 22)
