In [2]:
# %%
import pandas as pd
from pathlib import Path

# --- Load Bookplan data ---
BOOK_PATH = Path(r"C:\Users\kfq6\Documents\Data\Bookinger.xlsx")  # update path if needed
book = pd.read_excel(BOOK_PATH, engine="openpyxl")

# --- Standard clean-up ---
book.columns = book.columns.str.strip()
book["Angaaende"] = book["Angaaende"].astype(str).str.lower().str.strip()

# --- Filter for Sammedagsscreening (any mention of 'screening') ---
screening = book[book["Angaaende"].str.contains("screening", na=False)]

# --- Count unique patients (DW_EK_Borger) ---
n_patients = screening["DW_EK_Borger"].nunique()
print(f"Number of unique patients with a 'sammedagsscreening': {n_patients}")

# --- Optional summary table: number of screenings per patient ---
screening_counts = (
    screening.groupby("DW_EK_Borger")
    .size()
    .reset_index(name="n_screenings")
    .sort_values("n_screenings", ascending=False)
)

print("\nTop 10 patients by number of sammedagsscreeninger:")
print(screening_counts.head(10))

# --- Save to file if you like ---
out_path = Path(r"C:\Users\kfq6\Documents\Data\Bookplan_sammedagsscreening.xlsx")
screening.to_excel(out_path, index=False)
print("\nFiltered screening data saved to:", out_path)


  warn("Workbook contains no default style, apply openpyxl's default")


Number of unique patients with a 'sammedagsscreening': 2342

Top 10 patients by number of sammedagsscreeninger:
      DW_EK_Borger  n_screenings
901         536408            55
1690        974335            45
77           55343            43
1693        975238            42
1745        997028            36
1406        808613            36
16           11946            36
538         325019            36
283         175923            36
354         212648            36

Filtered screening data saved to: C:\Users\kfq6\Documents\Data\Bookplan_sammedagsscreening.xlsx


In [4]:
# --- (optional) keep only completed/active bookings ---
if "BookingStatusTekst" in screening.columns:
    screening = screening[screening["BookingStatusTekst"].isin(["Afviklet", "Booket"])]

# --- robust meeting datetime + date ---
def _parse_meeting_dt(row, dcol="Dato_MoedeDato", tcol="Klok_MoedeTid"):
    d = pd.to_datetime(row.get(dcol), errors="coerce", dayfirst=True)
    if pd.isna(d):
        return pd.NaT
    t_raw = row.get(tcol)
    if pd.isna(t_raw):
        return d
    t_str = str(t_raw).strip()
    for fmt in ("%H:%M:%S", "%H:%M", "%H%M%S", "%H%M"):
        try:
            return pd.to_datetime(f"{d.date()} {pd.to_datetime(t_str, format=fmt).time()}")
        except Exception:
            continue
    return d

screening = screening.copy()
screening["meeting_dt"] = screening.apply(_parse_meeting_dt, axis=1)
screening["meeting_date"] = screening["meeting_dt"].dt.normalize()

# --- collapse to ONE "real screening" per patient per day ---
screening_days = (
    screening
    .groupby(["DW_EK_Borger", "meeting_date"], dropna=False)
    .agg(
        n_bookings=("DW_EK_Borger", "size"),
        first_time=("meeting_dt", "min"),
        last_time=("meeting_dt", "max"),
    )
    .reset_index()
)

# --- per-patient counts overall and in rolling windows (1y, 5y) ---
END_DATE = pd.Timestamp.today().normalize()
cut1 = END_DATE - pd.Timedelta(days=365)
cut5 = END_DATE - pd.Timedelta(days=5*365)

per_patient = (
    screening_days
    .assign(
        in_1y=lambda d: d["meeting_date"] >= cut1,
        in_5y=lambda d: d["meeting_date"] >= cut5,
    )
    .groupby("DW_EK_Borger", as_index=False)
    .agg(
        n_screenings_total=("meeting_date", "nunique"),
        n_screenings_1y=("in_1y", "sum"),
        n_screenings_5y=("in_5y", "sum"),
        first_screening=("meeting_date", "min"),
        last_screening=("meeting_date", "max"),
    )
    .sort_values("n_screenings_total", ascending=False)
)

print("Real screenings (unique patient-days):", screening_days.shape[0])
print("Unique patients with ≥1 screening:", per_patient.shape[0])
print(per_patient.head(10))




Real screenings (unique patient-days): 4901
Unique patients with ≥1 screening: 2090
      DW_EK_Borger  n_screenings_total  n_screenings_1y  n_screenings_5y  \
1693       1080018                   6                1                6   
47           34391                   6                1                6   
132         100865                   5                0                5   
1356        866144                   5                1                5   
462         309158                   5                1                5   
750         506569                   5                3                5   
1857       1168177                   5                2                5   
1878       1181071                   5                1                5   
1900       1192261                   5                1                5   
1912       1196616                   5                2                5   

     first_screening last_screening  
1693      2022-05-31     2025-06-18  
47 

In [5]:
# %%
import pandas as pd
from pathlib import Path

# --- Load Bookplan data ---
BOOK_PATH = Path(r"C:\Users\kfq6\Documents\Data\Bookinger.xlsx")
book = pd.read_excel(BOOK_PATH, engine="openpyxl")

# --- Basic cleaning ---
book.columns = book.columns.str.strip()
book["Angaaende"] = book["Angaaende"].astype(str).str.lower().str.strip()

# --- Filter for Sammedagsscreening only ---
samedag = book[book["Angaaende"].str.contains("sammedagsscreening", na=False)]

# --- Keep only completed or booked visits ---
if "BookingStatusTekst" in samedag.columns:
    samedag = samedag[samedag["BookingStatusTekst"].isin(["Afviklet", "Booket"])]

# --- Parse meeting datetime ---
def _parse_meeting_dt(row, dcol="Dato_MoedeDato", tcol="Klok_MoedeTid"):
    d = pd.to_datetime(row.get(dcol), errors="coerce", dayfirst=True)
    if pd.isna(d):
        return pd.NaT
    t_raw = row.get(tcol)
    if pd.isna(t_raw):
        return d
    t_str = str(t_raw).strip()
    for fmt in ("%H:%M:%S", "%H:%M", "%H%M%S", "%H%M"):
        try:
            return pd.to_datetime(f"{d.date()} {pd.to_datetime(t_str, format=fmt).time()}")
        except Exception:
            continue
    return d

samedag = samedag.copy()
samedag["meeting_dt"] = samedag.apply(_parse_meeting_dt, axis=1)
samedag["meeting_date"] = samedag["meeting_dt"].dt.normalize()

# --- Collapse multiple same-day bookings into one real Sammedagsscreening ---
samedag_days = (
    samedag
    .groupby(["DW_EK_Borger", "meeting_date"], dropna=False)
    .agg(
        n_bookings=("DW_EK_Borger", "size"),
        first_time=("meeting_dt", "min"),
        last_time=("meeting_dt", "max"),
    )
    .reset_index()
)

# --- Per-patient summary with 1-year and 5-year windows ---
END_DATE = pd.Timestamp.today().normalize()
cut1 = END_DATE - pd.Timedelta(days=365)
cut5 = END_DATE - pd.Timedelta(days=5*365)

per_patient = (
    samedag_days
    .assign(
        in_1y=lambda d: d["meeting_date"] >= cut1,
        in_5y=lambda d: d["meeting_date"] >= cut5,
    )
    .groupby("DW_EK_Borger", as_index=False)
    .agg(
        n_sammedag_total=("meeting_date", "nunique"),
        n_sammedag_1y=("in_1y", "sum"),
        n_sammedag_5y=("in_5y", "sum"),
        first_sammedag=("meeting_date", "min"),
        last_sammedag=("meeting_date", "max"),
    )
    .sort_values("n_sammedag_total", ascending=False)
)

print("Total unique Sammedagsscreeninger (patient-day):", samedag_days.shape[0])
print("Unique patients with ≥1 Sammedagsscreening:", per_patient.shape[0])
print(per_patient.head(10))

# --- Save optional outputs ---
samedag_days.to_excel(r"C:\Users\kfq6\Documents\Data\Bookplan_sammedag_days.xlsx", index=False)
per_patient.to_excel(r"C:\Users\kfq6\Documents\Data\Bookplan_sammedag_per_patient.xlsx", index=False)


  warn("Workbook contains no default style, apply openpyxl's default")


Total unique Sammedagsscreeninger (patient-day): 4596
Unique patients with ≥1 Sammedagsscreening: 2036
      DW_EK_Borger  n_sammedag_total  n_sammedag_1y  n_sammedag_5y  \
1761       1143452                 4              1              4   
1813       1171734                 4              1              4   
1140        753640                 4              2              4   
483         331981                 4              1              4   
1506        988911                 4              1              4   
313         215215                 4              1              4   
804         548785                 4              1              4   
1133        746643                 4              1              4   
1714       1117925                 4              1              4   
468         320724                 4              1              4   

     first_sammedag last_sammedag  
1761     2022-04-01    2025-06-27  
1813     2022-06-10    2025-09-18  
1140     2022-10-3