In [1]:
# %%
import pandas as pd
from pathlib import Path

# Indstil mappen hvor filerne ligger
DATA_DIR = Path(r"C:\Users\kfq6\Documents\Data")

FILES = {
    "Bookinger":  "Bookinger.xlsx",
    "LABKA":      "LABKA.xlsx",
    "Population": "Population.xlsx",
    "WHO-5 (PRO)": "WHO-5 (PRO).xlsx",
}


In [2]:
# %%
dfs = {}
for name, fname in FILES.items():
    path = DATA_DIR / fname
    df = pd.read_excel(path, sheet_name=0, engine="openpyxl")
    dfs[name] = df
    print(f"{name}: {df.shape[0]} rækker, {df.shape[1]} kolonner")


  warn("Workbook contains no default style, apply openpyxl's default")


Bookinger: 207222 rækker, 24 kolonner


  warn("Workbook contains no default style, apply openpyxl's default")


LABKA: 419010 rækker, 22 kolonner
Population: 2837 rækker, 32 kolonner


  warn("Workbook contains no default style, apply openpyxl's default")


WHO-5 (PRO): 4330 rækker, 7 kolonner


In [3]:
# %%
summary_rows = []
for name, df in dfs.items():
    rows = len(df)
    cols = df.shape[1]
    unique_cpr = df["CPRNummer"].nunique(dropna=True) if "CPRNummer" in df.columns else None
    missing_cpr = df["CPRNummer"].isna().sum() if "CPRNummer" in df.columns else None
    summary_rows.append({
        "file": name,
        "rows": rows,
        "cols": cols,
        "unique_CPRNummer": unique_cpr,
        "missing_CPRNummer": missing_cpr
    })

summary = pd.DataFrame(summary_rows).sort_values("file").reset_index(drop=True)
summary


Unnamed: 0,file,rows,cols,unique_CPRNummer,missing_CPRNummer
0,Bookinger,207222,24,2823,0
1,LABKA,419010,22,2829,0
2,Population,2837,32,2837,0
3,WHO-5 (PRO),4330,7,1872,0


In [8]:
df = dfs["LABKA"]

# Now you can safely do:
print(df.columns.tolist())  # check what the column names actually are

unique_values = df["Analysenavn"].unique()
print("Unique text values in 'Analysenavn':")
print(unique_values)

# Count how many rows for each unique analysis name
analysis_counts = df["Analysenavn"].value_counts()

print(analysis_counts)

['CPRNummer', 'DW_EK_Borger', 'DW_SK_Borger1', 'DW_SK_RekvSvar', 'ORDINV_ID', 'REQUISITION_NR', 'Alder_Proevetagningstid', 'Dato_Svartid', 'Klok_Svartid', 'Dato_Proevetagningstid', 'Klok_Proevetagningstid', 'Analysenummer', 'INVER_ID', 'Analysekode', 'Analysenavn', 'NPUKode', 'Svartype_Tekst', 'Internt_Svar_Numerisk', 'Svar', 'Enhed', 'NyesteResultatAnalyse', 'Over70']
Unique text values in 'Analysenavn':
['B-Hæmoglobin'
 'eGFR_x0020__x002F__x0020_1_x002C_73m_x00B2__x0028_CKD-EPI_x0029_'
 'P-Natrium' 'P-Kalium' 'P-Kreatinin' 'P-Triglycerid'
 'U-Albumin_x0020__x002F__x0020_Kreatinin-ratio' 'P-Kolesterol'
 'P-Kolesterol_x0020_HDL' 'P-Kolesterol_x0020_LDL' 'P-Calcium'
 'P-Calcium_x0020__x0028_albuminkorrigeret_x0029_' 'P-Albumin'
 'P-Vitamin_x0020_B12'
 'Hb_x0028_B_x0029_-Hæmoglobin_x0020_A1c_x0020__x0028_IFCC_x0029_'
 'P-25-Hydroxy-Vitamin_x0020_D_x0028_D3_x002B_D2_x0029_']
Analysenavn
P-Kreatinin                                                         49476
eGFR_x0020__x002F__x0020_1_x0

## Typer af kontakt, antal patienter under hvert kontakt

In [7]:
df = dfs["Population"]



unique_values = df["KontaktType"].unique()
print("Unique text values in 'Analysenavn':")
print(unique_values)

# Count how many rows for each unique analysis name
analysis_counts = df["KontaktType"].value_counts()

print(analysis_counts)

Unique text values in 'Analysenavn':
['Virtuel kontakt' 'Ambulant' 'Indlæggelse']
KontaktType
Ambulant           1564
Virtuel kontakt    1261
Indlæggelse          12
Name: count, dtype: int64


## Hvilken center har ansvar for patienter

In [11]:
df = dfs["Population"]



unique_values = df["ShakAfdKodeTekst_KontaktAnsvar"].unique()
print("Unique text values in 'Analysenavn':")
print(unique_values)

# Count how many rows for each unique analysis name
analysis_counts = df["ShakAfdKodeTekst_KontaktAnsvar"].value_counts()

print(analysis_counts)

Unique text values in 'Analysenavn':
['800109 - Alb Endokrinologisk Område']
ShakAfdKodeTekst_KontaktAnsvar
800109 - Alb Endokrinologisk Område    2837
Name: count, dtype: int64


## type af diagnose

In [12]:
df = dfs["Population"]



unique_values = df["Aktionsdiagnosegruppe"].unique()
print("Unique text values in 'Analysenavn':")
print(unique_values)

# Count how many rows for each unique analysis name
analysis_counts = df["Aktionsdiagnosegruppe"].value_counts()

print(analysis_counts)

Unique text values in 'Analysenavn':
['Type 2-diabetes' 'Type 1-diabetes' 'Andre former for diabetes'
 'Ikke specificeret diabetes']
Aktionsdiagnosegruppe
Type 1-diabetes               1923
Type 2-diabetes                793
Andre former for diabetes       89
Ikke specificeret diabetes      32
Name: count, dtype: int64
