In [None]:
import pandas as pd

# jeg indlæser filerne her
appointments_raw = pd.read_csv("Appointment.csv", header=None)
encounters_raw = pd.read_csv("Encounter.csv", header=None)
observations_raw = pd.read_csv("Observation.csv", header=None)
patients_raw = pd.read_csv("Patient.csv", header=None)
practitioners_raw = pd.read_csv("Practitioner.csv", header=None)


print("Appointments:", len(appointments_raw))
print("Encounters:", len(encounters_raw))
print("Observations:", len(observations_raw))
print("Patients:", len(patients_raw))
print("Practitioners:", len(practitioners_raw))



        

Appointments: 12000
Encounters: 10800
Observations: 13200
Patients: 1200
Practitioners: 70


In [177]:
def extract_from_appointment(json_str):
    try:
        data = json.loads(json_str)
        patient_id = None
        practitioner_id = None

        for participant in data.get("participant", []):
            actor = participant.get("actor", {})
            ref = actor.get("reference", "")
            if ref.startswith("Practitioner/"):
                practitioner_id = ref.split("/")[-1]
            elif not ref.startswith("Location/") and not ref.startswith("Practitioner/"):
                patient_id = ref  

        return pd.Series([patient_id, practitioner_id])
    except Exception as e:
        print("Fejl i parsing:", e)
        return pd.Series([None, None])





In [178]:

appointments_df = appointments_raw[[3]].copy()
appointments_df[["PatientID", "PractitionerID"]] = appointments_df[3].apply(extract_from_appointment)


appointments_df = appointments_df.dropna(subset=["PatientID", "PractitionerID"])


appointments_counts = (
    appointments_df
    .groupby(["PatientID", "PractitionerID"])
    .size()
    .reset_index(name="Appointments")
)

print(appointments_counts.head())


                              PatientID PractitionerID  Appointments
0  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000014             1
1  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000022             1
2  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000024             1
3  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000025             1
4  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000027             1


In [179]:
def extract_from_encounter(json_str):
    try:
        data = json.loads(json_str)
        patient_id = data.get("subject", {}).get("reference", "").replace("Patient/", "")
        practitioner_id = None

        for participant in data.get("participant", []):
            actor = participant.get("individual", {})
            ref = actor.get("reference", "")
            if ref.startswith("Practitioner/"):
                practitioner_id = ref.split("/")[-1]

        return pd.Series([patient_id, practitioner_id])
    except Exception as e:
        print("Fejl i parsing encounter:", e)
        return pd.Series([None, None])


encounters_df = encounters_raw[[3]].copy()
encounters_df[["PatientID", "PractitionerID"]] = encounters_df[3].apply(extract_from_encounter)


encounters_df = encounters_df.dropna(subset=["PatientID", "PractitionerID"])


encounters_counts = (
    encounters_df
    .groupby(["PatientID", "PractitionerID"])
    .size()
    .reset_index(name="Encounters")
)


print(encounters_counts.head())


  PatientID PractitionerID  Encounters
0              user0000001         139
1              user0000002         153
2              user0000003         164
3              user0000004         152
4              user0000005         171


In [None]:

merged = appointments_counts \
    .merge(encounters_counts, on=["PatientID", "PractitionerID"], how="outer") \
    .merge(observations_counts, on=["PatientID", "PractitionerID"], how="outer") \
    .fillna(0)


merged[["Appointments", "Encounters", "Observations"]] = merged[["Appointments", "Encounters", "Observations"]].astype(int)


merged["TotalInteractions"] = merged[["Appointments", "Encounters", "Observations"]].sum(axis=1)


merged["Class"] = merged["TotalInteractions"].apply(lambda x: "anomalous" if x == 1 else "normal")


merged["User_name"] = merged["PractitionerID"]
merged["SpecialAction"] = 0


final_df = merged[["User_name", "PatientID", "Appointments", "Observations", "Encounters", "SpecialAction", "Class"]]


final_df.to_csv("conf-training-dataset.csv", index=False)


print(final_df.head())


     User_name PatientID  Appointments  Observations  Encounters  \
0  user0000001                       0             0         139   
1  user0000002                       0             0         153   
2  user0000003                       0             0         164   
3  user0000004                       0             0         152   
4  user0000005                       0             0         171   

   SpecialAction   Class  
0              0  normal  
1              0  normal  
2              0  normal  
3              0  normal  
4              0  normal  


In [198]:
def extract_encounter_patient_practitioner(js):
    try:
        data = json.loads(js)
        patient_ref = data.get("patient", {}).get("reference", "")
        patient_id = patient_ref.split("/")[-1] if "Patient/" in patient_ref else patient_ref

        practitioner_id = None
        for participant in data.get("participant", []):
            actor = participant.get("individual", {})
            ref = actor.get("reference", "")
            if ref.startswith("Practitioner/"):
                practitioner_id = ref.split("/")[-1]

        return pd.Series([patient_id, practitioner_id])
    except Exception as e:
        print("Fejl:", e)
        return pd.Series([None, None])



In [None]:
test_encounters = encounters_raw[[3]].copy()
test_encounters[["PatientID", "PractitionerID"]] = test_encounters[3].apply(extract_encounter_patient_practitioner)


print(test_encounters[["PatientID", "PractitionerID"]].head())



                              PatientID PractitionerID
0  5da8452a-5082-4582-a029-212f5ef781aa    user0000014
1  5da8452a-5082-4582-a029-212f5ef781aa    user0000016
2  5da8452a-5082-4582-a029-212f5ef781aa    user0000009
3  5da8452a-5082-4582-a029-212f5ef781aa    user0000006
4  5da8452a-5082-4582-a029-212f5ef781aa    user0000014


In [200]:
encounters_counts = (
    test_encounters
    .groupby(["PatientID", "PractitionerID"])
    .size()
    .reset_index(name="Encounters")
)

print(encounters_counts.head())


                              PatientID PractitionerID  Encounters
0  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000002           1
1  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000009           1
2  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000028           1
3  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000029           1
4  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000055           1


In [201]:
def extract_appointment_patient_practitioner(js):
    try:
        data = json.loads(js)
        patient_id = None
        practitioner_id = None

        for p in data.get("participant", []):
            actor = p.get("actor", {})
            ref = actor.get("reference", "")
            if ref.startswith("Practitioner/"):
                practitioner_id = ref.split("/")[-1]
            elif not ref.startswith("Location/") and not ref.startswith("Practitioner/"):
                patient_id = ref.split("/")[-1]  

        return pd.Series([patient_id, practitioner_id])
    except Exception as e:
        print("Fejl:", e)
        return pd.Series([None, None])


In [202]:
test_appointments = appointments_raw[[3]].copy()
test_appointments[["PatientID", "PractitionerID"]] = test_appointments[3].apply(extract_appointment_patient_practitioner)


print(test_appointments[["PatientID", "PractitionerID"]].head())


                              PatientID PractitionerID
0  5da8452a-5082-4582-a029-212f5ef781aa    user0000056
1  5da8452a-5082-4582-a029-212f5ef781aa    user0000049
2  5da8452a-5082-4582-a029-212f5ef781aa    user0000043
3  5da8452a-5082-4582-a029-212f5ef781aa    user0000064
4  5da8452a-5082-4582-a029-212f5ef781aa    user0000030


In [203]:
appointments_counts = (
    test_appointments
    .groupby(["PatientID", "PractitionerID"])
    .size()
    .reset_index(name="Appointments")
)

print(appointments_counts.head())


                              PatientID PractitionerID  Appointments
0  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000014             1
1  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000022             1
2  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000024             1
3  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000025             1
4  00127b47-8a14-4eb3-94ac-5d22c0d944b3    user0000027             1


In [204]:
def extract_observation_patient_practitioner(js):
    try:
        data = json.loads(js)
        patient_ref = data.get("subject", {}).get("reference", "")
        patient_id = patient_ref.split("/")[-1] if "Patient/" in patient_ref else patient_ref

        practitioner_id = None
        for p in data.get("performer", []):
            ref = p.get("reference", "")
            if ref.startswith("Practitioner/"):
                practitioner_id = ref.split("/")[-1]

        return pd.Series([patient_id, practitioner_id])
    except Exception as e:
        print("Fejl:", e)
        return pd.Series([None, None])


In [205]:
test_observations = observations_raw[[3]].copy()
test_observations[["PatientID", "PractitionerID"]] = test_observations[3].apply(extract_observation_patient_practitioner)


print(test_observations[["PatientID", "PractitionerID"]].head())


                              PatientID PractitionerID
0  5da8452a-5082-4582-a029-212f5ef781aa           None
1  5da8452a-5082-4582-a029-212f5ef781aa           None
2  5da8452a-5082-4582-a029-212f5ef781aa           None
3  5da8452a-5082-4582-a029-212f5ef781aa           None
4  5da8452a-5082-4582-a029-212f5ef781aa           None


In [206]:
observations_counts = (
    test_observations
    .dropna(subset=["PatientID", "PractitionerID"])
    .groupby(["PatientID", "PractitionerID"])
    .size()
    .reset_index(name="Observations")
)

print(observations_counts.head())


Empty DataFrame
Columns: [PatientID, PractitionerID, Observations]
Index: []


In [207]:

merged = appointments_counts \
    .merge(encounters_counts, on=["PatientID", "PractitionerID"], how="outer") \
    .merge(observations_counts, on=["PatientID", "PractitionerID"], how="outer") \
    .fillna(0)


merged[["Appointments", "Encounters", "Observations"]] = merged[["Appointments", "Encounters", "Observations"]].astype(int)


merged["Total"] = merged[["Appointments", "Encounters", "Observations"]].sum(axis=1)
merged["Class"] = merged["Total"].apply(lambda x: "anomalous" if x == 1 else "normal")
merged["User_name"] = merged["PractitionerID"]
merged["SpecialAction"] = 0


final_df = merged[["User_name", "PatientID", "Appointments", "Observations", "Encounters", "SpecialAction", "Class"]]


final_df.to_csv("conf-training-dataset.csv", index=False)
print(final_df.head())


     User_name                             PatientID  Appointments  \
0  user0000002  00127b47-8a14-4eb3-94ac-5d22c0d944b3             0   
1  user0000009  00127b47-8a14-4eb3-94ac-5d22c0d944b3             0   
2  user0000014  00127b47-8a14-4eb3-94ac-5d22c0d944b3             1   
3  user0000022  00127b47-8a14-4eb3-94ac-5d22c0d944b3             1   
4  user0000024  00127b47-8a14-4eb3-94ac-5d22c0d944b3             1   

   Observations  Encounters  SpecialAction      Class  
0             0           1              0  anomalous  
1             0           1              0  anomalous  
2             0           0              0  anomalous  
3             0           0              0  anomalous  
4             0           0              0  anomalous  
