In [82]:
import json
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

with open("events.json", "r", encoding="utf-8") as f:
    data = json.load(f)

appointments = []
observations = []
encounters = []
audits = []

for event in data:
    evt_type = event.get("type")
    d = event.get("data", {})
    pid = d.get("patient_id")
    uid = d.get("practitioner_id")
    ts = event.get("start") or event.get("timestamp")
    
    if not pid or not uid or not ts:
        continue

    if evt_type == "Appointment":
        appointments.append({"patient_id": pid, "practitioner_id": uid})
    elif evt_type == "Observation":
        observations.append({"patient_id": pid, "practitioner_id": uid})
    elif evt_type == "Encounter":
        encounters.append({"patient_id": pid, "practitioner_id": uid})
    elif evt_type == "AuditEvent":
        audits.append({
            "patient_id": pid,
            "practitioner_id": uid,
            "timestamp": ts,
            "break_glass": d.get("break_glass", False)
        })

rows = []
for a in audits:
    pid = a["patient_id"]
    uid = a["practitioner_id"]
    ts = a["timestamp"]
    break_glass = a["break_glass"]

    app_count = sum(1 for x in appointments if x["patient_id"] == pid and x["practitioner_id"] == uid)
    obs_count = sum(1 for x in observations if x["patient_id"] == pid and x["practitioner_id"] == uid)
    enc_count = sum(1 for x in encounters if x["patient_id"] == pid and x["practitioner_id"] == uid)

    total = app_count + obs_count + enc_count

    if total == 0 and not break_glass:
        label = "Anomalous"
    elif total == 1 and random.random() < 0.5:
        label = "Anomalous"
    else:
        label = "Normal"

    rows.append({
        "patient_id": pid,
        "practitioner_id": uid,
        "time": ts,
        "#appointments": app_count,
        "#encounters": enc_count,
        "#observations": obs_count,
        "break_glass": int(break_glass),
        "label": label
    })

df = pd.DataFrame(rows)
print(df.head())

X = df[["#appointments", "#observations", "#encounters", "break_glass"]]
y = df["label"].map({"Normal": 0, "Anomalous": 1})

print("\nLabel-fordeling:")
print(y.value_counts())

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, stratify=y, random_state=42
)

svm = SVC(kernel='linear', class_weight='balanced')
svm.fit(X_train, y_train)
print("\n--- SVM ---")
print(classification_report(y_test, svm.predict(X_test)))

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)
print("\n--- KNN ---")
print(classification_report(y_test, knn.predict(X_test)))

mnb = MultinomialNB()
mnb.fit(X_train, y_train)
print("\n--- Multinomial Naive Bayes ---")
print(classification_report(y_test, mnb.predict(X_test)))


                             patient_id                       practitioner_id  \
0  bdd640fb-0667-4ad1-9c80-317fa3b1799d  2319594f-630e-4896-a3e6-17a2c230a890   
1  47294739-614f-43d7-99db-3ad0ddd1dfb2  5c27af1c-a03d-4f69-821c-facb26736d23   
2  f9e8a369-2999-4735-9d56-cc943c9ad14c  ff77cf14-a19e-4e81-aae0-97c79830e432   
3  e9a1fa6f-81f7-4d1c-adbc-2134c30ff46e  1e788e01-b80f-4ca9-b03c-264a1670aa16   
4  5c323961-fb2f-497d-91e5-5363e3182010  d6d4f7ef-284b-43dd-a0e3-fe8d5e9ca5c3   

                  time  #appointments  #encounters  #observations  \
0  2023-11-06T14:00:00              9            9             15   
1  2023-11-06T15:22:00              6            4              6   
2  2023-11-13T10:00:00              2            3              8   
3  2023-11-13T15:57:00              4            3              2   
4  2024-01-25T20:58:00              0            0              0   

   break_glass      label  
0            0     Normal  
1            0     Normal  
2            0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
