In [48]:
import pandas as pd
from datetime import datetime
import numpy as np
import json

# Data laden
file_path = '../../dataset.json'  # Ensure the correct path to the dataset
data = pd.read_json(file_path)

# Extract 'nurseLocation', 'patientLocation', and 'service' from the 'visit' dictionary
data['nurse_latitude'] = data['visit'].apply(lambda x: x['nurseLocation']['latitude'])
data['nurse_longitude'] = data['visit'].apply(lambda x: x['nurseLocation']['longtitude'])
data['patient_latitude'] = data['visit'].apply(lambda x: x['patientLocation']['latitude'])
data['patient_longitude'] = data['visit'].apply(lambda x: x['patientLocation']['longtitude'])
data['service'] = data['visit'].apply(lambda x: x['service'])

# Frauderisico beoordelen
def fraud_risk(row, group, same_day_threshold=2, service_ratio_threshold=0.75):
    same_day_service_count = group[(group['visit_day'] == row['visit_day']) & (group['service'] == row['service'])].shape[0]
    if same_day_service_count > same_day_threshold:
        return "High Risk"
    service_ratio = (group['service'] == row['service']).mean()
    if service_ratio > service_ratio_threshold:
        return "Medium Risk"
    return "Low Risk"

# Data voorbereiden voor risicoberekening
data['visit_timestamp'] = pd.to_datetime(data['visit_timestamp'])
data['visit_day'] = data['visit_timestamp'].dt.date

grouped = data.groupby('rijksregisterNurse')
data['Fraud Risk'] = data.apply(lambda row: fraud_risk(row, grouped.get_group(row['rijksregisterNurse'])), axis=1)

# Filter out only high and medium risk entries
filtered_data = data[data['Fraud Risk'].isin(['High Risk', 'Medium Risk'])]

# Convert visit_timestamp and visit_day to string for JSON serialization
filtered_data['visit_timestamp'] = filtered_data['visit_timestamp'].astype(str)
filtered_data['visit_day'] = filtered_data['visit_day'].astype(str)

# Convert filtered data to list of dictionaries
result = filtered_data.to_dict(orient='records')

# Print the results
print(json.dumps(result, indent=4))


[
    {
        "rijksregisterPatient": 48485354849,
        "rijksregisterNurse": 23763617294,
        "visit": {
            "id": 4395,
            "visitAmounts": 2,
            "duration": "SIX_MONTHS",
            "service": "DENTAL_CARE",
            "nurseLocation": {
                "latitude": 51.243491,
                "longtitude": 3.841391
            },
            "patientLocation": {
                "latitude": 50.540657,
                "longtitude": 4.778784
            }
        },
        "visit_timestamp": "2024-06-21 23:59:24+00:00",
        "driveTime": 8,
        "distance": 1.005,
        "nurse_latitude": 51.243491,
        "nurse_longitude": 3.841391,
        "patient_latitude": 50.540657,
        "patient_longitude": 4.778784,
        "service": "DENTAL_CARE",
        "visit_day": "2024-06-21",
        "Fraud Risk": "Medium Risk"
    },
    {
        "rijksregisterPatient": 62554883857,
        "rijksregisterNurse": 33429776280,
        "visit": {
          