Load and parse the Dataset (Run this first!)

Update the directory which contains all xlsx files

In [None]:
from ParseData import parse_dataset
from utils.PropertyNames import ColumnNames as Cols

patient_data = parse_dataset("/home/lumpus/Documents/deBruijnData/Diabetes/", silent=False)
patients = patient_data[Cols.patient].unique()


Filter prune benchmark Test

In [None]:
from utils.PropertyNames import MethodOptions as Opts
from Benchmark import benchmark

params = {
    "k": 5,
    "risky_chars": None,
    "risk_threshold": 0.2,
    "prune": True,
    "prune_method": Opts.filter,
    "prune_threshold": 3,
    "max_steps": 3,
    "naive_threshold": 30,
}
benchmark(patient_data, start_time_range_hours=0, end_time_range_hours=1, **params)


Adaptive prune benchmark Test

In [None]:
from utils.PropertyNames import MethodOptions as Opts
from Benchmark import benchmark

params = {
    "k": 5,
    "risky_chars": None,
    "risk_threshold": 0.3,
    "prune": True,
    "prune_method": Opts.adaptive,
    "prune_threshold": 1,
    "weight_thresholds" : [1, 2, 5],
    "value_ranges": [(0, 1), (1, 2), (2, float('inf'))],
    "max_steps": 3,
    "naive_threshold": 30,
}

benchmark(patient_data, start_time_range_hours=0, end_time_range_hours=1, **params)

Plot Probability Distribution

In [None]:
from utils.VisualizationUtils import draw_histogram
from deBruijn.ProbabilityGraph import ProbabilityGraph
from utils.PropertyNames import MethodOptions as Opts
from utils.PropertyNames import ColumnNames as Cols

k = 4
risky_chars: None
params = {
    "prune": False,
    "prune_method": Opts.filter,
    "prune_threshold": 3,
    "max_steps": 3,
}

sequences = []
for p in patients:
    float_seq = patient_data[patient_data[Cols.patient] == p]
    float_seq = float_seq.sort_values(Cols.date, ascending=True)[Cols.char]
    sequences.append(float_seq)

probability_graph = ProbabilityGraph(sequences=sequences, k=k)

print(f"Resulting graph: {probability_graph}")

probability_model = probability_graph.get_probability_model(**params)

draw_histogram(list(probability_model.probability_dict.values()), "Node Probability Distribution", "Probability",
               "Count", bins=20)


Plot Timeline of the target (ideal model) for every point. Alerted datapoints are marked as red

In [None]:
from utils.VisualizationUtils import draw_timeline
from utils.PropertyNames import ColumnNames as Cols

from Benchmark import add_target_column

naive_threshold = 20
params = {
    "k": 4,
    "risky_chars": None,
    "risk_threshold": 0.5,
    "prune": True,
    "prune_method": Opts.filter,
    "prune_threshold": 3,
    "max_steps": 3,
}

# Pick an alert model here
alert_to_plot = Cols.target

patient_data_with_alerts = add_target_column(patient_data)

for p in patients:
        draw_timeline(patient_data_with_alerts[patient_data_with_alerts[Cols.patient] == p].sort_values(Cols.date, ascending=True), p, alert_to_plot)

Draw timeline of one of our models

In [None]:
from utils.VisualizationUtils import draw_timeline
from utils.PropertyNames import ColumnNames as Cols

from Benchmark import add_alerts

naive_threshold = 20
params = {
    "k": 5,
    "risky_chars": None,
    "risk_threshold": 0.3,
    "prune": True,
    "prune_method": Opts.adaptive,
    "prune_threshold": 1,
    "weight_thresholds" : [1, 2, 5],
    "value_ranges": [(0, 1), (1, 2), (2, float('inf'))],
    "max_steps": 3,
}


# Pick an alert model here
alert_to_plot = Cols.prob_alert

patient_data_with_alerts = add_alerts(patient_data, naive_threshold, **params)

for p in patients:
        draw_timeline(patient_data_with_alerts[patient_data_with_alerts[Cols.patient] == p].sort_values(Cols.date, ascending=True), p, alert_to_plot)

