In [1]:
import importlib
import sys
import torch
import numpy as np

sys.path.insert(0, '..')
sys.path.insert(1, '../..')

In [3]:
import event_log_loader.new_event_log_loader
importlib.reload(event_log_loader.new_event_log_loader)
from event_log_loader.new_event_log_loader import EventLogLoader, EventLogDataset

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

np.random.seed(17)

event_log_location = '../../../data/Sepsis.csv'
result_name = 'Sepsis_all'

event_log_properties = {
    'case_name' : 'case:concept:name',
    'concept_name' : 'concept:name',
    'timestamp_name' : 'time:timestamp',
    'time_since_case_start_column' : 'case_elapsed_time',
    'time_since_last_event_column' : 'event_elapsed_time',
    'day_in_week_column' : 'day_in_week',
    'seconds_in_day_column' : 'seconds_in_day',
    'min_suffix_size' : 5,
    'train_validation_size' : 0.15,
    'test_validation_size' : 0.2,
    'window_size' : 'auto',
    'categorical_columns' : ['concept:name', 'InfectionSuspected', 'org:group', 'DiagnosticBlood', 'DisfuncOrg', 'SIRSCritTachypnea', 'Hypotensie', 'SIRSCritHeartRate', 'Infusion', 'DiagnosticArtAstrup', 'DiagnosticIC', 'DiagnosticSputum', 'DiagnosticLiquor', 'DiagnosticOther', 'SIRSCriteria2OrMore', 'DiagnosticXthorax', 'SIRSCritTemperature', 'DiagnosticUrinaryCulture', 'SIRSCritLeucos', 'Oligurie', 'DiagnosticLacticAcid', 'lifecycle:transition',
                             'Diagnose', 'Hypoxie', 'DiagnosticUrinarySediment', 'DiagnosticECG'],
    'continuous_columns' : ['case_elapsed_time', 'event_elapsed_time', 'day_in_week', 'seconds_in_day',
                            'Age', 'Leucocytes', 'CRP', 'LacticAcid'],
                            'continuous_positive_columns' : []
}


# 1) loads event log
# 2) adds EOS to cases
# 3) normalizes numerical features
# 4) imputes features (replaces n.a. categories with a new class for categorical features and average values for numerical featues)
event_log_loader = EventLogLoader(event_log_location, event_log_properties)

In [4]:
print(event_log_loader.encoder_decoder.window_size)

55


In [5]:
train_dataset = event_log_loader.get_dataset('train')
torch.save(train_dataset, '../../../encoded_data/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_train.pkl')
print(train_dataset.all_categories)

categorical tensors:   0%|          | 0/26 [00:00<?, ?it/s]

concept:name:   0%|          | 0/683 [00:00<?, ?it/s]

InfectionSuspected:   0%|          | 0/683 [00:00<?, ?it/s]

org:group:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticBlood:   0%|          | 0/683 [00:00<?, ?it/s]

DisfuncOrg:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritTachypnea:   0%|          | 0/683 [00:00<?, ?it/s]

Hypotensie:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritHeartRate:   0%|          | 0/683 [00:00<?, ?it/s]

Infusion:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticArtAstrup:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticIC:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticSputum:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticLiquor:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticOther:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCriteria2OrMore:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticXthorax:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritTemperature:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticUrinaryCulture:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritLeucos:   0%|          | 0/683 [00:00<?, ?it/s]

Oligurie:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticLacticAcid:   0%|          | 0/683 [00:00<?, ?it/s]

lifecycle:transition:   0%|          | 0/683 [00:00<?, ?it/s]

Diagnose:   0%|          | 0/683 [00:00<?, ?it/s]

Hypoxie:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticUrinarySediment:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticECG:   0%|          | 0/683 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/8 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/683 [00:00<?, ?it/s]

event_elapsed_time:   0%|          | 0/683 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/683 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/683 [00:00<?, ?it/s]

Age:   0%|          | 0/683 [00:00<?, ?it/s]

Leucocytes:   0%|          | 0/683 [00:00<?, ?it/s]

CRP:   0%|          | 0/683 [00:00<?, ?it/s]

LacticAcid:   0%|          | 0/683 [00:00<?, ?it/s]

([('concept:name', 18, {'Admission IC': 1, 'Admission NC': 2, 'CRP': 3, 'EOS': 4, 'ER Registration': 5, 'ER Sepsis Triage': 6, 'ER Triage': 7, 'IV Antibiotics': 8, 'IV Liquid': 9, 'LacticAcid': 10, 'Leucocytes': 11, 'Release A': 12, 'Release B': 13, 'Release C': 14, 'Release D': 15, 'Release E': 16, 'Return ER': 17}), ('InfectionSuspected', 5, {'EOS': 1, 'False': 2, 'True': 3, nan: 4}), ('org:group', 27, {'?': 1, 'A': 2, 'B': 3, 'C': 4, 'D': 5, 'E': 6, 'EOS': 7, 'F': 8, 'G': 9, 'H': 10, 'I': 11, 'J': 12, 'K': 13, 'L': 14, 'M': 15, 'N': 16, 'O': 17, 'P': 18, 'Q': 19, 'R': 20, 'S': 21, 'T': 22, 'U': 23, 'V': 24, 'W': 25, 'Y': 26}), ('DiagnosticBlood', 5, {'EOS': 1, 'False': 2, 'True': 3, nan: 4}), ('DisfuncOrg', 5, {'EOS': 1, 'False': 2, 'True': 3, nan: 4}), ('SIRSCritTachypnea', 5, {'EOS': 1, 'False': 2, 'True': 3, nan: 4}), ('Hypotensie', 5, {'EOS': 1, 'False': 2, 'True': 3, nan: 4}), ('SIRSCritHeartRate', 5, {'EOS': 1, 'False': 2, 'True': 3, nan: 4}), ('Infusion', 5, {'EOS': 1, 'False

In [6]:
test_dataset = event_log_loader.get_dataset('test')
torch.save(test_dataset, '../../../encoded_data/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_test.pkl')

categorical tensors:   0%|          | 0/26 [00:00<?, ?it/s]

concept:name:   0%|          | 0/209 [00:00<?, ?it/s]

InfectionSuspected:   0%|          | 0/209 [00:00<?, ?it/s]

org:group:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticBlood:   0%|          | 0/209 [00:00<?, ?it/s]

DisfuncOrg:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritTachypnea:   0%|          | 0/209 [00:00<?, ?it/s]

Hypotensie:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritHeartRate:   0%|          | 0/209 [00:00<?, ?it/s]

Infusion:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticArtAstrup:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticIC:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticSputum:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticLiquor:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticOther:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCriteria2OrMore:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticXthorax:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritTemperature:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticUrinaryCulture:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritLeucos:   0%|          | 0/209 [00:00<?, ?it/s]

Oligurie:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticLacticAcid:   0%|          | 0/209 [00:00<?, ?it/s]

lifecycle:transition:   0%|          | 0/209 [00:00<?, ?it/s]

Diagnose:   0%|          | 0/209 [00:00<?, ?it/s]

Hypoxie:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticUrinarySediment:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticECG:   0%|          | 0/209 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/8 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/209 [00:00<?, ?it/s]

event_elapsed_time:   0%|          | 0/209 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/209 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/209 [00:00<?, ?it/s]

Age:   0%|          | 0/209 [00:00<?, ?it/s]

Leucocytes:   0%|          | 0/209 [00:00<?, ?it/s]

CRP:   0%|          | 0/209 [00:00<?, ?it/s]

LacticAcid:   0%|          | 0/209 [00:00<?, ?it/s]

In [None]:
val_dataset = event_log_loader.get_dataset('val')
torch.save(val_dataset, '../../../encoded_data/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_val.pkl')

In [9]:



# Load the dataset
file_path_data_set = '../../../encoded_data/Sepsis_all_5_test.pkl'
bpic_17_test_dataset = torch.load(file_path_data_set, weights_only=False)

In [10]:
bpic_17_test_dataset


<event_log_loader.new_event_log_loader.EventLogDataset at 0x119165e50>