In [1]:
import importlib
import sys
import torch
import numpy as np

sys.path.insert(0, '..')
sys.path.insert(0, '../..')
sys.path.insert(0, '../../..')

In [2]:
import event_log_loader.new_event_log_loader
importlib.reload(event_log_loader.new_event_log_loader)
from event_log_loader.new_event_log_loader import EventLogLoader

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

np.random.seed(17)

event_log_location = '../../../../../../evaluation_results/data/Sepsis.csv'
result_name = 'Sepsis_all_lognormal'

event_log_properties = {
    'case_name' : 'case:concept:name',
    'concept_name' : 'concept:name',
    'timestamp_name' : 'time:timestamp',
    'time_since_case_start_column' : 'case_elapsed_time',
    'time_since_last_event_column' : 'event_elapsed_time',
    'day_in_week_column' : 'day_in_week',
    'seconds_in_day_column' : 'seconds_in_day',
    'min_suffix_size' : 5,
    'train_validation_size' : 0.15,
    'test_validation_size' : 0.2,
    'window_size' : 'auto',
    'categorical_columns' : ['concept:name', 'InfectionSuspected', 'org:group', 'DiagnosticBlood', 'DisfuncOrg', 'SIRSCritTachypnea', 'Hypotensie', 'SIRSCritHeartRate', 'Infusion', 'DiagnosticArtAstrup', 'DiagnosticIC', 'DiagnosticSputum', 'DiagnosticLiquor', 'DiagnosticOther', 'SIRSCriteria2OrMore', 'DiagnosticXthorax', 'SIRSCritTemperature', 'DiagnosticUrinaryCulture', 'SIRSCritLeucos', 'Oligurie', 'DiagnosticLacticAcid', 'lifecycle:transition',
                             'Diagnose', 'Hypoxie', 'DiagnosticUrinarySediment', 'DiagnosticECG'],
    'continuous_columns' : ['day_in_week', 'seconds_in_day', 'Age', 'Leucocytes', 'CRP', 'LacticAcid'],
    'continuous_positive_columns' : ['case_elapsed_time', 'event_elapsed_time']
}
# Decoder attributes: concept:name, org:group, lifecycle:transition

# 1) loads event log
# 2) adds EOS to cases
# 3) normalizes numerical features
# 4) imputes features (replaces n.a. categories with a new class for categorical features and average values for numerical featues)
event_log_loader = EventLogLoader(event_log_location, event_log_properties)

Positive Standardization
min,25%,50%,75%,max: [ 0.          7.57198858 12.00054169 12.8894558  16.28230095]
Mean:  [10.486143]
Std:  [3.1214643]
Positive Standardization
min,25%,50%,75%,max: [ 1.79175949  6.7765069  10.98566532 12.27770901 16.15659904]
Mean:  [9.554063]
Std:  [3.2598135]




In [3]:
print(event_log_loader.encoder_decoder.window_size)

51


In [4]:
train_dataset = event_log_loader.get_dataset('train')
torch.save(train_dataset, '../../../../encoded_data/log_stand/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_train.pkl')
print(train_dataset.all_categories)

categorical tensors:   0%|          | 0/26 [00:00<?, ?it/s]

concept:name:   0%|          | 0/683 [00:00<?, ?it/s]

InfectionSuspected:   0%|          | 0/683 [00:00<?, ?it/s]

org:group:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticBlood:   0%|          | 0/683 [00:00<?, ?it/s]

DisfuncOrg:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritTachypnea:   0%|          | 0/683 [00:00<?, ?it/s]

Hypotensie:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritHeartRate:   0%|          | 0/683 [00:00<?, ?it/s]

Infusion:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticArtAstrup:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticIC:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticSputum:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticLiquor:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticOther:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCriteria2OrMore:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticXthorax:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritTemperature:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticUrinaryCulture:   0%|          | 0/683 [00:00<?, ?it/s]

SIRSCritLeucos:   0%|          | 0/683 [00:00<?, ?it/s]

Oligurie:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticLacticAcid:   0%|          | 0/683 [00:00<?, ?it/s]

lifecycle:transition:   0%|          | 0/683 [00:00<?, ?it/s]

Diagnose:   0%|          | 0/683 [00:00<?, ?it/s]

Hypoxie:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticUrinarySediment:   0%|          | 0/683 [00:00<?, ?it/s]

DiagnosticECG:   0%|          | 0/683 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/8 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/683 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/683 [00:00<?, ?it/s]

Age:   0%|          | 0/683 [00:00<?, ?it/s]

Leucocytes:   0%|          | 0/683 [00:00<?, ?it/s]

CRP:   0%|          | 0/683 [00:00<?, ?it/s]

LacticAcid:   0%|          | 0/683 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/683 [00:00<?, ?it/s]

Positive Standardization
min,25%,50%,75%,max: [ 0.          9.21900272 12.8894558  13.28232431 13.77963448]
Mean:  [10.988216]
Std:  [3.362029]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.32449007  9.8665123  12.8894558  12.8894558 ]
Mean:  [8.757147]
Std:  [4.104207]
Positive Standardization
min,25%,50%,75%,max: [ 0.          6.08221912  9.55958748 12.8894558  12.8894558 ]
Mean:  [8.538356]
Std:  [4.1034307]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.95752764 12.0091877  12.8894558  13.19180965]
Mean:  [10.362001]
Std:  [3.2314234]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.30518818 11.05474091 12.8894558  15.60274029]
Mean:  [10.066228]
Std:  [3.6419058]
Positive Standardization
min,25%,50%,75%,max: [ 0.          6.80350542  6.81892395 12.8894558  12.8894558 ]
Mean:  [8.535772]
Std:  [3.885224]
Positive Standardization
min,25%,50%,75%,max: [ 0.          9.93838453 12.01565647 12.8894558  16.28230095]
Mean:  [10.994267]
Std:  [3.

event_elapsed_time:   0%|          | 0/683 [00:00<?, ?it/s]

Positive Standardization
min,25%,50%,75%,max: [ 3.17805386  9.10364532 12.03884315 12.05989552 12.27770901]
Mean:  [10.37722]
Std:  [2.6263993]
Positive Standardization
min,25%,50%,75%,max: [ 1.60943794  7.29029274  9.78459167 12.27770901 12.27770901]
Mean:  [8.697441]
Std:  [3.9081314]
Positive Standardization
min,25%,50%,75%,max: [ 1.60943794  5.80814266  9.52828503 12.27770901 12.27770901]
Mean:  [8.419866]
Std:  [3.972902]
Positive Standardization
min,25%,50%,75%,max: [ 2.94443893  7.5877763  11.36675453 12.22325563 12.27770901]
Mean:  [9.944317]
Std:  [2.830376]
Positive Standardization
min,25%,50%,75%,max: [ 5.04342508  7.12773871 12.27770901 12.27770901 15.51468658]
Mean:  [10.13607]
Std:  [2.9964716]
Positive Standardization
min,25%,50%,75%,max: [ 2.70805025  6.39692974  7.51914978 12.27770901 12.27770901]
Mean:  [8.582573]
Std:  [3.6534095]
Positive Standardization
min,25%,50%,75%,max: [ 3.17805386  8.33926201 11.36675453 12.27770901 16.15659904]
Mean:  [10.3684025]
Std:  [2.6

In [5]:
test_dataset = event_log_loader.get_dataset('test')
torch.save(test_dataset, '../../../../encoded_data/log_stand/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_test.pkl')

categorical tensors:   0%|          | 0/26 [00:00<?, ?it/s]

concept:name:   0%|          | 0/209 [00:00<?, ?it/s]

InfectionSuspected:   0%|          | 0/209 [00:00<?, ?it/s]

org:group:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticBlood:   0%|          | 0/209 [00:00<?, ?it/s]

DisfuncOrg:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritTachypnea:   0%|          | 0/209 [00:00<?, ?it/s]

Hypotensie:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritHeartRate:   0%|          | 0/209 [00:00<?, ?it/s]

Infusion:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticArtAstrup:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticIC:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticSputum:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticLiquor:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticOther:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCriteria2OrMore:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticXthorax:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritTemperature:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticUrinaryCulture:   0%|          | 0/209 [00:00<?, ?it/s]

SIRSCritLeucos:   0%|          | 0/209 [00:00<?, ?it/s]

Oligurie:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticLacticAcid:   0%|          | 0/209 [00:00<?, ?it/s]

lifecycle:transition:   0%|          | 0/209 [00:00<?, ?it/s]

Diagnose:   0%|          | 0/209 [00:00<?, ?it/s]

Hypoxie:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticUrinarySediment:   0%|          | 0/209 [00:00<?, ?it/s]

DiagnosticECG:   0%|          | 0/209 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/8 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/209 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/209 [00:00<?, ?it/s]

Age:   0%|          | 0/209 [00:00<?, ?it/s]

Leucocytes:   0%|          | 0/209 [00:00<?, ?it/s]

CRP:   0%|          | 0/209 [00:00<?, ?it/s]

LacticAcid:   0%|          | 0/209 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/209 [00:00<?, ?it/s]

Positive Standardization
min,25%,50%,75%,max: [ 0.          6.85538602  9.08963633 12.8894558  13.62233448]
Mean:  [9.148615]
Std:  [3.8975668]
Positive Standardization
min,25%,50%,75%,max: [ 0.          9.58525157 12.442523   12.94427228 14.14290524]
Mean:  [11.022233]
Std:  [3.1924014]
Positive Standardization
min,25%,50%,75%,max: [ 0.          5.95531309  9.99143028 12.8894558  12.8894558 ]
Mean:  [8.971943]
Std:  [4.333707]
Positive Standardization
min,25%,50%,75%,max: [ 0.          8.64707947 10.20942688 11.13805771 12.8894558 ]
Mean:  [9.714985]
Std:  [2.7695374]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.21524    11.40199375 12.8894558  12.99974346]
Mean:  [9.966876]
Std:  [3.4380863]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.72964931 10.60750198 12.8894558  12.8894558 ]
Mean:  [9.611264]
Std:  [3.9783304]
Positive Standardization
min,25%,50%,75%,max: [ 0.         10.78399539 11.41642523 12.80513191 12.8894558 ]
Mean:  [10.950611]
Std:  [3.

event_elapsed_time:   0%|          | 0/209 [00:00<?, ?it/s]

Positive Standardization
min,25%,50%,75%,max: [ 3.04452252  6.68576252 10.58240461 12.27770901 13.19098949]
Mean:  [9.090357]
Std:  [3.638003]
Positive Standardization
min,25%,50%,75%,max: [ 0.69314718  9.54752707 11.33978176 12.27770901 12.78381157]
Mean:  [10.049489]
Std:  [3.1197908]
Positive Standardization
min,25%,50%,75%,max: [ 2.8903718   6.84694338 12.27770901 12.27770901 12.27770901]
Mean:  [9.575991]
Std:  [3.463031]
Positive Standardization
min,25%,50%,75%,max: [ 2.6390574   7.82826185  9.28266144  9.91743946 12.27770901]
Mean:  [9.096376]
Std:  [2.464838]
Positive Standardization
min,25%,50%,75%,max: [ 2.6390574   6.68835449 11.2443924  12.27770901 12.27770901]
Mean:  [9.2484455]
Std:  [3.446224]
Positive Standardization
min,25%,50%,75%,max: [ 4.47733688  7.72488832 12.27770901 12.27770901 12.27770901]
Mean:  [10.107661]
Std:  [2.7963898]
Positive Standardization
min,25%,50%,75%,max: [ 2.6390574   7.03472733  9.7357235  12.20503855 12.27770901]
Mean:  [9.033001]
Std:  [3.18

In [6]:
val_dataset = event_log_loader.get_dataset('val')
torch.save(val_dataset, '../../../../encoded_data/log_stand/'+result_name+'_'+str(event_log_loader.encoder_decoder.min_suffix_size)+'_val.pkl')

categorical tensors:   0%|          | 0/26 [00:00<?, ?it/s]

concept:name:   0%|          | 0/157 [00:00<?, ?it/s]

InfectionSuspected:   0%|          | 0/157 [00:00<?, ?it/s]

org:group:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticBlood:   0%|          | 0/157 [00:00<?, ?it/s]

DisfuncOrg:   0%|          | 0/157 [00:00<?, ?it/s]

SIRSCritTachypnea:   0%|          | 0/157 [00:00<?, ?it/s]

Hypotensie:   0%|          | 0/157 [00:00<?, ?it/s]

SIRSCritHeartRate:   0%|          | 0/157 [00:00<?, ?it/s]

Infusion:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticArtAstrup:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticIC:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticSputum:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticLiquor:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticOther:   0%|          | 0/157 [00:00<?, ?it/s]

SIRSCriteria2OrMore:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticXthorax:   0%|          | 0/157 [00:00<?, ?it/s]

SIRSCritTemperature:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticUrinaryCulture:   0%|          | 0/157 [00:00<?, ?it/s]

SIRSCritLeucos:   0%|          | 0/157 [00:00<?, ?it/s]

Oligurie:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticLacticAcid:   0%|          | 0/157 [00:00<?, ?it/s]

lifecycle:transition:   0%|          | 0/157 [00:00<?, ?it/s]

Diagnose:   0%|          | 0/157 [00:00<?, ?it/s]

Hypoxie:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticUrinarySediment:   0%|          | 0/157 [00:00<?, ?it/s]

DiagnosticECG:   0%|          | 0/157 [00:00<?, ?it/s]

continouous tensors:   0%|          | 0/8 [00:00<?, ?it/s]

day_in_week:   0%|          | 0/157 [00:00<?, ?it/s]

seconds_in_day:   0%|          | 0/157 [00:00<?, ?it/s]

Age:   0%|          | 0/157 [00:00<?, ?it/s]

Leucocytes:   0%|          | 0/157 [00:00<?, ?it/s]

CRP:   0%|          | 0/157 [00:00<?, ?it/s]

LacticAcid:   0%|          | 0/157 [00:00<?, ?it/s]

case_elapsed_time:   0%|          | 0/157 [00:00<?, ?it/s]

Positive Standardization
min,25%,50%,75%,max: [ 0.          7.25981951 10.88971519 12.53285575 12.8894558 ]
Mean:  [9.723665]
Std:  [3.2380226]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.70930815 10.77581167 12.8894558  12.8894558 ]
Mean:  [9.7339735]
Std:  [3.5853643]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.49180675 10.73550034 12.84376597 14.07053757]
Mean:  [9.965049]
Std:  [3.4384613]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.50439167 10.88361645 12.8894558  12.8894558 ]
Mean:  [9.75218]
Std:  [3.5314078]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.54802895  8.98281193 12.8894558  13.03230095]
Mean:  [9.807142]
Std:  [3.4479995]
Positive Standardization
min,25%,50%,75%,max: [ 0.          7.79296207  8.07713699 12.8894558  12.8894558 ]
Mean:  [9.312496]
Std:  [3.9069467]
Positive Standardization
min,25%,50%,75%,max: [ 0.          8.43185329  9.35953617 12.8894558  12.8894558 ]
Mean:  [9.892733]
Std:  [3.2

event_elapsed_time:   0%|          | 0/157 [00:00<?, ?it/s]

Positive Standardization
min,25%,50%,75%,max: [ 3.80666256  6.55613208 10.60281563 12.27770901 12.27770901]
Mean:  [9.489874]
Std:  [2.8058236]
Positive Standardization
min,25%,50%,75%,max: [ 4.20469284  7.40671062 11.86225271 12.27770901 12.42201042]
Mean:  [9.923759]
Std:  [2.7416258]
Positive Standardization
min,25%,50%,75%,max: [ 0.69314718  5.66050637 10.12433004 12.27770901 13.77525806]
Mean:  [9.150847]
Std:  [3.4109428]
Positive Standardization
min,25%,50%,75%,max: [ 2.48490667  7.72317421 10.361166   12.27770901 12.27770901]
Mean:  [9.797906]
Std:  [2.9272168]
Positive Standardization
min,25%,50%,75%,max: [ 2.07944155  7.02286816  9.57505322 12.27770901 12.38911438]
Mean:  [9.425709]
Std:  [3.0220447]
Positive Standardization
min,25%,50%,75%,max: [ 6.2499752   7.2420826  12.27770901 12.27770901 12.27770901]
Mean:  [9.892542]
Std:  [2.626086]
Positive Standardization
min,25%,50%,75%,max: [ 5.52942896  7.23849678 11.44679642 12.27770901 12.27770901]
Mean:  [9.746901]
Std:  [2.63