In [2]:
import os
import pandas as pd
import numpy as np
import time
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning import Trainer, seed_everything
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.feature_selection import mutual_info_classif, chi2
from sklearn.linear_model import LassoCV
import matplotlib.pyplot as plt
from pytorch_lightning.loggers import TensorBoardLogger
import seaborn as sns
import os
import sys
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from torch_explain.models.explainer import Explainer
from torch_explain.logic.metrics import formula_consistency
# from load_datasets import load_mimic
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import SMOTEN
from imblearn.combine import SMOTEENN
from torch.nn.functional import one_hot
from func_timeout import func_set_timeout, func_timeout, FunctionTimedOut
from datetime import date

In [3]:
files = os.listdir("./categorisedData/")


datasets = {file : pd.read_csv("./categorisedData/" + file) for file in files}


print(files)

results_dict = {}

['breastCancer.csv', 'clusteredData.csv', 'expertLabelledData.csv', 'metricExtractedData.csv', 'staticData.csv']


In [4]:
@func_set_timeout(90)
def explain_with_timeout(model, val_data, train_data, test_data, topk_expl, concepts):

    return model.explain_class(val_dataloaders=val_data, train_dataloaders=train_data, test_dataloaders=test_data, topk_explanations=topk_expl, concept_names=concepts, max_minterm_complexity=2)

In [6]:

np.set_printoptions(threshold=sys.maxsize)

# print(os.listdir("."))


for file in files:

    # file = "breastCancer.csv"


    print(f"Training {file}\n")

    data = datasets[file]

    if "PatientID" in data.columns:
        data = data.drop(columns=["PatientID"])


    targetName = "Mortality14Days"

    targetSeries = data[targetName]
    print(data[targetName].value_counts())
    data = data.drop(columns=[targetName])

    dataTensor = torch.FloatTensor(data.to_numpy())
    targetTensor = one_hot(torch.tensor(targetSeries.values).to(torch.long)).to(torch.float)

    # x, y, concept_names = load_mimic()
    # base_dir="./LEN_test/pytorch_explain/experiments/elens/data"


    dataset = TensorDataset(dataTensor, targetTensor)

    total_loader = DataLoader(dataset)


    train_size = int(len(dataset) * 0.5)
    val_size = (len(dataset) - train_size) // 2
    test_size = len(dataset) - train_size - val_size
    train_data, val_data, test_data = random_split(dataset, [train_size, val_size, test_size])
    train_loader = DataLoader(train_data, batch_size=train_size, shuffle=True)
    val_loader = DataLoader(val_data, batch_size=val_size)
    test_loader = DataLoader(test_data, batch_size=test_size)
    n_concepts = next(iter(train_loader))[0].shape[1]
    n_classes = 2
    # print("feature names: ", concept_names)
    print("features:", n_concepts)
    # print(n_classes)

    # %% md

    ## 5-fold cross-validation with explainer network

    # %%

    seed_everything(42)

    base_dir = f'./runs'
    # os.makedirs(base_dir, exist_ok=True)

    n_splits = 5
    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
    
    results_list = []
    feature_selection = []
    explanations = {i: [] for i in range(n_classes)}

    explanations_list = []
    splitResults_list = []
    scores_list = []


    x = dataTensor
    y = targetTensor

    for split, (trainval_index, test_index) in enumerate(skf.split(x.cpu().detach().numpy(),
                                                                y.argmax(dim=1).cpu().detach().numpy())):
        print(f'Split [{split + 1}/{n_splits}]')

        # print(x.shape)

        x_trainval, x_test = torch.FloatTensor(x[trainval_index]), torch.FloatTensor(x[test_index])
        y_trainval, y_test = torch.FloatTensor(y[trainval_index]), torch.FloatTensor(y[test_index])
        x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.2, random_state=42)
        print(f'{len(y_train)}/{len(y_val)}/{len(y_test)}')

        print(pd.Series(np.argmax(y_train.numpy(), axis=1)).value_counts().values)

        clf = SMOTEN(random_state=0)

        x_train, y_train = clf.fit_resample(x_train.numpy(), np.argmax(y_train.numpy(), axis=1))

        x_train = torch.FloatTensor(x_train)
        y_train = one_hot(torch.tensor(y_train).to(torch.long)).to(torch.float)

        print(pd.Series(np.argmax(y_train.numpy(), axis=1)).value_counts().values)


        train_data = TensorDataset(x_train, y_train)
        val_data = TensorDataset(x_val, y_val)
        test_data = TensorDataset(x_test, y_test)
        train_loader = DataLoader(train_data, batch_size=train_size, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=val_size)
        test_loader = DataLoader(test_data, batch_size=test_size)

        checkpoint_callback = ModelCheckpoint(dirpath=base_dir, monitor='f1_val', mode='max', save_top_k=1)

        logger = TensorBoardLogger("./runs/splits/", name=file)

        trainer = Trainer(max_epochs=400, gpus=1, auto_lr_find=True, deterministic=True,
                        check_val_every_n_epoch=1, default_root_dir=base_dir,
                        weights_save_path=base_dir, callbacks=[checkpoint_callback],
                        logger=logger, enable_progress_bar=False, gradient_clip_val=0.5)


        model = Explainer(n_concepts=n_concepts, n_classes=n_classes, l1=1e-3, lr=0.01,
                        explainer_hidden=[20], temperature=0.7)

        start = time.time()
        trainer.fit(model, train_loader, val_loader)
        # print(f"Gamma: {model.model[0].concept_mask}")
        model.freeze()

        # Precision, Recall, F1
        y_pred = torch.argmax(model(x_test), axis=1)
        y_test_argmax = torch.argmax(y_test, axis=1)

        scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

        print(f"Before loading best: {scores}")

        # scores_list.append(scores)
    
        model = model.load_from_checkpoint(checkpoint_callback.best_model_path)

        

        # Precision, Recall, F1

        scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

        print(f"{file} split {split+1} scores: {scores}")

        print("\nTesting...\n")
        model_results = trainer.test(model, dataloaders=test_loader)

        scores_list.append(scores)


        print("\nExplaining\n")

        try:

            results, f = explain_with_timeout(model, val_data=val_loader, train_data=train_loader, test_data=test_loader,
                                        topk_expl=10,
                                        concepts=data.columns)

        except FunctionTimedOut:
            print("Explanation timed out, skipping...")
            # explanations_list.append(None)
            # results_list.append(None)
            continue

        end = time.time()
        # explanations_list.append(f)

        print(f"Explaining time: {end - start}")
        results['model_accuracy'] = model_results[0]['test_acc_epoch']
        results['extraction_time'] = end - start

        results_list.append(results)
        extracted_concepts = []
        all_concepts = model.model[0].concept_mask[0] > 0.5
        common_concepts = model.model[0].concept_mask[0] > 0.5
        for j in range(n_classes):
            # print(f[j]['explanation'])
            n_used_concepts = sum(model.model[0].concept_mask[j] > 0.5)
            print(f"Number of features that impact on target {j}: {n_used_concepts}")
            print(f"Explanation for target {j}: {f[j]['explanation']}")
            print(f"Explanation accuracy: {f[j]['explanation_accuracy']}")
            explanations[j].append(f[j]['explanation'])
            extracted_concepts.append(n_used_concepts)
            all_concepts += model.model[0].concept_mask[j] > 0.5
            common_concepts *= model.model[0].concept_mask[j] > 0.5

        explanations_list.append(explanations)

        results['extracted_concepts'] = np.mean(extracted_concepts)
        results['common_concepts_ratio'] = sum(common_concepts) / sum(all_concepts)

        

        # prec_rec = precision_recall(y_pred, y_test_argmax, num_classes = n_classes)

        # print(prec_rec)

        # compare against standard feature selection
        i_mutual_info = mutual_info_classif(x_trainval, y_trainval[:, 1])
        i_chi2 = chi2(x_trainval, y_trainval[:, 1])[0]
        i_chi2[np.isnan(i_chi2)] = 0
        lasso = LassoCV(cv=5, random_state=0).fit(x_trainval, y_trainval[:, 1])
        i_lasso = np.abs(lasso.coef_)
        i_mu = model.model[0].concept_mask[1]
        # print(model.model[0].concept_mask)
        df = pd.DataFrame(np.hstack([
            i_mu.numpy(),
            # i_mutual_info / np.max(i_mutual_info),
            # i_chi2 / np.max(i_chi2),
            # i_lasso / np.max(i_lasso),
        ]).T, columns=['feature importance'])
        df['method'] = 'explainer'
        # df.iloc[90:, 1] = 'MI'
        # df.iloc[180:, 1] = 'CHI2'
        # df.iloc[270:, 1] = 'Lasso'
        df['feature'] = np.hstack([np.arange(0, n_concepts)])
        feature_selection.append(df)

        splitResults = [results['model_accuracy'], results['extraction_time'], *scores, f]

        splitResults_list.append(splitResults)


    results_dict[file] = splitResults_list


Global seed set to 42


Training breastCancer.csv

0    458
1    241
Name: Mortality14Days, dtype: int64
features: 89
Split [1/5]
447/112/140
[298 149]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 3.6 K 
-------------------------------------------
3.6 K     Trainable params
0         Non-trainable params
3.6 K     Total params
0.014     Total estimated model params size (MB)


[298 298]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.943019943019943, 0.9320652173913043, 0.9574442579717094]
breastCancer.csv split 1 scores: [0.983985358041638, 0.9791666666666667, 0.9893617021276595]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.9839853644371033
     test_acc_epoch         0.9857142567634583
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 19.386082649230957
Number of features that impact on target 0: 17
Explanation for target 0: ~Clump_Thickness_7 & ~Bland_Chromatin_10
Explanation accuracy: 0.5812562313060817
Number of features that impact on target 1: 17
Explanation for target 1: ~Clump_Thickness_1 & ~Bland_Chromatin_1
Explanation accuracy: 0.5736774193548386
Split [2/5]
447/112/140
[292 155]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 3.6 K 
-------------------------------------------
3.6 K     Trainable params
0         Non-trainable params
3.6 K     Total params
0.014     Total estimated model params size (MB)


[292 292]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.960565601937919, 0.9628623188405797, 0.9583987441130298]
breastCancer.csv split 2 scores: [0.9609353200513421, 0.9678442028985508, 0.9551663361974003]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.9609352946281433
     test_acc_epoch         0.9642857313156128
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining

Explaining time: 25.213157176971436
Number of features that impact on target 0: 82
Explanation for target 0: None
Explanation accuracy: 0
Number of features that impact on target 1: 80
Explanation for target 1: None
Explanation accuracy: 0


  rank_zero_warn(


Split [3/5]
447/112/140
[302 145]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 3.6 K 
-------------------------------------------
3.6 K     Trainable params
0         Non-trainable params
3.6 K     Total params
0.014     Total estimated model params size (MB)


[302 302]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.960565601937919, 0.9628623188405797, 0.9583987441130298]
breastCancer.csv split 3 scores: [0.9609353200513421, 0.9678442028985508, 0.9551663361974003]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.9609352946281433
     test_acc_epoch         0.9642857313156128
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining

Explaining time: 20.49801254272461
Number of features that impact on target 0: 85
Explanation for target 0: None
Explanation accuracy: 0
Number of features that impact on target 1: 86
Explanation for target 1: None
Explanation accuracy: 0


  rank_zero_warn(


Split [4/5]
447/112/140
[293 154]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 3.6 K 
-------------------------------------------
3.6 K     Trainable params
0         Non-trainable params
3.6 K     Total params
0.014     Total estimated model params size (MB)


[293 293]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.9529042386185244, 0.9529042386185244, 0.9529042386185244]
breastCancer.csv split 4 scores: [0.9765611920308053, 0.9788069073783359, 0.9744444444444444]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.9765611886978149
     test_acc_epoch         0.9785714149475098
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining

Explaining time: 21.208857536315918
Number of features that impact on target 0: 85
Explanation for target 0: None
Explanation accuracy: 0
Number of features that impact on target 1: 86
Explanation for target 1: None
Explanation accuracy: 0


  rank_zero_warn(


Split [5/5]
448/112/139
[296 152]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 3.6 K 
-------------------------------------------
3.6 K     Trainable params
0         Non-trainable params
3.6 K     Total params
0.014     Total estimated model params size (MB)


[296 296]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.9357076780758558, 0.9314331501831502, 0.9405095839177187]
breastCancer.csv split 5 scores: [0.9687640449438202, 0.9780219780219781, 0.9615384615384616]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch          0.968764066696167
     test_acc_epoch          0.971222996711731
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 20.565938711166382
Number of features that impact on target 0: 66
Explanation for target 0: Bland_Chromatin_1 | ~Cell_Size_Uniformity_5
Explanation accuracy: 0.4795779441797141
Number of features that impact on target 1: 63
Explanation for target 1: Cell_Size_Uniformity_6 | Marginal_Adhesion_7
Explanation accuracy: 0.5642633228840126


Global seed set to 42


Training clusteredData.csv

0    924
1     35
Name: Mortality14Days, dtype: int64
features: 50
Split [1/5]
613/154/192
[587  26]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: ./runs/splits/clusteredData.csv
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.1 K 
-------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


[587 587]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5552123552123552, 0.5552123552123552, 0.5552123552123552]
clusteredData.csv split 1 scores: [0.488, 0.4945945945945946, 0.48157894736842105]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.4880000054836273
     test_acc_epoch              0.953125
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 29.431500673294067
Number of features that impact on target 0: 9
Explanation for target 0: Hemoglobin_Mean_high | ~Hemoglobin_Mean_high
Explanation accuracy: 0.4907161803713528
Number of features that impact on target 1: 9
Explanation for target 1: CVP_Mean_low & Platelets_Mean_high
Explanation accuracy: 0.47967479674796754
Split [2/5]
613/154/192
[591  22]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.1 K 
-------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


[591 591]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.47540983606557374, 0.4702702702702703, 0.48066298342541436]
clusteredData.csv split 2 scores: [0.5606407322654462, 0.605019305019305, 0.5482954545454546]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5606407523155212
     test_acc_epoch         0.9010416865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 25.820793867111206
Number of features that impact on target 0: 16
Explanation for target 0: Arterial BP Mean_StdDev_high | ~Arterial BP Mean_StdDev_high | ~SVR_Mean_low
Explanation accuracy: 0.4907161803713528
Number of features that impact on target 1: 19
Explanation for target 1: ~CVP_Mean_low & ~SVR_StdDev_high
Explanation accuracy: 0.46114561766735684
Split [3/5]
613/154/192
[592  21]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.1 K 
-------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


[592 592]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.48663101604278075, 0.4918918918918919, 0.48148148148148145]
clusteredData.csv split 3 scores: [0.473972602739726, 0.46756756756756757, 0.48055555555555557]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.4739726483821869
     test_acc_epoch         0.9010416865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 24.676698446273804
Number of features that impact on target 0: 15
Explanation for target 0: Arterial pH_StdDev_low | ~Arterial pH_StdDev_low
Explanation accuracy: 0.4907161803713528
Number of features that impact on target 1: 13
Explanation for target 1: None
Explanation accuracy: 0
Split [4/5]
613/154/192
[589  24]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.1 K 
-------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


[589 589]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.488, 0.4945945945945946, 0.48157894736842105]
clusteredData.csv split 4 scores: [0.5293439077144917, 0.5416988416988417, 0.525]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5293439030647278
     test_acc_epoch         0.9114583134651184
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 24.87185263633728
Number of features that impact on target 0: 6
Explanation for target 0: Arterial BP Mean_Mean_low | Arterial PaCO2_Mean_high
Explanation accuracy: 0.5524475524475525
Number of features that impact on target 1: 3
Explanation for target 1: ~Arterial BP [Diastolic]_Mean_high
Explanation accuracy: 0.2827324478178368
Split [5/5]
614/154/191
[589  25]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.1 K 
-------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


[589 589]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.48793565683646106, 0.4945652173913043, 0.48148148148148145]
clusteredData.csv split 5 scores: [0.48097826086956524, 0.48097826086956524, 0.48097826086956524]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.48097825050354004
     test_acc_epoch          0.926701545715332
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 27.248028993606567
Number of features that impact on target 0: 10
Explanation for target 0: Arterial PaO2_Mean_high | Hemoglobin_Mean_high | ~Hemoglobin_Mean_high
Explanation accuracy: 0.49066666666666664
Number of features that impact on target 1: 4
Explanation for target 1: ~Platelets_StdDev_low & ~Platelets_StdDev_medium
Explanation accuracy: 0.47527472527472525


Global seed set to 42


Training expertLabelledData.csv

0    1077
1      49
Name: Mortality14Days, dtype: int64
features: 153
Split [1/5]
720/180/226
[691  29]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: ./runs/splits/expertLabelledData.csv
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 6.2 K 
-------------------------------------------
6.2 K     Trainable params
0         Non-trainable params
6.2 K     Total params
0.025     Total estimated model params size (MB)


[691 691]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5330010939589158, 0.5314814814814814, 0.5348182283666154]
expertLabelledData.csv split 1 scores: [0.5511014806789455, 0.5675925925925926, 0.543452380952381]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5511015057563782
     test_acc_epoch         0.9026548862457275
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 25.081281661987305
Number of features that impact on target 0: 147
Explanation for target 0: CVP_Min_medium | ~CVP_Min_medium
Explanation accuracy: 0.48868778280542985
Number of features that impact on target 1: 149
Explanation for target 1: None
Explanation accuracy: 0
Split [2/5]
720/181/225
[688  32]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 6.2 K 
-------------------------------------------
6.2 K     Trainable params
0         Non-trainable params
6.2 K     Total params
0.025     Total estimated model params size (MB)


[688 688]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.48394495412844035, 0.48842592592592593, 0.47954545454545455]
expertLabelledData.csv split 2 scores: [0.4827586206896552, 0.4861111111111111, 0.4794520547945205]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.4827585816383362
     test_acc_epoch         0.9333333373069763
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 26.344837427139282
Number of features that impact on target 0: 12
Explanation for target 0: ~AST_medium | ~INR_medium
Explanation accuracy: 0.4874715261958998
Number of features that impact on target 1: 20
Explanation for target 1: CaO2_low & ~Temperature_C_Max_high
Explanation accuracy: 0.29463970328505823
Split [3/5]
720/181/225
[690  30]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 6.2 K 
-------------------------------------------
6.2 K     Trainable params
0         Non-trainable params
6.2 K     Total params
0.025     Total estimated model params size (MB)


[690 690]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5468873128447597, 0.5651162790697675, 0.5395927601809954]
expertLabelledData.csv split 3 scores: [0.5598591549295775, 0.5720930232558139, 0.5524712254570074]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5598591566085815
     test_acc_epoch         0.9111111164093018
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 26.064717769622803
Number of features that impact on target 0: 12
Explanation for target 0: Hamoglobin_high | Hamoglobin_medium
Explanation accuracy: 0.4899611398963731
Number of features that impact on target 1: 8
Explanation for target 1: CVP_Min_high & ~Arterial_PaCO2_medium
Explanation accuracy: 0.5717644424540976
Split [4/5]
720/181/225
[690  30]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 6.2 K 
-------------------------------------------
6.2 K     Trainable params
0         Non-trainable params
6.2 K     Total params
0.025     Total estimated model params size (MB)


[690 690]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5415025132454829, 0.536046511627907, 0.5507863695937091]
expertLabelledData.csv split 4 scores: [0.5290697674418605, 0.5290697674418605, 0.5290697674418605]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5290697813034058
     test_acc_epoch         0.9200000166893005
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 24.244171142578125
Number of features that impact on target 0: 25
Explanation for target 0: Arterial_BP_Diastolic_low | Arterial_pH_Max_low
Explanation accuracy: 0.5157124408092983
Number of features that impact on target 1: 9
Explanation for target 1: None
Explanation accuracy: 0
Split [5/5]
720/181/225
[690  30]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 6.2 K 
-------------------------------------------
6.2 K     Trainable params
0         Non-trainable params
6.2 K     Total params
0.025     Total estimated model params size (MB)


[690 690]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5220900594732371, 0.5244186046511629, 0.5205399061032864]
expertLabelledData.csv split 5 scores: [0.48630136986301364, 0.49534883720930234, 0.47757847533632286]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.48630136251449585
     test_acc_epoch         0.9466667175292969
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 25.463199377059937
Number of features that impact on target 0: 13
Explanation for target 0: SVI_high | ~SVI_high
Explanation accuracy: 0.48863636363636365
Number of features that impact on target 1: 12
Explanation for target 1: ~Hamoglobin_high
Explanation accuracy: 0.3937552831783601


Global seed set to 42


Training metricExtractedData.csv

0    924
1     35
Name: Mortality14Days, dtype: int64
features: 70
Split [1/5]
613/154/192
[587  26]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: ./runs/splits/metricExtractedData.csv
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.9 K 
-------------------------------------------
2.9 K     Trainable params
0         Non-trainable params
2.9 K     Total params
0.011     Total estimated model params size (MB)


[587 587]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5698924731182795, 0.5606177606177606, 0.5839572192513369]
metricExtractedData.csv split 1 scores: [0.5434782608695652, 0.5498069498069498, 0.5391621129326047]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch          0.54347825050354
     test_acc_epoch         0.9270833134651184
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 26.66673469543457
Number of features that impact on target 0: 10
Explanation for target 0: CVP__root_mean_square_high | ~CVP__root_mean_square_high
Explanation accuracy: 0.4907161803713528
Number of features that impact on target 1: 8
Explanation for target 1: CVP__quantile__q_0.1_very_low
Explanation accuracy: 0.48369747899159665
Split [2/5]
613/154/192
[591  22]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.9 K 
-------------------------------------------
2.9 K     Trainable params
0         Non-trainable params
2.9 K     Total params
0.011     Total estimated model params size (MB)


[591 591]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.48525469168900803, 0.4891891891891892, 0.48138297872340424]
metricExtractedData.csv split 2 scores: [0.5919854280510017, 0.6185328185328185, 0.5770969362129583]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5919854640960693
     test_acc_epoch         0.9270833134651184
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 25.25193977355957
Number of features that impact on target 0: 7
Explanation for target 0: CVP__quantile__q_0.2_medium | CVP__quantile__q_0.7_very_low | ~CVP__quantile__q_0.7_very_low
Explanation accuracy: 0.4907161803713528
Number of features that impact on target 1: 12
Explanation for target 1: ~CVP__quantile__q_0.2_high & ~CVP__variation_coefficient_low
Explanation accuracy: 0.2952576714138893
Split [3/5]
613/154/192
[592  21]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.9 K 
-------------------------------------------
2.9 K     Trainable params
0         Non-trainable params
2.9 K     Total params
0.011     Total estimated model params size (MB)


[592 592]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.4810810810810811, 0.4810810810810811, 0.4810810810810811]
metricExtractedData.csv split 3 scores: [0.48525469168900803, 0.4891891891891892, 0.48138297872340424]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.4852546751499176
     test_acc_epoch         0.9427083134651184
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 25.822234630584717
Number of features that impact on target 0: 10
Explanation for target 0: CVP__minimum_low | ~CVP__c3__lag_1_low
Explanation accuracy: 0.5060457936712117
Number of features that impact on target 1: 10
Explanation for target 1: CVP__c3__lag_1_low & ~CVP__minimum_very_low
Explanation accuracy: 0.4498567335243553
Split [4/5]
613/154/192
[589  24]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.9 K 
-------------------------------------------
2.9 K     Trainable params
0         Non-trainable params
2.9 K     Total params
0.011     Total estimated model params size (MB)


[589 589]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.4696132596685083, 0.4594594594594595, 0.480225988700565]
metricExtractedData.csv split 4 scores: [0.4696132596685083, 0.4594594594594595, 0.480225988700565]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.4696132242679596
     test_acc_epoch         0.8854166865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 25.723774194717407
Number of features that impact on target 0: 15
Explanation for target 0: CVP__quantile__q_0.2_high | CVP__minimum_high | ~CVP__quantile__q_0.2_high
Explanation accuracy: 0.4907161803713528
Number of features that impact on target 1: 9
Explanation for target 1: ~CVP__quantile__q_0.3_high & ~CVP__quantile__q_0.7_very_low
Explanation accuracy: 0.3530608840700584
Split [5/5]
614/154/191
[589  25]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.9 K 
-------------------------------------------
2.9 K     Trainable params
0         Non-trainable params
2.9 K     Total params
0.011     Total estimated model params size (MB)


[589 589]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.4738292011019284, 0.4673913043478261, 0.48044692737430167]
metricExtractedData.csv split 5 scores: [0.618346957311535, 0.6265527950310559, 0.6113387978142076]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.6183469891548157
     test_acc_epoch         0.9424083828926086
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 24.375815629959106
Number of features that impact on target 0: 16
Explanation for target 0: ~CVP__minimum_medium
Explanation accuracy: 0.5151951704740344
Number of features that impact on target 1: 11
Explanation for target 1: ~CVP__quantile__q_0.1_very_low & ~CVP__quantile__q_0.6_low
Explanation accuracy: 0.18942694134509994


Global seed set to 42


Training staticData.csv

0    4584
1     678
Name: Mortality14Days, dtype: int64
features: 31
Split [1/5]
3367/842/1053
[2928  439]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: ./runs/splits/staticData.csv
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 1.3 K 
-------------------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


[2928 2928]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5608028930550484, 0.5912542497915196, 0.5578189300411522]
staticData.csv split 1 scores: [0.5896532084434671, 0.6039515042658284, 0.5821379694865427]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5896531939506531
     test_acc_epoch          0.790123462677002
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 52.41857981681824
Number of features that impact on target 0: 8
Explanation for target 0: cns_low | cns_medium
Explanation accuracy: 0.527226826786394
Number of features that impact on target 1: 12
Explanation for target 1: sofa_high
Explanation accuracy: 0.5636740331491712
Split [2/5]
3367/842/1053
[2946  421]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 1.3 K 
-------------------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


[2946 2946]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5524891138048724, 0.5701616524472385, 0.549076795369394]
staticData.csv split 2 scores: [0.5563984903768487, 0.5674514080441337, 0.5521635599211221]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5563985109329224
     test_acc_epoch         0.7701804041862488
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 48.99111080169678
Number of features that impact on target 0: 9
Explanation for target 0: cns_low | cns_medium
Explanation accuracy: 0.5299050857546529
Number of features that impact on target 1: 7
Explanation for target 1: age_high
Explanation accuracy: 0.4692635861249618
Split [3/5]
3368/842/1052
[2935  433]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 1.3 K 
-------------------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


[2935 2935]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.4979155525654516, 0.5842118017690536, 0.5389777910715621]
staticData.csv split 3 scores: [0.5668642951251647, 0.5656286602851488, 0.5682346913922414]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5668643116950989
     test_acc_epoch         0.8098859190940857
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 48.55330443382263
Number of features that impact on target 0: 10
Explanation for target 0: ~sofa_high
Explanation accuracy: 0.5871578876146013
Number of features that impact on target 1: 6
Explanation for target 1: ~coagulation_medium & ~gender_female
Explanation accuracy: 0.44951725113276
Split [4/5]
3368/842/1052
[2933  435]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 1.3 K 
-------------------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


[2933 2933]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5678403755868544, 0.6104446867805646, 0.5655582460262281]
staticData.csv split 4 scores: [0.5946248463236957, 0.5937356112928631, 0.5955533596837945]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5946248173713684
     test_acc_epoch         0.8203421831130981
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 50.615866899490356
Number of features that impact on target 0: 23
Explanation for target 0: None
Explanation accuracy: 0
Number of features that impact on target 1: 29
Explanation for target 1: None
Explanation accuracy: 0
Split [5/5]
3368/842/1052
[2931  437]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 1.3 K 
-------------------------------------------
1.3 K     Trainable params
0         Non-trainable params
1.3 K     Total params
0.005     Total estimated model params size (MB)


[2931 2931]


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Before loading best: [0.5389053186557233, 0.5576515540714102, 0.5383500200240289]
staticData.csv split 5 scores: [0.5234960532911386, 0.5234234523503725, 0.5235712266246617]

Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      f1_test_epoch         0.5234960317611694
     test_acc_epoch          0.786121666431427
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


Explaining time: 46.974565505981445
Number of features that impact on target 0: 13
Explanation for target 0: gender_male | ~coagulation_medium
Explanation accuracy: 0.48539133738601825
Number of features that impact on target 1: 13
Explanation for target 1: None
Explanation accuracy: 0


In [7]:
y_pred = torch.argmax(model(x_train), axis=1)

y = torch.argmax(y_train, axis=1)

print("train f1:" , f1_score(y, y_pred, average='macro'))

train f1: 0.8449716428842396


In [8]:
y_pred = torch.argmax(model(x_test), axis=1)

y = torch.argmax(y_test, axis=1)

print("test f1:", f1_score(y, y_pred, average='macro'))

test f1: 0.5234960532911386


In [24]:
kFoldMeans = []

bestExplanationsDict = {f:[0,0] for f in results_dict.keys()}

# print(bestExplanationsDict)

for x in results_dict:

    cols = ['file', 'model_accuracy', 'extraction_time', 'f1', 'recall', 'precision']

    # cols.extend(['model_accuracy', 'extraction_time', 'f1', 'recall', 'precision'])

    for idx, d in enumerate(results_dict[x][0][5]):
        cols.extend([str(x) + "_" + str(idx) for x in list(d)[1:]])

    # print(cols)

    rows = []

    for split in results_dict[x]:


        if split[5][1]['explanation_accuracy'] > bestExplanationsDict[x][0]:
            bestExplanationsDict[x] = [split[2], split[5]]

        row = [x]

        row.extend(split[:5])


        for d in split[5]:

            row.extend(list(d.values())[1:])


        

        rows.append(row)


    df = pd.DataFrame(columns=cols, data=rows)

    df = df.set_index('file')

    combinedCols = list(df.describe().columns)

    # print(combinedCols)

    row = [x]
    row.extend(np.round(df.describe().loc['mean'].values, 2))

    kFoldMeans.append(row)

# print(kFoldMeans)



kFoldMeansCols = list(df.describe().columns)

combinedCols.insert(0, "file")


# print(kFoldMeansCols)

totalMeans = pd.DataFrame(columns=combinedCols, data=kFoldMeans)

totalMeans = totalMeans.set_index('file')

cols = totalMeans.columns

cols = [c.replace("explanation", "expl").replace("accuracy", "acc").replace("complexity", "comp") for c in cols]

totalMeans.columns = cols

display(totalMeans)

print("Best explanations on minority class:\n")
for i in bestExplanationsDict:
    print(f"{i}:\n")
    print(bestExplanationsDict[i][1])



totalMeans.to_csv(f"./processingCache/totalMeans{date.today()}.csv")

Unnamed: 0_level_0,model_acc,extraction_time,f1,recall,precision,expl_acc_0,expl_fidelity_0,expl_comp_0,expl_acc_1,expl_fidelity_1,expl_comp_1
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
breastCancer.csv,0.97,21.37,0.97,0.97,0.97,0.21,0.28,0.8,0.23,0.25,0.8
clusteredData.csv,0.92,26.41,0.51,0.52,0.5,0.5,0.93,2.4,0.34,0.58,1.4
expertLabelledData.csv,0.92,25.44,0.52,0.53,0.52,0.49,0.89,2.0,0.25,0.36,1.0
metricExtractedData.csv,0.92,25.57,0.54,0.55,0.54,0.5,0.89,2.2,0.35,0.56,1.8
staticData.csv,0.8,49.51,0.57,0.57,0.56,0.43,0.63,1.4,0.3,0.41,0.8


Best explanations on minority class:

breastCancer.csv:

[{'target_class': 0, 'explanation': '~Clump_Thickness_7 & ~Bland_Chromatin_10', 'explanation_accuracy': 0.5812562313060817, 'explanation_fidelity': 0.7357142857142858, 'explanation_complexity': 2}, {'target_class': 1, 'explanation': '~Clump_Thickness_1 & ~Bland_Chromatin_1', 'explanation_accuracy': 0.5736774193548386, 'explanation_fidelity': 0.5642857142857143, 'explanation_complexity': 2}]
clusteredData.csv:

[{'target_class': 0, 'explanation': 'Hemoglobin_Mean_high | ~Hemoglobin_Mean_high', 'explanation_accuracy': 0.4907161803713528, 'explanation_fidelity': 0.9895833333333334, 'explanation_complexity': 2}, {'target_class': 1, 'explanation': 'CVP_Mean_low & Platelets_Mean_high', 'explanation_accuracy': 0.47967479674796754, 'explanation_fidelity': 0.9479166666666666, 'explanation_complexity': 2}]
expertLabelledData.csv:

[{'target_class': 0, 'explanation': 'Hamoglobin_high | Hamoglobin_medium', 'explanation_accuracy': 0.489961139