In [1]:
""" The model code from this file is adapted from the following:
https://github.com/pietrobarbiero/pytorch_explain/blob/master/experiments/elens/mnist.py

Credit to Pietro Barbiero for the original code."""

' The model code from this file is adapted from the following:\nhttps://github.com/pietrobarbiero/pytorch_explain/blob/master/experiments/elens/mnist.py\n\nCredit to Pietro Barbiero for the original code.'

In [1]:
import pandas as pd
import numpy as np
import torch.multiprocessing as mp
from sklearn.preprocessing import KBinsDiscretizer, OneHotEncoder, MinMaxScaler
from matplotlib import pyplot as plt
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import silhouette_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.cluster import KMeans
from sklearn.preprocessing import PolynomialFeatures
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor, as_completed
from scipy.interpolate import interp1d
from Categorization import Categorizer
import torch
from torch.utils.data import DataLoader, TensorDataset, random_split
from torch.utils.data.sampler import WeightedRandomSampler
import copy
from torch.nn.functional import one_hot
import imblearn
from pytorch_lightning import Trainer, seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, StochasticWeightAveraging
from pytorch_lightning.callbacks.early_stopping import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from collections import Counter
from tslearn.clustering import TimeSeriesKMeans, silhouette_score
from tslearn.utils import to_time_series_dataset
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from dask.dataframe import from_pandas
from tsfresh.utilities.distribution import MultiprocessingDistributor
from sklearn.model_selection import StratifiedKFold, train_test_split
import hashlib 
from sklearn.metrics import precision_recall_fscore_support
from importlib import reload
from temporalHelper import TemporalHelper as TH
from concurrent.futures import ProcessPoolExecutor
import os
from torch_explain.models.explainer import Explainer
import time
from torchmetrics.functional import precision_recall
from sklearn.feature_selection import mutual_info_classif, chi2
from sklearn.linear_model import LassoCV
from torch_explain.logic.metrics import formula_consistency
import seaborn as sns
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold
from func_timeout import func_set_timeout, func_timeout, FunctionTimedOut
from datetime import date
from pytorch_lightning.callbacks import ModelPruning



In [2]:
files = os.listdir("./categorisedData/")


datasets = {file : pd.read_csv("./categorisedData/" + file).set_index('PatientID') for file in files}


print(files)


['categorisedData_featureSelected.csv', 'clusteredData.csv', 'expertLabelledData.csv', 'expertLabelledDataSimple.csv', 'expertLabelledDataSimple_featureSelected.csv', 'metricExtractedData.csv', 'staticData.csv', 'staticDataSimple.csv', 'staticDataSimple_featureSelected.csv', 'staticData_featureSelected.csv']


In [5]:

print(files[3:4])

print(len(datasets))

results_dict = {}


['expertLabelledDataSimple.csv']
10


In [3]:
@func_set_timeout(300)
def explain_with_timeout(model, val_data, train_data, test_data, topk_expl, concepts):

    return model.explain_class(val_dataloaders=val_data, train_dataloaders=train_data, test_dataloaders=test_data, topk_explanations=topk_expl, concept_names=concepts)

In [6]:
for file in files[:1]:

    print(f"Training {file}\n")

    data = datasets[file]

    if file in ["staticData.csv", "staticDataSimple.csv", "staticData_featureSelected.csv", "staticDataSimple_featureSelected.csv"]:
        targetName = "deathperiod"
    else:
        targetName = "Mortality14Days"

    targetSeries = data[targetName]
    print(data[targetName].value_counts())
    data = data.drop(columns=[targetName])

    dataTensor = torch.FloatTensor(data.to_numpy())
    targetTensor = one_hot(torch.tensor(targetSeries.values).to(torch.long)).to(torch.float)


    dataset = TensorDataset(dataTensor, targetTensor)
    train_size = int(0.8 * len(dataset))

    val_size = (len(dataset) - train_size) // 2
    test_size = len(dataset) - train_size - val_size

    train_data, val_data, test_data = random_split(dataset, [train_size, val_size, test_size])

    # class_count = targetSeries.value_counts().values
    # weights = 1 / torch.Tensor(class_count)

    # sampler = WeightedRandomSampler(weights, train_size)

    train_loader = DataLoader(train_data, batch_size=train_size)
    val_loader = DataLoader(val_data, batch_size=val_size)
    test_loader = DataLoader(test_data, batch_size=test_size)



    n_concepts = next(iter(train_loader))[0].shape[1]
    # self.n_concepts = n_concepts


    n_classes = targetTensor.shape[1]
    # self.n_classes = n_classes

    print("Training on {} classes".format(n_classes))

    print("Num concepts: {}".format(n_concepts))
    print("Num classes: {}".format(n_classes))

    base_dir = f'./results/mimicLEN/explainer'
    os.makedirs(base_dir, exist_ok=True)

    seed_everything(40)

    n_splits = 5

    # self.n_splits = n_splits

    skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # self.skf = skf

    results_list = []
    feature_selection = []
    explanations = {i: [] for i in range(n_classes)}

    explanations_list = []
    splitResults_list = []
    scores_list = []

    x = dataTensor
    y = targetTensor


    for split, (trainval_index, test_index) in enumerate(skf.split(x.cpu().detach().numpy(),
                                                            y.argmax(dim=1).cpu().detach().numpy())):
        

        print(f'Split [{split + 1}/{n_splits}]')
        x_trainval, x_test = torch.FloatTensor(x[trainval_index]), torch.FloatTensor(x[test_index])
        y_trainval, y_test = torch.FloatTensor(y[trainval_index]), torch.FloatTensor(y[test_index])
        x_train, x_val, y_train, y_val = train_test_split(x_trainval, y_trainval, test_size=0.2, random_state=42, shuffle=True)

        # Rebalancing training set
        obj = imblearn.over_sampling.SMOTEN(random_state=0, n_jobs=4)

        print(sorted(Counter(torch.argmax(y_train, axis=1).numpy()).items()))

        x_train, y_train = obj.fit_resample(x_train.numpy(), torch.argmax(y_train, axis=1).numpy())

        print(sorted(Counter(y_train).items()))

        class_count = pd.Series(targetSeries).value_counts()
        print(class_count)
        weights = 1. / torch.FloatTensor(class_count.values)

        print(weights)
        # print(y_train.numpy().astype(np.int64))

        # print(weights)

        # print(y_train_unbalanced.numpy().astype(np.int64))

        # train_weights = np.array([weights[t] for t in torch.argmax(y_train, axis=1).numpy()]).astype(np.float64)

        # test_weights = np.array([weights[t] for t in np.argmax(y_test.numpy(), axis=1).astype(np.int64)]).astype(np.float64)
        
        # val_weights = np.array([weights[t] for t in np.argmax(y_val.numpy(), axis=1).astype(np.int64)]).astype(np.float64)

        # print(train_weights)

        # print(train_weights)
        # print(len(train_weights), len(y_train.numpy()))

        # replacement=False

        # train_sampler = WeightedRandomSampler(train_weights, len(y_train), replacement=True)

        # test_sampler = WeightedRandomSampler(test_weights, len(y_test), replacement=True)

        # val_sampler = WeightedRandomSampler(val_weights, len(y_val), replacement=True)


        targetTensor = one_hot(torch.tensor(y_train).to(torch.long)).to(torch.float)
        # targetTensor = y_train
        x_train = torch.FloatTensor(x_train)

        # print(targetTensor)

        # y_val = one_hot(y_val.to(torch.long)).to(torch.float)

        # y_test = one_hot(y_test.to(torch.long)).to(torch.float)



        train_data = TensorDataset(x_train, targetTensor)
        val_data = TensorDataset(x_val, y_val)
        test_data = TensorDataset(x_test, y_test)

        train_loader = DataLoader(train_data, batch_size=train_size)
        val_loader = DataLoader(val_data, batch_size=val_size)
        test_loader = DataLoader(test_data, batch_size=len(y_test.numpy()))

        # y_ = pd.Series(np.argmax(next(iter(train_loader))[1].numpy(), axis=1)).value_counts()
        # print(y_)

        print("Sizes: ", (train_size, test_size, val_size))

        checkpoint_callback = ModelCheckpoint(dirpath=base_dir, monitor='val_loss', mode='min', save_top_k=1)

        # Constructs the way that the model will be trained
        logger = TensorBoardLogger("./runs/splits/", name=file)

        # Constructs the way that the model will be trained
        trainer = Trainer(max_epochs=800, gpus=1, auto_lr_find=True, deterministic=True,
                    check_val_every_n_epoch=1, default_root_dir=base_dir,
                    weights_save_path=base_dir, callbacks=[checkpoint_callback],
                                                            # StochasticWeightAveraging(swa_lrs=1e-2)],
                    enable_progress_bar=False, logger=logger)

        # gradient_clip_val=0.25

        # This is the model itself, which is extended from pytorch_lightning
        model = Explainer(n_concepts=n_concepts, n_classes=n_classes, l1=1e-3, lr=0.001,
                        explainer_hidden=[20, 40, 20], temperature=0.7)


        start = time.time()
        trainer.fit(model, train_loader, val_loader)
        # print(f"Gamma: {model.model[0].concept_mask}")
        model.freeze()

        # Precision, Recall, F1
        y_pred = torch.argmax(model(x_test), axis=1)
        y_test_argmax = torch.argmax(y_test, axis=1)

        scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

        print(f"Before loading best: {scores}")

        scores_list.append(scores)
    
        model = model.load_from_checkpoint(checkpoint_callback.best_model_path)

        

        # Precision, Recall, F1
        # print(model(x_test))
        y_pred = torch.argmax(model(x_test), axis=1)
        # print(y_pred)
        y_test_argmax = torch.argmax(y_test, axis=1)

        scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), 
                precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

        print(f"{file} split {split+1} scores: {scores}")

        print("\nTesting...\n")
        model_results = trainer.test(model, dataloaders=test_loader)

        scores_list.append(scores)


        print("\nExplaining\n")

        try:

            results, f = explain_with_timeout(model, val_data=val_loader, train_data=train_loader, test_data=test_loader,
                                        topk_expl=10,
                                        concepts=data.columns)

        except FunctionTimedOut:
            print("Explanation timed out, skipping...")
            # explanations_list.append(None)
            # results_list.append(None)
            continue

        end = time.time()
        # explanations_list.append(f)

        print(f"Explaining time: {end - start}")
        results['model_accuracy'] = model_results[0]['test_acc_epoch']
        results['extraction_time'] = end - start

        results_list.append(results)
        extracted_concepts = []
        all_concepts = model.model[0].concept_mask[0] > 0.5
        common_concepts = model.model[0].concept_mask[0] > 0.5
        for j in range(n_classes):
            # print(f[j]['explanation'])
            n_used_concepts = sum(model.model[0].concept_mask[j] > 0.5)
            print(f"Number of features that impact on target {j}: {n_used_concepts}")
            print(f"Explanation for target {j}: {f[j]['explanation']}")
            print(f"Explanation accuracy: {f[j]['explanation_accuracy']}")
            explanations[j].append(f[j]['explanation'])
            extracted_concepts.append(n_used_concepts)
            all_concepts += model.model[0].concept_mask[j] > 0.5
            common_concepts *= model.model[0].concept_mask[j] > 0.5

        explanations_list.append(explanations)

        results['extracted_concepts'] = np.mean(extracted_concepts)
        results['common_concepts_ratio'] = sum(common_concepts) / sum(all_concepts)

        

        prec_rec = precision_recall(y_pred, y_test_argmax, num_classes = n_classes)

        print(prec_rec)

        # compare against standard feature selection
        i_mutual_info = mutual_info_classif(x_trainval, y_trainval[:, 1])
        i_chi2 = chi2(x_trainval, y_trainval[:, 1])[0]
        i_chi2[np.isnan(i_chi2)] = 0
        lasso = LassoCV(cv=5, random_state=0).fit(x_trainval, y_trainval[:, 1])
        i_lasso = np.abs(lasso.coef_)
        i_mu = model.model[0].concept_mask[1]
        # print(model.model[0].concept_mask)
        df = pd.DataFrame(np.hstack([
            i_mu.numpy(),
            # i_mutual_info / np.max(i_mutual_info),
            # i_chi2 / np.max(i_chi2),
            # i_lasso / np.max(i_lasso),
        ]).T, columns=['feature importance'])
        df['method'] = 'explainer'
        # df.iloc[90:, 1] = 'MI'
        # df.iloc[180:, 1] = 'CHI2'
        # df.iloc[270:, 1] = 'Lasso'
        df['feature'] = np.hstack([np.arange(0, n_concepts)])
        feature_selection.append(df)

        splitResults = [results['model_accuracy'], results['extraction_time'], *scores, f]

        splitResults_list.append(splitResults)

        break


    results_dict[file] = splitResults_list

# results_dict[file] = [results['model_accuracy'], results['extraction_time'], *scores, f]
# self.feature_selection = feature_selection
# # print(self.feature_selection)

# self.df = df
# self.explanations = explanations
# self.results_list = results_list
# print(results_dict)

Global seed set to 40


Training categorisedData_featureSelected.csv

0    924
1     35
Name: Mortality14Days, dtype: int64
Training on 2 classes
Num concepts: 14
Num classes: 2
Split [1/5]
[(0, 587), (1, 26)]


  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.3 K 
-------------------------------------------
2.3 K     Trainable params
0         Non-trainable params
2.3 K     Total params
0.009     Total estimated model params size (MB)


[(0, 587), (1, 587)]
0    924
1     35
Name: Mortality14Days, dtype: int64
tensor([0.0011, 0.0286])
Sizes:  (767, 96, 96)


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


In [15]:
results_dict.keys()

dict_keys(['staticDataSimple.csv'])

In [16]:
kFoldMeans = []

bestExplanationsDict = {f:[0,0] for f in results_dict.keys()}

# print(bestExplanationsDict)

for x in results_dict:

    cols = ['file']

    cols.extend(['model_accuracy', 'extraction_time', 'f1', 'recall', 'precision'])

    for idx, d in enumerate(results_dict[x][0][5]):
        cols.extend([str(x) + "_" + str(idx) for x in d])

    # print(cols)

    rows = []

    for split in results_dict[x]:


        if split[5][1]['explanation_accuracy'] > bestExplanationsDict[x][0]:
            bestExplanationsDict[x] = [split[2], split[5]]

        row = [x]

        row.extend(split[:5])


        for d in split[5]:

            row.extend(d.values())

        rows.append(row)


    df = pd.DataFrame(columns=cols, data=rows)

    df = df.set_index('file')

    combinedCols = list(df.describe().columns)

    # print(combinedCols)

    row = [x]
    row.extend(df.describe().loc['mean'].values)

    kFoldMeans.append(row)

# print(kFoldMeans)



kFoldMeansCols = list(df.describe().columns)

combinedCols.insert(0, "file")


# print(kFoldMeansCols)

totalMeans = pd.DataFrame(columns=combinedCols, data=kFoldMeans)

totalMeans = totalMeans.set_index('file')

display(totalMeans)

print("Best explanations on minority class:\n")
for i in bestExplanationsDict:
    print(f"{i}:\n")
    print(bestExplanationsDict[i][1])

totalMeans.to_csv(f"./processingCache/totalMeans{date.today()}.csv")

Unnamed: 0_level_0,model_accuracy,extraction_time,f1,recall,precision,target_class_0,explanation_accuracy_0,explanation_fidelity_0,explanation_complexity_0,target_class_1,explanation_accuracy_1,explanation_fidelity_1,explanation_complexity_1
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
staticDataSimple.csv,0.73289,270.052255,0.579786,0.635167,0.577061,0.0,0.439353,0.668251,7.0,1.0,0.510636,0.748099,29.0


Best explanations on minority class:

staticDataSimple.csv:

[{'target_class': 0, 'explanation': '~los_low | (comorbidity_low & sofa_low) | (comorbidity_low & ~sofa_high) | (respiration_high & ~sofa_high)', 'explanation_accuracy': 0.4393528594065987, 'explanation_fidelity': 0.6682509505703422, 'explanation_complexity': 7}, {'target_class': 1, 'explanation': '(comorbidity_high & liver_medium & ~coagulation_medium & ~cns_high & ~gender_female) | (age_high & sofa_high & ~coagulation_medium & ~cns_high & ~cns_low & ~cns_medium & ~gender_female) | (los_low & age_high & sofa_high & ~respiration_low & ~coagulation_high & ~coagulation_medium & ~cns_high & ~gender_female) | (los_low & age_high & sofa_high & ~respiration_low & ~coagulation_medium & ~renal_low & ~cns_high & ~cns_medium & ~gender_female)', 'explanation_accuracy': 0.5106361579079709, 'explanation_fidelity': 0.7480988593155894, 'explanation_complexity': 29}]


: 

In [None]:

for file in results_dict:
    print(file)

    feature_selection = results_dict[file][2]
    explanations = results_dict[file][3][0]

    base_dir = f'./results/mimicLEN/explainer'

    consistencies = []
    # print(explanations)
    for j in range(n_classes):
        if explanations[j][0] is None:
            continue
        consistencies.append(formula_consistency(explanations[j]))
    explanation_consistency = np.mean(consistencies)

    concat_feature_selection = pd.concat(feature_selection, axis=0)

    # print("Feature selection: ", feature_selection)

    f1 = concat_feature_selection[concat_feature_selection['feature'] <= n_concepts//3]
    f2 = concat_feature_selection[(concat_feature_selection['feature'] > n_concepts//3) & (concat_feature_selection['feature'] <= (n_concepts*2)//3)]
    f3 = concat_feature_selection[concat_feature_selection['feature'] > (n_concepts*2)//3]

    sets = [f1, f2, f3]

    fig = plt.figure(figsize=[30, 10])
    fig.suptitle("Feature Importance")

    # print(sets)

    for i in range(len(sets)):
        f = sets[i]

        plt.subplot(1, 3, i+1)
        # print(f.iloc[:, 0][f.iloc[:, 0] != False])

        # sums = [featureDF['feature importance'].sum() for _, featureDF in f.groupby('feature')]
        # print(len(sums))
        # print(len(f['feature']))

        ax = sns.barplot(y=data.columns[f['feature']], x=f.iloc[:,0], orient='h', hue=f['method'], errwidth=0.5, ci=None)
                        # hue=f['method'],  , errcolor='k')
        ax.set(xlim=(0,0.5))
        ax.set_title("")
        ax.get_legend().remove()

    plt.xlabel('')
    plt.tight_layout()
    plt.savefig(os.path.join(base_dir, 'barplot_mimic.png'))
    plt.savefig(os.path.join(base_dir, 'barplot_mimic.pdf'))
    plt.show()


    results_list = [x for x in results_list if x is not None]

    results_df = pd.DataFrame(results_list)
    results_df['explanation_consistency'] = explanation_consistency
    results_df.to_csv(os.path.join(base_dir, 'results_aware_mimic.csv'))
    results_df


results_df.mean()

results_df.sem()

x = dataTensor
y = targetTensor

dt_scores, rf_scores = [], []
for split, (trainval_index, test_index) in enumerate(
        skf.split(x.cpu().detach().numpy(), y.argmax(dim=1).cpu().detach().numpy())):
    print(f'Split [{split + 1}/{n_splits}]')
    x_trainval, x_test = x[trainval_index], x[test_index]
    y_trainval, y_test = y[trainval_index].argmax(dim=1), y[test_index].argmax(dim=1)

    dt_model = DecisionTreeClassifier(max_depth=5, random_state=split)
    dt_model.fit(x_trainval, y_trainval)
    dt_scores.append(dt_model.score(x_test, y_test))

    rf_model = RandomForestClassifier(random_state=split)
    rf_model.fit(x_trainval, y_trainval)
    rf_scores.append(rf_model.score(x_test, y_test))

print(f'Random forest scores: {np.mean(rf_scores)} (+/- {np.std(rf_scores)})')
print(f'Decision tree scores: {np.mean(dt_scores)} (+/- {np.std(dt_scores)})')
print(f'Mu net scores (model): {results_df["model_accuracy"].mean()} (+/- {results_df["model_accuracy"].std()})')
print(f'Mu net scores (exp): {results_df["explanation_accuracy"].mean()} (+/- {results_df["explanation_accuracy"].std()})')


NameError: name 'results_df' is not defined

In [11]:
results_dict = {}


for file in files[:1]:

    
    print(f"Training {file}\n")

    data = datasets[file]

    if file in ["staticData.csv", "staticDataSimple.csv"]:
        targetName = "deathperiod"
    else:
        targetName = "Mortality14Days"

    targetSeries = data[targetName]
    data = data.drop(columns=[targetName])


    dataTensor = torch.FloatTensor(data.to_numpy())
    targetTensor = one_hot(torch.tensor(targetSeries.values).to(torch.long)).to(torch.float)


    dataset = TensorDataset(dataTensor, targetTensor)
    train_size = int(0.8 * len(dataset))

    val_size = (len(dataset) - train_size) // 2
    test_size = len(dataset) - train_size - val_size

    train_data, val_data, test_data = random_split(dataset, [train_size, val_size, test_size])

    class_count = targetSeries.value_counts().values
    weights = 1 / torch.Tensor(class_count)

    print(weights)

    sampler = WeightedRandomSampler(weights, train_size)

    train_loader = DataLoader(train_data, batch_size=train_size, sampler=sampler)
    val_loader = DataLoader(val_data, batch_size=val_size)
    test_loader = DataLoader(test_data, batch_size=test_size)



    n_concepts = dataTensor.shape[1]
    # self.n_concepts = n_concepts


    n_classes = targetTensor.shape[1]
    # self.n_classes = n_classes

    print("Training on {} classes".format(n_classes))

    print("Num concepts: {}".format(n_concepts))
    print("Num classes: {}".format(n_classes))

    base_dir = f'./results/mimicLEN/explainer'
    os.makedirs(base_dir, exist_ok=True)

    seed_everything(40)

    # n_splits = 10

    # self.n_splits = n_splits

    # skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # self.skf = skf

    results_list = []
    feature_selection = []
    explanations = {i: [] for i in range(n_classes)}

    scores_list = []

    


    # x = dataTensor
    # y = targetTensor
        

    # x_trainval, x_test = torch.FloatTensor(dataTensor[trainval_index]), torch.FloatTensor(dataTensor[test_index])
    # y_trainval, y_test = torch.FloatTensor(y[trainval_index]), torch.FloatTensor(y[test_index])
    # x_train_unbalanced, x_val, y_train_unbalanced, y_val = train_test_split(x_trainval, y_trainval, test_size=0.2, random_state=42)
    # print(f'{len(y_train_unbalanced)}/{len(y_val)}/{len(y_test)}')


    # # Rebalancing training set
    # # obj = imblearn.over_sampling.SMOTEN(random_state=0, n_jobs=4)

    # # print(sorted(Counter(torch.argmax(y_train_unbalanced, axis=1).numpy()).items()))

    # x_train, y_train = x_train_unbalanced.numpy(), torch.argmax(y_train_unbalanced, axis=1).numpy()

    # print(sorted(Counter(y_train).items()))

    # class_count = pd.Series(np.argmax(y_train_unbalanced.numpy(), axis=1)).value_counts().values
    # weights = 1 / torch.FloatTensor(class_count)

    # print("Weights: {}".format(weights))

    # sampler = WeightedRandomSampler(weights, train_size)

    # y_train = one_hot(torch.tensor(y_train).to(torch.long)).to(torch.float)
    # x_train = torch.FloatTensor(x_train)

    

    # train_data = TensorDataset(x_train, y_train)
    # val_data = TensorDataset(x_val, y_val)
    # test_data = TensorDataset(x_test, y_test)
    # train_loader = DataLoader(train_data, batch_size=train_size, sampler=sampler)
    # val_loader = DataLoader(val_data, batch_size=val_size)
    # test_loader = DataLoader(test_data, batch_size=test_size)

    print("Sizes: ", (train_size, test_size, val_size))

    checkpoint_callback = ModelCheckpoint(dirpath=base_dir, monitor='val_acc', save_top_k=3)

    # Constructs the way that the model will be trained
    logger = TensorBoardLogger("./runs/splits/", name=file)

    # Constructs the way that the model will be trained
    trainer = Trainer(max_epochs=400, gpus=1, auto_lr_find=True, deterministic=True,
                    check_val_every_n_epoch=1, default_root_dir=base_dir,
                    weights_save_path=base_dir, callbacks=[checkpoint_callback, StochasticWeightAveraging(swa_lrs=1e-2)], 
                    enable_progress_bar=False, log_every_n_steps=1, logger=logger,
                    gradient_clip_val=0.5)

    # This is the model itself, which is extended from pytorch_lightning
    model = Explainer(n_concepts=n_concepts, n_classes=n_classes, l1=1e-3, lr=0.001,
                    explainer_hidden=[30, 40, 30, 20, 10], temperature=0.7)


    start = time.time()
    trainer.fit(model, train_loader, val_loader)
    # print(f"Gamma: {model.model[0].concept_mask}")
    model.freeze()

    model = model.load_from_checkpoint(checkpoint_callback.best_model_path)

    print("\nTesting...\n")
    model_results = trainer.test(model, dataloaders=test_loader)
    
    # Precision, Recall, F1
    x_test, y_test = next(iter(test_loader))
    y_pred = torch.argmax(model(x_test), axis=1)
    y_test_argmax = torch.argmax(y_test, axis=1)

    scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

    print(f"{file} Scores: {scores}")

    print("\nExplaining\n")
    
    continue

    try:

        results, f = explain_with_timeout(model, val_data=test_loader, train_data=train_loader, test_data=test_loader,
                                    topk_expl=10,
                                    concepts=data.columns)
    except FunctionTimedOut:
        print("Explanation timed out, skipping...")
        continue


    end = time.time()

    print(f"Explaining time: {end - start}")
    results['model_accuracy'] = model_results[0]['test_acc_epoch']
    results['extraction_time'] = end - start

    # results_list.append(results)
    extracted_concepts = []
    all_concepts = model.model[0].concept_mask[0] > 0.5
    common_concepts = model.model[0].concept_mask[0] > 0.5
    for j in range(n_classes):
        # print(f[j]['explanation'])
        n_used_concepts = sum(model.model[0].concept_mask[j] > 0.5)
        print(f"Number of features that impact on target {j}: {n_used_concepts}")
        print(f"Explanation for target {j}: {f[j]['explanation']}")
        print(f"Explanation accuracy: {f[j]['explanation_accuracy']}")
        explanations[j].append(f[j]['explanation'])
        extracted_concepts.append(n_used_concepts)
        all_concepts += model.model[0].concept_mask[j] > 0.5
        common_concepts *= model.model[0].concept_mask[j] > 0.5

    # explanations_list.append(explanations)

    results['extracted_concepts'] = np.mean(extracted_concepts)
    results['common_concepts_ratio'] = sum(common_concepts) / sum(all_concepts)

    

    # compare against standard feature selection
    i_mutual_info = mutual_info_classif(x_test, y_test[:, 1])
    i_chi2 = chi2(x_test, y_test[:, 1])[0]
    i_chi2[np.isnan(i_chi2)] = 0
    lasso = LassoCV(cv=5, random_state=0).fit(x_test, y_test[:, 1])
    i_lasso = np.abs(lasso.coef_)
    i_mu = model.model[0].concept_mask[1]
    # print(model.model[0].concept_mask)
    df = pd.DataFrame(np.hstack([
        i_mu.numpy(),
        # i_mutual_info / np.max(i_mutual_info),
        # i_chi2 / np.max(i_chi2),
        # i_lasso / np.max(i_lasso),
    ]).T, columns=['feature importance'])
    df['method'] = 'explainer'
    # df.iloc[90:, 1] = 'MI'
    # df.iloc[180:, 1] = 'CHI2'
    # df.iloc[270:, 1] = 'Lasso'
    df['feature'] = np.hstack([np.arange(0, n_concepts)])
    feature_selection.append(df)


    results_dict[file] = [results['model_accuracy'], results['extraction_time'], *scores, f]

Global seed set to 40
  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
Missing logger folder: ./runs/splits/clusteredData.csv
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 10.6 K
-------------------------------------------
10.6 K    Trainable params
0         Non-trainable params
10.6 K    Total params
0.042     Total estimated model params size (MB)


Training clusteredData.csv

tensor([0.0011, 0.0286])
Training on 2 classes
Num concepts: 120
Num classes: 2
Sizes:  (767, 96, 96)


  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Testing...

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_acc_epoch         0.8541666865348816
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
clusteredData.csv Scores: [0.35098591549295777, 0.4193548387096774, 0.4902301346070343]

Explaining



  rank_zero_warn(


In [77]:
for file in files[:1]:

    
    print(f"Training {file}\n")

    data = datasets[file]

    if file in ["staticData.csv", "staticDataSimple.csv"]:
        targetName = "deathperiod"
    else:
        targetName = "Mortality14Days"

    targetSeries = data[targetName]
    data = data.drop(columns=[targetName])


    dataTensor = torch.FloatTensor(data.to_numpy())
    targetTensor = (torch.tensor(targetSeries.values).to(torch.long)).to(torch.float)


    dataset = TensorDataset(dataTensor, targetTensor)
    train_size = int(0.8 * len(dataset))

    val_size = (len(dataset) - train_size) // 2
    test_size = len(dataset) - train_size - val_size

    x_train_unbalanced, x_temp, y_train_unbalanced, y_temp = train_test_split(dataTensor, targetTensor, test_size=0.2, random_state=42, shuffle=True)

    x_test, x_val, y_test, y_val = train_test_split(x_temp, y_temp, test_size=0.2, random_state=42, shuffle=True)


    class_count = pd.Series(y_train_unbalanced.numpy()).value_counts()
    print(class_count)
    t_weights = 1 / torch.Tensor(class_count.values)

    print(t_weights)

    # print(y_train_unbalanced.numpy().astype(np.int64))

    train_weights = np.array([weights[t] for t in y_train_unbalanced.numpy().astype(np.int64)])

    # print(samples_weight)

    train_sampler = WeightedRandomSampler(train_weights, len(y_train_unbalanced), replacement=True)



    class_count = pd.Series(y_val.numpy()).value_counts()
    # print(class_count)
    v_weights = 1 / torch.Tensor(class_count.values)
    val_weights = np.array([v_weights[t] for t in y_val.numpy().astype(np.int64)])

    print(v_weights)

    val_sampler = WeightedRandomSampler(val_weights, len(y_val.numpy()), replacement=True)

    # Rebalancing training set
    # obj = imblearn.over_sampling.SMOTEN(random_state=0, n_jobs=4)

    # print(sorted(Counter(y_train_unbalanced.numpy()).items()))

    # x_train, y_train = obj.fit_resample(x_train_unbalanced.numpy(), y_train_unbalanced.numpy())

    # print(sorted(Counter(y_train).items()))

    targetTensor = one_hot(torch.Tensor(y_train_unbalanced).to(torch.long)).to(torch.float)


    print(targetTensor)

    y_val = one_hot(torch.Tensor(y_val).to(torch.long)).to(torch.float)

    y_test = one_hot(torch.Tensor(y_test).to(torch.long)).to(torch.float)

    x_train = torch.FloatTensor(x_train_unbalanced)
    x_val = torch.FloatTensor(x_val)

    x_test = torch.FloatTensor(x_test)


    train_data = TensorDataset(x_train, targetTensor)
    val_data = TensorDataset(x_val, y_val)
    test_data = TensorDataset(x_test, y_test)

    train_loader = DataLoader(train_data, batch_size=train_size, sampler=train_sampler)
    val_loader = DataLoader(val_data, batch_size=val_size)
    test_loader = DataLoader(test_data, batch_size=test_size)

    n_concepts = dataTensor.shape[1]
    # self.n_concepts = n_concepts


    n_classes = targetTensor.shape[1]
    # self.n_classes = n_classes

    print("Training on {} classes".format(n_classes))

    print("Num concepts: {}".format(n_concepts))
    print("Num classes: {}".format(n_classes))

    base_dir = f'./results/mimicLEN/explainer'
    os.makedirs(base_dir, exist_ok=True)

    seed_everything(40)

    n_splits = 10

    # self.n_splits = n_splits

    # skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

    # self.skf = skf

    results_list = []
    feature_selection = []
    explanations = {i: [] for i in range(n_classes)}

    scores_list = []

    x = dataTensor
    y = targetTensor


        

    # x_trainval, x_test = torch.FloatTensor(x[trainval_index]), torch.FloatTensor(x[test_index])
    # y_trainval, y_test = torch.FloatTensor(y[trainval_index]), torch.FloatTensor(y[test_index])
    # x_train_unbalanced, x_val, y_train_unbalanced, y_val = train_test_split(dataTensor, targetTensor, test_size=0.2, random_state=42)
    # print(f'{len(y_train_unbalanced)}/{len(y_val)}/{len(y_test)}')


    

    # y_train = one_hot(torch.tensor(y_train).to(torch.long)).to(torch.float)
    # x_train = torch.FloatTensor(x_train)

    # train_data = TensorDataset(x_train, y_train)
    # val_data = TensorDataset(x_val, y_val)
    # test_data = TensorDataset(x_test, y_test)
    # train_loader = DataLoader(train_data, batch_size=train_size)
    # val_loader = DataLoader(val_data, batch_size=val_size)
    # test_loader = DataLoader(test_data, batch_size=test_size)

    checkpoint_callback = ModelCheckpoint(dirpath=base_dir, monitor='val_loss', mode='min', save_top_k=1)

    logger = TensorBoardLogger("./runs/splits/", name=file)

    # Constructs the way that the model will be trained
    trainer = Trainer(max_epochs=1600, gpus=1, auto_lr_find=True, deterministic=True,
                    check_val_every_n_epoch=1, default_root_dir=base_dir,
                    weights_save_path=base_dir, callbacks=[checkpoint_callback],
                                                            # StochasticWeightAveraging(swa_lrs=1e-2)],
                    enable_progress_bar=False, logger=logger, gradient_clip_val=0.25)

    # This is the model itself, which is extended from pytorch_lightning
    model = Explainer(n_concepts=n_concepts, n_classes=n_classes, l1=1e-3, lr=0.0001,
                    explainer_hidden=[20], temperature=0.7)

    start = time.time()
    trainer.fit(model, train_loader, val_loader)
    # print(f"Gamma: {model.model[0].concept_mask}")
    model.freeze()

    x_test, y_test = next(iter(test_loader))
    y_pred = torch.argmax(model(x_test), axis=1)
    # print("Predictions:", y_pred)
    y_test_argmax = torch.argmax(y_test, axis=1)
    # print("Actual:", y_test_argmax)

    scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

    print(scores)

    model = model.load_from_checkpoint(checkpoint_callback.best_model_path)

    print("\nTesting...\n")

    x_test, y_test = next(iter(test_loader))
    y_pred = torch.argmax(model(x_test), axis=1)
    # print("Predictions:", y_pred)
    y_test_argmax = torch.argmax(y_test, axis=1)
    # print("Actual:", y_test_argmax)

    scores = [f1_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), recall_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro'), precision_score(y_test_argmax.numpy(), y_pred.numpy(), average='macro')]

    print(scores)

    model_results = trainer.test(model, dataloaders=test_loader)
    
    print("\nExplaining\n")
    
    break
    
    try:

        results, f = explain_with_timeout(model, val_data=test_loader, train_data=train_loader, test_data=test_loader,
                                    topk_expl=10,
                                    concepts=data.columns)
    except FunctionTimedOut:
        print("Explanation timed out, skipping...")
        results_list.append(None)
        continue

    end = time.time()

    print(f"Explaining time: {end - start}")
    results['model_accuracy'] = model_results[0]['test_acc_epoch']
    results['extraction_time'] = end - start

    # results_list.append(results)
    extracted_concepts = []
    all_concepts = model.model[0].concept_mask[0] > 0.5
    common_concepts = model.model[0].concept_mask[0] > 0.5
    for j in range(n_classes):
        # print(f[j]['explanation'])
        n_used_concepts = sum(model.model[0].concept_mask[j] > 0.5)
        print(f"Number of features that impact on target {j}: {n_used_concepts}")
        print(f"Explanation for target {j}: {f[j]['explanation']}")
        print(f"Explanation accuracy: {f[j]['explanation_accuracy']}")
        explanations[j].append(f[j]['explanation'])
        extracted_concepts.append(n_used_concepts)
        all_concepts += model.model[0].concept_mask[j] > 0.5
        common_concepts *= model.model[0].concept_mask[j] > 0.5

    # explanations_list.append(explanations)

    # results['extracted_concepts'] = np.mean(extracted_concepts)
    # results['common_concepts_ratio'] = sum(common_concepts) / sum(all_concepts)

    # # Precision, Recall, F1
    # # print(x_test)
    
    # # print(f"{file} split {split+1} scores: {scores}")

    # # compare against standard feature selection
    # i_mutual_info = mutual_info_classif(x_test, y_test[:, 1])
    # i_chi2 = chi2(x_test, y_test[:, 1])[0]
    # i_chi2[np.isnan(i_chi2)] = 0
    # lasso = LassoCV(cv=5, random_state=0).fit(x_test, y_test[:, 1])
    # i_lasso = np.abs(lasso.coef_)
    # i_mu = model.model[0].concept_mask[1]
    # # print(model.model[0].concept_mask)
    # df = pd.DataFrame(np.hstack([
    #     i_mu.numpy(),
    #     # i_mutual_info / np.max(i_mutual_info),
    #     # i_chi2 / np.max(i_chi2),
    #     # i_lasso / np.max(i_lasso),
    # ]).T, columns=['feature importance'])
    # df['method'] = 'explainer'
    # # df.iloc[90:, 1] = 'MI'
    # # df.iloc[180:, 1] = 'CHI2'
    # # df.iloc[270:, 1] = 'Lasso'
    # df['feature'] = np.hstack([np.arange(0, n_concepts)])
    # feature_selection.append(df)


    results_dict[file] = [results['model_accuracy'], results['extraction_time'], *scores, f]

Global seed set to 40
  rank_zero_deprecation(
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type             | Params
-------------------------------------------
0 | loss  | CrossEntropyLoss | 0     
1 | model | Sequential       | 2.1 K 
-------------------------------------------
2.1 K     Trainable params
0         Non-trainable params
2.1 K     Total params
0.008     Total estimated model params size (MB)


Training clusteredData.csv

0.0    744
1.0     23
dtype: int64
tensor([0.0013, 0.0435])
tensor([0.0278, 0.3333])
tensor([[1., 0.],
        [1., 0.],
        [1., 0.],
        ...,
        [1., 0.],
        [1., 0.],
        [1., 0.]])
Training on 2 classes
Num concepts: 50
Num classes: 2


  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


[0.5353685778108711, 0.7, 0.5567226890756303]

Testing...

[0.5425957690108634, 0.7055555555555555, 0.5595813204508857]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     test_acc_epoch         0.6797385811805725
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

Explaining



  rank_zero_warn(


In [39]:
# If it doesn't learn on clusterData, the code is probably wrong. Check rebalancing is hooked up.

x, y = next(iter(test_loader))

y_argmax = np.argmax(y.numpy(), axis=1)

y_hat = model(x)

y_hat = np.argmax(y_hat.detach().numpy(), axis=1)

y = np.argmax(y.numpy(), axis=1)

print(f"Predicted: {y_hat}\n Actual: {y}")

minCorrect = [y_hat[i] == y[i] if y_hat[i] == 1 else False for i in range(len(y_hat))]

print("minority class correct %:", 100*np.sum(minCorrect) / np.sum(y))

Predicted: [0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 1 0 0 0 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0]
 Actual: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0
 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0]
minority class correct %: 10.0


In [78]:
scores = [f1_score(y_argmax, y_hat, average='macro'), recall_score(y_argmax, y_hat, average='macro'), precision_score(y_argmax, y_hat, average='macro')]
scores

[0.5425957690108634, 0.7055555555555555, 0.5595813204508857]

: 

In [None]:
cols = ['file']

cols.append('model_accuracy')
cols.append('extraction_time')

for idx, d in enumerate(results_dict['clusteredData.csv'][5]):
    cols.extend([str(x) + "_" + str(idx) for x in d])

print(cols)

rows = []

for x in files[:1]:

    row = [x]

    row.extend(results_dict[x][:2])

    for d in results_dict[x][5]:
        row.extend(d.values())

    rows.append(row)

totalDF = pd.DataFrame(columns=cols, data=rows)

totalDF = totalDF.set_index('file')

display(totalDF)

['file', 'model_accuracy', 'extraction_time', 'target_class_0', 'explanation_0', 'explanation_accuracy_0', 'explanation_fidelity_0', 'explanation_complexity_0', 'target_class_1', 'explanation_1', 'explanation_accuracy_1', 'explanation_fidelity_1', 'explanation_complexity_1']


Unnamed: 0_level_0,model_accuracy,extraction_time,target_class_0,explanation_0,explanation_accuracy_0,explanation_fidelity_0,explanation_complexity_0,target_class_1,explanation_1,explanation_accuracy_1,explanation_fidelity_1,explanation_complexity_1
file,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
clusteredData.csv,0.9375,50.876622,0,SVRI_Mean_medium | ~CVP_StdDev_high,0.631545,0.911458,2,1,~Arterial BP [Diastolic]_Mean_very_low & ~Arte...,0.428231,0.59375,5


In [None]:
totalDF.to_csv("./processingCache/explainer_results.csv")