In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import torch
import random
import csv
import matplotlib.pyplot as plt
import torch
from torch.utils.data import TensorDataset, DataLoader
from torch.autograd import Variable
from torchmetrics.classification import AUROC, Accuracy, ConfusionMatrix, F1Score
import os, subprocess, gc, time, datetime
from itertools import product

import models.models_original as models_original
import models.models_3d_atomics as models_3d_atomics
import models.models_3d as models_3d
from models.data import *
from models.helper import *
from models.param_initializations import *
from models.optimization_strategy import *

device = get_free_gpu()

current device cuda:8


In [2]:
train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_MIMIC_dataloader(output_dim = 2, batch_size = 512, random_state = 1)

print(class_weights, num_classes, seq_len)

for batch in train_loader:
    [print(t.shape, t.device) for t in batch]
    break

len(train_loader)

tensor([0.5797, 3.6376], dtype=torch.float64) 2 6
torch.Size([512, 6, 27]) cpu
torch.Size([512, 6, 27]) cpu
torch.Size([512, 8]) cpu
torch.Size([512, 2]) cpu


35

In [3]:
auroc_metric = AUROC(task="binary").to(device)
accuracy_metric = Accuracy(task="binary").to(device)
f1_metric = F1Score(task="binary").to(device)
conf_matrix = ConfusionMatrix(task="binary").to(device)

In [4]:
print(changing_dim, static_dim, seq_len)

random_seed = 1
set_seed(random_seed)

experiment_folder = "/workdir/optimal-summaries-public/_models/vasopressor/original/"
top_k_file = experiment_folder + "top-k/bottleneck_topkinds_seed_{seed}.csv"
makedir(top_k_file)

random_seeds = range(1,4)


27 8 6


## Optimization

In [5]:
def get_model(random_seed):
    set_seed(random_seed)

    config = {
        "n_concepts": 4,
    }

    makedir(experiment_folder)
    model_path = get_filename_from_dict(experiment_folder, config)
    model_path = model_path.format(**config, seed = random_seed)

    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_MIMIC_dataloader(random_state = random_seed)

    model = models_original.CBM(**config, static_dim=static_dim, changing_dim=changing_dim, seq_len=seq_len, output_dim=2, device=device)
    model.try_load_else_fit(train_loader, val_loader, p_weight=class_weights.to(device), save_model_path=model_path, max_epochs=10000)

    evaluate_classification(model, test_loader)
    return model

model = get_model(1)



Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_1.pt
AUC macro 0.915
ACC macro 0.837
 F1 macro 0.845


In [None]:
visualize_top100_weights_per_channel(model.bottleneck)


In [6]:
track_metrics={"acc": accuracy_metric,
               "f1": f1_metric,
               "auc": auroc_metric,
               }

results = []
for random_seed in random_seeds:
    print("random_seed", random_seed)
    model = get_model(random_seed)
    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_MIMIC_dataloader(random_state = random_seed)
    top_k_inds = [get_top_features_per_concept(layer) for layer in model.regularized_layers]
    save_path = top_k_file.format(seed=random_seed)
    
    greedy_results = greedy_forward_selection(model=model, layers_to_prune=model.regularized_layers, top_k_inds=top_k_inds, val_loader=val_loader, optimize_metric=auroc_metric, track_metrics=track_metrics, save_path=save_path)
    results.append(greedy_results)
    

random_seed 1




Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_1.pt
AUC macro 0.915
ACC macro 0.837
 F1 macro 0.845
Found 4 Concepts
90th percentile per concept [8.141751  0.8794138 4.321822  7.642881 ]
['Concept 0 len: 79', 'Concept 1 len: 272', 'Concept 2 len: 77', 'Concept 3 len: 71']


100%|██████████| 40/40 [4:08:22<00:00, 372.57s/it, Score=0.91963, acc=0.858, f1=0.863, auc=0.92]   


random_seed 2




Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_2.pt
AUC macro 0.921
ACC macro 0.844
 F1 macro 0.852
Found 4 Concepts
90th percentile per concept [ 0.28210557 10.270896    6.4929314   2.3336902 ]
['Concept 0 len: 299', 'Concept 1 len: 95', 'Concept 2 len: 105', 'Concept 3 len: 150']


100%|██████████| 40/40 [5:18:27<00:00, 477.68s/it, Score=0.92956, acc=0.842, f1=0.855, auc=0.93]   


random_seed 3




Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_3.pt
AUC macro 0.914
ACC macro 0.837
 F1 macro 0.845
Found 4 Concepts
90th percentile per concept [1.3721063  9.55452    0.41133872 7.4182534 ]
['Concept 0 len: 239', 'Concept 1 len: 81', 'Concept 2 len: 295', 'Concept 3 len: 72']


100%|██████████| 40/40 [5:24:46<00:00, 487.17s/it, Score=0.92909, acc=0.858, f1=0.865, auc=0.929]  


In [None]:
random_seed = 1
model = get_model(random_seed)
top_k_inds = get_top_features_per_concept(model.bottleneck)
greedy_results = read_df_from_csv(top_k_file.format(seed=random_seed))

pd.set_option('display.max_rows', 100)

print(len(top_k_inds))
[print(x) for x in top_k_inds]
greedy_results


In [None]:
plot_selected_weights(model.bottleneck.weight, top_k_inds, greedy_results, 100)

In [8]:
result_df = evaluate_greedy_selection(get_model, get_MIMIC_dataloader, top_k_file, n_experiments=3)
result_df.groupby(["Split", "Mask", "Finetuned"]).mean()



Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_1.pt
AUC macro 0.915
ACC macro 0.837
 F1 macro 0.845
AUC macro 0.914
ACC macro 0.839
 F1 macro 0.846
AUC macro 0.915
ACC macro 0.837
 F1 macro 0.845
AUC macro 0.920
ACC macro 0.858
 F1 macro 0.863
AUC macro 0.916
ACC macro 0.858
 F1 macro 0.863
Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/finetuned/n_concepts_4_seed_1.pt
AUC macro 0.909
ACC macro 0.837
 F1 macro 0.844
AUC macro 0.909
ACC macro 0.842
 F1 macro 0.848




Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_2.pt
AUC macro 0.921
ACC macro 0.844
 F1 macro 0.852
AUC macro 0.923
ACC macro 0.848
 F1 macro 0.856
AUC macro 0.921
ACC macro 0.844
 F1 macro 0.852
AUC macro 0.930
ACC macro 0.842
 F1 macro 0.855
AUC macro 0.926
ACC macro 0.836
 F1 macro 0.850
Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/finetuned/n_concepts_4_seed_2.pt
AUC macro 0.922
ACC macro 0.852
 F1 macro 0.859
AUC macro 0.917
ACC macro 0.848
 F1 macro 0.855




Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/n_concepts_4_seed_3.pt
AUC macro 0.914
ACC macro 0.837
 F1 macro 0.845
AUC macro 0.920
ACC macro 0.843
 F1 macro 0.850
AUC macro 0.914
ACC macro 0.837
 F1 macro 0.845
AUC macro 0.929
ACC macro 0.858
 F1 macro 0.865
AUC macro 0.923
ACC macro 0.855
 F1 macro 0.862
Loaded model from /workdir/optimal-summaries-public/_models/vasopressor/original/finetuned/n_concepts_4_seed_3.pt
AUC macro 0.924
ACC macro 0.852
 F1 macro 0.859
AUC macro 0.915
ACC macro 0.846
 F1 macro 0.853


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Seed,AUC,ACC,F1
Split,Mask,Finetuned,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
test,Empty,False,2.0,0.916475,0.839654,0.84736
test,Greedy,False,2.0,0.921804,0.849551,0.858044
test,Greedy,True,2.0,0.913676,0.845146,0.852178
val,Empty,False,2.0,0.919034,0.843257,0.850629
val,Greedy,False,2.0,0.926095,0.852808,0.860713
val,Greedy,True,2.0,0.918211,0.847032,0.853861


In [None]:
def aggregate_greedy_results(top_k_file):
    metrics = ["AUC", "ACC", "F1"]
    aggregated_metrics_df = read_df_from_csv(top_k_file.format(seed=1))
    aggregated_metrics_df = aggregated_metrics_df[[metrics]]

    seeds = range(2, 4)

    for random_seed in random_seeds:
        greedy_results = read_df_from_csv(top_k_file.format(seed=seed))
        greedy_results = greedy_results[[metrics]]
            
        aggregated_metrics_df += greedy_results

    aggregated_metrics_df /= (len(seeds)+1)

    return aggregated_metrics_df

aggregate_greedy_results(top_k_file)