In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append('..')

import numpy as np
import pandas as pd
import torch
import random
import csv
import matplotlib.pyplot as plt
import torch
from torchmetrics.classification import AUROC, Accuracy, ConfusionMatrix, F1Score
import os, subprocess, gc, time, datetime
from itertools import product
from einops import rearrange

import models.models_original as models_original
import models.models_3d as models_3d
import models.models_3d_atomics as models_3d_atomics
from models.data import *
from models.helper import *
from models.param_initializations import *
from models.optimization_strategy import *

device = get_free_gpu()


current device cuda:15


In [2]:
train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_tiselac_dataloader(batch_size = 512, random_state = 1)

print(class_weights, num_classes)

for batch in train_loader:
    [print(t.shape) for t in batch]
    break

len(train_loader)

tensor([0.5538, 2.8525, 0.5538, 0.5710, 0.7132, 1.6248, 1.2057, 6.3149, 3.5524]) 9
torch.Size([512, 23, 10])
torch.Size([512, 23, 10])
torch.Size([512])


117

In [3]:
auroc_metric = AUROC(task="multiclass", num_classes=num_classes).to(device)
accuracy_metric = Accuracy(task="multiclass", num_classes=num_classes).to(device)
f1_metric = F1Score(task="multiclass", num_classes=num_classes).to(device)
conf_matrix = ConfusionMatrix(task="multiclass", num_classes=num_classes).to(device)

print(changing_dim, static_dim, seq_len)

random_seed = 1
set_seed(random_seed)
random_seeds = range(1,4)

experiment_folder = "/workdir/optimal-summaries-public/_models/tiselac/atomics/"
top_k_file_sum2atom = experiment_folder + "top-k/bottleneck_topkinds_sum2atom_seed_{seed}.csv"
top_k_file_sum2con = experiment_folder + "top-k/bottleneck_topkinds_sum2con_seed_{seed}.csv"

10 0 23


## Optim

In [4]:
def get_model_sum2atom(random_seed):
    set_seed(random_seed)

    config = {
        "n_concepts": 4,
        "n_atomics": 10,
        "use_summaries_for_atomics": True,
    }

    makedir(experiment_folder)
    model_path = get_filename_from_dict(experiment_folder, config)
    model_path = model_path.format(**config, seed = random_seed)

    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_tiselac_dataloader(random_state = random_seed)

    model = models_3d_atomics.CBM(**config, static_dim=static_dim, changing_dim=changing_dim, seq_len=seq_len, output_dim=num_classes, device=device)
    model.try_load_else_fit(train_loader, val_loader, p_weight=class_weights.to(device), save_model_path=model_path, max_epochs=10000)

    evaluate_classification(model, test_loader)
    return model

get_model_sum2atom(1)

def get_model_sum2con(random_seed):
    set_seed(random_seed)

    config = {
        "n_concepts": 4,
        "n_atomics": 10,
        "use_summaries_for_atomics": False,
    }
    
    experiment_folder = "/workdir/optimal-summaries-public/_models/tiselac/atomics/"
    makedir(experiment_folder)
    model_path = get_filename_from_dict(experiment_folder, config)
    model_path = model_path.format(**config, seed = random_seed)

    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_tiselac_dataloader(random_state = random_seed)

    model = models_3d_atomics.CBM(**config, static_dim=static_dim, changing_dim=changing_dim, seq_len=seq_len, output_dim=num_classes, device=device)
    model.try_load_else_fit(train_loader, val_loader, p_weight=class_weights.to(device), save_model_path=model_path, max_epochs=10000)

    evaluate_classification(model, test_loader)
    return model

get_model_sum2con(1)



Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_1.pt
AUC macro 0.961
ACC macro 0.732
 F1 macro 0.683




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_False_seed_1.pt
AUC macro 0.957
ACC macro 0.729
 F1 macro 0.682


CBM(
  (sigmoid_layer): Sigmoid()
  (output_af): Softmax(dim=1)
  (activation_func): Sigmoid()
  (layer_time_to_atomics): LazyLinearWithMask(in_features=0, out_features=10, bias=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (layer_to_concepts): LazyLinearWithMask(in_features=0, out_features=4, bias=True)
  (layer_output): Linear(in_features=4, out_features=9, bias=True)
)

In [5]:
[get_model_sum2atom(random_seed) for random_seed in random_seeds]
[get_model_sum2con(random_seed) for random_seed in random_seeds]



Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_1.pt
AUC macro 0.961
ACC macro 0.732
 F1 macro 0.683




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_2.pt
AUC macro 0.966
ACC macro 0.752
 F1 macro 0.700




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_3.pt
AUC macro 0.965
ACC macro 0.752
 F1 macro 0.696




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_False_seed_1.pt
AUC macro 0.957
ACC macro 0.729
 F1 macro 0.682




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_False_seed_2.pt
AUC macro 0.960
ACC macro 0.733
 F1 macro 0.685




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_False_seed_3.pt
AUC macro 0.960
ACC macro 0.727
 F1 macro 0.681


[CBM(
   (sigmoid_layer): Sigmoid()
   (output_af): Softmax(dim=1)
   (activation_func): Sigmoid()
   (layer_time_to_atomics): LazyLinearWithMask(in_features=0, out_features=10, bias=True)
   (flatten): Flatten(start_dim=1, end_dim=-1)
   (layer_to_concepts): LazyLinearWithMask(in_features=0, out_features=4, bias=True)
   (layer_output): Linear(in_features=4, out_features=9, bias=True)
 ),
 CBM(
   (sigmoid_layer): Sigmoid()
   (output_af): Softmax(dim=1)
   (activation_func): Sigmoid()
   (layer_time_to_atomics): LazyLinearWithMask(in_features=0, out_features=10, bias=True)
   (flatten): Flatten(start_dim=1, end_dim=-1)
   (layer_to_concepts): LazyLinearWithMask(in_features=0, out_features=4, bias=True)
   (layer_output): Linear(in_features=4, out_features=9, bias=True)
 ),
 CBM(
   (sigmoid_layer): Sigmoid()
   (output_af): Softmax(dim=1)
   (activation_func): Sigmoid()
   (layer_time_to_atomics): LazyLinearWithMask(in_features=0, out_features=10, bias=True)
   (flatten): Flatten(sta

In [None]:
visualize_top100_weights_per_channel(model.bottleneck)


In [11]:
track_metrics={"acc": accuracy_metric,
               "f1": f1_metric,
               "auc": auroc_metric,
               }

results = []
for random_seed in random_seeds:
    print("seed", random_seed)
    set_seed(random_seed)
    model = get_model_sum2atom(random_seed)
    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_tiselac_dataloader(random_state = random_seed)
    top_k_inds = [get_top_features_per_concept(layer) for layer in model.regularized_layers]
    save_path = top_k_file_sum2atom.format(seed=random_seed)
    
    greedy_results = greedy_forward_selection(model=model, layers_to_prune=model.regularized_layers, top_k_inds=top_k_inds, val_loader=val_loader, optimize_metric=auroc_metric, track_metrics=track_metrics, save_path=save_path)
    results.append(greedy_results)
    

seed 1




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_1.pt
AUC macro 0.961
ACC macro 0.732
 F1 macro 0.683
Found 10 Concepts
90th percentile per concept [ 0.12366915  4.3872643  13.994587    5.6910405   5.27104     0.12729435
  2.866164    0.17723843  0.11380112  4.1549497 ]
['Concept 0 len: 10', 'Concept 1 len: 10', 'Concept 2 len: 10', 'Concept 3 len: 10', 'Concept 4 len: 10', 'Concept 5 len: 10', 'Concept 6 len: 11', 'Concept 7 len: 10', 'Concept 8 len: 10', 'Concept 9 len: 10']
Found 4 Concepts
90th percentile per concept [10.559936 17.957523 15.62205  13.409517]
['Concept 0 len: 16', 'Concept 1 len: 20', 'Concept 2 len: 23', 'Concept 3 len: 20']


100%|██████████| 140/140 [5:36:39<00:00, 144.28s/it, Score=0.91356, acc=0.382, f1=0.382, auc=0.913]  


seed 2




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_2.pt
AUC macro 0.966
ACC macro 0.752
 F1 macro 0.700
Found 10 Concepts
90th percentile per concept [ 0.12272644  2.801038    3.10565     3.4920776   4.466231    6.6447144
  0.12156378 10.8165655   3.7118132   0.11965739]
['Concept 0 len: 10', 'Concept 1 len: 10', 'Concept 2 len: 10', 'Concept 3 len: 10', 'Concept 4 len: 10', 'Concept 5 len: 10', 'Concept 6 len: 10', 'Concept 7 len: 10', 'Concept 8 len: 10', 'Concept 9 len: 10']
Found 4 Concepts
90th percentile per concept [15.473067 12.9715   11.362193 17.264057]
['Concept 0 len: 21', 'Concept 1 len: 25', 'Concept 2 len: 19', 'Concept 3 len: 24']


100%|██████████| 140/140 [5:54:33<00:00, 151.95s/it, Score=0.77775, acc=0.0389, f1=0.0389, auc=0.788]  


seed 3




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_True_seed_3.pt
AUC macro 0.965
ACC macro 0.752
 F1 macro 0.696
Found 10 Concepts
90th percentile per concept [ 2.5723462  10.70607     2.8294053   3.8433394   0.12618801  0.13571216
  5.281903    0.1168546   3.0253272   6.0665402 ]
['Concept 0 len: 10', 'Concept 1 len: 10', 'Concept 2 len: 10', 'Concept 3 len: 10', 'Concept 4 len: 10', 'Concept 5 len: 10', 'Concept 6 len: 10', 'Concept 7 len: 10', 'Concept 8 len: 10', 'Concept 9 len: 10']
Found 4 Concepts
90th percentile per concept [14.179349 17.946478 14.615713  9.905482]
['Concept 0 len: 20', 'Concept 1 len: 23', 'Concept 2 len: 20', 'Concept 3 len: 21']


100%|██████████| 140/140 [5:46:45<00:00, 148.61s/it, Score=0.89507, acc=0.442, f1=0.442, auc=0.891]  


In [None]:
greedy_results = read_df_from_csv(top_k_file_sum2atom.format(seed=1))
pd.set_option('display.max_rows', 100)
greedy_results


In [None]:
plot_selected_weights(model.regularized_layers[0].weight, top_k_inds, greedy_results)
plot_selected_weights(model.regularized_layers[1].weight, top_k_inds, greedy_results)

In [6]:
track_metrics={"acc": accuracy_metric,
               "f1": f1_metric,
               "auc": auroc_metric,
               }

results = []
for random_seed in random_seeds:
    print("random_seed", random_seed)
    set_seed(random_seed)
    model = get_model_sum2con(random_seed)
    train_loader, val_loader, test_loader, class_weights, num_classes, changing_dim, static_dim, seq_len = get_tiselac_dataloader(random_state = random_seed)
    top_k_inds = [get_top_features_per_concept(layer) for layer in model.regularized_layers]
    save_path = top_k_file_sum2con.format(seed=random_seed)
    
    greedy_results = greedy_forward_selection(model=model, layers_to_prune=model.regularized_layers, top_k_inds=top_k_inds, val_loader=val_loader, optimize_metric=auroc_metric, track_metrics=track_metrics, save_path=save_path)
    results.append(greedy_results)
    

random_seed 1




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_False_seed_1.pt
AUC macro 0.957
ACC macro 0.729
 F1 macro 0.682


Found 10 Concepts
90th percentile per concept [0.09481631 0.10449596 0.09370365 0.10036281 0.10516899 0.11018913
 0.09929327 4.7098823  0.10785296 0.09374589]
['Concept 0 len: 10', 'Concept 1 len: 10', 'Concept 2 len: 10', 'Concept 3 len: 10', 'Concept 4 len: 10', 'Concept 5 len: 10', 'Concept 6 len: 10', 'Concept 7 len: 10', 'Concept 8 len: 10', 'Concept 9 len: 10']
Found 4 Concepts
90th percentile per concept [10.564029  11.2774105 14.226827  13.393058 ]
['Concept 0 len: 23', 'Concept 1 len: 20', 'Concept 2 len: 19', 'Concept 3 len: 20']
Successfully loaded greedy search results!
random_seed 2




Loaded model from /workdir/optimal-summaries-public/_models/tiselac/atomics/n_concepts_4_n_atomics_10_use_summaries_for_atomics_False_seed_2.pt
AUC macro 0.960
ACC macro 0.733
 F1 macro 0.685
Found 10 Concepts
90th percentile per concept [0.11170515 0.10352117 0.10605661 3.478114   0.11777916 0.09690154
 0.11849654 0.11505327 0.10375417 0.10070904]
['Concept 0 len: 10', 'Concept 1 len: 10', 'Concept 2 len: 10', 'Concept 3 len: 10', 'Concept 4 len: 10', 'Concept 5 len: 10', 'Concept 6 len: 10', 'Concept 7 len: 10', 'Concept 8 len: 10', 'Concept 9 len: 10']
Found 4 Concepts
90th percentile per concept [10.390122 12.834478 12.110575 19.563532]
['Concept 0 len: 21', 'Concept 1 len: 19', 'Concept 2 len: 16', 'Concept 3 len: 18']


  4%|▎         | 5/140 [15:51<7:06:04, 189.37s/it, Score=0.95740, acc=0.0176, f1=0.0176, auc=0.5]

In [None]:
model = get_model_sum2con(1)
greedy_results = read_df_from_csv(top_k_file_sum2con.format(seed=1))
pd.set_option('display.max_rows', 100)
greedy_results


In [None]:

plot_selected_weights(model.regularized_layers[0].weight, top_k_inds, greedy_results)
plot_selected_weights(model.regularized_layers[1].weight, top_k_inds, greedy_results)

In [None]:
result_df = evaluate_greedy_selection(get_model_sum2atom, get_tiselac_dataloader, top_k_file_sum2atom, n_experiments=3)
result_df


In [None]:
result_df = evaluate_greedy_selection(get_model_sum2con, get_tiselac_dataloader, top_k_file_sum2con, n_experiments=3)
result_df
