## Measuring disentanglement

Specify path to an experiment in a format `outputs/YYYY-MM-DD/HH-MM-SS`.

In [25]:
EXPERIMENT_PATH = "outputs/2023-06-04/15-33-19"

In [26]:
import os 
import math
import torch

import numpy as np
import torch.nn as nn 
from tqdm import tqdm
import matplotlib.pyplot as plt

from helpers import load_experiment
from extract import prepare_data_dci, fit_linear_model, compute_completeness, compute_disentanglement, compute_informativeness

In [27]:
PATH_PREFIX = "/home/danis/Projects/AlphaCaption/AutoConceptBottleneck/autoconcept"
dm, model = load_experiment(os.path.join(PATH_PREFIX, EXPERIMENT_PATH))
train_loader = dm.train_dataloader()
test_loader = dm.test_dataloader()
train_set = train_loader.dataset

Global seed set to 42


Fetching configuration...
Loading datamodule...


100%|██████████| 2700/2700 [00:00<00:00, 9526.45it/s]


Len of vocab:  53
Max len of caption:  12
Index for <pad>: [0]
Loading model


  rank_zero_warn(
  rank_zero_warn(


In [28]:
PATH_PREFIX = "/home/danis/Projects/AlphaCaption/AutoConceptBottleneck/autoconcept"
EXPERIMENT_PATH = "outputs/2023-06-21/12-37-03"
dm, _ = load_experiment(os.path.join(PATH_PREFIX, EXPERIMENT_PATH))
train_loader = dm.train_dataloader()
test_loader = dm.test_dataloader()
train_set = train_loader.dataset

Global seed set to 42


Fetching configuration...
Loading datamodule...


100%|██████████| 2700/2700 [00:00<00:00, 9475.26it/s]


Len of vocab:  53
Max len of caption:  12
Index for <pad>: [0]
Loading model


  rank_zero_warn(
  rank_zero_warn(


## DCI

In [29]:
targets = list()
preds = list()

is_framework = hasattr(model.main, "concept_extractor")
for batch in tqdm(test_loader):
    images, target = batch["image"].cuda(), batch["target"]
    N = images.shape[0]

    batch_features = model.main.inference(
                images)[0].cpu().detach()

    pred = torch.argmax(batch_features, dim=1).numpy()
    print(pred.shape)

    
    preds += list(pred)
    targets += list(target)
    



 11%|█         | 1/9 [00:04<00:39,  4.97s/it]

(64,)


 22%|██▏       | 2/9 [00:05<00:15,  2.18s/it]

(64,)


 33%|███▎      | 3/9 [00:05<00:07,  1.29s/it]

(64,)


 44%|████▍     | 4/9 [00:05<00:04,  1.14it/s]

(64,)


 56%|█████▌    | 5/9 [00:05<00:02,  1.55it/s]

(64,)


 67%|██████▋   | 6/9 [00:06<00:01,  1.99it/s]

(64,)


 78%|███████▊  | 7/9 [00:06<00:00,  2.41it/s]

(64,)


 89%|████████▉ | 8/9 [00:06<00:00,  2.78it/s]

(64,)
(19,)


100%|██████████| 9/9 [00:07<00:00,  1.15it/s]


In [30]:
preds[:10], targets[:10]

([4, 2, 5, 4, 2, 3, 4, 4, 3, 2],
 [tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0),
  tensor(0)])

In [31]:
from sklearn.metrics import accuracy_score
score = accuracy_score(targets, preds)
score

0.21092278719397364

In [22]:
preds == targets

False

In [32]:
X_train, y_train = prepare_data_dci(train_loader, model)
X_test, y_test = prepare_data_dci(test_loader, model)

100%|██████████| 29/29 [00:14<00:00,  2.07it/s]
100%|██████████| 9/9 [00:08<00:00,  1.00it/s]


In [33]:
R, errors = fit_linear_model(X_train, y_train, X_test, y_test)

100%|██████████| 6/6 [00:03<00:00,  1.91it/s]


In [37]:
disentanglement = compute_disentanglement(R)
print(f"Disentanglement: {disentanglement:.3f}")

Disentanglement: 0.547


In [38]:
completeness = compute_completeness(R)
print(f"Completeness: {completeness:.3f}")

Completeness: 0.488


In [39]:
informativeness = compute_informativeness(errors)
print(f"Informativeness (NRMSE): {informativeness:.3f}")

Informativeness (NRMSE): 0.244


## Purity

In [None]:
def compute_purity(loader, model):
    is_framework = hasattr(model.main, "concept_extractor")
    n_features = model.main.feature_extractor.main.fc.out_features
    
    features_to_attributes = list()
    attribute_values = None
    
    for batch in tqdm(loader):
        images, attributes_all = batch["image"].cuda(), batch["attributes"]
        N = images.shape[0]
        n_attributes = np.array(attributes_all).shape[1]

        if attribute_values is None:
            attribute_values = [[[0, 0] for _ in range(n_attributes)] for f in range(n_features)]
        
        if is_framework:
            batch_features = model.main.inference(images)[1].cpu().detach().numpy()
        else:
            batch_features = model(images)["concept_probs"].cpu().detach().numpy()
        
        for sample_id in range(N):
            attributes = np.array(attributes_all[sample_id])
            features = batch_features[sample_id]

            for feature_id in range(n_features):

                feature = features[feature_id]

                for attribute_id, attribute in enumerate(attributes):
                    value_on = attribute * feature + (1 - attribute) * (1 - feature)
                    attribute_values[feature_id][attribute_id][0] += value_on

                    value_off = attribute * (1 - feature) + (1 - attribute) * feature
                    attribute_values[feature_id][attribute_id][1] += value_off
    
    for a in attribute_values:
        a_ = [max(p) / len(train_set) for p in a]
        features_to_attributes.append(a_)
    
    return features_to_attributes

f2a = compute_purity(train_loader, model)

100%|██████████| 29/29 [00:15<00:00,  1.84it/s]


In [None]:
def find_best_alignment(features_to_attributes, iter_converge=20.0):
    n_features, n_attributes = np.array(features_to_attributes).shape

    features_to_attributes_ = list()
    for feature_to_attributes in features_to_attributes:
        feature_to_attributes_ = sorted([(idx, fa) for idx, fa in enumerate(feature_to_attributes)], key=lambda x: x[1], reverse=True)
        features_to_attributes_.append(feature_to_attributes_)
    
    attributes_to_features = list(list() for _ in range(n_attributes))

    for idx_feat, feature_to_attributes in enumerate(features_to_attributes_):
        for idx_attr, score in feature_to_attributes:
            attributes_to_features[idx_attr].append((idx_feat, score))
    
    attributes_to_features_ = list()
    for attr2feature in attributes_to_features:
        attributes_to_features_.append(sorted(attr2feature, key=lambda x: x[1], reverse=True))
    
    best_idx = list(None for _ in range(n_features))
    best_scores = list(None for _ in range(n_features))

    patience_left = iter_converge

    while None in best_idx and patience_left > 0:
        prev_best = [_ for _ in best_idx]

        for feat_idx, f2a in enumerate(features_to_attributes_):
            
            if best_idx[feat_idx] is None:

                for att_idx, score in f2a:

                    if att_idx not in best_idx:
                        best_idx[feat_idx] = att_idx
                        best_scores[feat_idx] = score
                        break

                    else:
                        idx_other = best_idx.index(att_idx)
                        score_other = best_scores[idx_other]

                        if score > score_other:
                            best_idx[feat_idx] = att_idx
                            best_scores[feat_idx] = score

                            best_idx[idx_other] = None
                            best_scores[idx_other] = None
                            break
        
        if best_idx == prev_best:
            patience_left -= 1
        else:
            patience_left = iter_converge
        
    return list(zip(best_idx, best_scores))

    
result = find_best_alignment(f2a)

scores = [b for _, b in result if b is not None and b != 0]
print("Purity: ", np.array(scores).mean())

Purity:  0.7787873725699388


## Results

### 1. Shapes dataset

| model | activation | norm_fn | slot_norm | reg_dist | f1-score | purity | disentanglement | completeness | cluster | align | directory |
|:-----------|:----:|:----:|:----:|:----:|:----:|:-------:|:-------:|:-------:|:-------:|:-------:|:-----------|
| Baseline | `sigmoid` |   `-`   |  `-`   |   `-`   | `0.830247` | `0.767724` | `0.465324 `| `0.481560` |  `A`  | `-`  | `outputs/2023-05-22/08-37-36` |
| Baseline | `gumbel` |   `-`   |   `-`   |  `-`   | `0.404321`  | `0.828083` | `0.316362` | `0.276083` | `A` | `-` |  `outputs/2023-05-22/08-49-23`  |
| Framework | `sigmoid` | `softmax`   |  `false`   |     `false`   | `0.969136`  | `0.636225`  | `0.437534` | `0.408124` | `B` | `D` | `outputs/2023-05-22/08-18-17` |  
| Framework | `gumbel` |  `softmax`   |  `false`   |    `false`  |   `0.848765`  |  `0.763983`    | `0.651922` | `0.611969` |   `B` |  `C`   |  `outputs/2023-05-22/08-04-48`  |  
| Framework | `gumbel` |  `entmax`   |  `false`   |    `false`  |   `0.842593`  |  `0.748309`     | `0.742202` | `0.736384` |  `A`  |  `B`  |  `outputs/2023-05-22/09-13-40`  | 
| Framework | `gumbel` |  `softmax`   |  `true`   |    `false`  |   `0.731482`  |  `0.707190`     | `0.670618` | `0.637436` |   `A` |  `B`  |  `outputs/2023-05-22/09-38-41`  | 
| Framework | `gumbel` |  `entmax`   |  `true`   |    `false`  |   `0.586420`  |  `0.691018`     | `0.582086` | `0.564636` |  `A`  |  `B`  |  `outputs/2023-05-22/11-03-11`  | 
| Framework | `gumbel` |  `softmax`   |  `false`   |    `true`  |   `0.814815`  |  `0.726690`    | `0.708535` | `0.673539` |  `A` | `D` |  `outputs/2023-05-22/09-53-54`  | 

### 2. CUB-200 

| model | activation | norm_fn | slot_norm | reg_dist | f1-score | purity | disentanglement | completeness | directory |
|:-----------|:----:|:----:|:----:|:----:|:----:|:-------:|:-------:|:-------:|:-----------|
| Baseline | `sigmoid` |   `-`   |  `-`   |   `-`   | `0.805452` | `0.573071` | `0.189394`| `0.196021` | `outputs/2023-05-26/07-31-18` |
| Baseline | `gumbel (0.01)` |   `-`   |   `-`   |  `-`   | `0.765`  | `0.578` | `0.193078` | `0.201281` | `outputs/2023-05-28/09-21-34`  |
| Framework | `gumbel (0.5)` |  `entmax`   |  `false`   |    `false`  |   `0.773003`  |  `0.593836`    | `0.229795` | `0.255646` |   `outputs/2023-05-27/10-34-06`  | 
| Framework | `gumbel (0.01)` |  `entmax`   |  `false`   |    `false`  |   `0.726`  |  `0.657`    | `X` | `X` |   `outputs/2023-05-27/19-41-06`  | 

### 3. MIMIC-CXR

| model | activation | norm_fn | slot_norm | reg_dist | f1-score | disentanglement | completeness | directory |
|:-----------|:----:|:----:|:----:|:----:|:----:|:-------:|:-------:|:-----------|
| Baseline | `sigmoid` |   `-`   |  `-`   |   `-`   | `0.768` |  `0.0164`| `0.0085` | `outputs/2023-06-01/12-16-30` |
| Framework | `gumbel (0.01)` |   `-`   |   `-`   |  `-`   | `0.749` | `0.0222` | `0.0124` | `outputs/2023-06-01/13-18-08`  |


In [1]:
results = {

"E49-SHP":
[[0.981131, 0.873248, 0.973656, 0.924189, 0.971781],
[0.663345, 0.394986, 0.387052, 0.561059, 0.575503],
[0.505478, 0.364848, 0.391295, 0.460447, 0.642119],
[0.172368, 0.280508, 0.169319, 0.192731, 0.182917]],

"E50-SHP":
[[0.981165, 0.971696, 0.990583, 0.963805, 0.983096],
[0.588585, 0.573661, 0.56222, 0.553471, 0.477187],
[0.46946, 0.632319, 0.498188, 0.485164, 0.380766],
[0.143945, 0.175667, 0.154161, 0.149143, 0.161818]],

"E51-SHP":
[[0.447716, 0.389919, 0.472793, 0.665221, 0.489999],
[0.432098, 0.37835, 0.430014, 0.495095, 0.528005],
[0.419269, 0.382079, 0.424006, 0.461387, 0.431549],
[0.477292, 0.476659, 0.459475, 0.364721, 0.448022]],

"E52-SHP":
[[0.942825, 0.968106, 0.988683, 0.983082, 0.9887],
[0.472531, 0.45885, 0.638394, 0.511719, 0.509037],
[0.447773, 0.461443, 0.549162, 0.533417, 0.422075],
[0.231318, 0.14777, 0.142614, 0.159164, 0.139041]],

"E53-SHP":
[[0.881701, 0.827895, 0.879056, 0.874072, 0.682161],
[0.627323, 0.667616, 0.595665, 0.850418, 0.534167],
[0.529322, 0.517727, 0.527888, 0.692079, 0.511404],
[0.139276, 0.145541, 0.137512, 0.162982, 0.205537]],

"E54-SHP":
[[0.575949, 0.35089, 0.582664, 0.482592, 0.376687],
[0.485671, 0.460129, 0.661162, 0.617536, 0.583347],
[0.519757, 0.406376, 0.518161, 0.522811, 0.61405],
[0.177332, 0.366635, 0.146314, 0.171029, 0.265272]],

"E55-SHP":
[[0.614845, 0.42983, 0.584451, 0.477093, 0.393848],
[0.540889, 0.504733, 0.678941, 0.546654, 0.577525],
[0.425633, 0.505349, 0.588776, 0.478584, 0.560115],
[0.226377, 0.231023, 0.146473, 0.176644, 0.241622]],

"E56-SHP":
[[0.862637, 0.829518, 0.862029, 0.850979, 0.740367],
[0.618041, 0.618922, 0.505485, 0.73972, 0.538934],
[0.460178, 0.541834, 0.441986, 0.625805, 0.513224],
[0.13346, 0.175929, 0.154805, 0.153977, 0.179752]],

"E57-SHP":
[[0.494562, 0.657113, 0.52312, 0.514724, 0.508858],
[0.586914, 0.810653, 0.365879, 0.488906, 0.59746],
[0.54572, 0.579612, 0.399097, 0.43978, 0.65056],
[0.348297, 0.145581, 0.318479, 0.254663, 0.204913]],

"E58-SHP":
[[0.439655, 0.605264, 0.416931, 0.468073, 0.512713],
[0.525453, 0.775373, 0.408299, 0.5834, 0.549561],
[0.486188, 0.379191, 0.408362, 0.547149, 0.581411],
[0.319103, 0.174612, 0.328444, 0.241011, 0.177954]],

"E59-SHP":
[[0.605599, 0.6777, 0.646318, 0.581926, 0.673149],
[0.731953, 0.801716, 0.782887, 0.706306, 0.739048],
[0.776975, 0.721015, 0.769809, 0.739891, 0.747892],
[0.086086, 0.078189, 0.08505, 0.050539, 0.105751]],

"E60-SHP":
[[0.455141, 0.523521, 0.497093, 0.494261, 0.47096],
[0.564572, 0.594083, 0.54585, 0.645181, 0.679455],
[0.503765, 0.468938, 0.542489, 0.407685, 0.668507],
[0.283051, 0.184142, 0.215551, 0.276161, 0.207509]],

"E61-SHP":
[[0.881701, 0.827895, 0.879056, 0.874072, 0.682161],
[0.627323, 0.667616, 0.595665, 0.850418, 0.534167],
[0.529322, 0.517727, 0.527888, 0.692079, 0.511404],
[0.139276, 0.145541, 0.137512, 0.162982, 0.205537]],

"E62-SHP":
[[0.862637, 0.829518, 0.862029, 0.850979, 0.740367],
[0.618041, 0.618922, 0.505485, 0.73972, 0.538934],
[0.460178, 0.541834, 0.441986, 0.625805, 0.513224],
[0.13346, 0.175929, 0.154805, 0.153977, 0.179752]]

}

In [1]:
import numpy as np

f1 = np.array([0.754, 0.739333, 0.742667, 0.751333, 0.778])
print(f"f1: {f1.mean():.2f} ± {f1.std():.2f}")

D = np.array([0.019657, 0.020572, 0.017048, 0.022053, 0.022829])
print(f"disentanglement: {D.mean():.2f} ± {D.std():.2f}")

C = np.array([0.011669, 0.015166, 0.011676, 0.011553, 0.013123])
print(f"completeness:{C.mean():.2f} ± {C.std():.2f}")

I = np.array([0.991455, 1.000082, 0.999471, 0.993454, 0.993892])
print(f"informativeness: {I.mean():.2f} ± {I.std():.2f}")

f1: 0.75 ± 0.01
disentanglement: 0.02 ± 0.00
completeness:0.01 ± 0.00
informativeness: 1.00 ± 0.00


In [7]:
for i in range(49, 63):
    exp = f"E{i}-SHP"
    res = results[exp]
    f1 = np.array(res[0])
    D = np.array(res[1])
    C = np.array(res[2])
    I = np.array(res[3])
    print(exp)
    print(f"f1: {f1.mean():.2f} ± {f1.std():.2f}")
    print(f"disentanglement: {D.mean():.2f} ± {D.std():.2f}")
    print(f"completeness:{C.mean():.2f} ± {C.std():.2f}")
    print(f"informativeness: {I.mean():.2f} ± {I.std():.2f}")
    print()

E49-SHP
f1: 0.94 ± 0.04
disentanglement: 0.52 ± 0.11
completeness:0.47 ± 0.10
informativeness: 0.20 ± 0.04

E50-SHP
f1: 0.98 ± 0.01
disentanglement: 0.55 ± 0.04
completeness:0.49 ± 0.08
informativeness: 0.16 ± 0.01

E51-SHP
f1: 0.49 ± 0.09
disentanglement: 0.45 ± 0.05
completeness:0.42 ± 0.03
informativeness: 0.45 ± 0.04

E52-SHP
f1: 0.97 ± 0.02
disentanglement: 0.52 ± 0.06
completeness:0.48 ± 0.05
informativeness: 0.16 ± 0.03

E53-SHP
f1: 0.83 ± 0.08
disentanglement: 0.66 ± 0.11
completeness:0.56 ± 0.07
informativeness: 0.16 ± 0.03

E54-SHP
f1: 0.47 ± 0.10
disentanglement: 0.56 ± 0.08
completeness:0.52 ± 0.07
informativeness: 0.23 ± 0.08

E55-SHP
f1: 0.50 ± 0.09
disentanglement: 0.57 ± 0.06
completeness:0.51 ± 0.06
informativeness: 0.20 ± 0.04

E56-SHP
f1: 0.83 ± 0.05
disentanglement: 0.60 ± 0.08
completeness:0.52 ± 0.07
informativeness: 0.16 ± 0.02

E57-SHP
f1: 0.54 ± 0.06
disentanglement: 0.57 ± 0.15
completeness:0.52 ± 0.09
informativeness: 0.25 ± 0.07

E58-SHP
f1: 0.49 ± 0.07
dise

In [6]:
s = """E48-SHP
0.911634
0.714322
0.552646
0.100274

E48-SHP
0.924783
0.756357
0.678377
0.057948

E48-SHP
0.916744
0.723712
0.751705
0.076777

E48-SHP
0.915334
0.782168
0.82064
0.077664

E48-SHP
0.888455
0.762912
0.826717
0.080682

E47-SHP
0.926393
0.790332
0.512468
0.08803

E47-SHP
0.935305
0.801184
0.763281
0.038964

E47-SHP
0.947202
0.795011
0.81812
0.079581

E47-SHP
0.93199
0.782948
0.861406
0.066335

E47-SHP
0.912968
0.72203
0.726697
0.080838

E46-SHP
0.609361
0.75557
0.829323
0.122514

E46-SHP
0.688224
0.646946
0.587759
0.068016

E46-SHP
0.704962
0.673611
0.763867
0.076611

E46-SHP
0.598473
0.629381
0.694606
0.036437

E46-SHP
0.712253
0.735236
0.764935
0.111215

E45-SHP
0.605599
0.731953
0.776975
0.086086

E45-SHP
0.6777
0.801716
0.721015
0.078189

E45-SHP
0.646318
0.782887
0.769809
0.08505

E45-SHP
0.581926
0.706306
0.739891
0.050539

E45-SHP
0.673149
0.739048
0.747892
0.105751

E44-SHP
0.714243
0.833282
0.933392
0.056083

E44-SHP
0.746843
0.685071
0.537353
0.072866

E44-SHP
0.665712
0.736512
0.842934
0.062078

E44-SHP
0.689275
0.774483
0.88213
0.0672

E44-SHP
0.79202
0.662222
0.772932
0.114144

E43-SHP
0.796979
0.784924
0.80317
0.099218

E43-SHP
0.730684
0.758952
0.704489
0.082034

E43-SHP
0.68999
0.681042
0.726528
0.084849

E43-SHP
0.781944
0.613127
0.595106
0.073327

E43-SHP
0.729565
0.73328
0.704466
0.115084

E42-SHP
0.911634
0.714322
0.552646
0.100274

E42-SHP
0.924783
0.756357
0.678377
0.057948

E42-SHP
0.916744
0.723712
0.751705
0.076777

E42-SHP
0.915334
0.782168
0.82064
0.077664

E42-SHP
0.888455
0.762912
0.826717
0.080682

E41-SHP
0.593661
0.574505
0.633967
0.120356

E41-SHP
0.755893
0.679759
0.72005
0.071099

E41-SHP
0.66464
0.72415
0.770474
0.07496

E41-SHP
0.631355
0.703409
0.758989
0.078137

E41-SHP
0.602499
0.76411
0.701181
0.095788

E40-SHP
0.586194
0.612219
0.67942
0.086265

E40-SHP
0.686391
0.678793
0.733373
0.037829

E40-SHP
0.635703
0.706593
0.765801
0.121043

E40-SHP
0.592576
0.648599
0.738637
0.090218

E40-SHP
0.58641
0.699692
0.612774
0.092392

E39-SHP
0.926393
0.790332
0.512468
0.08803

E39-SHP
0.935305
0.801184
0.763281
0.038964

E39-SHP
0.947202
0.795011
0.81812
0.079581

E39-SHP
0.93199
0.782948
0.861406
0.066335

E39-SHP
0.912968
0.72203
0.726697
0.080838

E38-SHP
0.992482
0.554426
0.609996
0.102797

E38-SHP
0.99435
0.675867
0.69221
0.103424

E38-SHP
0.998117
0.664008
0.704597
0.019946

E38-SHP
0.994349
0.778436
0.706883
0.072356

E38-SHP
0.992466
0.523756
0.688129
0.086632

E37-SHP
0.988745
0.526579
0.504826
0.092013

E37-SHP
0.994365
0.489267
0.540975
0.114545

E37-SHP
0.994366
0.506091
0.52436
0.075384

E37-SHP
0.998117
0.523942
0.664761
0.046922

E37-SHP
0.992466
0.50006
0.575982
0.083993

E36-SHP
0.996234
0.578681
0.677627
0.087798

E36-SHP
0.996233
0.806794
0.706332
0.095678

E36-SHP
0.99435
0.652738
0.71896
0.059986

E36-SHP
0.996233
0.775874
0.800519
0.079704

E36-SHP
0.998117
0.575105
0.676851
0.065531

E35-SHP
0.996233
0.563723
0.550603
0.074319

E35-SHP
0.994366
0.611488
0.570011
0.082178

E35-SHP
0.996234
0.525652
0.576321
0.076154

E35-SHP
0.996233
0.678788
0.803945
0.051122

E35-SHP
0.99435
0.597293
0.7082
0.084083
"""

s = s.split("\n")

results = {}
offset = -1
pred_t = 0
for t in s:
    if t == "":
        continue
    if t[0] == "E":
        if t not in results:
            results[t] = [list(), list(), list(), list()]
        offset = -1
        pred_t = t
    else:
        t = float(t)
        offset += 1
        results[pred_t][offset].append(t)

results


{'E48-SHP': [[0.911634, 0.924783, 0.916744, 0.915334, 0.888455],
  [0.714322, 0.756357, 0.723712, 0.782168, 0.762912],
  [0.552646, 0.678377, 0.751705, 0.82064, 0.826717],
  [0.100274, 0.057948, 0.076777, 0.077664, 0.080682]],
 'E47-SHP': [[0.926393, 0.935305, 0.947202, 0.93199, 0.912968],
  [0.790332, 0.801184, 0.795011, 0.782948, 0.72203],
  [0.512468, 0.763281, 0.81812, 0.861406, 0.726697],
  [0.08803, 0.038964, 0.079581, 0.066335, 0.080838]],
 'E46-SHP': [[0.609361, 0.688224, 0.704962, 0.598473, 0.712253],
  [0.75557, 0.646946, 0.673611, 0.629381, 0.735236],
  [0.829323, 0.587759, 0.763867, 0.694606, 0.764935],
  [0.122514, 0.068016, 0.076611, 0.036437, 0.111215]],
 'E45-SHP': [[0.605599, 0.6777, 0.646318, 0.581926, 0.673149],
  [0.731953, 0.801716, 0.782887, 0.706306, 0.739048],
  [0.776975, 0.721015, 0.769809, 0.739891, 0.747892],
  [0.086086, 0.078189, 0.08505, 0.050539, 0.105751]],
 'E44-SHP': [[0.714243, 0.746843, 0.665712, 0.689275, 0.79202],
  [0.833282, 0.685071, 0.736512, 

In [8]:
import numpy as np
for i in range(35, 49):
    exp = f"E{i}-SHP"
    res = results[exp]
    f1 = np.array(res[0])
    D = np.array(res[1])
    C = np.array(res[2])
    I = np.array(res[3])
    print(exp)
    print(f"f1: {f1.mean():.2f} ± {f1.std():.2f}")
    print(f"disentanglement: {D.mean():.2f} ± {D.std():.2f}")
    print(f"completeness:{C.mean():.2f} ± {C.std():.2f}")
    print(f"informativeness: {I.mean():.2f} ± {I.std():.2f}")
    print()

E35-SHP
f1: 1.00 ± 0.00
disentanglement: 0.60 ± 0.05
completeness:0.64 ± 0.10
informativeness: 0.07 ± 0.01

E36-SHP
f1: 1.00 ± 0.00
disentanglement: 0.68 ± 0.10
completeness:0.72 ± 0.05
informativeness: 0.08 ± 0.01

E37-SHP
f1: 0.99 ± 0.00
disentanglement: 0.51 ± 0.01
completeness:0.56 ± 0.06
informativeness: 0.08 ± 0.02

E38-SHP
f1: 0.99 ± 0.00
disentanglement: 0.64 ± 0.09
completeness:0.68 ± 0.04
informativeness: 0.08 ± 0.03

E39-SHP
f1: 0.93 ± 0.01
disentanglement: 0.78 ± 0.03
completeness:0.74 ± 0.12
informativeness: 0.07 ± 0.02

E40-SHP
f1: 0.62 ± 0.04
disentanglement: 0.67 ± 0.03
completeness:0.71 ± 0.05
informativeness: 0.09 ± 0.03

E41-SHP
f1: 0.65 ± 0.06
disentanglement: 0.69 ± 0.06
completeness:0.72 ± 0.05
informativeness: 0.09 ± 0.02

E42-SHP
f1: 0.91 ± 0.01
disentanglement: 0.75 ± 0.03
completeness:0.73 ± 0.10
informativeness: 0.08 ± 0.01

E43-SHP
f1: 0.75 ± 0.04
disentanglement: 0.71 ± 0.06
completeness:0.71 ± 0.07
informativeness: 0.09 ± 0.01

E44-SHP
f1: 0.72 ± 0.04
dise