In [1]:
import os
import sys
import argparse
import IPython 
from PIL import Image

import torch
import torchvision
import numpy as np
import pandas as pd
import skimage
from scipy import sparse
import matplotlib.pyplot as plt 
import torchxrayvision as xrv

from dinov2.data import SamplerType, make_data_loader, make_dataset
from dinov2.data.datasets import NIHChestXray
from dinov2.data.transforms import make_xray_classification_eval_transform, make_classification_eval_transform
from dinov2.eval.setup import setup_and_build_model
from dinov2.eval.utils import ModelWithNormalize, evaluate, extract_features
from dinov2.utils import show_image_from_tensor

In [2]:
args = argparse.Namespace(config_file='dinov2/configs/eval/vits14_pretrain.yaml', pretrained_weights='models/dinov2_vits14_pretrain.pth', output_dir='results/NIH/dinov2_vits14/knn', opts=[], train_dataset_str='NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp', val_dataset_str='NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp', nb_knn=[5, 20, 50, 100, 200], temperature=0.07, gather_on_cpu=False, batch_size=8, n_per_class_list=[-1], n_tries=1, ngpus=1, nodes=1, timeout=2800, partition='learnlab', use_volta32=False, comment='', exclude='')
model, autocast_dtype = setup_and_build_model(args)
model = ModelWithNormalize(model)

I20230813 21:39:27 683 dinov2 config.py:60] git:
  sha: e326bc6424a2557c160a3cdb326324ed5f8f1ebe, status: has uncommitted changes, branch: main

I20230813 21:39:27 683 dinov2 config.py:61] batch_size: 8
comment: 
config_file: dinov2/configs/eval/vits14_pretrain.yaml
exclude: 
gather_on_cpu: False
n_per_class_list: [-1]
n_tries: 1
nb_knn: [5, 20, 50, 100, 200]
ngpus: 1
nodes: 1
opts: ['train.output_dir=/mnt/c/Users/user/Desktop/dinov2/results/NIH/dinov2_vits14/knn']
output_dir: /mnt/c/Users/user/Desktop/dinov2/results/NIH/dinov2_vits14/knn
partition: learnlab
pretrained_weights: models/dinov2_vits14_pretrain.pth
temperature: 0.07
timeout: 2800
train_dataset_str: NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp
use_volta32: False
val_dataset_str: NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp
I20230813 21:39:27 683 dinov2 config.py:27] sqrt scaling learning rate; base: 0.004, new: 0.001
I20230813 21:39:27 683 dinov2 config.py:34] MODEL:
  WEIGHTS: ''
compute_precision:
  gra

In [3]:
transform = make_classification_eval_transform()
train_dataset = make_dataset(
    dataset_str=args.train_dataset_str,
    transform=transform,
)
val_dataset = make_dataset(
    dataset_str=args.val_dataset_str,
    transform=transform,
)

I20230813 21:39:31 683 dinov2 loaders.py:89] using dataset: "NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp"
I20230813 21:39:34 683 dinov2 nih_chest_xray.py:67] 86480 x-ray's are missing from TRAIN set
I20230813 21:39:34 683 dinov2 loaders.py:94] # of dataset samples: 44
I20230813 21:39:34 683 dinov2 loaders.py:89] using dataset: "NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp"
I20230813 21:39:38 683 dinov2 nih_chest_xray.py:67] 86480 x-ray's are missing from VAL set
I20230813 21:39:38 683 dinov2 loaders.py:94] # of dataset samples: 44


In [4]:
train_dataset_str = args.train_dataset_str
val_dataset_str = args.val_dataset_str
batch_size = args.batch_size
gather_on_cpu = args.gather_on_cpu
num_workers = 0

In [5]:
with torch.cuda.amp.autocast(dtype=autocast_dtype):
    train_features, train_labels = extract_features(
        model, train_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )
    model.eval()
    val_features, val_labels = extract_features(
        model, val_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )

train_features = train_features.cpu().numpy()
train_labels = train_labels.cpu().numpy()
val_features = val_features.cpu().numpy()
val_labels = val_labels.cpu().numpy()

I20230813 21:39:38 683 dinov2 loaders.py:164] sampler: none
I20230813 21:39:38 683 dinov2 loaders.py:211] using PyTorch data loader
I20230813 21:39:38 683 dinov2 loaders.py:224] # of batches: 6
I20230813 21:39:39 683 dinov2 utils.py:139] Storing features into tensor of shape torch.Size([44, 384])
I20230813 21:39:39 683 dinov2 helpers.py:103]   [0/6]  eta: 0:00:09    time: 1.540404  data: 0.497542  max mem: 160
I20230813 21:39:42 683 dinov2 helpers.py:103]   [5/6]  eta: 0:00:00    time: 0.792999  data: 0.611803  max mem: 162
I20230813 21:39:42 683 dinov2 helpers.py:131]  Total time: 0:00:04 (0.793415 s / it)
I20230813 21:39:42 683 dinov2 utils.py:151] Features shape: (44, 384)
I20230813 21:39:42 683 dinov2 utils.py:152] Labels shape: (44, 10)
I20230813 21:39:42 683 dinov2 loaders.py:164] sampler: none
I20230813 21:39:42 683 dinov2 loaders.py:211] using PyTorch data loader
I20230813 21:39:42 683 dinov2 loaders.py:224] # of batches: 6
I20230813 21:39:43 683 dinov2 utils.py:139] Storing fe

In [6]:
import inspect
from enum import Enum
from typing import Any, Dict, Optional
from torchmetrics import Metric, MetricCollection
from torchmetrics.wrappers import ClasswiseWrapper
from torchmetrics.classification import (MultilabelAUROC, MultilabelF1Score, MultilabelAccuracy, MulticlassF1Score,
                                        MulticlassAccuracy, MulticlassAUROC, Accuracy, BinaryF1Score, BinaryAUROC)

In [7]:
class MetricAveraging(Enum):
    MACRO = "macro"
    MEAN_ACCURACY = "micro"
    MEAN_PER_CLASS_ACCURACY = "macro"
    MULTILABEL_ACCURACY = "macro"
    MULTILABEL_AUROC = "macro"
    PER_CLASS_ACCURACY = "none"

    def __str__(self):
        return self.value

In [92]:

def build_multilabel_auroc_metric(average_type: MetricAveraging, num_labels: int, labels=None):
    metrics: Dict[str, Metric] = {
        f"auroc": MultilabelAUROC(num_labels=num_labels, average=average_type.value),
        "class-specific": MetricCollection({
            "auroc": ClasswiseWrapper(MultilabelAUROC(num_labels=num_labels, average=None), labels=labels, prefix="_"),
        }) 
    }
    return MetricCollection(metrics)

In [93]:
metric = build_multilabel_auroc_metric(MetricAveraging.MACRO, 10, ["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k"])

In [94]:
for m in metric.values():
    m = m.to('cuda')

In [95]:
tar = np.expand_dims(val_labels[0, :], axis=0)
preds = np.expand_dims(np.array([0.7, 0.5, 0.4, 0.4, 0.4, 0.3, 0.8, 0.2, 0.1, 0.5]), axis=0) 

In [96]:
res = {"target": torch.tensor(tar, device='cuda'), "preds": torch.tensor(preds, device='cuda')}

In [97]:
res

{'target': tensor([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], device='cuda:0'),
 'preds': tensor([[0.7000, 0.5000, 0.4000, 0.4000, 0.4000, 0.3000, 0.8000, 0.2000, 0.1000,
          0.5000]], device='cuda:0', dtype=torch.float64)}

In [98]:
print({**res})

{'target': tensor([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0]], device='cuda:0'), 'preds': tensor([[0.7000, 0.5000, 0.4000, 0.4000, 0.4000, 0.3000, 0.8000, 0.2000, 0.1000,
         0.5000]], device='cuda:0', dtype=torch.float64)}


In [99]:
metric.update(**res)

In [100]:
for i in metric.items():
    print(i)

('auroc', MultilabelAUROC())
('class-specific_auroc', ClasswiseWrapper(
  (metric): MultilabelAUROC()
))


In [None]:
with torch.cuda.amp.autocast(dtype=autocast_dtype):
    train_features, train_labels = extract_features(
        model, train_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )
    model.eval()
    val_features, val_labels = extract_features(
        model, val_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )

train_features = train_features.cpu().numpy()
train_labels = train_labels.cpu().numpy()
val_features = val_features.cpu().numpy()
val_labels = val_labels.cpu().numpy()

I20230813 21:39:38 683 dinov2 loaders.py:164] sampler: none
I20230813 21:39:38 683 dinov2 loaders.py:211] using PyTorch data loader
I20230813 21:39:38 683 dinov2 loaders.py:224] # of batches: 6
I20230813 21:39:39 683 dinov2 utils.py:139] Storing features into tensor of shape torch.Size([44, 384])
I20230813 21:39:39 683 dinov2 helpers.py:103]   [0/6]  eta: 0:00:09    time: 1.540404  data: 0.497542  max mem: 160
I20230813 21:39:42 683 dinov2 helpers.py:103]   [5/6]  eta: 0:00:00    time: 0.792999  data: 0.611803  max mem: 162
I20230813 21:39:42 683 dinov2 helpers.py:131]  Total time: 0:00:04 (0.793415 s / it)
I20230813 21:39:42 683 dinov2 utils.py:151] Features shape: (44, 384)
I20230813 21:39:42 683 dinov2 utils.py:152] Labels shape: (44, 10)
I20230813 21:39:42 683 dinov2 loaders.py:164] sampler: none
I20230813 21:39:42 683 dinov2 loaders.py:211] using PyTorch data loader
I20230813 21:39:42 683 dinov2 loaders.py:224] # of batches: 6
I20230813 21:39:43 683 dinov2 utils.py:139] Storing fe

In [169]:
metrics = {"a": metric}

In [170]:
metric.items()

odict_items([('auroc', MultilabelAUROC()), ('class-specific_auroc', ClasswiseWrapper(
  (metric): MultilabelAUROC()
))])

In [171]:
stats = {k: m.compute() for k, m in metric.items()}
stats

{'auroc': tensor(0., device='cuda:0', dtype=torch.float64),
 'class-specific_auroc': {'_a': tensor(0., device='cuda:0', dtype=torch.float64),
  '_b': tensor(0., device='cuda:0', dtype=torch.float64),
  '_c': tensor(0., device='cuda:0', dtype=torch.float64),
  '_d': tensor(0., device='cuda:0', dtype=torch.float64),
  '_e': tensor(0., device='cuda:0', dtype=torch.float64),
  '_f': tensor(0., device='cuda:0', dtype=torch.float64),
  '_g': tensor(0., device='cuda:0', dtype=torch.float64),
  '_h': tensor(0., device='cuda:0', dtype=torch.float64),
  '_i': tensor(0., device='cuda:0', dtype=torch.float64),
  '_j': tensor(0., device='cuda:0', dtype=torch.float64)}}

In [176]:
def apply_method_to_nested_values(d, method_name):
    result = {}
    print(d)
    for key, value in d.items():
        if isinstance(value, MetricCollection):
            result[key] = apply_method_to_nested_values(value, method_name)
        else:
            method = getattr(value, method_name)
            result[key] = method()
    return result

In [178]:
stats

{'a': {'auroc': tensor(0., device='cuda:0', dtype=torch.float64),
  'class-specific_auroc': {'_a': tensor(0., device='cuda:0', dtype=torch.float64),
   '_b': tensor(0., device='cuda:0', dtype=torch.float64),
   '_c': tensor(0., device='cuda:0', dtype=torch.float64),
   '_d': tensor(0., device='cuda:0', dtype=torch.float64),
   '_e': tensor(0., device='cuda:0', dtype=torch.float64),
   '_f': tensor(0., device='cuda:0', dtype=torch.float64),
   '_g': tensor(0., device='cuda:0', dtype=torch.float64),
   '_h': tensor(0., device='cuda:0', dtype=torch.float64),
   '_i': tensor(0., device='cuda:0', dtype=torch.float64),
   '_j': tensor(0., device='cuda:0', dtype=torch.float64)}}}

In [71]:
train_loader = torch.utils.data.DataLoader(train_dataset)

In [78]:
train_loader.dataset.class_names

array(['Atelectasis', 'Cardiomegaly', 'Effusion', 'Emphysema', 'Fibrosis',
       'Infiltration', 'Mass', 'No Finding', 'Nodule',
       'Pleural_Thickening'], dtype=object)

## MLkNN

In [224]:
metric = build_multilabel_auroc_metric(MetricAveraging.MACRO, 10, list(train_dataset.class_names))

In [225]:
import numpy as np
import sklearn.metrics
from dinov2.eval.utils import MLkNN


classifier = MLkNN(5)
classifier.fit(train_features, train_labels)
results = classifier.predict_proba(val_features).toarray()

In [227]:
import numpy as np
import sklearn.metrics
from dinov2.eval.utils import MLkNN

results_dict = {}

for k in [20]:
    results_dict[f"{k}"] = {}

    classifier = MLkNN(k)
    classifier.fit(train_features, train_labels)
    results = torch.tensor(classifier.predict_proba(val_features).toarray(), dtype=torch.float64).cuda()

    metric.update(**{"target": torch.tensor(val_labels).cuda(), "preds": results})

    # Disease-specific scores
    # disease_results = {"AUC": {}, "Accuracy": {}, "F1": {}}
    # for index, disease in enumerate(train_dataset.class_names):
    #     disease_results["AUC"][disease] =  sklearn.metrics.roc_auc_score(val_labels[:, index], results[:, index])

    # results_dict[f"{k}"]["Disease-specific"] = disease_results

In [229]:
metric.compute()

{'auroc': tensor(0.5880, device='cuda:0'),
 '_Atelectasis': tensor(0.6365, device='cuda:0'),
 '_Cardiomegaly': tensor(0.9884, device='cuda:0'),
 '_Effusion': tensor(0.2558, device='cuda:0'),
 '_Emphysema': tensor(0.5943, device='cuda:0'),
 '_Fibrosis': tensor(0.6820, device='cuda:0'),
 '_Infiltration': tensor(0.4884, device='cuda:0'),
 '_Mass': tensor(0.5250, device='cuda:0'),
 '_No Finding': tensor(0.8837, device='cuda:0'),
 '_Nodule': tensor(0.5993, device='cuda:0'),
 '_Pleural_Thickening': tensor(0.2262, device='cuda:0')}

In [218]:
import numpy as np
import sklearn.metrics
from dinov2.eval.utils import MLkNN

ab = {}

for k in [20]:
    ab[f"{k}"] = {}

    classifier = MLkNN(k)
    classifier.fit(train_features, train_labels)
    results = classifier.predict_proba(val_features).toarray()

    ab[f"{k}"]["mAUC Combined"]  = sklearn.metrics.roc_auc_score(val_labels, results, average="macro")

    disease_results = {"AUC": {}, "Accuracy": {}, "F1": {}}
    for index, disease in enumerate(train_dataset.class_names):
        disease_results["AUC"][disease] =  sklearn.metrics.roc_auc_score(val_labels[:, index], results[:, index])

    ab[f"{k}"]["Disease-specific"] = disease_results

In [188]:
import numpy as np
import sklearn.metrics
from dinov2.eval.utils import MLkNN

results_dict = {}

for k in args.nb_knn:
    results_dict[f"{k}"] = {}

    classifier = MLkNN(k)
    classifier.fit(train_features, train_labels)
    results = classifier.predict_proba(val_features).toarray()
    
    results_dict[f"{k}"]["Hamming Loss"]  = sklearn.metrics.hamming_loss(val_labels, results>0.5)
    results_dict[f"{k}"]["Accuracy"]  = sklearn.metrics.accuracy_score(val_labels, results>0.5)
    results_dict[f"{k}"]["mAUC Combined"]  = sklearn.metrics.roc_auc_score(val_labels, results, average="macro")
    results_dict[f"{k}"]["F1"]  = sklearn.metrics.f1_score(val_labels, results, average="macro")

    # Disease-specific scores
    disease_results = {"AUC": {}, "Accuracy": {}, "F1": {}}
    for index, disease in enumerate(train_dataset.class_names):
        disease_results["AUC"][disease] =  sklearn.metrics.roc_auc_score(val_labels[:, index], results[:, index])
        disease_results["Accuracy"][disease] =  sklearn.metrics.accuracy_score(val_labels[:, index], results[:, index])
        disease_results["F1"][disease] =  sklearn.metrics.f1_score(val_labels[:, index], results[:, index])

    results_dict[f"{k}"]["Disease-specific"] = disease_results

(44, 10)


ValueError: Classification metrics can't handle a mix of multilabel-indicator and continuous-multioutput targets