In [48]:
import os
import sys
import argparse
import IPython 
from PIL import Image

import torch
import torchvision
import numpy as np
import pandas as pd
import skimage
from scipy import sparse
import matplotlib.pyplot as plt 
import torchxrayvision as xrv

from dinov2.data import SamplerType, make_data_loader, make_dataset
from dinov2.data.datasets import NIHChestXray
from dinov2.data.transforms import make_xray_classification_eval_transform, make_classification_eval_transform
from dinov2.eval.setup import setup_and_build_model
from dinov2.eval.utils import ModelWithNormalize, evaluate, extract_features
from dinov2.utils import show_image_from_tensor

In [3]:
args = argparse.Namespace(config_file='dinov2/configs/eval/vits14_pretrain.yaml', pretrained_weights='models/dinov2_vits14_pretrain.pth', output_dir='results/NIH/dinov2_vits14/knn', opts=[], train_dataset_str='NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train', val_dataset_str='NIHChestXray:split=TEST:root=/mnt/d/data/NIH/test', nb_knn=[5, 20, 50, 100, 200], temperature=0.07, gather_on_cpu=False, batch_size=8, n_per_class_list=[-1], n_tries=1, ngpus=1, nodes=1, timeout=2800, partition='learnlab', use_volta32=False, comment='', exclude='')
model, autocast_dtype = setup_and_build_model(args)
model = ModelWithNormalize(model)

I20230809 00:13:48 6875 dinov2 config.py:60] git:
  sha: 6cc52e4279135e8bb60fd4caadb5071dd4d82445, status: has uncommitted changes, branch: main

I20230809 00:13:48 6875 dinov2 config.py:61] batch_size: 8
comment: 
config_file: dinov2/configs/eval/vits14_pretrain.yaml
exclude: 
gather_on_cpu: False
n_per_class_list: [-1]
n_tries: 1
nb_knn: [5, 20, 50, 100, 200]
ngpus: 1
nodes: 1
opts: ['train.output_dir=/mnt/c/Users/user/Desktop/dinov2/results/NIH/dinov2_vits14/knn']
output_dir: /mnt/c/Users/user/Desktop/dinov2/results/NIH/dinov2_vits14/knn
partition: learnlab
pretrained_weights: models/dinov2_vits14_pretrain.pth
temperature: 0.07
timeout: 2800
train_dataset_str: NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train
use_volta32: False
val_dataset_str: NIHChestXray:split=TEST:root=/mnt/d/data/NIH/test
I20230809 00:13:48 6875 dinov2 config.py:27] sqrt scaling learning rate; base: 0.004, new: 0.001
I20230809 00:13:48 6875 dinov2 config.py:34] MODEL:
  WEIGHTS: ''
compute_precision:
  grad_s

In [49]:
transform = make_classification_eval_transform()
train_dataset = make_dataset(
    dataset_str=args.train_dataset_str,
    transform=transform,
)
val_dataset = make_dataset(
    dataset_str=args.val_dataset_str,
    transform=transform,
)

I20230809 13:18:13 6875 dinov2 loaders.py:89] using dataset: "NIHChestXray:split=TEST:root=/mnt/d/data/NIH/test"
I20230809 13:18:16 6875 dinov2 nih_chest_xray.py:67] 0 x-ray's are missing from TEST set
I20230809 13:18:16 6875 dinov2 loaders.py:94] # of dataset samples: 25,596


In [19]:
train_dataset_str = args.train_dataset_str
val_dataset_str = args.val_dataset_str
batch_size = args.batch_size
gather_on_cpu = args.gather_on_cpu
num_workers = 0

In [20]:
with torch.cuda.amp.autocast(dtype=autocast_dtype):
    train_features, train_labels = extract_features(
        model, train_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )
    model.eval()
    val_features, val_labels = extract_features(
        model, val_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )

train_features = train_features.cpu().numpy()
train_labels = train_labels.cpu().numpy()
val_features = val_features.cpu().numpy()
val_labels = val_labels.cpu().numpy()

I20230809 00:19:11 6875 dinov2 loaders.py:164] sampler: none
I20230809 00:19:11 6875 dinov2 loaders.py:211] using PyTorch data loader
I20230809 00:19:11 6875 dinov2 loaders.py:224] # of batches: 10,816
I20230809 00:19:20 6875 dinov2 utils.py:139] Storing features into tensor of shape torch.Size([86524, 384])
I20230809 00:19:20 6875 dinov2 helpers.py:103]   [    0/10816]  eta: 1 day, 2:31:13    time: 8.827078  data: 5.333767  max mem: 277
I20230809 00:19:38 6875 dinov2 helpers.py:103]   [   10/10816]  eta: 7:21:02    time: 2.448832  data: 2.125461  max mem: 300
I20230809 00:19:53 6875 dinov2 helpers.py:103]   [   20/10816]  eta: 5:58:25    time: 1.650230  data: 1.643480  max mem: 300
I20230809 00:20:06 6875 dinov2 helpers.py:103]   [   30/10816]  eta: 5:19:00    time: 1.403729  data: 1.397043  max mem: 300
I20230809 00:20:17 6875 dinov2 helpers.py:103]   [   40/10816]  eta: 4:47:39    time: 1.191907  data: 1.185574  max mem: 300
I20230809 00:20:31 6875 dinov2 helpers.py:103]   [   50/10

## MLkNN

In [63]:
import numpy as np
import sklearn.metrics
from dinov2.eval.utils import MLkNN

results_dict = {}

for k in args.nb_knn:
    results_dict[f"{k}"] = {}

    classifier = MLkNN(k)
    classifier.fit(train_features, train_labels)
    results = classifier.predict(val_features).toarray()
    
    print(results.shape)
    results_dict[f"{k}"]["Hamming Loss"]  = sklearn.metrics.hamming_loss(val_labels, results)
    results_dict[f"{k}"]["Accuracy"]  = sklearn.metrics.accuracy_score(val_labels, results)
    results_dict[f"{k}"]["mAUC Combined"]  = sklearn.metrics.roc_auc_score(val_labels, results, average="macro")
    results_dict[f"{k}"]["F1"]  = sklearn.metrics.f1_score(val_labels, results, average="macro")

    # Disease-specific scores
    disease_results = {"AUC": {}, "Accuracy": {}, "F1": {}}
    for index, disease in enumerate(train_dataset.class_names):
        disease_results["AUC"][disease] =  sklearn.metrics.roc_auc_score(val_labels[:, index], results[:, index])
        disease_results["Accuracy"][disease] =  sklearn.metrics.accuracy_score(val_labels[:, index], results[:, index])
        disease_results["F1"][disease] =  sklearn.metrics.f1_score(val_labels[:, index], results[:, index])

    results_dict[f"{k}"]["Disease-specific"] = disease_results

KeyboardInterrupt: 