In [73]:
import os
import sys
import argparse
import IPython 
from PIL import Image

import torch
import torchvision
import numpy as np
import pandas as pd
import skimage
from scipy import sparse
import matplotlib.pyplot as plt 
import torchxrayvision as xrv

from dinov2.data import SamplerType, make_data_loader, make_dataset
from dinov2.data.datasets import NIHChestXray
from dinov2.data.transforms import make_xray_classification_eval_transform, make_classification_eval_transform
from dinov2.eval.setup import setup_and_build_model
from dinov2.eval.utils import ModelWithNormalize, evaluate, extract_features
from dinov2.MLkNN import MLkNN 
from dinov2.utils import show_image_from_tensor

In [None]:
args = argparse.Namespace(config_file='dinov2/configs/eval/vits14_pretrain.yaml', pretrained_weights='models/dinov2_vits14_pretrain.pth', output_dir='results/NIH/dinov2_vits14/knn', opts=[], train_dataset_str='NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp', val_dataset_str='NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp', nb_knn=[5, 20, 50, 100, 200], temperature=0.07, gather_on_cpu=False, batch_size=8, n_per_class_list=[-1], n_tries=1, ngpus=1, nodes=1, timeout=2800, partition='learnlab', use_volta32=False, comment='', exclude='')
model, autocast_dtype = setup_and_build_model(args)
model = ModelWithNormalize(model)

In [None]:
transform = make_classification_eval_transform()
train_dataset = make_dataset(
    dataset_str=args.train_dataset_str,
    transform=transform,
)
val_dataset = make_dataset(
    dataset_str=args.val_dataset_str,
    transform=transform,
)

In [None]:
train_dataset_str = args.train_dataset_str
val_dataset_str = args.val_dataset_str
batch_size = args.batch_size
gather_on_cpu = args.gather_on_cpu
num_workers = 0

In [None]:
with torch.cuda.amp.autocast(dtype=autocast_dtype):
    train_features, train_labels = extract_features(
        model, train_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )
    val_features, val_labels = extract_features(
        model, val_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )

train_features = train_features.cpu().numpy()
train_labels = train_labels.cpu().numpy()
val_features = val_features.cpu().numpy()
val_labels = val_labels.cpu().numpy()

In [91]:
import numpy as np
import sklearn.metrics

for k in args.nb_knn:
    classifier = MLkNN(k)
    classifier.fit(train_features, train_labels)
    results = classifier.predict(train_features).toarray()
    print(f"### for {k}-NN ###")
    print("### Hamming loss: %f" % sklearn.metrics.hamming_loss(train_labels, results))
    print("### Accuracy score: %f" % sklearn.metrics.accuracy_score(train_labels, results))
    print("### mAUC score combined: %f" % sklearn.metrics.roc_auc_score(train_labels, results, average="weighted"))
    print("### F1 score: %f" % sklearn.metrics.f1_score(train_labels, results, average="micro"))

    # Disease-specific scores
    disease_results = {"AUC": {}, "Accuracy": {}, "F1": {}}
    for index, disease in enumerate(train_dataset.class_names):
        disease_results["AUC"][disease] =  sklearn.metrics.roc_auc_score(train_labels[:, index], results[:, index])
        disease_results["Accuracy"][disease] =  sklearn.metrics.accuracy_score(train_labels[:, index], results[:, index])
        disease_results["F1"][disease] =  sklearn.metrics.f1_score(train_labels[:, index], results[:, index])
    print("## Disease-specific AUC scores")    
    print(pd.DataFrame(disease_results))

### for 5-NN ###
### Hamming loss: 0.093182
### Accuracy score: 0.409091
### mAUC score combined: 0.633929
### F1 score: 0.481013
## Disease-specific AUC scores
                         AUC  Accuracy        F1
Atelectasis         0.500000  0.954545  0.000000
Cardiomegaly        0.500000  0.909091  0.000000
Effusion            0.500000  0.863636  0.000000
Emphysema           0.500000  0.954545  0.000000
Fibrosis            0.500000  0.954545  0.000000
Infiltration        0.666667  0.863636  0.500000
Mass                0.500000  0.954545  0.000000
No Finding          0.772727  0.772727  0.761905
Nodule              0.500000  0.909091  0.000000
Pleural_Thickening  0.500000  0.931818  0.000000
### for 20-NN ###
### Hamming loss: 0.104545
### Accuracy score: 0.363636
### mAUC score combined: 0.589286
### F1 score: 0.410256
## Disease-specific AUC scores
                         AUC  Accuracy        F1
Atelectasis         0.500000  0.954545  0.000000
Cardiomegaly        0.500000  0.909091  

ValueError: Expected n_neighbors <= n_samples,  but n_samples = 44, n_neighbors = 50