In [1]:
import os
import sys
import argparse
import IPython 
from PIL import Image

import torch
import torchvision
import numpy as np
import skimage
from scipy import sparse
import matplotlib.pyplot as plt 
import torchxrayvision as xrv

from dinov2.data import SamplerType, make_data_loader, make_dataset
from dinov2.data.datasets import NIHChestXray
from dinov2.data.transforms import make_xray_classification_eval_transform, make_classification_eval_transform
from dinov2.eval.setup import setup_and_build_model
from dinov2.eval.utils import ModelWithNormalize, evaluate, extract_features
from dinov2.MLkNN import MLkNN 
from dinov2.utils import show_image_from_tensor

In [2]:
args = argparse.Namespace(config_file='dinov2/configs/eval/vits14_pretrain.yaml', pretrained_weights='models/dinov2_vits14_pretrain.pth', output_dir='results/NIH/dinov2_vits14/knn', opts=[], train_dataset_str='NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp', val_dataset_str='NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp', nb_knn=[5, 20, 50, 100, 200], temperature=0.07, gather_on_cpu=False, batch_size=8, n_per_class_list=[-1], n_tries=1, ngpus=1, nodes=1, timeout=2800, partition='learnlab', use_volta32=False, comment='', exclude='')
model, autocast_dtype = setup_and_build_model(args)
model = ModelWithNormalize(model)

I20230803 13:36:17 3210 dinov2 config.py:60] git:
  sha: d9d3e00a8c109a3671cfa08caa07d9c98fa22b30, status: has uncommitted changes, branch: main

I20230803 13:36:17 3210 dinov2 config.py:61] batch_size: 8
comment: 
config_file: dinov2/configs/eval/vits14_pretrain.yaml
exclude: 
gather_on_cpu: False
n_per_class_list: [-1]
n_tries: 1
nb_knn: [5, 20, 50, 100, 200]
ngpus: 1
nodes: 1
opts: ['train.output_dir=/mnt/c/Users/user/Desktop/dinov2/results/NIH/dinov2_vits14/knn']
output_dir: /mnt/c/Users/user/Desktop/dinov2/results/NIH/dinov2_vits14/knn
partition: learnlab
pretrained_weights: models/dinov2_vits14_pretrain.pth
temperature: 0.07
timeout: 2800
train_dataset_str: NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp
use_volta32: False
val_dataset_str: NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp
I20230803 13:36:17 3210 dinov2 config.py:27] sqrt scaling learning rate; base: 0.004, new: 0.001
I20230803 13:36:17 3210 dinov2 config.py:34] MODEL:
  WEIGHTS: ''
compute_precision:
 

In [3]:
transform = make_classification_eval_transform()
train_dataset = make_dataset(
    dataset_str=args.train_dataset_str,
    transform=transform,
)
val_dataset = make_dataset(
    dataset_str=args.val_dataset_str,
    transform=transform,
)

I20230803 13:36:19 3210 dinov2 loaders.py:89] using dataset: "NIHChestXray:split=TRAIN:root=/mnt/d/data/NIH/train_tmp"
I20230803 13:36:21 3210 dinov2 nih_chest_xray.py:67] 86480 x-ray's are missing from TRAIN set
I20230803 13:36:21 3210 dinov2 loaders.py:94] # of dataset samples: 44
I20230803 13:36:21 3210 dinov2 loaders.py:89] using dataset: "NIHChestXray:split=VAL:root=/mnt/d/data/NIH/test_tmp"
I20230803 13:36:24 3210 dinov2 nih_chest_xray.py:67] 86480 x-ray's are missing from VAL set
I20230803 13:36:24 3210 dinov2 loaders.py:94] # of dataset samples: 44


In [4]:
train_dataset_str = args.train_dataset_str
val_dataset_str = args.val_dataset_str
batch_size = args.batch_size
gather_on_cpu = args.gather_on_cpu
num_workers = 0

In [57]:
with torch.cuda.amp.autocast(dtype=autocast_dtype):
    train_features, train_labels = extract_features(
        model, train_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )
    val_features, val_labels = extract_features(
        model, val_dataset, batch_size, num_workers, gather_on_cpu=gather_on_cpu
    )

train_features = train_features.cpu().numpy()
train_labels = train_labels.cpu().numpy()
val_features = val_features.cpu().numpy()
val_labels = val_labels.cpu().numpy()

I20230803 14:17:57 3210 dinov2 loaders.py:164] sampler: none
I20230803 14:17:57 3210 dinov2 loaders.py:211] using PyTorch data loader
I20230803 14:17:57 3210 dinov2 loaders.py:224] # of batches: 6
I20230803 14:17:57 3210 dinov2 utils.py:131] Storing features into tensor of shape torch.Size([44, 384])
I20230803 14:17:57 3210 dinov2 helpers.py:103]   [0/6]  eta: 0:00:01    time: 0.262892  data: 0.256785  max mem: 162
I20230803 14:17:58 3210 dinov2 helpers.py:103]   [5/6]  eta: 0:00:00    time: 0.218041  data: 0.191460  max mem: 162
I20230803 14:17:58 3210 dinov2 helpers.py:131]  Total time: 0:00:01 (0.218460 s / it)
I20230803 14:17:58 3210 dinov2 utils.py:143] Features shape: (44, 384)
I20230803 14:17:58 3210 dinov2 utils.py:144] Labels shape: (44, 10)
I20230803 14:17:58 3210 dinov2 loaders.py:164] sampler: none
I20230803 14:17:58 3210 dinov2 loaders.py:211] using PyTorch data loader
I20230803 14:17:58 3210 dinov2 loaders.py:224] # of batches: 6
I20230803 14:17:59 3210 dinov2 utils.py:13

In [59]:
import numpy as np
import sklearn.metrics

for i in args.nb_knn:
    classifier = MLkNN(i)
    classifier.fit(train_features, train_labels)
    results = classifier.predict(train_features).toarray()
    print(f"### for {i} NN ###")
    print("### Hamming loss ###")
    print("SKML: %f" % sklearn.metrics.hamming_loss(train_labels, results))
    print("### Accuracy score ###")
    print("SKML: %f" % sklearn.metrics.accuracy_score(train_labels, results))
    print("### F1 score ###")
    print("SKML: %f" % sklearn.metrics.f1_score(train_labels, results, average="micro"))

### for 5 NN ###
### Hamming loss ###
SKML: 0.093182
### Accuracy score ###
SKML: 0.409091
### F1 score ###
SKML: 0.481013
### for 20 NN ###
### Hamming loss ###
SKML: 0.104545
### Accuracy score ###
SKML: 0.363636
### F1 score ###
SKML: 0.410256


ValueError: Expected n_neighbors <= n_samples,  but n_samples = 44, n_neighbors = 50