In [1]:
import os
os.getcwd()

'/content'

In [2]:
os.chdir('/content/drive/MyDrive/DINO')

In [3]:
import torch
from torch import nn
import torch.distributed as dist
import torch.backends.cudnn as cudnn
from torchvision import datasets
from torchvision import transforms as pth_transforms

import numpy as np

import utils
import vision_transformer as vits

In [4]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, images, labels=None, transforms=None):
        self.X = images
        self.y = labels
        self.transforms = transforms
         
    def __len__(self):
        return (len(self.X))
    
    def __getitem__(self, i):
        data = self.X[i]
        #print(data.shape)
        data = np.asarray(data).astype(np.float32)
        #data = np.asarray(data).astype(np.uint8).reshape(28, 28, 1)
        
        if self.transforms:
            data = self.transforms(data)
            
        if self.y is not None:
            return (data, self.y[i])
        else:
            return data

In [5]:
def extract_feature_pipeline(args):
    # ============ preparing data ... ============
    train_dataset = datasets.FashionMNIST(args.data_path, download=True, train=False)
    val_dataset = datasets.FashionMNIST(args.data_path, download=True, train=True)
    X_train = torch.flatten(train_dataset.data, start_dim=1).numpy()/255
    y_train = train_dataset.targets.numpy()

    X_val = torch.flatten(val_dataset.data, start_dim=1).numpy()/255
    y_val = val_dataset.targets.numpy()
    dataset_train = CustomDataset(X_train, y_train)
    dataset_val = CustomDataset(X_val, y_val)
    sampler = torch.utils.data.DistributedSampler(dataset_train, shuffle=False)
    data_loader_train = torch.utils.data.DataLoader(
        dataset_train,
        sampler=sampler,
        batch_size=args.batch_size_per_gpu,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=False,
    )
    data_loader_val = torch.utils.data.DataLoader(
        dataset_val,
        batch_size=args.batch_size_per_gpu,
        num_workers=args.num_workers,
        pin_memory=True,
        drop_last=False,
    )
    print(f"Data loaded with {len(dataset_train)} train and {len(dataset_val)} val imgs.")

    # ============ building network ... ============
    model = vits.__dict__[args.arch](patch_size=args.patch_size, num_classes=0)
    print(f"Model {args.arch} {args.patch_size}x{args.patch_size} built.")
    model.cuda()
    utils.load_pretrained_weights(model, args.pretrained_weights, args.checkpoint_key, args.arch, args.patch_size)
    model.eval()

    # ============ extract features ... ============
    print("Extracting features for train set...")
    train_features = extract_features(model, data_loader_train)
    print("Extracting features for val set...")
    test_features = extract_features(model, data_loader_val)

    if utils.get_rank() == 0:
        train_features = nn.functional.normalize(train_features, dim=1, p=2)
        test_features = nn.functional.normalize(test_features, dim=1, p=2)

    train_labels = torch.tensor([s for s in dataset_train.y]).long()
    test_labels = torch.tensor([s for s in dataset_val.y]).long()
    # save features and labels
    if args.dump_features and dist.get_rank() == 0:
        torch.save(train_features.cpu(), os.path.join(args.dump_features, "trainfeat.pth"))
        torch.save(test_features.cpu(), os.path.join(args.dump_features, "testfeat.pth"))
        torch.save(train_labels.cpu(), os.path.join(args.dump_features, "trainlabels.pth"))
        torch.save(test_labels.cpu(), os.path.join(args.dump_features, "testlabels.pth"))
    return train_features, test_features, train_labels, test_labels


In [6]:
@torch.no_grad()
def extract_features(model, data_loader):
    metric_logger = utils.MetricLogger(delimiter="  ")
    features = None
    for samples, index in metric_logger.log_every(data_loader, 10):
        samples = samples.cuda(non_blocking=True)
        index = index.cuda(non_blocking=True)
        feats = model(samples).clone()

        # init storage feature matrix
        if dist.get_rank() == 0 and features is None:
            features = torch.zeros(len(data_loader.dataset), feats.shape[-1])
            if args.use_cuda:
                features = features.cuda(non_blocking=True)
            print(f"Storing features into tensor of shape {features.shape}")

        # get indexes from all processes
        y_all = torch.empty(dist.get_world_size(), index.size(0), dtype=index.dtype, device=index.device)
        y_l = list(y_all.unbind(0))
        y_all_reduce = torch.distributed.all_gather(y_l, index, async_op=True)
        y_all_reduce.wait()
        index_all = torch.cat(y_l)

        # share features between processes
        feats_all = torch.empty(
            dist.get_world_size(),
            feats.size(0),
            feats.size(1),
            dtype=feats.dtype,
            device=feats.device,
        )
        output_l = list(feats_all.unbind(0))
        output_all_reduce = torch.distributed.all_gather(output_l, feats, async_op=True)
        output_all_reduce.wait()

        # update storage feature matrix
        if dist.get_rank() == 0:
            if args.use_cuda:
                features.index_copy_(0, index_all, torch.cat(output_l))
            else:
                features.index_copy_(0, index_all.cpu(), torch.cat(output_l).cpu())
    return features

In [7]:
@torch.no_grad()
def knn_classifier(train_features, train_labels, test_features, test_labels, k, T, num_classes=1000):
    top1, top5, total = 0.0, 0.0, 0
    train_features = train_features.t()
    num_test_images, num_chunks = test_labels.shape[0], 100
    imgs_per_chunk = num_test_images // num_chunks
    retrieval_one_hot = torch.zeros(k, num_classes).cuda()
    for idx in range(0, num_test_images, imgs_per_chunk):
        # get the features for test images
        features = test_features[
            idx : min((idx + imgs_per_chunk), num_test_images), :
        ]
        targets = test_labels[idx : min((idx + imgs_per_chunk), num_test_images)]
        batch_size = targets.shape[0]

        # calculate the dot product and compute top-k neighbors
        similarity = torch.mm(features, train_features)
        distances, indices = similarity.topk(k, largest=True, sorted=True)
        candidates = train_labels.view(1, -1).expand(batch_size, -1)
        retrieved_neighbors = torch.gather(candidates, 1, indices)

        retrieval_one_hot.resize_(batch_size * k, num_classes).zero_()
        retrieval_one_hot.scatter_(1, retrieved_neighbors.view(-1, 1), 1)
        distances_transform = distances.clone().div_(T).exp_()
        probs = torch.sum(
            torch.mul(
                retrieval_one_hot.view(batch_size, -1, num_classes),
                distances_transform.view(batch_size, -1, 1),
            ),
            1,
        )
        _, predictions = probs.sort(1, True)

        # find the predictions that match the target
        correct = predictions.eq(targets.data.view(-1, 1))
        top1 = top1 + correct.narrow(1, 0, 1).sum().item()
        top5 = top5 + correct.narrow(1, 0, 5).sum().item()
        total += targets.size(0)
    top1 = top1 * 100.0 / total
    top5 = top5 * 100.0 / total
    return top1, top5

In [8]:
class ReturnIndexDataset(datasets.ImageFolder):
    def __getitem__(self, idx):
        img, lab = super(ReturnIndexDataset, self).__getitem__(idx)
        return img, idx

In [9]:
args={"batch_size_per_gpu": 128,
      "nb_knn": [10, 20, 100, 200], 
      "temperature": 0.07,
      "pretrained_weights": './checkpoint.pth',
      "use_cuda": True,
      "arch": "vit_tiny",
      "patch_size": 4, 
      "checkpoint_key": "teacher", 
      "dump_features": None,
      "load_features": None, 
      "num_workers": 10,
      "dist_url": "env://", 
      "local_rank": 0, 
      "data_path": './data/FashionMNIST'}

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args=AttrDict(args)

In [10]:
utils.init_distributed_mode(args)
print("git:\n  {}\n".format(utils.get_sha()))
print("\n".join("%s: %s" % (k, str(v)) for k, v in sorted(dict(vars(args)).items())))
cudnn.benchmark = True

Will run the code on one GPU.
| distributed init (rank 0): env://
git:
  sha: N/A, status: clean, branch: N/A

arch: vit_tiny
batch_size_per_gpu: 128
checkpoint_key: teacher
data_path: ./data/FashionMNIST
dist_url: env://
dump_features: None
gpu: 0
load_features: None
local_rank: 0
nb_knn: [10, 20, 100, 200]
num_workers: 10
patch_size: 4
pretrained_weights: ./checkpoint.pth
rank: 0
temperature: 0.07
use_cuda: True
world_size: 1


In [11]:
#if args.load_features:
#    train_features = torch.load(os.path.join(args.load_features, "trainfeat.pth"))
#    test_features = torch.load(os.path.join(args.load_features, "testfeat.pth"))
#    train_labels = torch.load(os.path.join(args.load_features, "trainlabels.pth"))
#    test_labels = torch.load(os.path.join(args.load_features, "testlabels.pth"))
#else:
#    # need to extract features !
#    train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
    if args.use_cuda:
        train_features = train_features.cuda()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
        test_labels = test_labels.cuda()

    print("Features are ready!\nStart the k-NN classification.")
    for k in args.nb_knn:
        top1, top5 = knn_classifier(train_features, train_labels,
            test_features, test_labels, k, args.temperature)
        print(f"{k}-NN classifier result: Top1: {top1}, Top5: {top5}")
dist.barrier()

  cpuset_checked))


Data loaded with 10000 train and 60000 val imgs.
Model vit_tiny 4x4 built.
Take key teacher in provided checkpoint dict
Pretrained weights found at ./checkpoint.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Extracting features for train set...
Storing features into tensor of shape torch.Size([10000, 192])
  [ 0/79]  eta: 0:00:37    time: 0.469524  data: 0.378897  max mem: 301
  [10/79]  eta: 0:00:05    time: 0.075526  data: 0.034621  max mem: 308
  [20/79]  eta: 0:00:03    time: 0.043028  data: 0.000220  max mem: 403
  [30/79]  eta: 0:00:02    time: 0.049933  data: 0.000251  max mem: 439
  [40/79]  eta: 0:00:02    time: 0.049903  data: 0.000244  max mem: 439
  [50/79]  eta: 0:00:01    time: 0.049874  data: 0.000384  max mem: 439
  [60/79]  eta: 0:00:01    time: 0.049887  data: 0.00

In [12]:
args={"batch_size_per_gpu": 128,
      "nb_knn": [10, 20, 100, 200], 
      "temperature": 0.07,
      "pretrained_weights": './checkpoint0000.pth',
      "use_cuda": True,
      "arch": "vit_tiny",
      "patch_size": 4, 
      "checkpoint_key": "teacher", 
      "dump_features": None,
      "load_features": None, 
      "num_workers": 10,
      "dist_url": "env://", 
      "local_rank": 0, 
      "data_path": './data/FashionMNIST'}

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args=AttrDict(args)

train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
    if args.use_cuda:
        train_features = train_features.cuda()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
        test_labels = test_labels.cuda()

    print("Features are ready!\nStart the k-NN classification.")
    for k in args.nb_knn:
        top1, top5 = knn_classifier(train_features, train_labels,
            test_features, test_labels, k, args.temperature)
        print(f"{k}-NN classifier result: Top1: {top1}, Top5: {top5}")
dist.barrier()

Data loaded with 10000 train and 60000 val imgs.


  cpuset_checked))


Model vit_tiny 4x4 built.
Take key teacher in provided checkpoint dict
Pretrained weights found at ./checkpoint0000.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Extracting features for train set...
Storing features into tensor of shape torch.Size([10000, 192])
  [ 0/79]  eta: 0:00:35    time: 0.445867  data: 0.384794  max mem: 1007
  [10/79]  eta: 0:00:04    time: 0.071056  data: 0.035155  max mem: 1007
  [20/79]  eta: 0:00:03    time: 0.041723  data: 0.000200  max mem: 1007
  [30/79]  eta: 0:00:02    time: 0.049900  data: 0.000242  max mem: 1007
  [40/79]  eta: 0:00:02    time: 0.049928  data: 0.000275  max mem: 1007
  [50/79]  eta: 0:00:01    time: 0.049915  data: 0.000274  max mem: 1007
  [60/79]  eta: 0:00:01    time: 0.049913  data: 0.000269  max mem: 1007
  [70/79]  eta: 0:0

In [13]:
args={"batch_size_per_gpu": 128,
      "nb_knn": [10, 20, 100, 200], 
      "temperature": 0.07,
      "pretrained_weights": './checkpoint0020.pth',
      "use_cuda": True,
      "arch": "vit_tiny",
      "patch_size": 4, 
      "checkpoint_key": "teacher", 
      "dump_features": None,
      "load_features": None, 
      "num_workers": 10,
      "dist_url": "env://", 
      "local_rank": 0, 
      "data_path": './data/FashionMNIST'}

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args=AttrDict(args)

train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
    if args.use_cuda:
        train_features = train_features.cuda()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
        test_labels = test_labels.cuda()

    print("Features are ready!\nStart the k-NN classification.")
    for k in args.nb_knn:
        top1, top5 = knn_classifier(train_features, train_labels,
            test_features, test_labels, k, args.temperature)
        print(f"{k}-NN classifier result: Top1: {top1}, Top5: {top5}")
dist.barrier()

Data loaded with 10000 train and 60000 val imgs.


  cpuset_checked))


Model vit_tiny 4x4 built.
Take key teacher in provided checkpoint dict
Pretrained weights found at ./checkpoint0020.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Extracting features for train set...
Storing features into tensor of shape torch.Size([10000, 192])
  [ 0/79]  eta: 0:00:34    time: 0.440798  data: 0.377149  max mem: 1007
  [10/79]  eta: 0:00:04    time: 0.070591  data: 0.034461  max mem: 1007
  [20/79]  eta: 0:00:03    time: 0.041736  data: 0.000280  max mem: 1007
  [30/79]  eta: 0:00:02    time: 0.049936  data: 0.000371  max mem: 1007
  [40/79]  eta: 0:00:02    time: 0.049964  data: 0.000359  max mem: 1007
  [50/79]  eta: 0:00:01    time: 0.049933  data: 0.000294  max mem: 1007
  [60/79]  eta: 0:00:01    time: 0.049926  data: 0.000247  max mem: 1007
  [70/79]  eta: 0:0

In [14]:
args={"batch_size_per_gpu": 128,
      "nb_knn": [10, 20, 100, 200], 
      "temperature": 0.07,
      "pretrained_weights": './checkpoint0040.pth',
      "use_cuda": True,
      "arch": "vit_tiny",
      "patch_size": 4, 
      "checkpoint_key": "teacher", 
      "dump_features": None,
      "load_features": None, 
      "num_workers": 10,
      "dist_url": "env://", 
      "local_rank": 0, 
      "data_path": './data/FashionMNIST'}

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args=AttrDict(args)

train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
    if args.use_cuda:
        train_features = train_features.cuda()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
        test_labels = test_labels.cuda()

    print("Features are ready!\nStart the k-NN classification.")
    for k in args.nb_knn:
        top1, top5 = knn_classifier(train_features, train_labels,
            test_features, test_labels, k, args.temperature)
        print(f"{k}-NN classifier result: Top1: {top1}, Top5: {top5}")
dist.barrier()

  cpuset_checked))


Data loaded with 10000 train and 60000 val imgs.
Model vit_tiny 4x4 built.
Take key teacher in provided checkpoint dict
Pretrained weights found at ./checkpoint0040.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Extracting features for train set...
Storing features into tensor of shape torch.Size([10000, 192])
  [ 0/79]  eta: 0:00:36    time: 0.461666  data: 0.402214  max mem: 1007
  [10/79]  eta: 0:00:05    time: 0.072504  data: 0.037064  max mem: 1007
  [20/79]  eta: 0:00:03    time: 0.041732  data: 0.000357  max mem: 1007
  [30/79]  eta: 0:00:02    time: 0.049911  data: 0.000357  max mem: 1007
  [40/79]  eta: 0:00:02    time: 0.049942  data: 0.000401  max mem: 1007
  [50/79]  eta: 0:00:01    time: 0.049845  data: 0.000231  max mem: 1007
  [60/79]  eta: 0:00:01    time: 0.049841  

In [15]:
args={"batch_size_per_gpu": 128,
      "nb_knn": [10, 20, 100, 200], 
      "temperature": 0.07,
      "pretrained_weights": './checkpoint0060.pth',
      "use_cuda": True,
      "arch": "vit_tiny",
      "patch_size": 4, 
      "checkpoint_key": "teacher", 
      "dump_features": None,
      "load_features": None, 
      "num_workers": 10,
      "dist_url": "env://", 
      "local_rank": 0, 
      "data_path": './data/FashionMNIST'}

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args=AttrDict(args)

train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
    if args.use_cuda:
        train_features = train_features.cuda()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
        test_labels = test_labels.cuda()

    print("Features are ready!\nStart the k-NN classification.")
    for k in args.nb_knn:
        top1, top5 = knn_classifier(train_features, train_labels,
            test_features, test_labels, k, args.temperature)
        print(f"{k}-NN classifier result: Top1: {top1}, Top5: {top5}")
dist.barrier()

Data loaded with 10000 train and 60000 val imgs.


  cpuset_checked))


Model vit_tiny 4x4 built.
Take key teacher in provided checkpoint dict
Pretrained weights found at ./checkpoint0060.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Extracting features for train set...
Storing features into tensor of shape torch.Size([10000, 192])
  [ 0/79]  eta: 0:00:35    time: 0.453005  data: 0.387749  max mem: 1007
  [10/79]  eta: 0:00:04    time: 0.071704  data: 0.035455  max mem: 1007
  [20/79]  eta: 0:00:03    time: 0.041763  data: 0.000226  max mem: 1007
  [30/79]  eta: 0:00:02    time: 0.049926  data: 0.000198  max mem: 1007
  [40/79]  eta: 0:00:02    time: 0.049937  data: 0.000226  max mem: 1007
  [50/79]  eta: 0:00:01    time: 0.049975  data: 0.000260  max mem: 1007
  [60/79]  eta: 0:00:01    time: 0.049962  data: 0.000203  max mem: 1007
  [70/79]  eta: 0:0

In [16]:
args={"batch_size_per_gpu": 128,
      "nb_knn": [10, 20, 100, 200], 
      "temperature": 0.07,
      "pretrained_weights": './checkpoint0080.pth',
      "use_cuda": True,
      "arch": "vit_tiny",
      "patch_size": 4, 
      "checkpoint_key": "teacher", 
      "dump_features": None,
      "load_features": None, 
      "num_workers": 10,
      "dist_url": "env://", 
      "local_rank": 0, 
      "data_path": './data/FashionMNIST'}

class AttrDict(dict):
    def __init__(self, *args, **kwargs):
        super(AttrDict, self).__init__(*args, **kwargs)
        self.__dict__ = self

args=AttrDict(args)

train_features, test_features, train_labels, test_labels = extract_feature_pipeline(args)

if utils.get_rank() == 0:
    if args.use_cuda:
        train_features = train_features.cuda()
        test_features = test_features.cuda()
        train_labels = train_labels.cuda()
        test_labels = test_labels.cuda()

    print("Features are ready!\nStart the k-NN classification.")
    for k in args.nb_knn:
        top1, top5 = knn_classifier(train_features, train_labels,
            test_features, test_labels, k, args.temperature)
        print(f"{k}-NN classifier result: Top1: {top1}, Top5: {top5}")
dist.barrier()

Data loaded with 10000 train and 60000 val imgs.


  cpuset_checked))


Model vit_tiny 4x4 built.
Take key teacher in provided checkpoint dict
Pretrained weights found at ./checkpoint0080.pth and loaded with msg: _IncompatibleKeys(missing_keys=[], unexpected_keys=['head.mlp.0.weight', 'head.mlp.0.bias', 'head.mlp.2.weight', 'head.mlp.2.bias', 'head.mlp.4.weight', 'head.mlp.4.bias', 'head.last_layer.weight_g', 'head.last_layer.weight_v'])
Extracting features for train set...
Storing features into tensor of shape torch.Size([10000, 192])
  [ 0/79]  eta: 0:00:34    time: 0.435320  data: 0.375431  max mem: 1007
  [10/79]  eta: 0:00:04    time: 0.070094  data: 0.034303  max mem: 1007
  [20/79]  eta: 0:00:03    time: 0.041756  data: 0.000174  max mem: 1007
  [30/79]  eta: 0:00:02    time: 0.049940  data: 0.000201  max mem: 1007
  [40/79]  eta: 0:00:02    time: 0.049932  data: 0.000279  max mem: 1007
  [50/79]  eta: 0:00:01    time: 0.049908  data: 0.000265  max mem: 1007
  [60/79]  eta: 0:00:01    time: 0.049895  data: 0.000196  max mem: 1007
  [70/79]  eta: 0:0