In [4]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("ayaroshevskiy/downsampled-imagenet-64x64")
print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Path to dataset files: /Users/igor.varha/.cache/kagglehub/datasets/ayaroshevskiy/downsampled-imagenet-64x64/versions/1


In [5]:
from pathlib import Path

train_path = Path(path)/'train_64x64'/'train_64x64'
test_path = Path(path)/'valid_64x64/valid_64x64'
train_dataset_p = "tr_dataset.pt"
test_dataset_p = "te_dataset.pt"


In [6]:
from torch.utils.data import DataLoader
from image_toolkit.data_processor import FragmentDataset
import torchvision.transforms as T
import random


class RandomPatchAugment:
    def __init__(self, prob=0.5):
        self.prob = prob
        self.augment = T.Compose([
            T.RandomRotation(degrees=90),
            T.ColorJitter(0.1, 0.1, 0.1),
            #T.GaussianBlur(kernel_size=3),
        ])

    def __call__(self, patch):
        if random.random() < self.prob:
            return self.augment(patch)
        return patch

augmenter = RandomPatchAugment(prob=0.6)
test_dataset_aug = FragmentDataset(test_path,limit=1000, augment=True, augmenter=augmenter) #
# i used in training phase 1000 images as val set so it will be my test final run
test_dataset = FragmentDataset(test_path,limit=10000, augment=False) 

dataloader_test_aug = DataLoader(test_dataset_aug, batch_size=10, shuffle=False)

dataloader_test = DataLoader(test_dataset, batch_size=10, shuffle=False)

  2%|▏         | 999/49999 [00:00<00:29, 1640.95it/s]
 20%|█▉        | 9999/49999 [00:12<00:50, 799.01it/s] 


In [14]:
from image_toolkit.nets import TransformerPatchCluster
import torch

DEVICE = torch.device("mps" if torch.backends.mps.is_available() else "cuda" if torch.cuda.is_available() else "cpu")

model = TransformerPatchCluster(embed_dim=256,nhead=8,device=DEVICE,num_layers=10).to(DEVICE) #0.72

model.load_weights("best_ttc_256_8_10_ARI76.5(100K)/best_model_epoch_15.pth")

model.eval()

  self.load_state_dict(torch.load(path, map_location=self.device))


Weights loaded from best_ttc_256_8_10_ARI76.5(100K)/best_model_epoch_15.pth


TransformerPatchCluster(
  (encoder): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (4): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_run

In [15]:

from image_toolkit.nets import evaluate_clustering_on_validation_p

ari,nmi,sil = evaluate_clustering_on_validation_p(dataloader_test,model,device=DEVICE)

print(f"ARI No Augmentation: {ari}, NMI: {nmi}, Silhouette: {sil}")

ari,nmi,sil = evaluate_clustering_on_validation_p(dataloader_test_aug,model,device=DEVICE)

print(f"ARI Augmentation: {ari}, NMI: {nmi}, Silhouette: {sil}")


ARI No Augmentation: 0.7848565605303293, NMI: 0.8732883395767997, Silhouette: 0.7746378779411316
ARI Augmentation: 0.38438785103562173, NMI: 0.6087712346609347, Silhouette: 0.5479543805122375
