In [2]:
%pip install transformers[torch] huggingface_hub datasets evaluate torchvision ipywidgets

Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
Collecting huggingface_hub
  Downloading huggingface_hub-0.28.1-py3-none-any.whl.metadata (13 kB)
Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting ipywidgets
  Downloading ipywidgets-8.1.5-py3-none-any.whl.metadata (2.3 kB)
Collecting transformers[torch]
  Downloading transformers-4.48.3-py3-none-any.whl.metadata (44 kB)
Collecting tokenizers<0.22,>=0.21 (from transformers[torch])
  Downloading tokenizers-0.21.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting accelerate>=0.26.0 (from transformers[torch])
  Downloading accelerate-1.3.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylin

In [1]:
from transformers import AutoModelForImageClassification
from torch.utils.data import ConcatDataset, DataLoader
from tqdm.notebook import tqdm
import torch
import time

import base

In [8]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and will be used:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU.")

GPU is available and will be used: NVIDIA A100 80GB PCIe MIG 2g.20gb


In [3]:
base.reset_seed(42)

In [4]:
transform = base.base_transforms()
augment_transform = base.aug_transforms()

In [5]:
base.reset_seed(42)

In [10]:
model = AutoModelForImageClassification.from_pretrained(
    "aaraki/vit-base-patch16-224-in21k-finetuned-cifar10",
    num_labels=10,
)

model.to(device)

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [None]:
torch.cuda.synchronize() 
starter, ender = torch.cuda.Event(enable_timing=True), torch.cuda.Event(enable_timing=True)
dataset_part = base.get_dataset_part()
transform = base.base_transforms()
device = "cuda"
model.to(device)

test = base.CustomCIFAR10L(root='./data/10-logits', dataset_part=dataset_part.TEST, transform=transform)
test_loader = DataLoader(test, batch_size=1, shuffle=False)
timings = []



for i, batch in enumerate(test_loader):
    if i >= 1000:
        break
    torch.cuda.synchronize()
    starter.record()
    with torch.no_grad():
        _ = model(batch["pixel_values"].to(device))
    ender.record()
    torch.cuda.synchronize()
    timings.append(starter.elapsed_time(ender))

print(f"Average Inference Time on GPU: {sum(timings) / len(timings):.3f} ms")

Average Inference Time on GPU: 8.831 ms


In [None]:
dataset_part = base.get_dataset_part()
transform = base.base_transforms()
device = "cpu"
model.to(device)
test = base.CustomCIFAR10L(root='./data/10-logits', dataset_part=dataset_part.TEST, transform=transform)
test_loader = DataLoader(test, batch_size=1, shuffle=False)
timings = []



for i, batch in enumerate(test_loader):
    if i >= 1000:
        break
    start_time = time.perf_counter()
    with torch.no_grad():
        _ = model(batch["pixel_values"].to(device))
    end_time = time.perf_counter()
    timings.append((end_time - start_time)*1000)

print(f"Average Inference Time on CPU: {sum(timings) / len(timings):.3f} ms")

Average Inference Time on CPU: 343.113 ms


In [14]:
base.count_parameters(model)

model size: 327.325MB.
Total Trainable Params: 85806346.


Unnamed: 0,Modules,Parameters
0,vit.embeddings.cls_token,768
1,vit.embeddings.position_embeddings,151296
2,vit.embeddings.patch_embeddings.projection.weight,589824
3,vit.embeddings.patch_embeddings.projection.bias,768
4,vit.encoder.layer.0.attention.attention.query....,589824
...,...,...
195,vit.encoder.layer.11.layernorm_after.bias,768
196,vit.layernorm.weight,768
197,vit.layernorm.bias,768
198,classifier.weight,7680


In [7]:
model.eval()

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [8]:
dataset_part = base.get_dataset_part()

In [10]:
testing = base.unpickle("data/10/cifar-10-batches-py/test_batch")
test_data = base.CustomCIFAR10(root='./data/10', train=False, transform=transform)
test_dataloader = DataLoader(test_data, batch_size=128, shuffle=False)

logits_test = base.generate_logits(test_dataloader, model)
testing[b"logits"] = logits_test
base.pickle_up("data/10-logits/cifar-10-batches-py/test", testing)




evaluating = base.unpickle("data/10/cifar-10-batches-py/data_batch_5")
eval_data = base.CustomCIFAR10(root='./data/10', train=True, batch=5, transform=transform)
eval_dataloader = DataLoader(eval_data, batch_size=128, shuffle=False)

logits_eval = base.generate_logits(eval_dataloader, model)
evaluating[b"logits"] = logits_eval
base.pickle_up("data/10-logits/cifar-10-batches-py/eval", evaluating)

  0%|          | 0/79 [00:00<?, ?it/s]

  0%|          | 0/79 [00:00<?, ?it/s]

In [11]:
base.reset_seed(42)
for index in range(1,5):
    data = base.unpickle(f"data/10/cifar-10-batches-py/data_batch_{index}")

    train = base.CustomCIFAR10(root='./data/10', batch=index, train=True, transform=transform)
    train_augmented = base.CustomCIFAR10(root='./data/10', batch=index, train=True, transform=augment_transform)
    
    train_dataloader = DataLoader(train, batch_size=64, shuffle=False)
    train_dataloader_augmented = DataLoader(train_augmented, batch_size=64, shuffle=False)

    logits_arr = []
    logits_arr_aug = []

    for batch in tqdm(train_dataloader, desc = f"Progress for file {index}."): 
        pixel_values, labels = batch
        with torch.no_grad():
            outputs = model(pixel_values)
            logits = outputs.logits
        logits_arr.append(logits.cpu().numpy())

    for batch in tqdm(train_dataloader_augmented, desc = f"Progress for file {index} with augmentation."): 
        pixel_values, labels = batch
        with torch.no_grad():
            outputs = model(pixel_values)
            logits = outputs.logits
        logits_arr_aug.append(logits.cpu().numpy())

    
    logits_arr_flat = []
    logits_arr_aug_flat = []

    for tensor in logits_arr:
        logits_arr_flat.extend(tensor)

    for tensor in logits_arr_aug:
        logits_arr_aug_flat.extend(tensor)

    data[b"logits"] = logits_arr_flat
    data[b"logits_aug"] = logits_arr_aug_flat
    base.pickle_up(f"data/10-logits/cifar-10-batches-py/train_batch_{index}",data)

Progress for file 1.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 1 with augmentation.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 2.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 2 with augmentation.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 3.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 3 with augmentation.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 4.:   0%|          | 0/157 [00:00<?, ?it/s]

Progress for file 4 with augmentation.:   0%|          | 0/157 [00:00<?, ?it/s]

In [12]:
train_aug = base.CustomCIFAR10L(root='./data/10-logits', dataset_part=dataset_part.TRAIN, transform=augment_transform)
train = base.CustomCIFAR10L(root='./data/10-logits', dataset_part=dataset_part.TRAIN, transform=transform)
train_combo = ConcatDataset([train, train_aug])

In [13]:
print(base.check_acc(train))
print(base.check_acc(train_aug))
print(base.check_acc(train_combo))

Progress for base train set:   0%|          | 0/40000 [00:00<?, ?it/s]

Accuracy for base train set: 0.954925


Progress for base train set:   0%|          | 0/40000 [00:00<?, ?it/s]

Accuracy for base train set: 0.686


Progress for base train set:   0%|          | 0/80000 [00:00<?, ?it/s]

Accuracy for base train set: 0.8204625


In [14]:
train_aug = base.remove_diff_pred_class(train, train_aug)
train_combo = ConcatDataset([train, train_aug])

In [15]:
print(base.check_acc(train_aug))
print(base.check_acc(train_combo))

Progress for base train set:   0%|          | 0/28176 [00:00<?, ?it/s]

Accuracy for base train set: 0.9614565587734242


Progress for base train set:   0%|          | 0/68176 [00:00<?, ?it/s]

Accuracy for base train set: 0.9576243839474302
