In [1]:
from torch.utils.data import DataLoader, Subset, ConcatDataset
from transformers import AutoModelForImageClassification
from sklearn.model_selection import train_test_split
import numpy as np
import torch
import os 

import base

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/jovyan/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!


In [2]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("GPU is available and will be used:", torch.cuda.get_device_name(0))
else:
    device = torch.device("cpu")
    print("GPU is not available, using CPU.")

GPU is available and will be used: NVIDIA H100 PCIe


In [3]:
base.reset_seed(42)

In [4]:
transform = base.base_transforms()
augment_transform = base.aug_transforms()

train = base.CustomCIFAR100(root=f"{os.path.expanduser('~')}/data/100", train=True, transform=transform, device=device)
train_aug = base.CustomCIFAR100(root=f"{os.path.expanduser('~')}/data/100", train=True, transform=augment_transform, device=device)


test = base.CustomCIFAR100(root=f"{os.path.expanduser('~')}/data/100", train=False, transform=transform, device=device)
eval = base.CustomCIFAR100(root=f"{os.path.expanduser('~')}/data/100", train=True, transform=transform, device=device)

In [5]:
train_idx, validation_idx = train_test_split(np.arange(len(train)),
                                             test_size=0.2,
                                             random_state=42,
                                             shuffle=True,
                                             stratify=train.labels)

In [6]:
train = Subset(train, train_idx)
train_aug = Subset(train_aug, train_idx)
eval = Subset(eval, validation_idx)

In [None]:
train_dataloader = DataLoader(train, batch_size=128, shuffle=False)
train_dataloader_aug = DataLoader(train_aug, batch_size=128, shuffle=False)

In [None]:
eval_dataloder = DataLoader(eval, batch_size=128, shuffle=False)
test_dataloader = DataLoader(test, batch_size=128, shuffle=False)

In [9]:
model = AutoModelForImageClassification.from_pretrained(
    "Ahmed9275/Vit-Cifar100",
    num_labels=100,
)

model.to(device)
torch.save(model.state_dict(), f"{os.path.expanduser('~')}/models/cifar100/teacher.pth")

config.json:   0%|          | 0.00/4.68k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/344M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/344M [00:00<?, ?B/s]

In [10]:
model.eval()

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTSdpaAttention(
            (attention): ViTSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_fe

In [11]:
logits = base.generate_logits(train_dataloader, model)
logits_aug = base.generate_logits(train_dataloader_aug, model)

logits_eval = base.generate_logits(eval_dataloder, model)
logits_test = base.generate_logits(test_dataloader, model)

Generating logits for given dataset:   0%|          | 0/313 [00:00<?, ?it/s]

Generating logits for given dataset:   0%|          | 0/313 [00:00<?, ?it/s]

Generating logits for given dataset:   0%|          | 0/79 [00:00<?, ?it/s]

Generating logits for given dataset:   0%|          | 0/79 [00:00<?, ?it/s]

In [12]:
data_file = base.unpickle(f"{os.path.expanduser('~')}/data/100/cifar-100-python/train")
testing = base.unpickle(f"{os.path.expanduser('~')}/data/100/cifar-100-python/test")

In [13]:
data = {key: [value[i] for i in train_idx] for key, value in data_file.items() if key != b"batch_label"}    
eval_data = {key: [value[i] for i in validation_idx] for key, value in data_file.items() if key != b"batch_label"}    

In [None]:
data[b"logits"] = logits
data[b"logits_aug"] = logits_aug

eval_data[b"logits"] = logits_eval
testing[b"logits"] = logits_test

In [15]:
base.pickle_up(f"{os.path.expanduser('~')}/data/100-logits/cifar-100-python/test", testing)
base.pickle_up(f"{os.path.expanduser('~')}/data/100-logits/cifar-100-python/train", data)
base.pickle_up(f"{os.path.expanduser('~')}/data/100-logits/cifar-100-python/eval", eval_data)

In [None]:
dataset_part = base.get_dataset_part()

In [17]:
test = base.CustomCIFAR100L(root=f"{os.path.expanduser('~')}/data/100-logits", dataset_part=dataset_part.TEST, transform=transform)
eval = base.CustomCIFAR100L(root=f"{os.path.expanduser('~')}/data/100-logits", dataset_part=dataset_part.EVAL, transform=transform)

print(base.check_acc(test, "Accuracy for test dataset is:"))
print(base.check_acc(eval, "Accuracy for eval dataset is:"))

Calculating accuracy based on the saved logits:   0%|          | 0/10000 [00:00<?, ?it/s]

Accuracy for test dataset is: 0.8182


Calculating accuracy based on the saved logits:   0%|          | 0/10000 [00:00<?, ?it/s]

Accuracy for eval dataset is: 0.9445


In [18]:
train_aug = base.CustomCIFAR100L(f"{os.path.expanduser('~')}/data/100-logits", dataset_part=dataset_part.TRAIN, transform=augment_transform)
train = base.CustomCIFAR100L(f"{os.path.expanduser('~')}/data/100-logits", dataset_part=dataset_part.TRAIN, transform=transform)
train_combo = ConcatDataset([train, train_aug])

In [19]:
print(base.check_acc(train, "Accuracy for train dataset is:"))
print(base.check_acc(train_aug, "Accuracy for augmeted train dataset is:"))
print(base.check_acc(train_combo, "Accuracy for combined dataset is:"))

Calculating accuracy based on the saved logits:   0%|          | 0/40000 [00:00<?, ?it/s]

Accuracy for train dataset is: 0.94035


Calculating accuracy based on the saved logits:   0%|          | 0/40000 [00:00<?, ?it/s]

Accuracy for augmeted train dataset is: 0.631525


Calculating accuracy based on the saved logits:   0%|          | 0/80000 [00:00<?, ?it/s]

Accuracy for combined dataset is: 0.7859375


In [20]:
train_aug = base.remove_diff_pred_class(train, train_aug, pytorch_dataset=True)
train_combo = ConcatDataset([train, train_aug])

Removing entries from augmented dataset that are different from the base one - based on saved logits:   0%|   …

In [21]:
print(len(train_aug))

25914


In [22]:
print(base.check_acc(train_aug, "Accuracy for filtered augmented dataset is:"))
print(base.check_acc(train_combo, "Accuracy for combined dataset is:"))

Calculating accuracy based on the saved logits:   0%|          | 0/25914 [00:00<?, ?it/s]

Accuracy for filtered augmented dataset is: 0.9609477502508297


Calculating accuracy based on the saved logits:   0%|          | 0/65914 [00:00<?, ?it/s]

Accuracy for combined dataset is: 0.9484479776678703


In [23]:
base.count_parameters(model)

model size: 327.589MB.
Total Trainable Params: 85875556.


Unnamed: 0,Modules,Parameters
0,vit.embeddings.cls_token,768
1,vit.embeddings.position_embeddings,151296
2,vit.embeddings.patch_embeddings.projection.weight,589824
3,vit.embeddings.patch_embeddings.projection.bias,768
4,vit.encoder.layer.0.attention.attention.query....,589824
...,...,...
195,vit.encoder.layer.11.layernorm_after.bias,768
196,vit.layernorm.weight,768
197,vit.layernorm.bias,768
198,classifier.weight,76800


In [None]:
train_part_cpu = base.CustomCIFAR100(root=f"{os.path.expanduser('~')}/data/100", train=True, transform=transform, device="cpu")
cpu_data_loader = DataLoader(train_part_cpu, batch_size=1, shuffle=False)
cpu_benchmark = base.BenchMarkRunner(model, cpu_data_loader, "cpu", 1000)

print(cpu_benchmark.run_benchmark())

In [None]:
train_part_gpu = base.CustomCIFAR100(root=f"{os.path.expanduser('~')}/data/100", train=True, transform=transform, device="cuda")
gpu_data_loader = DataLoader(train_part_gpu, batch_size=1, shuffle=False)
gpu_benchmark = base.BenchMarkRunner(model, gpu_data_loader, "cuda", 1000)


print(gpu_benchmark.run_benchmark())

<torch.utils.benchmark.utils.common.Measurement object at 0x79fe2b4ce500>
self.infer_speed_comp()
  11.26 ms
  1 measurement, 1000 runs , 4 threads
