# Init

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
%cd /content/drive/MyDrive/ViT-pytorch

/content/drive/MyDrive/ViT-pytorch


In [11]:
!pip install -q timm

In [12]:
import torch
import timm, tome
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

In [13]:
imagenet_data = datasets.CIFAR100('/content/tmp', train = False, download = True,
                                  transform=transforms.Compose([
                                      transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                  ]))

Files already downloaded and verified


In [14]:
data_loader = DataLoader(imagenet_data, batch_size=1, shuffle=False)

In [15]:
# Hàm đánh giá mô hình
def evaluate(model, data_loader, flag):
    total = 0
    correct = 0
    count = 0
    with torch.no_grad():
        for images, labels in tqdm(data_loader):
            images, labels = images.cuda(), labels.cuda()
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            count = count + 1
            if flag and count == 1:

                return 'debug'
    return 100 * correct / total

# Default Model

In [16]:
model = timm.create_model("deit_base_patch16_224.fb_in1k", pretrained=True, num_classes= 100)
model.load_state_dict(torch.load('/content/drive/MyDrive/ViT-pytorch/checkpoints/deit_base_patch16_224.fb_in1k_1.bin'))

<All keys matched successfully>

In [17]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [18]:
#evaluate(model, data_loader, 0)

In [None]:
device = "cuda:0"
runs = 50
batch_size = 256  # Lower this if you don't have that much memory
input_size = model.default_cfg["input_size"]

In [None]:
baseline_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)

# Tome Model

In [33]:
tome.patch.timm(model)
model.r = 20

In [34]:
evaluate(model, data_loader, 0)

100%|██████████| 10000/10000 [03:20<00:00, 49.78it/s]


73.26

In [35]:
device = "cuda:0"
runs = 50
batch_size = 256  # Lower this if you don't have that much memory
input_size = model.default_cfg["input_size"]

In [36]:
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)

Benchmarking: 100%|██████████| 50/50 [00:57<00:00,  1.15s/it]


Throughput: 217.28 im/s


# New Tome (using x)

In [37]:
import new_tome

In [38]:
model = timm.create_model("deit_base_patch16_224.fb_in1k", pretrained=True, num_classes= 100)
model.load_state_dict(torch.load('/content/drive/MyDrive/ViT-pytorch/checkpoints/deit_base_patch16_224.fb_in1k_1.bin'))

<All keys matched successfully>

In [39]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [40]:
new_tome.patch.timm(model)
model.r = 20

In [41]:
evaluate(model, data_loader, 0)

100%|██████████| 10000/10000 [03:24<00:00, 48.82it/s]


76.06

In [42]:
device = "cuda:0"
runs = 50
batch_size = 256  # Lower this if you don't have that much memory
input_size = model.default_cfg["input_size"]

In [43]:
tome_throughput = tome.utils.benchmark(
    model,
    device=device,
    verbose=True,
    runs=runs,
    batch_size=batch_size,
    input_size=input_size
)

Benchmarking: 100%|██████████| 50/50 [01:03<00:00,  1.27s/it]


Throughput: 196.93 im/s


In [44]:
new_tome.patch.timm(model)
model.r = 16

# Kmeans

In [8]:
import kmeans

In [9]:
model = timm.create_model("deit_base_patch16_224.fb_in1k", pretrained=True, num_classes= 100)
model.load_state_dict(torch.load('/content/drive/MyDrive/ViT-pytorch/checkpoints/deit_base_patch16_224.fb_in1k_1.bin'))

<All keys matched successfully>

In [10]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

VisionTransformer(
  (patch_embed): PatchEmbed(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
    (norm): Identity()
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (patch_drop): Identity()
  (norm_pre): Identity()
  (blocks): Sequential(
    (0): Block(
      (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=True)
        (q_norm): Identity()
        (k_norm): Identity()
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=True)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (ls1): Identity()
      (drop_path1): Identity()
      (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
      (mlp): Mlp(
        (fc1): Linear(in_features=768, out_features=3072, bias=True)
        (act): GELU(approximate='none')
        (drop1): Dropout(p=0.0, inplace=False)
        (norm): Identity(

In [11]:
kmeans.patch.timm(model)
model.r = 16

In [12]:
evaluate(model, data_loader, 0)

100%|██████████| 10000/10000 [22:25<00:00,  7.43it/s]


68.11