In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install transformers

In [39]:
import os
import time
from tqdm.notebook import tqdm
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from PIL import Image
from sklearn.metrics import accuracy_score

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
from torchvision import models
from torch.nn.utils import prune
from torch.utils.data import DataLoader
from transformers import AutoFeatureExtractor, AutoModelForImageClassification


In [None]:
path_to_model = "/content/drive/MyDrive/model_compression/my_model"

processor = AutoFeatureExtractor.from_pretrained(path_to_model)
vit_model = AutoModelForImageClassification.from_pretrained(path_to_model)

In [20]:
def model_use(model, img):
    with torch.no_grad():
        logits = model(**img).logits

    pred_label = logits.argmax(-1).item()

    return model.config.id2label[pred_label]

In [22]:
images_list = os.listdir('/content/drive/MyDrive/model_compression/data')

In [25]:
# num of parameters
vit_model.num_parameters()

85800194

In [37]:
# size in Mb
param_size = 0
for param in vit_model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in vit_model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model ViT size: {:.3f}MB'.format(size_all_mb))

model ViT size: 327.302MB


# Labels:

{
  "cat": 0,
  "dog": 1,
}


In [29]:
images_list[:3]

['cat.0.jpg', 'cat.1.jpg', 'cat.10.jpg']

In [36]:
start = time.time()

target_lst = []
predict_lst = []

for img_name in images_list:
    img_path = os.path.join('/content/drive/MyDrive/model_compression/data', img_name)  # Full image path

    image = Image.open(img_path, mode='r')

    inputs = processor(image, return_tensors="pt")
    predicts = model_use(vit_model, inputs)
    target = img_name[:img_name.find(".")]

    if target == "dog":
        label = 1
    else:
        label = 0

    target_lst.append(label)

    if predicts == "dogs":
        pr = 1
    else:
        pr = 0

    predict_lst.append(pr)

end = time.time()

acc = accuracy_score(target_lst, predict_lst)

In [34]:
print("accuracy исходной модели= ", acc)
print("Время обработки изображений исходной модели= ", end-start, " секунд")
print("Скорость обработки изображений у исходной модели составила  ", len(images_list)/(end-start), " картинок в секунду")

accuracy исходной модели=  0.9875
Время обработки изображений исходной модели=  83.3334059715271  секунд
Скорость обработки изображений у исходной модели составила   1.9199983264174745  картинок в секунду


In [41]:
infer_time = ((end - start) / len(images_list)) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

Avg inference time: 527.7798 ms


**Квантизация:**

Dynamic quantization is a method of quantization where the weights and activations are quantized at runtime

In [42]:
vit_model

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7

In [65]:
# PTSQ - Post Training Static Quantization

quantized_model = torch.quantization.quantize_dynamic(
    vit_model,
    {torch.nn.Linear},
    dtype=torch.qint8
)


In [66]:
param_size = 0
for param in quantized_model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in quantized_model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('quantized_model size: {:.3f}MB'.format(size_all_mb))

quantized_model size: 2.979MB


In [79]:
start = time.time()

target_lst = []
predict_lst = []

for img_name in images_list:
    img_path = os.path.join('/content/drive/MyDrive/model_compression/data', img_name)  # Full image path

    image = Image.open(img_path, mode='r')

    inputs = processor(image, return_tensors="pt")
    predicts = model_use(quantized_model, inputs)
    target = img_name[:img_name.find(".")]

    label = 1 if img_name.startswith("dog") else 0
    target_lst.append(label)

    pr = 1 if predicts == "dogs" else 0
    predict_lst.append(pr)

end = time.time()

print('quantized_model:\n')
print("accuracy исходной модели= ", acc)
print("Время обработки изображений исходной модели= ", end-start, " секунд")
print("Скорость обработки изображений у исходной модели составила  ", len(images_list)/(end-start), " картинок в секунду")
print(f'Avg inference time : {infer_time:.4f} ms')

quantized_model:

accuracy исходной модели=  0.975
Время обработки изображений исходной модели=  54.9720938205719  секунд
Скорость обработки изображений у исходной модели составила   2.9105676877114712  картинок в секунду
Avg inference time : 360.0799 ms


# Прунинг

In [85]:
vit_model.modules()

<generator object Module.modules at 0x7b2514aa5cb0>

In [100]:
import copy
vit_model_copy = copy.deepcopy(vit_model)

In [102]:
import torch.nn.utils.prune as prune

pruning_ratio = 0.5

parameters_to_prune = []

# prune the classifier layer
parameters_to_prune.append((vit_model_copy.classifier, 'weight'))

pruning_method = torch.nn.utils.prune.L1Unstructured

for layer, parameter_name in parameters_to_prune:
    prune_amount = int(layer.weight.numel() * pruning_ratio)
    pruning_method.apply(layer, parameter_name, amount=prune_amount)

# Remove re-parametrization after pruning
for layer, parameter_name in parameters_to_prune:
    prune.remove(layer, 'weight')

start = time.time()

target_lst = []
predict_lst = []

for img_name in images_list:
    img_path = os.path.join('/content/drive/MyDrive/model_compression/data', img_name)  # Full image path

    image = Image.open(img_path, mode='r')

    inputs = processor(image, return_tensors="pt")
    predicts = model_use(vit_model_copy, inputs)
    target = img_name[:img_name.find(".")]

    label = 1 if img_name.startswith("dog") else 0
    target_lst.append(label)

    pr = 1 if predicts == "dogs" else 0
    predict_lst.append(pr)

end = time.time()

print('pruned_model:\n')
print("accuracy = ", acc)
print("Время обработки изображений модели= ", end-start, " секунд")
print("Скорость обработки изображений модели составила  ", len(images_list)/(end-start), " картинок в секунду")
print(f'Avg inference time : {infer_time:.4f} ms')


pruned_model:

accuracy =  0.975
Время обработки изображений модели=  87.96074986457825  секунд
Скорость обработки изображений модели составила   1.8189931332592235  картинок в секунду
Avg inference time : 360.0799 ms
