In [203]:
import os
import time
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
import torch
import torch.nn as nn
from PIL import Image
from sklearn.metrics import accuracy_score
import torch.nn.utils.prune as prune
import copy
from sklearn.cluster import KMeans
from scipy.sparse import csc_matrix, csr_matrix
import numpy as np

In [204]:
path_to_model = "weights/my_model"

extractor = AutoFeatureExtractor.from_pretrained(path_to_model)
vit_model = AutoModelForImageClassification.from_pretrained(path_to_model)



In [205]:
def model_use(model, img):
    with torch.no_grad():
        logits = model(**img).logits

    predicted_label = logits.argmax(-1).item()

    return model.config.id2label[predicted_label]

In [206]:
# Функция для кластеризации весов. 
def apply_weight_clusterization(module_list, bits=2):
    """
    Applies weight sharing to the given model
    """
    for module in module_list:
        
        weight = module.weight.data.cpu().numpy()
        shape = weight.shape
                
        if len(shape) == 4:
            weight = np.reshape(weight, (shape[0], shape[1]*shape[2]*shape[3]))
            
        print("Веса до кластеризации")    
        print(weight)
        print()
        mat = csr_matrix(weight)
        min_ = min(mat.data)
        max_ = max(mat.data)

        space = np.linspace(min_, max_, num=2**bits)

        kmeans = KMeans(n_clusters=len(space), init=space.reshape(-1,1), n_init= 1, algorithm="lloyd")
        
        kmeans.fit(mat.data.reshape(-1,1))
        new_weight = kmeans.cluster_centers_[kmeans.labels_].reshape(-1)
        mat.data = new_weight
             
        module.weight.data = torch.from_numpy(mat.toarray())
        
        print("Веса после кластеризации")
        print(module.weight.data, module.weight.data.shape)
        module.weight.data = torch.reshape(module.weight.data, shape)        

In [207]:
path = "data/"
images_list = os.listdir(path)

In [208]:
# Функция для замера размера модели.
def size_measurement(model):
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()

    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    size_all_mb = (param_size + buffer_size) / (1024 ** 2)
    print('model size: {:.3f}MB'.format(size_all_mb))

In [209]:
# Найдем исходный размер модели.
size_measurement(vit_model)

model size: 327.302MB


In [210]:
parameters_to_clusterization = []

vit_model_copy = copy.deepcopy(vit_model)
parameters_to_clusterization.append(vit_model_copy.vit.embeddings.patch_embeddings.projection)
parameters_to_clusterization.append(vit_model_copy.vit.layernorm)
parameters_to_clusterization.append(vit_model_copy.classifier)


In [211]:
print(parameters_to_clusterization)

[Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16)), LayerNorm((768,), eps=1e-12, elementwise_affine=True), Linear(in_features=768, out_features=2, bias=True)]


In [212]:
apply_weight_clusterization(parameters_to_clusterization)

Веса до кластеризации
[[ 0.01599822  0.05142277  0.05571121 ... -0.08067208 -0.05774073
  -0.03657478]
 [-0.02955145  0.02847983  0.02802685 ...  0.02751332 -0.03092413
  -0.04187002]
 [-0.0159509  -0.00071795 -0.01075949 ...  0.00088374  0.00579313
   0.00175963]
 ...
 [-0.0403337   0.01777354  0.06060625 ... -0.02783299 -0.03638085
  -0.04375472]
 [ 0.06298763  0.07997435  0.08153521 ... -0.0289927  -0.00268504
  -0.04663738]
 [-0.01337854 -0.00126218  0.00302414 ... -0.00116881 -0.0155547
  -0.00858315]]

Веса после кластеризации
tensor([[ 0.0155,  0.0851,  0.0851,  ..., -0.0953, -0.0259, -0.0259],
        [-0.0259,  0.0155,  0.0155,  ...,  0.0155, -0.0259, -0.0259],
        [-0.0259,  0.0155, -0.0259,  ...,  0.0155,  0.0155,  0.0155],
        ...,
        [-0.0259,  0.0155,  0.0851,  ..., -0.0259, -0.0259, -0.0259],
        [ 0.0851,  0.0851,  0.0851,  ..., -0.0259,  0.0155, -0.0259],
        [-0.0259,  0.0155,  0.0155,  ...,  0.0155, -0.0259, -0.0259]]) torch.Size([768, 768])
Веса



In [213]:
# Найдем размер модели после прунинга.
size_measurement(vit_model_copy)

model size: 327.302MB


In [214]:
# Видим, что кластеризация задействована, веса одинаковые.
print(vit_model_copy.classifier.weight.data)

tensor([[-0.0323, -0.0323,  0.0117,  ...,  0.0347,  0.0117, -0.0323],
        [ 0.0117,  0.0117,  0.0117,  ...,  0.0117,  0.0347,  0.0117]])


In [215]:
start_time = time.time()

# Собака 1, кошка 0.
target_list = []
predict_list = []

for element in images_list:

    image = Image.open(path + element, mode='r', formats=None)

    inputs = extractor(image, return_tensors="pt")
    predict = model_use(vit_model_copy, inputs)
    target = element[:element.find(".")]

    if target == "dog":
        label = 1
    else:
        label = 0

    target_list.append(label)

    if predict == "dogs":
        pr = 1
    else:
        pr = 0

    predict_list.append(pr)

end_time = time.time()

acc = accuracy_score(target_list, predict_list)
print("Точность модели после кластеризации весов = ", acc)
print("Время обработки изображений модели после кластеризации весов = ", end_time-start_time, " секунд")
print("Скорость обработки изображений у модели после кластеризации весов составила  ", len(images_list)/(end_time-start_time), " картинок в секунду")

Точность модели после кластеризации весов =  0.96875
Время обработки изображений модели после кластеризации весов =  924.4877910614014  секунд
Скорость обработки изображений у модели после кластеризации весов составила   0.17306880798967017  картинок в секунду
