## ДЗ 4:
### Кластеризация весов

In [1]:
import ultralytics
from ultralytics import YOLO

In [2]:
import torch
import time
import torch.quantization
from torch import profiler
from torch.profiler import profile, record_function, ProfilerActivity
import matplotlib.pyplot as plt

In [3]:
torch.seed = 42

In [4]:
def get_model_size(model: torch.nn.Module) -> float:
    """return model size im mb"""
    param_size = 0
    for param in model.parameters():
        param_size += param.nelement() * param.element_size()
    buffer_size = 0
    for buffer in model.buffers():
        buffer_size += buffer.nelement() * buffer.element_size()

    return (param_size + buffer_size) / 1024**2

In [5]:
def profile_model(model, input_example: torch.Tensor = torch.randn(1, 3, 224, 224)):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    input_example = input_example.to(device)

    # cold run
    for _ in range(3):
        model(input_example)

    with profile(activities=[ProfilerActivity.CPU], profile_memory=True, with_flops=True) as prof:
        with record_function("model_inference"):
            with torch.inference_mode():
                model(input_example)

    profiling_info = prof.key_averages().table(sort_by="cpu_time_total", row_limit=1)
    return profiling_info

### Исходная модель

In [6]:
yolo = YOLO('yolov8n.pt')
model = yolo.model
model.fuse()
model.eval()
print()

[W NNPACK.cpp:64] Could not initialize NNPACK! Reason: Unsupported hardware.
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs





In [7]:
print(profile_model(model))
print(f'model size: {get_model_size(model)}')

---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total KFLOPs  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         7.56%       3.970ms       100.00%      52.508ms      52.508ms           0 b     -13.75 Mb             1            --  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
Self CPU time total: 52.508ms

model size: 12.0235595703125


STAGE:2023-10-13 13:27:15 77229:2817424 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-10-13 13:27:15 77229:2817424 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-10-13 13:27:15 77229:2817424 ActivityProfilerController.cpp:321] Completed Stage: Post Processing


### Кластеризация весов

In [8]:
import numpy as np
import sklearn
from sklearn.cluster import KMeans

In [10]:
def cluster_weights(weights):
    for name, tensor in weights.items():
        if 'weight' not in name:
            continue  # пропустить тензоры, которые не связаны с весами
        n = np.prod(tensor.shape)  # получить общее количество элементов в тензоре
        arr = tensor.reshape(n,)  # преобразовать тензор в одномерный массив
        kmeans = KMeans(n_init=10, init='random')  # инициализировать K-means со случайной инициализацией
        kmeans.fit(arr.reshape(-1, 1))  # применить K-means
        centroids = kmeans.cluster_centers_.squeeze()  # извлечь центроиды
        dist = np.abs(arr - centroids[0])  # вычислить расстояния до первого центроида
        for i in range(1, len(centroids)):
            dist = np.vstack((dist, np.abs(arr - centroids[i])))  # вычислить расстояния до остальных центроидов
        args_of_cent = dist.argmin(axis=0)  # найти ближайший центроид для каждого элемента
        arr = centroids[args_of_cent]  # заменить каждый элемент его ближайшим центроидом
        weights[name] = torch.Tensor(arr.reshape(tensor.shape))  # обновить тензор весов
    return weights

In [11]:
weights = yolo.model.state_dict()

In [12]:
weights_clustered = cluster_weights(weights)



In [13]:
clustered_yolo = YOLO('yolov8n.pt')
clustered_model = clustered_yolo.model
clustered_model.fuse()
clustered_model.eval()
print()

YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs





In [14]:
clustered_yolo.model.load_state_dict(weights_clustered)

<All keys matched successfully>

In [15]:
torch.save(weights_clustered, 'weights_clustered.pt')

In [16]:
clustered_yolo.model.load_state_dict(torch.load('weights_clustered.pt'))

<All keys matched successfully>

In [17]:
print('model with clustered weights')
print(profile_model(model))
print(f'model size: {get_model_size(model)}')

model with clustered weights
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                             Name    Self CPU %      Self CPU   CPU total %     CPU total  CPU time avg       CPU Mem  Self CPU Mem    # of Calls  Total KFLOPs  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
                  model_inference         1.72%     756.000us       100.00%      43.885ms      43.885ms           0 b     -13.68 Mb             1            --  
---------------------------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  ------------  
Self CPU time total: 43.885ms

model size: 12.0235595703125


STAGE:2023-10-13 13:27:54 77229:2817424 ActivityProfilerController.cpp:311] Completed Stage: Warm Up
STAGE:2023-10-13 13:27:54 77229:2817424 ActivityProfilerController.cpp:317] Completed Stage: Collection
STAGE:2023-10-13 13:27:54 77229:2817424 ActivityProfilerController.cpp:321] Completed Stage: Post Processing
