## Google Colab dep installation

In [1]:
!git clone "https://github.com/PredelinaAsya/ModelCompression.git"
%cd ModelCompression/hw3
!pip3 install -qr "requirements.txt"

Cloning into 'ModelCompression'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 25 (delta 4), reused 20 (delta 1), pack-reused 0[K
Receiving objects: 100% (25/25), 10.63 KiB | 2.13 MiB/s, done.
Resolving deltas: 100% (4/4), done.
/content/ModelCompression/hw3
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m66.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m615.6/615.6 kB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[?25h

## Importing deps

In [2]:
import numpy as np
import pickle
from sklearn.cluster import KMeans
import torch
import torch.nn as nn
from tqdm import tqdm
from ultralytics import YOLO

## Clustering

In [3]:
def apply_weight_clustering(layer, max_clusters=64, collect_data=False):
    original_shape = layer.weight.data.shape
    flattened_weights = layer.weight.data.cpu().numpy().flatten().reshape(-1, 1)
    unique_weights = np.unique(flattened_weights)
    n_clusters = min(len(unique_weights), max_clusters)

    centroids, indices = None, None

    if n_clusters > 1:
        kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=1).fit(flattened_weights)
        new_weights = np.array([kmeans.cluster_centers_[label] for label in kmeans.labels_])
        new_weights = new_weights.reshape(original_shape)
        layer.weight.data = torch.Tensor(new_weights).to(layer.weight.device)

        if collect_data:
            centroids = kmeans.cluster_centers_
            indices = kmeans.labels_

    return centroids, indices

Loading the model

In [6]:
model = YOLO("yolov8n.pt")

Verifying of model performance before clustering

In [6]:
metrics = model.val(data="coco128.yaml")

Ultralytics YOLOv8.0.190 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (Intel Xeon 2.20GHz)
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs

Dataset 'coco128.yaml' images not found ⚠️, missing path '/content/ModelCompression/hw3/datasets/coco128/images/train2017'
Downloading https://ultralytics.com/assets/coco128.zip to '/content/ModelCompression/hw3/datasets/coco128.zip'...
100%|██████████| 6.66M/6.66M [00:00<00:00, 12.6MB/s]
Unzipping /content/ModelCompression/hw3/datasets/coco128.zip to /content/ModelCompression/hw3/datasets/coco128...: 100%|██████████| 263/263 [00:00<00:00, 3624.94file/s]
Dataset download success ✅ (1.7s), saved to [1m/content/ModelCompression/hw3/datasets[0m

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...
100%|██████████| 755k/755k [00:00<00:00, 4.29MB/s]
[34m[1mval: [0mScanning /content/ModelCompression/hw3/datasets/coco128/labels/train2017... 126 images, 2 backgrounds, 0 corrupt: 10

Applying clustering weights to each convolutional layer

In [7]:
centroid_dict = {}
index_dict = {}

total = sum(1 for layer in model.modules() if isinstance(layer, nn.Conv2d))
with tqdm(total=total, desc="Processing layers", dynamic_ncols=True) as pbar:
    for name, layer in model.named_modules():
        if isinstance(layer, nn.Conv2d):
            centroids, indices = apply_weight_clustering(layer, collect_data=True)
            if centroids is not None and indices is not None:
                centroid_dict[name] = centroids.tolist()
                index_dict[name] = indices.tolist()
            pbar.update(1)

Processing layers: 100%|██████████| 64/64 [06:46<00:00,  6.35s/it]


Saving centroids and indices

In [8]:
with open('centroids.pkl', 'wb') as f:
    pickle.dump(centroid_dict, f)

with open('indices.pkl', 'wb') as f:
    pickle.dump(index_dict, f)

Verifying of model performance after clustering

In [9]:
metrics = model.val(data="coco128.yaml")

Ultralytics YOLOv8.0.190 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (Intel Xeon 2.20GHz)
YOLOv8n summary (fused): 168 layers, 3151904 parameters, 0 gradients, 8.7 GFLOPs

Dataset 'coco128.yaml' images not found ⚠️, missing path '/content/ModelCompression/hw3/datasets/coco128/images/train2017'
Downloading https://ultralytics.com/assets/coco128.zip to '/content/ModelCompression/hw3/datasets/coco128.zip'...
100%|██████████| 6.66M/6.66M [00:00<00:00, 65.4MB/s]
Unzipping /content/ModelCompression/hw3/datasets/coco128.zip to /content/ModelCompression/hw3/datasets/coco128...: 100%|██████████| 263/263 [00:00<00:00, 2328.57file/s]
Dataset download success ✅ (0.9s), saved to [1m/content/ModelCompression/hw3/datasets[0m

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...
100%|██████████| 755k/755k [00:00<00:00, 13.0MB/s]
[34m[1mval: [0mScanning /content/ModelCompression/hw3/datasets/coco128/labels/train2017... 126 images, 2 backgrounds, 0 corrupt: 10