In [1]:
import numpy as np
import torch
from ultralytics import YOLO
import matplotlib.pyplot as plt
import os
from datetime import datetime
import time
from torch.nn.utils import prune
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [17]:
def get_model_size(model):
    size_model = 0
    for param in model.parameters():
        if param.data.is_floating_point():
            size_model += param.numel() * torch.finfo(param.data.dtype).bits
        else:
            size_model += param.numel() * torch.iinfo(param.data.dtype).bits
    print(size_model/(32*1024*1024))

Заведем модель yolo_v8_l и посчитаем долю нулей среди весов

In [2]:
def estimate_sparsity(model):
    total, pruned = 0, 0
    for params in model.parameters():
        total += params.numel()
        pruned += (params == 0).sum()
    return pruned / total

In [3]:
model = YOLO('yolov8l.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8l.pt to 'yolov8l.pt'...
100%|█████████████████████████████████████████████████████████████████████████████| 83.7M/83.7M [00:02<00:00, 31.4MB/s]


In [19]:
estimate_sparsity(model)

tensor(1.1993e-05)

In [18]:
get_model_size(model)

41.66748046875


Замерим inference и время выполнения в целом

In [20]:
start_time = datetime.now()

model.val(data='coco1.yaml', imgsz=640, iou = 0.6, batch = 2)

end_time = datetime.now()

print(end_time - start_time)

Ultralytics YOLOv8.0.181  Python-3.11.4 torch-2.2.0.dev20230918+cu121 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8l summary (fused): 268 layers, 43668288 parameters, 0 gradients


FileNotFoundError: 'coco1.yaml' does not exist

Inference 17.5мс на gpu nvidia rtx 4060 mobile

In [6]:
model = YOLO('yolov8l.pt')

In [7]:
compression_ratio = 0.3
lyr_names = []
is_conv = []
for layer_name, model_layer in model.model.named_modules():
  lyr_names.append(layer_name)
  is_conv.append(isinstance(model_layer, torch.nn.Conv2d))
  if isinstance(model_layer, torch.nn.Conv2d):
    prune.l1_unstructured(model_layer, name="weight", amount=compression_ratio)
    prune.remove(model_layer, 'weight')
estimate_sparsity(model)

tensor(0.2997)

In [8]:
start_time = datetime.now()

model.val(data='coco1.yaml', imgsz=640, iou = 0.6, batch = 2 )

end_time = datetime.now()

print(end_time - start_time)

Ultralytics YOLOv8.0.181  Python-3.11.4 torch-2.2.0.dev20230918+cu121 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8l summary (fused): 268 layers, 43668288 parameters, 0 gradients
[34m[1mval: [0mScanning C:\Users\gbekh\JupiterProjects\ProjectCompression\datasets\coco\labels\val2017.cache... 500 images, 4 bac[0m


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 252/252 [00:
                   all        504       3552      0.161      0.105     0.0599     0.0327
                person        504       1081     0.0851      0.074     0.0339     0.0206
               bicycle        504         34      0.125     0.0882     0.0221     0.0069
                   car        504        169     0.0492     0.0178      0.017     0.0145
            motorcycle        504         45     0.0644     0.0667    0.00986    0.00248
              airplane        504         12     0.0508       0.25     0.0373     0.0134
                   bus        504         35      0.139      0.257        0.2      0.151
                 train        504         15      0.118        0.2     0.0445     0.0262
                 truck        504         29      0.177     0.0745      0.029     0.0237
                  boat        504         38          1          0   0.000342  

0:00:35.605759


Точность модели после 30% сжатия сильно упала, но и inference time упал до 16.1 мс, это почти 10%
Посмотрим на structured prunning, и сделаем его более агрессивным

In [9]:
model = YOLO('yolov8l.pt')

In [10]:
compression_ratio = 0.9
lyr_names = []
is_conv = []
for layer_name, model_layer in model.model.named_modules():
  lyr_names.append(layer_name)
  is_conv.append(isinstance(model_layer, torch.nn.Conv2d))
  if isinstance(model_layer, torch.nn.Conv2d):
    prune.ln_structured(model_layer, name="weight", amount=compression_ratio, n=2, dim=0)
    #prune.l1_unstructured(model_layer, name="weight", amount=compression_ratio)
    prune.remove(model_layer, 'weight')
estimate_sparsity(model)

tensor(0.8982)

In [11]:
start_time = datetime.now()

model.val(data='coco1.yaml', imgsz=640, iou = 0.6, batch = 2 )

end_time = datetime.now()

print(end_time - start_time)

Ultralytics YOLOv8.0.181  Python-3.11.4 torch-2.2.0.dev20230918+cu121 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
YOLOv8l summary (fused): 268 layers, 43668288 parameters, 0 gradients
[34m[1mval: [0mScanning C:\Users\gbekh\JupiterProjects\ProjectCompression\datasets\coco\labels\val2017.cache... 500 images, 4 bac[0m


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 252/252 [00:
                   all        504       3552          0          0          0          0
Speed: 0.2ms preprocess, 15.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns\detect\val6[0m


0:00:21.390253


Был использован прунинг строк матриц весов, Inference ускорился до 15.4ms, однако дальше ускоряться непозволяют инструкции распараллеливания
