In [1]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.0.181-py3-none-any.whl (617 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m617.1/617.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: ultralytics
Successfully installed ultralytics-8.0.181


In [2]:
from ultralytics import YOLO

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.datasets as datasets
from torchvision import models
from torch.nn.utils import prune
from torch.utils.data import DataLoader

In [13]:
from torchsummary import summary

In [4]:
import time
from tqdm.notebook import tqdm

In [5]:
model = YOLO('yolov8n.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 11.7MB/s]


In [6]:
import os

print("%.2f MB" %(os.path.getsize("yolov8n.pt")/1e6))

6.53 MB


In [None]:
model = models.resnet18(pretrained=True)



In [7]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 12.085MB


In [8]:
inp = torch.randn(1, 3, 224, 224)

num_samples = 100
start_time = time.time()
for _ in tqdm(range(num_samples)):
    output = model(inp / 255)
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} sec')

  0%|          | 0/100 [00:00<?, ?it/s]


0: 224x224 (no detections), 219.3ms
Speed: 0.0ms preprocess, 219.3ms inference, 12.0ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 69.1ms
Speed: 0.0ms preprocess, 69.1ms inference, 1.6ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 63.8ms
Speed: 0.0ms preprocess, 63.8ms inference, 1.8ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 78.4ms
Speed: 0.0ms preprocess, 78.4ms inference, 1.7ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 46.0ms
Speed: 0.0ms preprocess, 46.0ms inference, 1.1ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 39.4ms
Speed: 0.0ms preprocess, 39.4ms inference, 1.1ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 49.4ms
Speed: 0.0ms preprocess, 49.4ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 40.1ms
Speed: 0.0ms preprocess, 40.1m

Avg inference time: 85.0392 sec


In [9]:
infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

Avg inference time: 85.0392 ms


In [10]:
print(model)

YOLO(
  (model): DetectionModel(
    (model): Sequential(
      (0): Conv(
        (conv): Conv2d(3, 16, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (act): SiLU(inplace=True)
      )
      (1): Conv(
        (conv): Conv2d(16, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (act): SiLU(inplace=True)
      )
      (2): C2f(
        (cv1): Conv(
          (conv): Conv2d(32, 32, kernel_size=(1, 1), stride=(1, 1))
          (act): SiLU(inplace=True)
        )
        (cv2): Conv(
          (conv): Conv2d(48, 32, kernel_size=(1, 1), stride=(1, 1))
          (act): SiLU(inplace=True)
        )
        (m): ModuleList(
          (0): Bottleneck(
            (cv1): Conv(
              (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
              (act): SiLU(inplace=True)
            )
            (cv2): Conv(
              (conv): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
              (act): SiLU(inplace=True)
   

In [11]:
quantized_model = torch.quantization.quantize_dynamic(
    model,
    {torch.nn.Conv2d},
    dtype=torch.qint8
)

Ultralytics YOLOv8.0.181 🚀 Python-3.10.12 torch-2.0.1+cu118 CPU (Intel Xeon 2.20GHz)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=coco8.yaml, epochs=100, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, format=torchscript, keras=False, optimize=F

In [12]:
param_size = 0
for param in quantized_model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in quantized_model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('quantized_model size: {:.3f}MB'.format(size_all_mb))

quantized_model size: 12.085MB


In [13]:
inp = torch.randn(1, 3, 224, 224)

num_samples = 100
start_time = time.time()
for _ in tqdm(range(num_samples)):
    output = quantized_model(inp)
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

  0%|          | 0/100 [00:00<?, ?it/s]


0: 224x224 (no detections), 60.4ms
Speed: 0.0ms preprocess, 60.4ms inference, 1.3ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 45.2ms
Speed: 0.0ms preprocess, 45.2ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 45.4ms
Speed: 0.0ms preprocess, 45.4ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 40.8ms
Speed: 0.8ms preprocess, 40.8ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 46.6ms
Speed: 0.0ms preprocess, 46.6ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 39.7ms
Speed: 0.0ms preprocess, 39.7ms inference, 1.3ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 48.5ms
Speed: 0.0ms preprocess, 48.5ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 44.9ms
Speed: 0.0ms preprocess, 44.9ms i

Avg inference time: 63.7313 ms


In [18]:
from torch.nn.utils import prune

In [39]:
for name, m in model.named_modules():
  params = list(m.named_parameters())
  if len(params) and params[0][0] == 'weight':
    prune.l1_unstructured(m, name=params[0][0], amount=0.3)

In [40]:
param_size = 0
for param in model.parameters():
    param_size += param.nelement() * param.element_size()
buffer_size = 0
for buffer in model.buffers():
    buffer_size += buffer.nelement() * buffer.element_size()

size_all_mb = (param_size + buffer_size) / 1024**2
print('model size: {:.3f}MB'.format(size_all_mb))

model size: 24.025MB


In [41]:
inp = torch.randn(1, 3, 224, 224)

num_samples = 100
start_time = time.time()
for _ in tqdm(range(num_samples)):
    output = model(inp / 255)
end_time = time.time()

infer_time = ((end_time - start_time) / num_samples) * 1000
print(f'Avg inference time: {infer_time:.4f} ms')

  0%|          | 0/100 [00:00<?, ?it/s]


0: 224x224 (no detections), 83.7ms
Speed: 0.1ms preprocess, 83.7ms inference, 3.6ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 72.9ms
Speed: 0.0ms preprocess, 72.9ms inference, 1.8ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 51.5ms
Speed: 0.0ms preprocess, 51.5ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 48.7ms
Speed: 0.0ms preprocess, 48.7ms inference, 1.3ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 50.5ms
Speed: 0.0ms preprocess, 50.5ms inference, 1.8ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 53.7ms
Speed: 0.0ms preprocess, 53.7ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 46.6ms
Speed: 0.0ms preprocess, 46.6ms inference, 1.2ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 49.6ms
Speed: 0.0ms preprocess, 49.6ms i

Avg inference time: 72.6070 sec
