In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
!pip install ultralytics

Original weights Valid and Test eval.

In [6]:
from ultralytics import YOLO
import torch

# Load your trained model
weights = "/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_s11.pt"
model = YOLO(weights)

data_yaml = "/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/data.yaml"




# Move model to device (GPU recommended)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Model device: {model.device}")

Model device: cuda:0


# Original model evaluation (before pruning)

In [None]:
results = model.val(data=data_yaml, split="val")  # Change dataset.yaml path if needed
print(f"Validation mAP50: {results.box.map:.4f}")

results = model.val(data=data_yaml, split="test")  # For test set evaluation
print(f"Test mAP50: {results.box.map:.4f}")


[34m[1mval: [0mFast image access ✅ (ping: 0.2±0.1 ms, read: 22.1±5.5 MB/s, size: 29.0 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/valid/labels.cache... 90 images, 0 backgrounds, 0 corrupt: 100%|██████████| 90/90 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:02<00:00,  2.65it/s]


                   all         90         93      0.921      0.923      0.951      0.784      0.525      0.502      0.371      0.127
            half-squat         15         16      0.927      0.791      0.899      0.736      0.604        0.5      0.397     0.0977
                 plank         38         38       0.99          1      0.995      0.846       0.94      0.947      0.895       0.37
                 squat         18         20      0.878        0.9      0.927      0.777      0.367       0.35       0.15     0.0297
                 stand         19         19      0.889          1      0.982      0.775      0.188      0.211     0.0432     0.0114
Speed: 2.4ms preprocess, 4.5ms inference, 0.0ms loss, 2.5ms postprocess per image
Results saved to [1mruns/pose/val3[0m
Validation mAP50: 0.7836
[34m[1mval: [0mFast image access ✅ (ping: 0.3±0.1 ms, read: 31.6±3.9 MB/s, size: 32.7 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/test/labels.cache... 48 images, 0 backgrounds, 0 corrupt: 100%|██████████| 48/48 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  1.80it/s]


                   all         48         48      0.805       0.87      0.796      0.689      0.462      0.511      0.367      0.129
            half-squat          3          3      0.494      0.655      0.409       0.37      0.481      0.629      0.333     0.0889
                 plank         22         22       0.98          1      0.995      0.863      0.892      0.909       0.87      0.377
                 squat         12         12      0.785      0.917      0.819      0.638      0.379      0.417       0.25     0.0452
                 stand         11         11      0.962      0.909       0.96      0.883     0.0966     0.0909     0.0159    0.00319
Speed: 3.0ms preprocess, 4.6ms inference, 0.0ms loss, 2.4ms postprocess per image
Results saved to [1mruns/pose/val4[0m
Test mAP50: 0.6886


# Pruning 1
L1 Norm unstrcuted pruning

In [None]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
from ultralytics import YOLO
import copy

# Load base model
base_model = YOLO("/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_s11.pt")
model = base_model.model
baseline_metrics = base_model.val(data=data_yaml, verbose=False)
baseline_map50 = baseline_metrics.box.map50
print(f"\nBaseline mAP50: {baseline_map50:.4f}")

prune_ratio = 0.8
blocks_to_check = [2, 4, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 22]
target_layers = ["cv1.conv", "cv2.conv", "cv3.conv"]  # Top-level convs
sensitivity_results = []

for i in blocks_to_check:
    block = model.model[i]

    for name in target_layers:
        # Split nested attributes: e.g., "cv1.conv"
        parts = name.split('.')
        sub = block
        for p in parts:
            sub = getattr(sub, p, None)
            if sub is None:
                break

        if isinstance(sub, nn.Conv2d):
            conv = sub
            print(f"Pruning: Block {i}, Layer {name}")

            # Backup
            original_weight = conv.weight.data.clone()

            # Prune
            prune.l1_unstructured(conv, name="weight", amount=prune_ratio)

            # Evaluate
            temp_model = copy.deepcopy(base_model)
            temp_model.model.model[i] = block
            metrics = temp_model.val(data=data_yaml, verbose=False)
            pruned_map50 = metrics.box.map50
            drop = baseline_map50 - pruned_map50
            sensitivity_results.append((f"Block {i} - {name}", round(drop, 4)))

            # Restore
            conv.weight.data.copy_(original_weight)
            prune.remove(conv, 'weight')
        else:
            print(f"Skipped: Block {i}, Layer {name} not found or not Conv2d")

# Sort and show results
sensitivity_results.sort(key=lambda x: x[1], reverse=True)

print("\nLayer-wise Sensitivity Summary (Sorted by drop in mAP50):")
for layer, drop in sensitivity_results:
    print(f"{layer}: mAP50 drop = {drop:.4f}")


In [None]:
# Confirm available layers per block
for i in range(len(model.model)):
    block = model.model[i]
    print(f"\nBlock {i}: {type(block)}")
    for name, submodule in block.named_modules():
        print(f"  {name}: {type(submodule)}")


In [7]:
import torch
import torch.nn as nn
import torch.nn.utils.prune as prune
import copy

# Backup original model for comparison
original_model = copy.deepcopy(model)
prune_ratio_high = 0.63  # Set pruning amount
prune_ratio_low = 0.5
numbers_to_check = range(1, 23)  # Blocks to focus pruning on
masks = {}  # Dictionary to store pruning masks

# Layers we want to prune (safely)
target_layer_keywords = ["cv1.conv", "cv2.conv", "cv3.conv", "cv4.conv", "cv5.conv",
    "stem.conv", "conv"]


for name, module in model.named_modules():
    if isinstance(module, nn.Conv2d):
        # Skip pose head
        if "head" in name or "pose" in name:
            continue

        for num in numbers_to_check:
            if any(k in name for k in target_layer_keywords) and f".{num}." in name:
                match num:
                    case 1:
                        prune_ratio = 0
                    case 2:
                        prune_ratio = 0.65
                    case 3:
                        prune_ratio = 0
                    case 4:
                        prune_ratio = 0.65
                    case 5:
                        prune_ratio = 0
                    case 6:
                        prune_ratio = 0.65
                    case 7:
                        prune_ratio = 0
                    case 8:
                        prune_ratio = 0.65
                    case 9:
                        prune_ratio = 0.65
                    case 10:
                        prune_ratio = 0.55
                    case 11:
                        prune_ratio = 0
                    case 12:
                        prune_ratio = 0
                    case 13:
                        prune_ratio = 0.55
                    case 14:
                        prune_ratio = 0
                    case 15:
                        prune_ratio = 0
                    case 16:
                        prune_ratio = 0.5
                    case 17:
                        prune_ratio = 0.1
                    case 18:
                        prune_ratio = 0.1
                    case 19:
                        prune_ratio = 0.4
                    case 20:
                        prune_ratio = 0.1
                    case 21:
                        prune_ratio = 0.1
                    case 22:
                        prune_ratio = 0.4

                print(f"Pruning layer (ratio={prune_ratio}): {name}")

                # Apply pruning
                prune.l1_unstructured(module, name='weight', amount=prune_ratio)

                # Store the mask
                mask_name = name + '.weight_mask'
                weight_mask = dict(module.named_buffers()).get('weight_mask', None)
                if weight_mask is not None:
                    masks[name + '.weight'] = weight_mask.clone()
                # Remove reparam
                prune.remove(module, 'weight')

                break

            # Store the mask
            weight_mask = dict(module.named_buffers()).get('weight_mask', None)
            if weight_mask is not None:
                masks[name + '.weight'] = weight_mask.clone()
                prune.remove(module, 'weight')  # 只有剪枝过的才 remove



# Save the entire pruned model
output_dir = "/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models"
model.save(f"{output_dir}/best_pruned_s11.pt")


Pruning layer (ratio=0): model.model.1.conv
Pruning layer (ratio=0.65): model.model.2.cv1.conv
Pruning layer (ratio=0.65): model.model.2.cv2.conv
Pruning layer (ratio=0.65): model.model.2.m.0.cv1.conv
Pruning layer (ratio=0.65): model.model.2.m.0.cv2.conv
Pruning layer (ratio=0): model.model.3.conv
Pruning layer (ratio=0.65): model.model.4.cv1.conv
Pruning layer (ratio=0.65): model.model.4.cv2.conv
Pruning layer (ratio=0.65): model.model.4.m.0.cv1.conv
Pruning layer (ratio=0.65): model.model.4.m.0.cv2.conv
Pruning layer (ratio=0): model.model.5.conv
Pruning layer (ratio=0.65): model.model.6.cv1.conv
Pruning layer (ratio=0.65): model.model.6.cv2.conv
Pruning layer (ratio=0.65): model.model.6.m.0.cv1.conv
Pruning layer (ratio=0.65): model.model.6.m.0.cv2.conv
Pruning layer (ratio=0.65): model.model.6.m.0.cv3.conv
Pruning layer (ratio=0.65): model.model.6.m.0.m.0.cv1.conv
Pruning layer (ratio=0.65): model.model.6.m.0.m.0.cv2.conv
Pruning layer (ratio=0): model.model.6.m.0.m.1.cv1.conv
Pru

#Pruned model test

In [8]:
# Load the FULL pruned model instead of just state_dict
model1 = YOLO("/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_pruned_s11.pt")

# Move to GPU if available
device = "cuda" if torch.cuda.is_available() else "cpu"
model1.to(device)

print("Pruned model successfully loaded for evaluation!")

results = model1.val(data=data_yaml, split="val")  # Change dataset.yaml path if needed
print(f"Validation mAP50: {results.box.map:.4f}")

results = model1.val(data=data_yaml, split="test")  # For test set evaluation
print(f"Test mAP50: {results.box.map:.4f}")


Pruned model successfully loaded for evaluation!
YOLO11s-pose summary (fused): 109 layers, 9,971,979 parameters, 0 gradients, 23.3 GFLOPs
[34m[1mval: [0mFast image access ✅ (ping: 0.3±0.1 ms, read: 22.6±6.8 MB/s, size: 33.6 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/valid/labels.cache... 90 images, 0 backgrounds, 0 corrupt: 100%|██████████| 90/90 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:02<00:00,  2.56it/s]


                   all         90         93      0.677      0.639      0.646      0.453      0.255      0.177      0.126     0.0163
            half-squat         15         16      0.819      0.849      0.831      0.595      0.222      0.188     0.0696     0.0101
                 plank         38         38      0.792      0.602      0.748      0.508       0.62      0.368      0.359      0.046
                 squat         18         20      0.521        0.6      0.553      0.397      0.108        0.1     0.0215    0.00366
                 stand         19         19      0.577      0.503      0.453      0.313     0.0705     0.0526     0.0551    0.00551
Speed: 2.4ms preprocess, 4.5ms inference, 0.0ms loss, 2.5ms postprocess per image
Results saved to [1mruns/pose/val3[0m
Validation mAP50: 0.4533
[34m[1mval: [0mFast image access ✅ (ping: 0.4±0.1 ms, read: 18.7±3.6 MB/s, size: 29.2 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/test/labels.cache... 48 images, 0 backgrounds, 0 corrupt: 100%|██████████| 48/48 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  1.84it/s]


                   all         48         48      0.725      0.734      0.755      0.579      0.248      0.199      0.133      0.017
            half-squat          3          3      0.328      0.654      0.544      0.345          0          0          0          0
                 plank         22         22      0.948      0.636      0.776      0.567      0.786      0.545      0.459     0.0585
                 squat         12         12      0.766      0.917      0.852      0.708      0.207       0.25     0.0717    0.00963
                 stand         11         11      0.858      0.727      0.846      0.695          0          0          0          0
Speed: 2.9ms preprocess, 4.6ms inference, 0.0ms loss, 3.3ms postprocess per image
Results saved to [1mruns/pose/val4[0m
Test mAP50: 0.5786


In [51]:
import torch.nn as nn

def compute_global_sparsity(model):
    total_params = 0
    zero_params = 0

    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d) and hasattr(module, "weight"):
            weight = module.weight.data
            total_params += weight.numel()
            zero_params += torch.sum(weight == 0).item()

    sparsity = 100.0 * zero_params / total_params if total_params > 0 else 0
    print(f"Global Sparsity: {sparsity:.2f}% ({zero_params}/{total_params})")


compute_global_sparsity(model.model)

Global Sparsity: 32.40% (3226171/9956270)


#Quantization: Export ONNX into FP16

In [52]:
from ultralytics import YOLO

# Load your trained YOLOv8 model (e.g., YOLOv8n/y/m/l or a custom model)
model = YOLO("/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_pruned_s11.pt")

# Move model to device (GPU recommended)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
print(f"Model device: {model.device}")

# Export to ONNX quantization
model.export(format="onnx", dynamic=True, half=True)

Model device: cuda:0
Ultralytics 8.3.125 🚀 Python-3.11.12 torch-2.6.0+cu124 CPU (Intel Xeon 2.20GHz)
YOLO11s-pose summary (fused): 109 layers, 9,971,979 parameters, 0 gradients, 23.3 GFLOPs

[34m[1mPyTorch:[0m starting from '/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_pruned_s11.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 65, 8400) (19.4 MB)
[31m[1mrequirements:[0m Ultralytics requirements ['onnx>=1.12.0', 'onnxslim>=0.1.46', 'onnxruntime-gpu'] not found, attempting AutoUpdate...
Collecting onnx>=1.12.0
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxslim>=0.1.46
  Downloading onnxslim-0.1.51-py3-none-any.whl.metadata (5.0 kB)
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.21.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata

'/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_pruned_s11.onnx'

In [53]:
!pip install onnxmltools

Collecting onnxmltools
  Downloading onnxmltools-1.13.0-py2.py3-none-any.whl.metadata (8.2 kB)
Collecting onnxconverter-common (from onnxmltools)
  Downloading onnxconverter_common-1.14.0-py2.py3-none-any.whl.metadata (4.2 kB)
Collecting protobuf>=3.20.2 (from onnx->onnxmltools)
  Downloading protobuf-3.20.2-py2.py3-none-any.whl.metadata (720 bytes)
Downloading onnxmltools-1.13.0-py2.py3-none-any.whl (328 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.0/329.0 kB[0m [31m22.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnxconverter_common-1.14.0-py2.py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.5/84.5 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading protobuf-3.20.2-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: protobuf, onnxconverter-common, onnxmltools
  Attem

In [54]:
from onnxmltools.utils.float16_converter import convert_float_to_float16
import onnx

model_fp32 = onnx.load("/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_pruned_s11.onnx")
model_fp16 = convert_float_to_float16(model_fp32)
onnx.save(model_fp16, "/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_pruned_fp16_s11.onnx")


In [None]:
pip install onnx onnxruntime onnxruntime-tools


Collecting onnx
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Collecting onnxruntime
  Downloading onnxruntime-1.21.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting onnxruntime-tools
  Downloading onnxruntime_tools-1.7.0-py3-none-any.whl.metadata (14 kB)
Collecting coloredlogs (from onnxruntime)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting py3nvml (from onnxruntime-tools)
  Downloading py3nvml-0.2.7-py3-none-any.whl.metadata (13 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Collecting xmltodict (from py3nvml->onnxruntime-tools)
  Downloading xmltodict-0.14.2-py2.py3-none-any.whl.metadata (8.0 kB)
Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[

In [2]:
!pip install onnx-tool

Collecting onnx-tool
  Downloading onnx_tool-0.9.0-py3-none-any.whl.metadata (9.6 kB)
Collecting onnx (from onnx-tool)
  Downloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (16 kB)
Downloading onnx_tool-0.9.0-py3-none-any.whl (44 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.5/44.5 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading onnx-1.17.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m115.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: onnx, onnx-tool
Successfully installed onnx-1.17.0 onnx-tool-0.9.0


#Parameter count

In [None]:
!pip install onnx_opcounter

In [None]:
!pip install fvcore

In [None]:
model_path = "/content/drive/MyDrive/S25-ECE556-ODDL/yolo_code/models/best_s11.pt"
yolo = YOLO(model_path)
net = yolo.model
net.eval()

dummy_input = torch.randn(1, 3, 640, 640)  # Adjust to your model's input size
flops = FlopCountAnalysis(net, dummy_input)
print("Total MACs:", flops.total() / 1e6, "MMACs")



Total MACs: 11787.5352 MMACs
