In [None]:
#| include: false

import argparse
import math
import os
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import List, Union

import numpy as np
import torch
import torch.nn as nn
from matplotlib import pyplot as plt
from ultralytics import YOLO, __version__
from ultralytics.nn.modules import Detect, C2f, Conv, Bottleneck
from ultralytics.nn.tasks import attempt_load_one_weight
from ultralytics.yolo.engine.model import TASK_MAP
from ultralytics.yolo.engine.trainer import BaseTrainer
from ultralytics.yolo.utils import yaml_load, LOGGER, RANK, DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS
from ultralytics.yolo.utils.checks import check_yaml
from ultralytics.yolo.utils.torch_utils import initialize_weights, de_parallel

import torch_pruning as tp
from fasterai.prune.all import *
from fastai.vision.all import *
from fastcore.basics import store_attr, listify, true
from torch_pruning.pruner import function

## Helpers

In [None]:
#| include: false

def infer_shortcut(bottleneck):
    c1 = bottleneck.cv1.conv.in_channels
    c2 = bottleneck.cv2.conv.out_channels
    return c1 == c2 and hasattr(bottleneck, 'add') and bottleneck.add


class C2f_v2(nn.Module):
    # CSP Bottleneck with 2 convolutions
    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
        super().__init__()
        self.c = int(c2 * e)  # hidden channels
        self.cv0 = Conv(c1, self.c, 1, 1)
        self.cv1 = Conv(c1, self.c, 1, 1)
        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))

    def forward(self, x):
        # y = list(self.cv1(x).chunk(2, 1))
        y = [self.cv0(x), self.cv1(x)]
        y.extend(m(y[-1]) for m in self.m)
        return self.cv2(torch.cat(y, 1))


def transfer_weights(c2f, c2f_v2):
    c2f_v2.cv2 = c2f.cv2
    c2f_v2.m = c2f.m

    state_dict = c2f.state_dict()
    state_dict_v2 = c2f_v2.state_dict()

    # Transfer cv1 weights from C2f to cv0 and cv1 in C2f_v2
    old_weight = state_dict['cv1.conv.weight']
    half_channels = old_weight.shape[0] // 2
    state_dict_v2['cv0.conv.weight'] = old_weight[:half_channels]
    state_dict_v2['cv1.conv.weight'] = old_weight[half_channels:]

    # Transfer cv1 batchnorm weights and buffers from C2f to cv0 and cv1 in C2f_v2
    for bn_key in ['weight', 'bias', 'running_mean', 'running_var']:
        old_bn = state_dict[f'cv1.bn.{bn_key}']
        state_dict_v2[f'cv0.bn.{bn_key}'] = old_bn[:half_channels]
        state_dict_v2[f'cv1.bn.{bn_key}'] = old_bn[half_channels:]

    # Transfer remaining weights and buffers
    for key in state_dict:
        if not key.startswith('cv1.'):
            state_dict_v2[key] = state_dict[key]

    # Transfer all non-method attributes
    for attr_name in dir(c2f):
        attr_value = getattr(c2f, attr_name)
        if not callable(attr_value) and '_' not in attr_name:
            setattr(c2f_v2, attr_name, attr_value)

    c2f_v2.load_state_dict(state_dict_v2)


def replace_c2f_with_c2f_v2(module):
    for name, child_module in module.named_children():
        if isinstance(child_module, C2f):
            # Replace C2f with C2f_v2 while preserving its parameters
            shortcut = infer_shortcut(child_module.m[0])
            c2f_v2 = C2f_v2(child_module.cv1.conv.in_channels, child_module.cv2.conv.out_channels,
                            n=len(child_module.m), shortcut=shortcut,
                            g=child_module.m[0].cv2.conv.groups,
                            e=child_module.c / child_module.cv2.conv.out_channels)
            transfer_weights(child_module, c2f_v2)
            setattr(module, name, c2f_v2)
        else:
            replace_c2f_with_c2f_v2(child_module)


def save_model_v2(self: BaseTrainer):
    """
    Disabled half precision saving. originated from ultralytics/yolo/engine/trainer.py
    """
    ckpt = {
        'epoch': self.epoch,
        'best_fitness': self.best_fitness,
        'model': deepcopy(de_parallel(self.model)),
        'ema': deepcopy(self.ema.ema),
        'updates': self.ema.updates,
        'optimizer': self.optimizer.state_dict(),
        'train_args': vars(self.args),  # save as dict
        'date': datetime.now().isoformat(),
        'version': __version__}

    # Save last, best and delete
    torch.save(ckpt, self.last)
    if self.best_fitness == self.fitness:
        torch.save(ckpt, self.best)
    if (self.epoch > 0) and (self.save_period > 0) and (self.epoch % self.save_period == 0):
        torch.save(ckpt, self.wdir / f'epoch{self.epoch}.pt')
    del ckpt

def final_eval_v2(self: BaseTrainer):
    """
    originated from ultralytics/yolo/engine/trainer.py
    """
    for f in self.last, self.best:
        if f.exists():
            strip_optimizer_v2(f)  # strip optimizers
            if f is self.best:
                LOGGER.info(f'\nValidating {f}...')
                self.metrics = self.validator(model=f)
                self.metrics.pop('fitness', None)
                self.run_callbacks('on_fit_epoch_end')

def strip_optimizer_v2(f: Union[str, Path] = 'best.pt', s: str = '') -> None:
    """
    Disabled half precision saving. originated from ultralytics/yolo/utils/torch_utils.py
    """
    x = torch.load(f, map_location=torch.device('cpu'))
    args = {**DEFAULT_CFG_DICT, **x['train_args']}  # combine model args with default args, preferring model args
    if x.get('ema'):
        x['model'] = x['ema']  # replace model with ema
    for k in 'optimizer', 'ema', 'updates':  # keys
        x[k] = None
    for p in x['model'].parameters():
        p.requires_grad = False
    x['train_args'] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS}  # strip non-default keys
    # x['model'].args = x['train_args']
    torch.save(x, s or f)
    mb = os.path.getsize(s or f) / 1E6  # filesize
    LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")


def train_v2(self: YOLO, pruning=False, **kwargs):
    """
    Disabled loading new model when pruning flag is set. originated from ultralytics/yolo/engine/model.py
    """

    self._check_is_pytorch_model()
    if self.session:  # Ultralytics HUB session
        if any(kwargs):
            LOGGER.warning('WARNING ⚠️ using HUB training arguments, ignoring local training arguments.')
        kwargs = self.session.train_args
    overrides = self.overrides.copy()
    overrides.update(kwargs)
    if kwargs.get('cfg'):
        LOGGER.info(f"cfg file passed. Overriding default params with {kwargs['cfg']}.")
        overrides = yaml_load(check_yaml(kwargs['cfg']))
    overrides['mode'] = 'train'
    if not overrides.get('data'):
        raise AttributeError("Dataset required but missing, i.e. pass 'data=coco128.yaml'")
    if overrides.get('resume'):
        overrides['resume'] = self.ckpt_path

    self.task = overrides.get('task') or self.task
    self.trainer = TASK_MAP[self.task][1](overrides=overrides, _callbacks=self.callbacks)

    if not pruning:
        if not overrides.get('resume'):  # manually set model only if not resuming
            self.trainer.model = self.trainer.get_model(weights=self.model if self.ckpt else None, cfg=self.model.yaml)
            self.model = self.trainer.model

    else:
        # pruning mode
        self.trainer.pruning = True
        self.trainer.model = self.model

        # replace some functions to disable half precision saving
        self.trainer.save_model = save_model_v2.__get__(self.trainer)
        self.trainer.final_eval = final_eval_v2.__get__(self.trainer)

    self.trainer.hub_session = self.session  # attach optional HUB session
    self.trainer.train()
    # Update model and cfg after training
    if RANK in (-1, 0):
        self.model, _ = attempt_load_one_weight(str(self.trainer.best))
        self.overrides = self.model.args
        self.metrics = getattr(self.trainer.validator, 'metrics', None)

In [None]:
#| include: false

def prune(args):
    # load trained yolov8 model
    model = YOLO(args.model)
    model.__setattr__("train_v2", train_v2.__get__(model))
    pruning_cfg = yaml_load(check_yaml(args.cfg))
    batch_size = pruning_cfg['batch']
    
    pruning_cfg['data'] = "coco128.yaml"
    pruning_cfg['epochs'] = 10
    pruning_cfg['verbose'] = False
    
    model.model.train()
    replace_c2f_with_c2f_v2(model.model)
    initialize_weights(model.model)
    
    validation_model = deepcopy(model)
    metric = validation_model.val(**pruning_cfg)
    init_map = metric.box.map
    example_inputs = torch.randn(1, 3, pruning_cfg["imgsz"], pruning_cfg["imgsz"]).to(model.device)
    
    base_macs, base_nparams = tp.utils.count_ops_and_params(model.model, example_inputs)
    print(f"Before Pruning: MACs={base_macs / 1e9: .5f} G, #Params={base_nparams / 1e6: .5f} M, mAP={init_map: .5f}")
    
    for name, param in model.model.named_parameters():
        param.requires_grad = True
        
    model.train_v2(pruning=True, **pruning_cfg)

    pruning_cfg['epochs'] = 10
    
    macs_list, nparams_list, map_list, pruned_map_list = [], [], [], []
    base_macs, base_nparams = tp.utils.count_ops_and_params(model.model, example_inputs)
    
    pruning_cfg['name'] = f"baseline_val"
    pruning_cfg['batch'] = 1
    
    
    validation_model.model.model = deepcopy(model.model.model)
    metric = validation_model.val(**pruning_cfg)
    init_map = metric.box.map
    macs_list.append(base_macs)
    nparams_list.append(100)
    map_list.append(init_map)
    pruned_map_list.append(init_map)
    print(f"Before Pruning: MACs={base_macs / 1e9: .5f} G, #Params={base_nparams / 1e6: .5f} M, mAP={init_map: .5f}")
    
    for name, param in model.model.named_parameters():
        param.requires_grad = True
    
        ignored_layers = []
        unwrapped_parameters = []
        for m in model.model.modules():
            if isinstance(m, (Detect,)):
                ignored_layers.append(m)
    
    pruner = Pruner(model.model, 'local', large_final, ignored_layers=ignored_layers)
    print(model.model.model[0].conv)

    for i in range(args.iterative_steps):

        pruning_ratio = args.sched(args.target_prune_rate*100, i/args.iterative_steps)

        pruner.prune_model(pruning_ratio[0])
        print(pruning_ratio[0])

        print('After Pruning')
        print('Model', model.model.model[0].conv)
        print('Pruner', pruner.model.model[0].conv)

        pruning_cfg['name'] = f"step_{i}_pre_val"
        pruning_cfg['batch'] = 1
        validation_model.model.model = deepcopy(pruner.model.model)
        metric = validation_model.val(**pruning_cfg)
        pruned_map = metric.box.map
        pruned_macs, pruned_nparams = tp.utils.count_ops_and_params(pruner.model.to(default_device()), example_inputs.to(default_device()))
        
        print('After post-pruning Validation')
        print('Model', model.model.model[0].conv)
        print('Pruner', pruner.model.model[0].conv)
        
        
        current_speed_up = float(macs_list[0]) / pruned_macs
        print(f"After pruning iter {i + 1}: MACs={pruned_macs / 1e9} G, #Params={pruned_nparams / 1e6} M, "
              f"mAP={pruned_map}, speed up={current_speed_up}")

        
        # fine-tuning
        for name, param in model.model.named_parameters():
            param.requires_grad = True
        pruning_cfg['name'] = f"step_{i}_finetune"
        pruning_cfg['batch'] = batch_size  # restore batch size
        model.model = pruner.model
        model.train_v2(pruning=True, **pruning_cfg)

        print('After fine-tuning')
        print('Model', model.model.model[0].conv)
        print('Pruner', pruner.model.model[0].conv)
        
        
        # post fine-tuning validation
        pruning_cfg['name'] = f"step_{i}_post_val"
        pruning_cfg['batch'] = 1
        validation_model = YOLO(model.trainer.best)
        validation_model.model = deepcopy(model.model)
        metric = validation_model.val( **pruning_cfg)
        current_map = metric.box.map
        print(f"After fine tuning mAP={current_map}")

        print('After post fine-tuning validation')
        print('Model', model.model.model[0].conv)
        print('Pruner', pruner.model.model[0].conv)
    

        macs_list.append(pruned_macs)
        nparams_list.append(pruned_nparams / base_nparams * 100)
        pruned_map_list.append(pruned_map)
        map_list.append(current_map)

        if init_map - current_map > args.max_map_drop:
            print("Pruning early stop")
            break


    model.export(format='onnx')

## Training

In [None]:
class Args(argparse.Namespace):
  model = 'yolov8l.pt'
  cfg = 'default.yaml'
  iterative_steps = 15
  target_prune_rate = 0.15
  max_map_drop = 0.2
  sched = Schedule(partial(sched_onecycle,  α=10, β=4))

args=Args()
prune(args)

Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
YOLOv8l summary (fused): 285 layers, 43668288 parameters, 0 gradients, 165.2 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.731      0.768      0.828      0.659
Speed: 0.1ms preprocess, 7.7ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1mruns/detect/val59[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=

Before Pruning: MACs= 82.72641 G, #Params= 43.69152 M, mAP= 0.65869


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/train49/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train49[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      14.4G     0.8537     0.7447      1.082        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.776      0.741      0.832      0.667

 

Before Pruning: MACs= 82.72641 G, #Params= 43.69152 M, mAP= 0.75438
Conv2d(3, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


0.27046189978777607
After Pruning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 43325836 parameters, 74176 gradients, 163.3 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.883      0.849      0.903      0.743
Speed: 0.2ms preprocess, 12.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_0_pre_val131[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_0_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, co

After post-pruning Validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 1: MACs=81.8125668 G, #Params=43.348966 M, mAP=0.7428735001565969, speed up=1.0111699172357467


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_0_finetune103/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_0_finetune103[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.6G     0.7161     0.4777     0.9953        122        640: 100%|██████████| 8/8 [00:03
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929       0.92      0.841   

After fine-tuning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 43325836 parameters, 0 gradients, 163.3 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.897      0.863      0.922      0.787
Speed: 0.2ms preprocess, 12.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_0_post_val75[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.7869655326484724
After post fine-tuning validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
0.5179586515491672
After Pruning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 43081939 parameters, 74176 gradients, 162.7 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.913      0.872      0.929      0.788
Speed: 0.1ms preprocess, 12.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_1_pre_val66[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_1_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 2: MACs=81.5020432 G, #Params=43.105009 M, mAP=0.7879549975477981, speed up=1.0150224847369225


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_1_finetune62/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_1_finetune62[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.3G     0.5906     0.3832     0.9224        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.903      0.868     

After fine-tuning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 43081939 parameters, 0 gradients, 162.7 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.907      0.888      0.937      0.804
Speed: 0.1ms preprocess, 12.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_1_post_val48[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.804165683147925
After post fine-tuning validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
0.9769531739708688
After Pruning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 42712366 parameters, 74176 gradients, 161.3 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.923      0.871      0.933      0.794
Speed: 0.2ms preprocess, 12.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_2_pre_val50[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_2_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 3: MACs=80.7933916 G, #Params=42.735334 M, mAP=0.7940590327289188, speed up=1.0239254072854147


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_2_finetune48/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_2_finetune48[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.4G      0.548     0.3528     0.9023        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.923      0.871     

After fine-tuning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 42712366 parameters, 0 gradients, 161.3 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929       0.92      0.888      0.943      0.813
Speed: 0.1ms preprocess, 12.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_2_post_val42[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8133375576554807
After post fine-tuning validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
1.7924759478681729
After Pruning
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 42094706 parameters, 74176 gradients, 158.8 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929       0.94      0.864      0.936      0.804
Speed: 0.1ms preprocess, 12.7ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_3_pre_val36[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_3_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 63, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 4: MACs=79.5541908 G, #Params=42.117503 M, mAP=0.8043294271876973, speed up=1.0398749024796818


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_3_finetune36/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_3_finetune36[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.9G     0.5395     0.3534      0.897        122        640: 100%|██████████| 8/8 [00:42
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.932      0.875     

After fine-tuning
Model Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 42094706 parameters, 0 gradients, 158.8 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.945      0.875      0.943      0.824
Speed: 0.2ms preprocess, 12.7ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_3_post_val34[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8242263974664863
After post fine-tuning validation
Model Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
3.1368842425083825
After Pruning
Model Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 40919781 parameters, 74176 gradients, 154.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.913      0.876      0.935      0.792
Speed: 0.2ms preprocess, 12.6ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_4_pre_val32[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_4_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 62, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 5: MACs=77.3600192 G, #Params=40.942254 M, mAP=0.7920074671210469, speed up=1.0693690003634333


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_4_finetune32/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_4_finetune32[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.8G      0.573     0.3665     0.9011        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.926      0.881     

After fine-tuning
Model Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 40919781 parameters, 0 gradients, 154.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.929      0.891      0.944       0.82
Speed: 0.1ms preprocess, 12.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_4_post_val31[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.820206153122929
After post fine-tuning validation
Model Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
5.101267981852869
After Pruning
Model Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 39455305 parameters, 74176 gradients, 149.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.916      0.864      0.929      0.789
Speed: 0.2ms preprocess, 13.0ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_5_pre_val31[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_5_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 61, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 6: MACs=74.8418608 G, #Params=39.477376 M, mAP=0.7891253582912163, speed up=1.1053494062777232


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_5_finetune31/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_5_finetune31[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.5G     0.5773     0.3687     0.8973        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.904      0.881     

After fine-tuning
Model Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 39455305 parameters, 0 gradients, 149.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.928      0.891      0.943       0.82
Speed: 0.2ms preprocess, 12.9ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_5_post_val31[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8197008467567249
After post fine-tuning validation
Model Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
7.518590641324997
After Pruning
Model Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 37708749 parameters, 74176 gradients, 143.2 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.904      0.848      0.923       0.76
Speed: 0.2ms preprocess, 10.9ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_6_pre_val28[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_6_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 60, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 7: MACs=71.732976 G, #Params=37.730325 M, mAP=0.7604747253578685, speed up=1.1532549046898597


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_6_finetune28/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_6_finetune28[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      13.3G     0.6267     0.3973     0.9214        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.909      0.859     

After fine-tuning
Model Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 37708749 parameters, 0 gradients, 143.2 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.937      0.878      0.946      0.808
Speed: 0.1ms preprocess, 10.8ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_6_post_val27[0m


After fine tuning mAP=0.8082043641470185
After post fine-tuning validation
Model Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
9.935913300797125
After Pruning
Model Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
YOLOv8l summary (fused): 285 layers, 35995675 parameters, 74176 gradients, 136.7 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.838      0.847      0.905      0.744
Speed: 0.2ms preprocess, 12.1ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_7_pre_val25[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_7_finetune, exist_ok=False, pretrained=Tru

After post-pruning Validation
Model Conv2d(3, 59, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 8: MACs=68.4860368 G, #Params=36.016747 M, mAP=0.7439133908787243, speed up=1.207930992438447


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_7_finetune25/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_7_finetune25[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10        13G     0.6576     0.4219     0.9433        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.884      0.852     

After fine-tuning
Model Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 35995675 parameters, 0 gradients, 136.7 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.934      0.877      0.942      0.805
Speed: 0.1ms preprocess, 12.0ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_7_post_val25[0m


After fine tuning mAP=0.8047311680941978
After post fine-tuning validation
Model Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
11.900297040141613
After Pruning
Model Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
YOLOv8l summary (fused): 285 layers, 34583399 parameters, 74176 gradients, 131.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.861      0.846      0.915      0.747
Speed: 0.2ms preprocess, 12.0ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_8_pre_val24[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_8_finetune, exist_ok=False, pretrained=Tru

After post-pruning Validation
Model Conv2d(3, 57, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 9: MACs=65.8289424 G, #Params=34.604045 M, mAP=0.746892685800743, speed up=1.2566874597092115


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_8_finetune24/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_8_finetune24[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      12.7G     0.6527     0.4186     0.9399        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.878       0.86     

After fine-tuning
Model Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 34583399 parameters, 0 gradients, 131.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.939      0.885       0.94      0.804
Speed: 0.1ms preprocess, 12.5ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_8_post_val24[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8042272329558376
After post fine-tuning validation
Model Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
13.24470533478182
After Pruning
Model Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 33747610 parameters, 74176 gradients, 128.5 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.919      0.872      0.923      0.774
Speed: 0.2ms preprocess, 13.1ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_9_pre_val23[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_9_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, cos

After post-pruning Validation
Model Conv2d(3, 56, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 10: MACs=64.3900056 G, #Params=33.768007 M, mAP=0.77353892505729, speed up=1.2847709148203583


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_9_finetune23/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_9_finetune23[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      12.6G     0.6022     0.3899     0.9207        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.921      0.881     

After fine-tuning
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
YOLOv8l summary (fused): 285 layers, 33747610 parameters, 0 gradients, 128.5 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.943      0.875      0.932      0.804
Speed: 0.2ms preprocess, 14.0ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_9_post_val23[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8042200149576527
After post fine-tuning validation
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
14.060228108679125
After Pruning
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 33209910 parameters, 74176 gradients, 126.7 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.933      0.855      0.928      0.782
Speed: 0.2ms preprocess, 13.6ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_10_pre_val17[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_10_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, c

After post-pruning Validation
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 11: MACs=63.4942128 G, #Params=33.230145 M, mAP=0.7824563352367453, speed up=1.302896795658832


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_10_finetune17/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_10_finetune17[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      12.3G     0.5909     0.3739      0.911        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.938      0.863   

After fine-tuning
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 33209910 parameters, 0 gradients, 126.7 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.949      0.873      0.938      0.803
Speed: 0.2ms preprocess, 14.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_10_post_val17[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8030008974391184
After post fine-tuning validation
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
14.519222631100824
After Pruning
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32703049 parameters, 74176 gradients, 124.6 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.926      0.871      0.929      0.785
Speed: 0.1ms preprocess, 15.1ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_11_pre_val15[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_11_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, c

After post-pruning Validation
Model Conv2d(3, 55, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 12: MACs=62.4345712 G, #Params=32.723122 M, mAP=0.7849986248769537, speed up=1.3250096030130178


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_11_finetune15/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_11_finetune15[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      12.4G      0.592     0.3808     0.9108        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.912      0.894   

After fine-tuning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32703049 parameters, 0 gradients, 124.6 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.932      0.885      0.938      0.803
Speed: 0.2ms preprocess, 15.9ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_11_post_val14[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8028105881367777
After post fine-tuning validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
14.766719382862217
After Pruning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32669140 parameters, 74176 gradients, 124.6 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.945      0.883      0.942      0.806
Speed: 0.1ms preprocess, 15.9ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_12_pre_val14[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_12_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, c

After post-pruning Validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 13: MACs=62.4070664 G, #Params=32.689204 M, mAP=0.8058915915724488, speed up=1.325593577332454


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_12_finetune13/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_12_finetune13[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      12.7G      0.499     0.3319     0.8801        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.933      0.887   

After fine-tuning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
YOLOv8l summary (fused): 285 layers, 32669140 parameters, 0 gradients, 124.6 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.937      0.892      0.941      0.811
Speed: 0.2ms preprocess, 16.7ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_12_post_val13[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8111457220640336
After post fine-tuning validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
14.89709551315643
After Pruning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32416863 parameters, 74176 gradients, 123.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.939      0.886       0.94      0.805
Speed: 0.2ms preprocess, 16.6ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_13_pre_val13[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_13_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, c

After post-pruning Validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 14: MACs=61.8488912 G, #Params=32.436843 M, mAP=0.8050285863373501, speed up=1.3375568226839933


[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_13_finetune13/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_13_finetune13[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      12.1G     0.5096      0.332     0.8815        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.935      0.891   

After fine-tuning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32416863 parameters, 0 gradients, 123.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.951      0.887      0.946      0.815
Speed: 0.2ms preprocess, 17.3ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_13_post_val13[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)


After fine tuning mAP=0.8146198835662797
After post fine-tuning validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
14.96493134246744
After Pruning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32416863 parameters, 74176 gradients, 123.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.951      0.886      0.945      0.815
Speed: 0.2ms preprocess, 17.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_14_pre_val13[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24253MiB)
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=None, data=coco128.yaml, epochs=10, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=step_14_finetune, exist_ok=False, pretrained=True, optimizer=auto, verbose=False, seed=0, deterministic=True, single_cls=False, rect=False, c

After post-pruning Validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
After pruning iter 15: MACs=61.8488912 G, #Params=32.436843 M, mAP=0.8153698637584581, speed up=1.3375568226839933


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgr[0m
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
Plotting labels to runs/detect/step_14_finetune13/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.000119, momentum=0.9) with parameter groups 105 weight(decay=0.0), 112 weight(decay=0.0005), 111 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/step_14_finetune13[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
       1/10      11.4G     0.4922     0.3236     0.8733        122        640: 100%|██████████| 8/8 [00:02
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128  

After fine-tuning
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32416863 parameters, 0 gradients, 123.4 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/train2017.cache... 126 images, 2 backgrou[0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████|
                   all        128        929      0.947      0.899      0.943      0.819
Speed: 0.2ms preprocess, 17.2ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/step_14_post_val13[0m
Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CPU


After fine tuning mAP=0.8192818206570706
After post fine-tuning validation
Model Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
Pruner Conv2d(3, 54, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)


YOLOv8l summary (fused): 285 layers, 32416863 parameters, 0 gradients, 123.4 GFLOPs

[34m[1mPyTorch:[0m starting from runs/detect/step_14_finetune13/weights/best.pt with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (124.2 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.0 opset 17...
[34m[1mONNX:[0m export success ✅ 2.6s, saved as runs/detect/step_14_finetune13/weights/best.onnx (123.9 MB)

Export complete (3.5s)
Results saved to [1m/home/HubensN/fasterai/nbs/runs/detect/step_14_finetune13/weights[0m
Predict:         yolo predict task=detect model=runs/detect/step_14_finetune13/weights/best.onnx imgsz=640 
Validate:        yolo val task=detect model=runs/detect/step_14_finetune13/weights/best.onnx imgsz=640 data=/home/HubensN/miniconda3/envs/fasterai/lib/python3.9/site-packages/ultralytics/datasets/coco128.yaml 
Visualize:       https://netron.app


## Post-Training Checks

In [None]:
model = YOLO('/home/HubensN/fasterai/nbs/runs/detect/step_14_finetune4/weights/best.pt')

In [None]:
base_macs, base_nparams = tp.utils.count_ops_and_params(model.model, example_inputs); base_macs, base_nparams

(57692198400.0, 30077028)

In [None]:
results = model.val(
                data='coco128.yaml',
                batch=1,
                imgsz=640,
                verbose=False,
            )

Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CUDA:0 (NVIDIA GeForce RTX 3090, 24268MiB)
YOLOv8l summary (fused): 285 layers, 30057792 parameters, 0 gradients, 115.1 GFLOPs
[34m[1mval: [0mScanning /home/HubensN/fasterai/nbs/datasets/coco128/labels/tra[0m
                 Class     Images  Instances      Box(P          R  
                   all        128        929      0.917      0.907      0.945      0.809
Speed: 0.2ms preprocess, 24.4ms inference, 0.0ms loss, 0.4ms postprocess per image
Results saved to [1mruns/detect/val35[0m


In [None]:
results

ultralytics.yolo.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 11, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 46, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 68, 69, 71, 72, 73, 74, 75, 76, 77, 79])
box: ultralytics.yolo.utils.metrics.Metric object
confusion_matrix: <ultralytics.yolo.utils.metrics.ConfusionMatrix object>
fitness: 0.8221835652536718
keys: ['metrics/precision(B)', 'metrics/recall(B)', 'metrics/mAP50(B)', 'metrics/mAP50-95(B)']
maps: array([    0.75668,     0.47387,      0.3594,     0.91994,     0.94661,     0.90211,     0.94289,     0.68531,     0.68927,     0.34436,     0.80851,      0.8955,     0.80851,     0.86091,     0.84563,     0.96863,     0.89911,      0.9501,     0.80851,     0.80851,     0.88959,       0.995,       0.995,     0.93382,
            0.8273,     0.84511,      0.6686,     0.7

In [None]:
model.export(format = 'onnx', half = True)

Ultralytics YOLOv8.0.132 🚀 Python-3.9.0 torch-2.2.1 CPU
YOLOv8l summary (fused): 268 layers, 43668288 parameters, 0 gradients, 165.2 GFLOPs

[34m[1mPyTorch:[0m starting from yolov8l.pt with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (83.7 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.0 opset 17...
[34m[1mONNX:[0m export success ✅ 2.8s, saved as yolov8l.onnx (166.8 MB)

Export complete (4.0s)
Results saved to [1m/home/HubensN/fasterai/nbs[0m
Predict:         yolo predict task=detect model=yolov8l.onnx imgsz=640 
Validate:        yolo val task=detect model=yolov8l.onnx imgsz=640 data=coco.yaml 
Visualize:       https://netron.app


'yolov8l.onnx'