with https://github.com/neuralmagic/sparseml and https://github.com/neuralmagic/sparseml/tree/main/integrations/ultralytics-yolov5

# Setup

Clone repo, install dependencies and check PyTorch and GPU.

In [None]:
!git clone https://github.com/neuralmagic/sparseml # clone repo
%cd /content/sparseml/integrations/ultralytics-yolov5
!pwd
!bash setup_integration.sh

Cloning into 'sparseml'...
remote: Enumerating objects: 9436, done.[K
remote: Counting objects: 100% (1561/1561), done.[K
remote: Compressing objects: 100% (655/655), done.[K
remote: Total 9436 (delta 1115), reused 1149 (delta 876), pack-reused 7875[K
Receiving objects: 100% (9436/9436), 10.62 MiB | 30.45 MiB/s, done.
Resolving deltas: 100% (6449/6449), done.
/content/sparseml/integrations/ultralytics-yolov5
/content/sparseml/integrations/ultralytics-yolov5
Cloning into 'yolov5'...
remote: Enumerating objects: 6349, done.[K
remote: Counting objects: 100% (15/15), done.[K
remote: Compressing objects: 100% (13/13), done.[K
remote: Total 6349 (delta 4), reused 8 (delta 2), pack-reused 6334[K
Receiving objects: 100% (6349/6349), 8.58 MiB | 34.44 MiB/s, done.
Resolving deltas: 100% (4339/4339), done.
Collecting PyYAML>=5.3.1
[?25l  Downloading https://files.pythonhosted.org/packages/7a/a5/393c087efdc78091afa2af9f1378762f9821c9c1d7a22c5753fb5ac5f97a/PyYAML-5.4.1-cp37-cp37m-manylinux

In [None]:
'''!git clone https://github.com/ultralytics/yolov5  # clone repo
%cd yolov5
%pip install -qr requirements.txt  # install dependencies'''

import torch
from IPython.display import Image, clear_output  # to display images

clear_output()
print(f"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})")

# 3. Train

In [None]:
%cd /content
!mkdir sent2
%cd sent2
!pwd

In [None]:
!rm -r images
!rm -r labels
!mkdir images
!mkdir labels
%cd /content

Train a YOLOv5s model on [COCO128](https://www.kaggle.com/ultralytics/coco128) with `--data coco128.yaml`, starting from pretrained `--weights yolov5s.pt`, or from randomly initialized `--weights '' --cfg yolov5s.yaml`. Models are downloaded automatically from the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases), and **COCO, COCO128, and VOC datasets are downloaded automatically** on first use.

All training results are saved to `runs/train/` with incrementing run directories, i.e. `runs/train/exp2`, `runs/train/exp3` etc.


In [None]:
%cd /content
!cat './sparseml/integrations/ultralytics-yolov5/yolov5/data/coco128.yaml'

In [None]:
!pip3 install torch==1.7.0+cu101 torchvision==0.8.1+cu101 torchaudio==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html


In [None]:
# Train YOLOv5s on COCO128 for 3 epochs
%cd /content/sparseml/integrations/ultralytics-yolov5/yolov5
!python train.py --img 640 --batch 16 --epochs 3 --data './data/coco128.yaml' --weights yolov5s.pt --nosave --cache

In [None]:
Image(filename='runs/train/exp/train_batch0.jpg', width=800)  # train batch 0 mosaics and labels
Image(filename='runs/train/exp/test_batch0_labels.jpg', width=800)  # test batch 0 labels
Image(filename='runs/train/exp/test_batch0_pred.jpg', width=800)  # test batch 0 predictions

> <img src="https://user-images.githubusercontent.com/26833433/83667642-90fcb200-a583-11ea-8fa3-338bbf7da194.jpeg" width="750">  
`train_batch0.jpg` shows train batch 0 mosaics and labels

> <img src="https://user-images.githubusercontent.com/26833433/83667626-8c37fe00-a583-11ea-997b-0923fe59b29b.jpeg" width="750">  
`test_batch0_labels.jpg` shows test batch 0 labels

> <img src="https://user-images.githubusercontent.com/26833433/83667635-90641b80-a583-11ea-8075-606316cebb9c.jpeg" width="750">  
`test_batch0_pred.jpg` shows test batch 0 _predictions_


In [None]:
%cd /content/sparseml/integrations/ultralytics-yolov5/yolov5
from utils.plots import plot_results 
plot_results(save_dir='runs/train/exp')  # plot all results*.txt as results.png
Image(filename='runs/train/exp/results.png', width=800)

<img src="https://user-images.githubusercontent.com/26833433/97808309-8182b180-1c66-11eb-8461-bffe1a79511d.png" width="800">


Exporting for Inference

In [None]:
%cd /content/sparseml/integrations/ultralytics-yolov5/yolov5
#!cat models/export.py

In [None]:
#code = '''import argparse
'''from copy import deepcopy
import sys
import time

sys.path.append('./')  # to run '$ python *.py' files in subdirectories

import torch
import torch.nn as nn
from torch.utils.mobile_optimizer import optimize_for_mobile

from sparseml.pytorch.utils import ModuleExporter
from sparseml.pytorch.utils.quantization import skip_onnx_input_quantize

import models
from models.experimental import attempt_load
from models.yolo import Model
from utils.activations import Hardswish, SiLU
from utils.general import colorstr, check_img_size, check_requirements, file_size, set_logging
from utils.google_utils import attempt_download
from utils.sparse import SparseMLWrapper
from utils.torch_utils import select_device, intersect_dicts, is_parallel, torch_distributed_zero_first


def create_checkpoint(epoch, model, optimizer, ema, sparseml_wrapper, **kwargs):
    pickle = not sparseml_wrapper.qat_active(epoch)  # qat does not support pickled exports
    ckpt_model = deepcopy(model.module if is_parallel(model) else model).float()
    yaml = ckpt_model.yaml
    if not pickle:
        ckpt_model = ckpt_model.state_dict()

    return {'epoch': epoch,
            'model': ckpt_model,
            'optimizer': optimizer.state_dict(),
            'yaml': yaml,
            **ema.state_dict(pickle),
            **sparseml_wrapper.state_dict(),
            **kwargs}


def load_checkpoint(type_, weights, device, cfg=None, hyp=None, nc=None, recipe=None, resume=None, rank=-1):
    with torch_distributed_zero_first(rank):
        attempt_download(weights)  # download if not found locally
    ckpt = torch.load(weights, map_location=device)  # load checkpoint
    start_epoch = ckpt['epoch'] + 1 if 'epoch' in ckpt else 0
    pickled = isinstance(ckpt['model'], nn.Module)

    if pickled and type_ == 'ensemble':
        # load ensemble using pickled
        cfg = None
        model = attempt_load(weights, map_location=device)  # load FP32 model
        state_dict = model.state_dict()
    else:
        # load model from config and weights
        cfg = cfg or (ckpt['yaml'] if 'yaml' in ckpt else None) or \
              (ckpt['model'].yaml if pickled else None)
        model = Model(cfg, ch=3, nc=ckpt['nc'] if ('nc' in ckpt and not nc) else nc,
                      anchors=hyp.get('anchors') if hyp else None).to(device)
        model_key = 'ema' if (type_ in ['ema', 'ensemble'] and 'ema' in ckpt and ckpt['ema']) else 'model'
        state_dict = ckpt[model_key].float().state_dict() if pickled else ckpt[model_key]

    # turn gradients for params back on in case they were removed
    for p in model.parameters():
        p.requires_grad = True

    # load sparseml recipe for applying pruning and quantization
    recipe = recipe or (ckpt['recipe'] if 'recipe' in ckpt else None)
    sparseml_wrapper = SparseMLWrapper(model, recipe)
    if type_ in ['ema', 'ensemble']:
        # apply the recipe to create the final state of the model when not training
        sparseml_wrapper.apply()
    else:
        # intialize the recipe for training
        sparseml_wrapper.initialize(start_epoch)

    if type_ == 'train':
        # load any missing weights from the model
        exclude = ['anchor'] if (cfg or hyp.get('anchors')) and not resume else []  # exclude keys
        state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude)  # intersect

    model.load_state_dict(state_dict, strict=type_ != 'train')  # load
    model.float()
    report = 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)

    return model, {
        'ckpt': ckpt,
        'state_dict': state_dict,
        'start_epoch': start_epoch,
        'sparseml_wrapper': sparseml_wrapper,
        'report': report,
    }


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--weights', type=str, default='./yolov3.pt', help='weights path')  # from yolov3/models/
    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
    parser.add_argument('--device', default='cpu', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
    parser.add_argument('--include', nargs='+', default=['torchscript', 'onnx', 'coreml'], help='include formats')
    parser.add_argument('--half', action='store_true', help='FP16 half-precision export')
    parser.add_argument('--inplace', action='store_true', help='set YOLOv5 Detect() inplace=True')
    parser.add_argument('--train', action='store_true', help='model.train() mode')
    parser.add_argument('--optimize', action='store_true', help='optimize TorchScript for mobile')  # TorchScript-only
    parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')  # ONNX-only
    parser.add_argument('--simplify', action='store_true', help='simplify ONNX model')  # ONNX-only
    parser.add_argument('--opset-version', type=int, default=12, help='ONNX opset version')  # ONNX-only
    parser.add_argument("--remove-grid", action="store_true", help="remove export of Detect() layer grid")
    opt = parser.parse_args()
    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
    opt.include = [x.lower() for x in opt.include]
    print(opt)
    set_logging()
    t = time.time()

    # Load PyTorch model
    device = select_device(opt.device)
    model, extras = load_checkpoint('ensemble', opt.weights, device)  # load FP32 model
    sparseml_wrapper = extras['sparseml_wrapper']
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
    assert not (opt.device.lower() == 'cpu' and opt.half), '--half only compatible with GPU export, i.e. use --device 0'

    # Input
    img = torch.zeros(opt.batch_size, 3, *opt.img_size).to(device)  # image size(1,3,320,192) iDetection

    # Update model
    if opt.half:
        img, model = img.half(), model.half()  # to FP16
    if opt.train:
        model.train()  # training mode (no grid construction in Detect layer)
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, models.common.Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        elif isinstance(m, models.yolo.Detect):
            m.inplace = opt.inplace
            m.onnx_dynamic = opt.dynamic
            # m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not opt.remove_grid  # set Detect() layer grid export

    for _ in range(2):
        y = model(img)  # dry runs

    # ONNX export ------------------------------------------------------------------------------------------------------
    if 'onnx' in opt.include:
        prefix = colorstr('ONNX:')
        try:
            import onnx

            print(f'{prefix} starting export with onnx {onnx.__version__}...')
            f = opt.weights.replace('.pt', '.onnx')  # filename
            if not sparseml_wrapper.enabled:
                # Jonathan: fixed opt.opset_version to 7 to make it work with onnx.js
                # but this gave other problems, so try 8, 14 (unsupported), 12 (MaxPool' with opsets: ai.onnx v12), 9 (step!=1 not supported)
                # 6 not supported, 9 (ONNX: export failure: step!=1 is currently not supported)
                torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                                  dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'},  # size(1,3,640,640)
                                                'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
            else:
                # export through SparseML so quantized and pruned graphs can be corrected
                save_dir = '/'.join(f.split('/')[:-1])
                save_name = f.split('/')[-1]
                exporter = ModuleExporter(model, save_dir)
                exporter.export_onnx(img, name=save_name, convert_qat=True)
                try:
                    skip_onnx_input_quantize(f, f)
                except:
                    pass

            # Checks
            model_onnx = onnx.load(f)  # load onnx model
            onnx.checker.check_model(model_onnx)  # check onnx model
            # print(onnx.helper.printable_graph(model_onnx.graph))  # print

            # Simplify
            if opt.simplify:
                try:
                    check_requirements(['onnx-simplifier'])
                    import onnxsim

                    print(f'{prefix} simplifying with onnx-simplifier {onnxsim.__version__}...')
                    model_onnx, check = onnxsim.simplify(
                        model_onnx,
                        dynamic_input_shape=opt.dynamic,
                        input_shapes={'images': list(img.shape)} if opt.dynamic else None)
                    assert check, 'assert check failed'
                    onnx.save(model_onnx, f)
                except Exception as e:
                    print(f'{prefix} simplifier failure: {e}')
            print(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
        except Exception as e:
            print(f'{prefix} export failure: {e}')'''
#with open('models/export_patched.py', 'w') as fp:
#    fp.write(code)


In [None]:
#!cat models/export_patched.py

In [None]:
#!pip install onnx --upgrade # make sure onnx is 1.9 instead of 1.7
# ONNX: export failure: step!=1 is currently not supported --- with pytorch 1.7.0
# also does not work. ONNX.js seems to have bad maintenance!
!pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html


In [None]:
%cd /content/sparseml/integrations/ultralytics-yolov5/yolov5
!python models/export.py --weights ./runs/train/exp/weights/best.pt --img-size 512 512

In [None]:
!du -h "/content/sparseml/integrations/ultralytics-yolov5/yolov5/runs/train/exp/weights/best.onnx"

In [None]:
%cd /content/sparseml/integrations/ultralytics-yolov5/yolov5/