# Dependencies

Re-install YOLOv7 package and all the dependencies.

In [None]:
%pip uninstall -y yolov7

In [None]:
%pip install -r https://raw.githubusercontent.com/ValV/yolov7/master/requirements.torch.cpu.txt
%pip install -r https://raw.githubusercontent.com/ValV/yolov7/master/requirements.txt
%pip install -U git+https://github.com/ValV/yolov7.git@package#egg=yolov7

# Export

The `export` function is rendered from [export.py](https://github.com/ValV/yolov7/blob/master/export.py).

## Dependencies

Extra dependencies (including Nvidia TensorRT):
* `nvidia-pyindex` - necessary for correct Graph Surgeon installation;
* `onnx-simplifier` - model optimization support during export;
* `onnx-graphsurgeon` - Graph Surgeon for adding NMS into ONNX model;
* `protobuf` - correct version of protobuf (pain).

> Comment out Nvidia dependencies to have less troubles working in CPU-only environment.

In [None]:
# %pip install -U 'setuptools' 'nvidia-pyindex'
%pip install -U 'setuptools'
# %pip install 'onnx>=1.9.0'
# %pip install 'onnx-simplifier>=0.3.6' 'onnx-graphsurgeon' 'protobuf~=3.19.6'
%pip install 'onnx-simplifier>=0.3.6' 'protobuf~=3.19.6'

## Function

NOTE: this function was intended for ONNX export, so export to other formats will require some code modifications.

In [None]:
import sys
import time
import warnings

from os import makedirs, path as osp
from glob import glob

import torch

from torch import nn

from yolov7.models.common import Conv
from yolov7.models.experimental import attempt_load, End2End
from yolov7.utils.activations import Hardswish, SiLU
from yolov7.utils.general import set_logging, check_img_size
from yolov7.utils.torch_utils import select_device
from yolov7.utils.add_nms import RegisterNMS


def export(
    weights: str,  # weights path
    img_size: int = [640, 640],  # image size
    batch_size: int = 1,  # batch size
    dynamic: bool = False,  # dynamic ONNX axes
    dynamic_batch: bool = False,  # dynamic batch onnx for tensorrt and onnx-runtime (disables dynamic axes)
    grid: bool = False,  # export Detect() layer grid
    end2end: bool = False,  # export end2end onnx (disables dynamic axes)
    max_wh: int = None,  # None for tensorrt nms, int value for onnx-runtime nms
    topk_all: int = 4000,  # topk objects for every image
    iou_thres: float = 0.45,  # iou threshold for NMS
    conf_thres: float = 0.25,  # conf threshold for NMS
    device: str = "cpu",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    simplify: bool = False,  # simplify onnx model
    include_nms: bool = False,  # export end2end onnx
    fp16: bool = False,  # CoreML FP16 half-precision export
    int8: bool = False,  # CoreML INT8 quantization
):
    img_size *= 2 if len(img_size) == 1 else 1  # expand
    dynamic = dynamic and not end2end
    dynamic = False if dynamic_batch else dynamic
    set_logging()
    t = time.time()

    # Load PyTorch model
    device = select_device(device)
    model = attempt_load(weights, map_location=device)  # load FP32 model
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    img_size = [
        check_img_size(x, gs) for x in img_size
    ]  # verify img_size are gs-multiples

    # Input
    img = torch.zeros(batch_size, 3, *img_size).to(
        device
    )  # image size(1, 3, 320, 192) iDetection

    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not grid  # set Detect() layer grid export
    y = model(img)  # dry run
    if include_nms:
        model.model[-1].include_nms = True
        y = None

    # TorchScript export
    try:
        print("\nStarting TorchScript export with torch %s..." % torch.__version__)
        f = weights.replace(".pt", ".torchscript.pt")  # filename
        ts = torch.jit.trace(model, img, strict=False)
        ts.save(f)
        print("TorchScript export success, saved as %s" % f)
    except Exception as e:
        print("TorchScript export failure: %s" % e)

    # CoreML export
    try:
        import coremltools as ct

        print("\nStarting CoreML export with coremltools %s..." % ct.__version__)
        # convert model from torchscript and apply pixel scaling as per detect.py
        ct_model = ct.convert(
            ts,
            inputs=[
                ct.ImageType("image", shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])
            ],
        )
        bits, mode = (
            (8, "kmeans_lut") if int8 else (16, "linear") if fp16 else (32, None)
        )
        if bits < 32:
            if sys.platform.lower() == "darwin":  # quantization only supported on macOS
                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        "ignore", category=DeprecationWarning
                    )  # suppress numpy==1.20 float warning
                    ct_model = (
                        ct.models.neural_network.quantization_utils.quantize_weights(
                            ct_model, bits, mode
                        )
                    )
            else:
                print("quantization only supported on macOS, skipping...")

        f = weights.replace(".pt", ".mlmodel")  # filename
        ct_model.save(f)
        print("CoreML export success, saved as %s" % f)
    except Exception as e:
        print("CoreML export failure: %s" % e)

    # TorchScript-Lite export
    try:
        print("\nStarting TorchScript-Lite export with torch %s..." % torch.__version__)
        f = weights.replace(".pt", ".torchscript.ptl")  # filename
        tsl = torch.jit.trace(model, img, strict=False)
        tsl = optimize_for_mobile(tsl)
        tsl._save_for_lite_interpreter(f)
        print("TorchScript-Lite export success, saved as %s" % f)
    except Exception as e:
        print("TorchScript-Lite export failure: %s" % e)

    # ONNX export
    try:
        import onnx

        print("\nStarting ONNX export with onnx %s..." % onnx.__version__)
        f = weights.replace(".pt", ".onnx")  # filename
        model.eval()
        output_names = ["classes", "boxes"] if y is None else ["output"]
        dynamic_axes = None
        if dynamic:
            dynamic_axes = {
                "images": {
                    0: "batch",
                    2: "height",
                    3: "width",
                },  # size(1, 3, 640, 640)
                "output": {0: "batch", 2: "y", 3: "x"},
            }
        if dynamic_batch:
            batch_size = "batch"
            dynamic_axes = {
                "images": {
                    0: "batch",
                },
            }
            if end2end and max_wh is None:
                # TensorRT end2end
                output_axes = {
                    "num_dets": {0: "batch"},
                    "det_boxes": {0: "batch"},
                    "det_scores": {0: "batch"},
                    "det_classes": {0: "batch"},
                }
            else:
                # Onnxruntime
                output_axes = {
                    "output": {0: "batch"},
                }
            dynamic_axes.update(output_axes)
        if grid:
            if end2end:
                # End2end Detect() layer grid export
                print(
                    "\nStarting export end2end onnx model for %s..." % "TensorRT"
                    if max_wh is None
                    else "onnxruntime"
                )
                model = End2End(
                    model,
                    topk_all,
                    iou_thres,
                    conf_thres,
                    max_wh,
                    device,
                    len(labels),
                )
                if end2end and max_wh is None:
                    # TensorRT end2end
                    output_names = [
                        "num_dets",
                        "det_boxes",
                        "det_scores",
                        "det_classes",
                    ]
                    shapes = [
                        batch_size,
                        1,
                        batch_size,
                        topk_all,
                        4,
                        batch_size,
                        topk_all,
                        batch_size,
                        topk_all,
                    ]
                else:
                    # Onnxruntime end2end
                    output_names = ["output"]
            else:
                # Basic Detect() layer grid export
                model.model[-1].concat = True

        torch.onnx.export(
            model,
            img,
            f,
            verbose=False,
            opset_version=12,
            input_names=["images"],
            output_names=output_names,
            dynamic_axes=dynamic_axes,
        )

        # Checks
        onnx_model = onnx.load(f)  # load onnx model
        onnx.checker.check_model(onnx_model)  # check onnx model

        if end2end and max_wh is None:
            # TensorRT end2end
            for i in onnx_model.graph.output:
                for j in i.type.tensor_type.shape.dim:
                    j.dim_param = str(shapes.pop(0))

        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model

        # # Metadata
        # d = {'stride': int(max(model.stride))}
        # for k, v in d.items():
        #     meta = onnx_model.metadata_props.add()
        #     meta.key, meta.value = k, str(v)
        # onnx.save(onnx_model, f)

        if simplify:
            try:
                import onnxsim

                print("\nStarting to simplify ONNX...")
                onnx_model, check = onnxsim.simplify(onnx_model)
                assert check, "assert check failed"
            except Exception as e:
                print(f"Simplifier failure: {e}")

        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
        onnx.save(onnx_model, f)
        print("ONNX export success, saved as %s" % f)

        if include_nms:
            print("Registering NMS plugin for ONNX...")
            mo = RegisterNMS(f)
            mo.register_nms()
            mo.save(f)

    except Exception as e:
        raise
        print("ONNX export failure: %s" % e)

    # Finish
    print(
        "\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron."
        % (time.time() - t)
    )

## Config

Dummy config. The only parameter it requires is `save_dir` - that is the path where experiment results have been written. By default the path `./runs/train/exp-xxx` is set before training.

In order to convert model only without running the whole training procedure, set `opt.save_dir` to a custom path where subdirectory `weights` with PyTorch models.

In [None]:
from dataclasses import dataclass


@dataclass
class Config: ...


opt = Config()
opt.save_dir = "."

Custom path constant was intentionally extracted because this notebook is separated from training and its config. Setting this constant to a custom directory takes off conventions and restrictions.

In [None]:
PATH_EXPORT_SOURCE = osp.join(opt.save_dir, "weights")

## Export

Some notes on exporting:
* `path_export_source` - is the path where PyTorch models reside;
* `size_input` - the ONNX model input size (necessary for CPU version of NMS).

Parameters explanation:
* `weights` - a path to a PyTorch model;
* `iou_thresh` - NMS IoU threshold value (merge boxes that overlap more than this value), higher values for higher objects density/overlapping;
* `conf_thresh` - NMS object confidence (bboxes below this values will be dropped);
* `grid` - export last Detect() layer (not quite sure about this argument, but it works);
* `end2end` - export the model with NMS embedded into ONNX graph;
* `max_wh` - maximum size of NMS matrix;
* `simplify` - optimize (fuse some nodes, etc).

> If `max_wh` is `None` (default), then NMS in the ONNX model will be a TensorRT ops and will not run on CPU.

Options that should not be enabled when exporting end-to-end ONNX model with NMS for CPU:
* `dynamic`;
* `dynamic_batch`;
* `include_nms`.

> TODO: try dynamic batch.

In [None]:
path_export_source = PATH_EXPORT_SOURCE
# path_export_target = path_export_source

# makedirs(path_export_target, exist_ok=True)

size_input = [3840, 2176]  # [3840, 2160] (4K) + multiple of 32
for path_weights in glob(osp.join(path_export_source, "????.pt")):
    if "init.pt" in path_weights:
        continue
    print(f"Exporting {osp.basename(path_weights)}...")
    export(
        weights=path_weights,
        img_size=size_input[::-1],  # opt.img_size,
        # dynamic=True,
        # dynamic_batch=True,
        iou_thres=0.25,  # iou threshold for NMS
        conf_thres=0.25,  # conf threshold for NMS
        grid=True,
        end2end=True,
        max_wh=max(size_input),
        simplify=True,
        # include_nms=True,
    )
    print("\n\n")

Check exported ONNX models.

In [None]:
%ls {path_export_source}

# Archive

Pack training and export results into an archive for downloading.

> Not necessary for local conversion.

In [None]:
# from os import path as osp
# from shutil import make_archive


# make_archive(opt.save_dir, "zip", osp.dirname(opt.save_dir), osp.basename(opt.save_dir))