# Dependencies

Re-install YOLOv7 package.

In [None]:
%pip uninstall -y yolov7

In [None]:
%pip install -U git+https://github.com/ValV/yolov7.git@package#egg=yolov7

# Functions

This training notebook is based on three elephants:
* `train` - this is the main function to train a YOLOv7 model;
* `test` - this function is used by `train` on validation step;
* `export` - this function is auxiliary and is defined at the end.

Imports for `train` and `test` functions necessary for the train loop are separated from imports for the `export` function.

In [None]:
import logging
import math
import os
import random
import sys
import time
from copy import deepcopy
from pathlib import Path
from threading import Thread

import numpy as np
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data
import yaml
from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

# import test  # FIXME: import test.py to get mAP after each epoch

from yolov7 import PACKAGE_ROOT
from yolov7.models.experimental import attempt_load
from yolov7.models.yolo import Model
from yolov7.utils.autoanchor import check_anchors
from yolov7.utils.datasets import create_dataloader
from yolov7.utils.general import (
    labels_to_class_weights,
    increment_path,
    labels_to_image_weights,
    init_seeds,
    strip_optimizer,
    get_latest_run,
    check_dataset,
    check_file,
    check_git_status,
    check_img_size,
    check_requirements,
    print_mutation,
    set_logging,
    one_cycle,
    colorstr,
)
from yolov7.utils.general import (
    coco80_to_coco91_class,
    check_dataset,
    check_file,
    check_img_size,
    check_requirements,
    box_iou,
    non_max_suppression,
    scale_coords,
    xyxy2xywh,
    xywh2xyxy,
    set_logging,
    increment_path,
    colorstr,
)  # Test
from yolov7.utils.google_utils import attempt_download
from yolov7.utils.loss import ComputeLoss, ComputeLossOTA
from yolov7.utils.metrics import fitness
from yolov7.utils.metrics import ap_per_class, ConfusionMatrix  # Test
from yolov7.utils.plots import (
    plot_images,
    plot_labels,
    plot_results,
    plot_evolution,
)
from yolov7.utils.plots import (
    plot_images,
    output_to_target,
    plot_study_txt,
)  # Test
from yolov7.utils.torch_utils import (
    ModelEMA,
    select_device,
    intersect_dicts,
    torch_distributed_zero_first,
    is_parallel,
)
from yolov7.utils.torch_utils import (
    select_device,
    time_synchronized,
    TracedModel,
)  # Test
from yolov7.utils.wandb_logging.wandb_utils import (
    WandbLogger,
    check_wandb_resume,
)


logger = logging.getLogger(__name__)

## Test

The `test` function must be defined before `train`, since the latter depends on it.

This function is taken from [test.py](https://github.com/ValV/yolov7/blob/master/test.py) so that it would be easy to modify it in-place.

In [None]:
def test(
    data,
    weights=None,
    batch_size=32,
    imgsz=640,
    conf_thres=0.001,
    iou_thres=0.45,  # for NMS (default: 0.6)
    save_json=False,
    single_cls=False,
    augment=False,
    verbose=False,
    model=None,
    dataloader=None,
    save_dir=Path(""),  # for saving images
    save_txt=False,  # for auto-labelling
    save_hybrid=False,  # for hybrid auto-labelling
    save_conf=False,  # save auto-label confidences
    plots=True,
    wandb_logger=None,
    compute_loss=None,
    half_precision=True,
    trace=False,
    is_coco=False,
    v5_metric=False,
):
    # Initialize/load model and set device
    training = model is not None
    if training:  # called by train.py
        device = next(model.parameters()).device  # get model device

    else:  # called directly
        set_logging()
        device = select_device(opt.device, batch_size=batch_size)

        # Directories
        save_dir = Path(
            increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok)
        )  # increment run
        (save_dir / "labels" if save_txt else save_dir).mkdir(
            parents=True, exist_ok=True
        )  # make dir

        # Load model
        model = attempt_load(weights, map_location=device)  # load FP32 model
        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
        imgsz = check_img_size(imgsz, s=gs)  # check img_size

        if trace:
            model = TracedModel(model, device, imgsz)

    # Half
    half = (
        device.type != "cpu" and half_precision
    )  # half precision only supported on CUDA
    if half:
        model.half()

    # Configure
    model.eval()
    if isinstance(data, str):
        is_coco = data.endswith("coco.yaml")
        with open(data) as f:
            data = yaml.load(f, Loader=yaml.SafeLoader)
    check_dataset(data)  # check
    nc = 1 if single_cls else int(data["nc"])  # number of classes
    iouv = torch.linspace(0.5, 0.95, 10).to(device)  # iou vector for mAP@0.5:0.95
    niou = iouv.numel()

    # Logging
    log_imgs = 0
    if wandb_logger and wandb_logger.wandb:
        log_imgs = min(wandb_logger.log_imgs, 100)
    # Dataloader
    if not training:
        if device.type != "cpu":
            model(
                torch.zeros(1, 3, imgsz, imgsz)
                .to(device)
                .type_as(next(model.parameters()))
            )  # run once
        task = (
            opt.task if opt.task in ("train", "val", "test") else "val"
        )  # path to train/val/test images
        dataloader = create_dataloader(
            data[task],
            imgsz,
            batch_size,
            gs,
            opt,
            pad=0.5,
            rect=True,
            prefix=colorstr(f"{task}: "),
        )[0]

    if v5_metric:
        print("Testing with YOLOv5 AP metric...")

    seen = 0
    confusion_matrix = ConfusionMatrix(nc=nc)
    names = {
        k: v
        for k, v in enumerate(
            model.names if hasattr(model, "names") else model.module.names
        )
    }
    coco91class = coco80_to_coco91_class()
    s = ("%20s" + "%12s" * 6) % (
        "Class",
        "Images",
        "Labels",
        "P",
        "R",
        "mAP@.5",
        "mAP@.5:.95",
    )
    p, r, f1, mp, mr, map50, map, t0, t1 = (
        0.0,
        0.0,
        0.0,
        0.0,
        0.0,
        0.0,
        0.0,
        0.0,
        0.0,
    )
    loss = torch.zeros(3, device=device)
    jdict, stats, ap, ap_class, wandb_images = [], [], [], [], []
    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
        img = img.to(device, non_blocking=True)
        img = img.half() if half else img.float()  # uint8 to fp16/32
        img /= 255.0  # 0 - 255 to 0.0 - 1.0
        targets = targets.to(device)
        nb, _, height, width = img.shape  # batch size, channels, height, width

        with torch.no_grad():
            # Run model
            t = time_synchronized()
            out, train_out = model(
                img, augment=augment
            )  # inference and training outputs
            t0 += time_synchronized() - t

            # Compute loss
            if compute_loss:
                loss += compute_loss([x.float() for x in train_out], targets)[1][
                    :3
                ]  # box, obj, cls

            # Run NMS
            targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(
                device
            )  # to pixels
            lb = (
                [targets[targets[:, 0] == i, 1:] for i in range(nb)]
                if save_hybrid
                else []
            )  # for autolabelling
            t = time_synchronized()
            out = non_max_suppression(
                out,
                conf_thres=conf_thres,
                iou_thres=iou_thres,
                labels=lb,
                multi_label=True,
            )
            t1 += time_synchronized() - t

        # Statistics per image
        for si, pred in enumerate(out):
            labels = targets[targets[:, 0] == si, 1:]
            nl = len(labels)
            tcls = labels[:, 0].tolist() if nl else []  # target class
            path = Path(paths[si])
            seen += 1

            if len(pred) == 0:
                if nl:
                    stats.append(
                        (
                            torch.zeros(0, niou, dtype=torch.bool),
                            torch.Tensor(),
                            torch.Tensor(),
                            tcls,
                        )
                    )
                continue

            # Predictions
            predn = pred.clone()
            scale_coords(
                img[si].shape[1:], predn[:, :4], shapes[si][0], shapes[si][1]
            )  # native-space pred

            # Append to text file
            if save_txt:
                gn = torch.tensor(shapes[si][0])[
                    [1, 0, 1, 0]
                ]  # normalization gain whwh
                for *xyxy, conf, cls in predn.tolist():
                    xywh = (
                        (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn)
                        .view(-1)
                        .tolist()
                    )  # normalized xywh
                    line = (
                        (cls, *xywh, conf) if save_conf else (cls, *xywh)
                    )  # label format
                    with open(save_dir / "labels" / (path.stem + ".txt"), "a") as f:
                        f.write(("%g " * len(line)).rstrip() % line + "\n")

            # W&B logging - Media Panel Plots
            if (
                len(wandb_images) < log_imgs and wandb_logger.current_epoch > 0
            ):  # Check for test operation
                if wandb_logger.current_epoch % wandb_logger.bbox_interval == 0:
                    box_data = [
                        {
                            "position": {
                                "minX": xyxy[0],
                                "minY": xyxy[1],
                                "maxX": xyxy[2],
                                "maxY": xyxy[3],
                            },
                            "class_id": int(cls),
                            "box_caption": "%s %.3f" % (names[cls], conf),
                            "scores": {"class_score": conf},
                            "domain": "pixel",
                        }
                        for *xyxy, conf, cls in pred.tolist()
                    ]
                    boxes = {
                        "predictions": {
                            "box_data": box_data,
                            "class_labels": names,
                        }
                    }  # inference-space
                    wandb_images.append(
                        wandb_logger.wandb.Image(
                            img[si], boxes=boxes, caption=path.name
                        )
                    )
            (
                wandb_logger.log_training_progress(predn, path, names)
                if wandb_logger and wandb_logger.wandb_run
                else None
            )

            # Append to pycocotools JSON dictionary
            if save_json:
                # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ...
                image_id = int(path.stem) if path.stem.isnumeric() else path.stem
                box = xyxy2xywh(predn[:, :4])  # xywh
                box[:, :2] -= box[:, 2:] / 2  # xy center to top-left corner
                for p, b in zip(pred.tolist(), box.tolist()):
                    jdict.append(
                        {
                            "image_id": image_id,
                            "category_id": (
                                coco91class[int(p[5])] if is_coco else int(p[5])
                            ),
                            "bbox": [round(x, 3) for x in b],
                            "score": round(p[4], 5),
                        }
                    )

            # Assign all predictions as incorrect
            correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device)
            if nl:
                detected = []  # target indices
                tcls_tensor = labels[:, 0]

                # target boxes
                tbox = xywh2xyxy(labels[:, 1:5])
                scale_coords(
                    img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]
                )  # native-space labels
                if plots:
                    confusion_matrix.process_batch(
                        predn, torch.cat((labels[:, 0:1], tbox), 1)
                    )

                # Per target class
                for cls in torch.unique(tcls_tensor):
                    ti = (
                        (cls == tcls_tensor).nonzero(as_tuple=False).view(-1)
                    )  # prediction indices
                    pi = (
                        (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1)
                    )  # target indices

                    # Search for detections
                    if pi.shape[0]:
                        # Prediction to target ious
                        ious, i = box_iou(predn[pi, :4], tbox[ti]).max(
                            1
                        )  # best ious, indices

                        # Append detections
                        detected_set = set()
                        for j in (ious > iouv[0]).nonzero(as_tuple=False):
                            d = ti[i[j]]  # detected target
                            if d.item() not in detected_set:
                                detected_set.add(d.item())
                                detected.append(d)
                                correct[pi[j]] = ious[j] > iouv  # iou_thres is 1xn
                                if (
                                    len(detected) == nl
                                ):  # all targets already located in image
                                    break

            # Append statistics (correct, conf, pcls, tcls)
            stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls))

        # Plot images
        if plots and batch_i < 3:
            f = save_dir / f"test_batch{batch_i}_labels.jpg"  # labels
            Thread(
                target=plot_images,
                args=(img, targets, paths, f, names),
                daemon=True,
            ).start()
            f = save_dir / f"test_batch{batch_i}_pred.jpg"  # predictions
            Thread(
                target=plot_images,
                args=(img, output_to_target(out), paths, f, names),
                daemon=True,
            ).start()

    # Compute statistics
    stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
    if len(stats) and stats[0].any():
        p, r, ap, f1, ap_class = ap_per_class(
            *stats,
            plot=plots,
            v5_metric=v5_metric,
            save_dir=save_dir,
            names=names,
        )
        ap50, ap = ap[:, 0], ap.mean(1)  # AP@0.5, AP@0.5:0.95
        mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
        nt = np.bincount(
            stats[3].astype(np.int64), minlength=nc
        )  # number of targets per class
    else:
        nt = torch.zeros(1)

    # Print results
    pf = "%20s" + "%12i" * 2 + "%12.3g" * 4  # print format
    print(pf % ("all", seen, nt.sum(), mp, mr, map50, map))

    # Print results per class
    if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
        for i, c in enumerate(ap_class):
            print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

    # Print speeds
    t = tuple(x / seen * 1e3 for x in (t0, t1, t0 + t1)) + (
        imgsz,
        imgsz,
        batch_size,
    )  # tuple
    if not training:
        print(
            "Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g"
            % t
        )

    # Plots
    if plots:
        confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
        if wandb_logger and wandb_logger.wandb:
            val_batches = [
                wandb_logger.wandb.Image(str(f), caption=f.name)
                for f in sorted(save_dir.glob("test*.jpg"))
            ]
            wandb_logger.log({"Validation": val_batches})
    if wandb_images:
        wandb_logger.log({"Bounding Box Debugger/Images": wandb_images})

    # Save JSON
    if save_json and len(jdict):
        w = (
            Path(weights[0] if isinstance(weights, list) else weights).stem
            if weights is not None
            else ""
        )  # weights
        anno_json = "./coco/annotations/instances_val2017.json"  # annotations json
        pred_json = str(save_dir / f"{w}_predictions.json")  # predictions json
        print("\nEvaluating pycocotools mAP... saving %s..." % pred_json)
        with open(pred_json, "w") as f:
            json.dump(jdict, f)

        try:  # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb
            from pycocotools.coco import COCO
            from pycocotools.cocoeval import COCOeval

            anno = COCO(anno_json)  # init annotations api
            pred = anno.loadRes(pred_json)  # init predictions api
            eval = COCOeval(anno, pred, "bbox")
            if is_coco:
                eval.params.imgIds = [
                    int(Path(x).stem) for x in dataloader.dataset.img_files
                ]  # image IDs to evaluate
            eval.evaluate()
            eval.accumulate()
            eval.summarize()
            map, map50 = eval.stats[:2]  # update results (mAP@0.5:0.95, mAP@0.5)
        except Exception as e:
            print(f"pycocotools unable to run: {e}")

    # Return results
    model.float()  # for training
    if not training:
        s = (
            f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}"
            if save_txt
            else ""
        )
        print(f"Results saved to {save_dir}{s}")
    maps = np.zeros(nc) + map
    for i, c in enumerate(ap_class):
        maps[c] = ap[i]
    return (
        (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()),
        maps,
        t,
    )

## Train

Main training entry-point, uses `test` function for evaluation (as mentioned above).

This `train` function is taken from [train.py](https://github.com/ValV/yolov7/blob/master/train.py) for the same purpose as the `test` function (ease of modification).

In [None]:
def train(hyp, opt, device, tb_writer=None):
    if PACKAGE_ROOT in sys.path:
        del sys.path[sys.path.index(PACKAGE_ROOT)]
    if PACKAGE_ROOT not in sys.path:
        # print(f"DEBUG: adding '{PACKAGE_ROOT}' to sys.path...")
        sys.path.insert(0, PACKAGE_ROOT)  # FIXME: add models to path for pretrain
    logger.info(
        colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items())
    )
    save_dir, epochs, batch_size, total_batch_size, weights, rank, freeze = (
        Path(opt.save_dir),
        opt.epochs,
        opt.batch_size,
        opt.total_batch_size,
        opt.weights,
        opt.global_rank,
        opt.freeze,
    )

    # Directories
    wdir = save_dir / "weights"
    wdir.mkdir(parents=True, exist_ok=True)  # make dir
    last = wdir / "last.pt"
    best = wdir / "best.pt"
    results_file = save_dir / "results.txt"

    # Save run settings
    with open(save_dir / "hyp.yaml", "w") as f:
        yaml.dump(hyp, f, sort_keys=False)
    with open(save_dir / "opt.yaml", "w") as f:
        yaml.dump(vars(opt), f, sort_keys=False)

    # Configure
    plots = not opt.evolve  # create plots
    cuda = device.type != "cpu"
    init_seeds(2 + rank)
    with open(opt.data) as f:
        data_dict = yaml.load(f, Loader=yaml.SafeLoader)  # data dict
    is_coco = opt.data.endswith("coco.yaml")

    # Logging - doing this before checking the dataset. Might update data_dict
    loggers = {"wandb": None}  # loggers dict
    if rank in [-1, 0]:
        opt.hyp = hyp  # add hyperparameters
        run_id = (
            torch.load(weights, map_location=device).get("wandb_id")
            if weights.endswith(".pt") and os.path.isfile(weights)
            else None
        )
        wandb_logger = WandbLogger(opt, Path(opt.save_dir).stem, run_id, data_dict)
        loggers["wandb"] = wandb_logger.wandb
        data_dict = wandb_logger.data_dict
        if wandb_logger.wandb:
            weights, epochs, hyp = (
                opt.weights,
                opt.epochs,
                opt.hyp,
            )  # WandbLogger might update weights, epochs if resuming

    nc = 1 if opt.single_cls else int(data_dict["nc"])  # number of classes
    names = (
        ["item"]
        if opt.single_cls and len(data_dict["names"]) != 1
        else data_dict["names"]
    )  # class names
    assert len(names) == nc, "%g names found for nc=%g dataset in %s" % (
        len(names),
        nc,
        opt.data,
    )  # check

    # Model
    pretrained = weights.endswith(".pt")
    # print(f"DEBUG: weights = {weights}")
    if pretrained:
        with torch_distributed_zero_first(rank):
            # print(f"DEBUG: trying to download '{weights}'")
            attempt_download(weights)  # download if not found locally
        ckpt = torch.load(weights, map_location=device)  # load checkpoint
        model = Model(
            opt.cfg or ckpt["model"].yaml,
            ch=3,
            nc=nc,
            anchors=hyp.get("anchors"),
        ).to(device)  # create
        exclude = (
            ["anchor"] if (opt.cfg or hyp.get("anchors")) and not opt.resume else []
        )  # exclude keys
        state_dict = ckpt["model"].float().state_dict()  # to FP32
        state_dict = intersect_dicts(
            state_dict, model.state_dict(), exclude=exclude
        )  # intersect
        model.load_state_dict(state_dict, strict=False)  # load
        logger.info(
            "Transferred %g/%g items from %s"
            % (len(state_dict), len(model.state_dict()), weights)
        )  # report
    else:
        model = Model(opt.cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(
            device
        )  # create
    with torch_distributed_zero_first(rank):
        check_dataset(data_dict)  # check
    train_path = data_dict["train"]
    test_path = data_dict["val"]

    # Freeze
    freeze = [
        f"model.{x}." for x in (freeze if len(freeze) > 1 else range(freeze[0]))
    ]  # parameter names to freeze (full or partial)
    for k, v in model.named_parameters():
        v.requires_grad = True  # train all layers
        if any(x in k for x in freeze):
            print("freezing %s" % k)
            v.requires_grad = False

    # Optimizer
    nbs = 64  # nominal batch size
    accumulate = max(
        round(nbs / total_batch_size), 1
    )  # accumulate loss before optimizing
    hyp["weight_decay"] *= total_batch_size * accumulate / nbs  # scale weight_decay
    logger.info(f"Scaled weight_decay = {hyp['weight_decay']}")

    pg0, pg1, pg2 = [], [], []  # optimizer parameter groups
    for k, v in model.named_modules():
        if hasattr(v, "bias") and isinstance(v.bias, nn.Parameter):
            pg2.append(v.bias)  # biases
        if isinstance(v, nn.BatchNorm2d):
            pg0.append(v.weight)  # no decay
        elif hasattr(v, "weight") and isinstance(v.weight, nn.Parameter):
            pg1.append(v.weight)  # apply decay
        if hasattr(v, "im"):
            if hasattr(v.im, "implicit"):
                pg0.append(v.im.implicit)
            else:
                for iv in v.im:
                    pg0.append(iv.implicit)
        if hasattr(v, "imc"):
            if hasattr(v.imc, "implicit"):
                pg0.append(v.imc.implicit)
            else:
                for iv in v.imc:
                    pg0.append(iv.implicit)
        if hasattr(v, "imb"):
            if hasattr(v.imb, "implicit"):
                pg0.append(v.imb.implicit)
            else:
                for iv in v.imb:
                    pg0.append(iv.implicit)
        if hasattr(v, "imo"):
            if hasattr(v.imo, "implicit"):
                pg0.append(v.imo.implicit)
            else:
                for iv in v.imo:
                    pg0.append(iv.implicit)
        if hasattr(v, "ia"):
            if hasattr(v.ia, "implicit"):
                pg0.append(v.ia.implicit)
            else:
                for iv in v.ia:
                    pg0.append(iv.implicit)
        if hasattr(v, "attn"):
            if hasattr(v.attn, "logit_scale"):
                pg0.append(v.attn.logit_scale)
            if hasattr(v.attn, "q_bias"):
                pg0.append(v.attn.q_bias)
            if hasattr(v.attn, "v_bias"):
                pg0.append(v.attn.v_bias)
            if hasattr(v.attn, "relative_position_bias_table"):
                pg0.append(v.attn.relative_position_bias_table)
        if hasattr(v, "rbr_dense"):
            if hasattr(v.rbr_dense, "weight_rbr_origin"):
                pg0.append(v.rbr_dense.weight_rbr_origin)
            if hasattr(v.rbr_dense, "weight_rbr_avg_conv"):
                pg0.append(v.rbr_dense.weight_rbr_avg_conv)
            if hasattr(v.rbr_dense, "weight_rbr_pfir_conv"):
                pg0.append(v.rbr_dense.weight_rbr_pfir_conv)
            if hasattr(v.rbr_dense, "weight_rbr_1x1_kxk_idconv1"):
                pg0.append(v.rbr_dense.weight_rbr_1x1_kxk_idconv1)
            if hasattr(v.rbr_dense, "weight_rbr_1x1_kxk_conv2"):
                pg0.append(v.rbr_dense.weight_rbr_1x1_kxk_conv2)
            if hasattr(v.rbr_dense, "weight_rbr_gconv_dw"):
                pg0.append(v.rbr_dense.weight_rbr_gconv_dw)
            if hasattr(v.rbr_dense, "weight_rbr_gconv_pw"):
                pg0.append(v.rbr_dense.weight_rbr_gconv_pw)
            if hasattr(v.rbr_dense, "vector"):
                pg0.append(v.rbr_dense.vector)

    if opt.adam:
        optimizer = optim.Adam(
            pg0, lr=hyp["lr0"], betas=(hyp["momentum"], 0.999)
        )  # adjust beta1 to momentum
    else:
        optimizer = optim.SGD(
            pg0, lr=hyp["lr0"], momentum=hyp["momentum"], nesterov=True
        )

    optimizer.add_param_group(
        {"params": pg1, "weight_decay": hyp["weight_decay"]}
    )  # add pg1 with weight_decay
    optimizer.add_param_group({"params": pg2})  # add pg2 (biases)
    logger.info(
        "Optimizer groups: %g .bias, %g conv.weight, %g other"
        % (len(pg2), len(pg1), len(pg0))
    )
    del pg0, pg1, pg2

    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
    if opt.linear_lr:
        lf = (
            lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp["lrf"]) + hyp["lrf"]
        )  # linear
    else:
        lf = one_cycle(1, hyp["lrf"], epochs)  # cosine 1->hyp['lrf']
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
    # plot_lr_scheduler(optimizer, scheduler, epochs)

    # EMA
    ema = ModelEMA(model) if rank in [-1, 0] else None

    # Resume
    start_epoch, best_fitness = 0, 0.0
    if pretrained:
        # Optimizer
        if ckpt["optimizer"] is not None:
            optimizer.load_state_dict(ckpt["optimizer"])
            best_fitness = ckpt["best_fitness"]

        # EMA
        if ema and ckpt.get("ema"):
            ema.ema.load_state_dict(ckpt["ema"].float().state_dict())
            ema.updates = ckpt["updates"]

        # Results
        if ckpt.get("training_results") is not None:
            results_file.write_text(ckpt["training_results"])  # write results.txt

        # Epochs
        start_epoch = ckpt["epoch"] + 1
        if opt.resume:
            assert start_epoch > 0, (
                "%s training to %g epochs is finished, nothing to resume."
                % (
                    weights,
                    epochs,
                )
            )
        if epochs < start_epoch:
            logger.info(
                "%s has been trained for %g epochs. Fine-tuning for %g additional epochs."
                % (weights, ckpt["epoch"], epochs)
            )
            epochs += ckpt["epoch"]  # finetune additional epochs

        del ckpt, state_dict

    # Image sizes
    gs = max(int(model.stride.max()), 32)  # grid size (max stride)
    nl = model.model[-1].nl  # number of detection layers (used for scaling hyp['obj'])
    imgsz, imgsz_test = [
        check_img_size(x, gs) for x in opt.img_size
    ]  # verify imgsz are gs-multiples

    # DP mode
    if cuda and rank == -1 and torch.cuda.device_count() > 1:
        model = torch.nn.DataParallel(model)

    # SyncBatchNorm
    if opt.sync_bn and cuda and rank != -1:
        model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
        logger.info("Using SyncBatchNorm()")

    # Train loader
    dataloader, dataset = create_dataloader(
        train_path,
        imgsz,
        batch_size,
        gs,
        opt,
        hyp=hyp,
        augment=True,
        cache=opt.cache_images,
        rect=opt.rect,
        rank=rank,
        world_size=opt.world_size,
        workers=opt.workers,
        image_weights=opt.image_weights,
        quad=opt.quad,
        prefix=colorstr("train: "),
    )
    mlc = np.concatenate(dataset.labels, 0)[:, 0].max()  # max label class
    nb = len(dataloader)  # number of batches
    assert mlc < nc, (
        "Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g"
        % (
            mlc,
            nc,
            opt.data,
            nc - 1,
        )
    )

    # Process 0
    if rank in [-1, 0]:
        testloader = create_dataloader(
            test_path,
            imgsz_test,
            batch_size * 2,
            gs,
            opt,  # testloader
            hyp=hyp,
            cache=opt.cache_images and not opt.notest,
            rect=True,
            rank=-1,
            world_size=opt.world_size,
            workers=opt.workers,
            pad=0.5,
            prefix=colorstr("val: "),
        )[0]

        if not opt.resume:
            labels = np.concatenate(dataset.labels, 0)
            c = torch.tensor(labels[:, 0])  # classes
            # cf = torch.bincount(c.long(), minlength=nc) + 1.  # frequency
            # model._initialize_biases(cf.to(device))
            if plots:
                # plot_labels(labels, names, save_dir, loggers)
                if tb_writer:
                    tb_writer.add_histogram("classes", c, 0)

            # Anchors
            if not opt.noautoanchor:
                check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz)
            model.half().float()  # pre-reduce anchor precision

    # DDP mode
    if cuda and rank != -1:
        model = DDP(
            model,
            device_ids=[opt.local_rank],
            output_device=opt.local_rank,
            # nn.MultiheadAttention incompatibility with DDP https://github.com/pytorch/pytorch/issues/26698
            find_unused_parameters=any(
                isinstance(layer, nn.MultiheadAttention) for layer in model.modules()
            ),
        )

    # Model parameters
    hyp["box"] *= 3.0 / nl  # scale to layers
    hyp["cls"] *= nc / 80.0 * 3.0 / nl  # scale to classes and layers
    hyp["obj"] *= (imgsz / 640) ** 2 * 3.0 / nl  # scale to image size and layers
    hyp["label_smoothing"] = opt.label_smoothing
    model.nc = nc  # attach number of classes to model
    model.hyp = hyp  # attach hyperparameters to model
    model.gr = 1.0  # iou loss ratio (obj_loss = 1.0 or iou)
    model.class_weights = (
        labels_to_class_weights(dataset.labels, nc).to(device) * nc
    )  # attach class weights
    model.names = names

    # Start training
    t0 = time.time()
    nw = max(
        round(hyp["warmup_epochs"] * nb), 1000
    )  # number of warmup iterations, max(3 epochs, 1k iterations)
    # nw = min(nw, (epochs - start_epoch) / 2 * nb)  # limit warmup to < 1/2 of training
    maps = np.zeros(nc)  # mAP per class
    results = (
        0,
        0,
        0,
        0,
        0,
        0,
        0,
    )  # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
    scheduler.last_epoch = start_epoch - 1  # do not move
    scaler = amp.GradScaler(enabled=cuda)
    compute_loss_ota = ComputeLossOTA(model)  # init loss class
    compute_loss = ComputeLoss(model)  # init loss class
    logger.info(
        f"Image sizes {imgsz} train, {imgsz_test} test\n"
        f"Using {dataloader.num_workers} dataloader workers\n"
        f"Logging results to {save_dir}\n"
        f"Starting training for {epochs} epochs..."
    )
    torch.save(model, wdir / "init.pt")
    for epoch in range(
        start_epoch, epochs
    ):  # epoch ------------------------------------------------------------------
        model.train()

        # print(f"DEBUG: {epoch=} / image weights")
        # Update image weights (optional)
        if opt.image_weights:
            # Generate indices
            if rank in [-1, 0]:
                cw = (
                    model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc
                )  # class weights
                iw = labels_to_image_weights(
                    dataset.labels, nc=nc, class_weights=cw
                )  # image weights
                dataset.indices = random.choices(
                    range(dataset.n), weights=iw, k=dataset.n
                )  # rand weighted idx
            # Broadcast if DDP
            if rank != -1:
                indices = (
                    torch.tensor(dataset.indices)
                    if rank == 0
                    else torch.zeros(dataset.n)
                ).int()
                dist.broadcast(indices, 0)
                if rank != 0:
                    dataset.indices = indices.cpu().numpy()

        # Update mosaic border
        # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
        # dataset.mosaic_border = [b - imgsz, -b]  # height, width borders

        mloss = torch.zeros(4, device=device)  # mean losses
        if rank != -1:
            dataloader.sampler.set_epoch(epoch)
        pbar = enumerate(dataloader)
        logger.info(
            ("\n" + "%10s" * 8)
            % (
                "Epoch",
                "gpu_mem",
                "box",
                "obj",
                "cls",
                "total",
                "labels",
                "img_size",
            )
        )
        if rank in [-1, 0]:
            pbar = tqdm(pbar, total=nb)  # progress bar
            ...
        optimizer.zero_grad()
        for (
            i,
            (
                imgs,
                targets,
                paths,
                _,
            ),
        ) in (
            pbar
        ):  # batch -------------------------------------------------------------
            ni = i + nb * epoch  # number integrated batches (since train start)
            imgs = (
                imgs.to(device, non_blocking=True).float() / 255.0
            )  # uint8 to float32, 0-255 to 0.0-1.0

            # Warmup
            if ni <= nw:
                # print(f"DEBUG: {epoch=} / {i=} / warmup")
                xi = [0, nw]  # x interp
                # model.gr = np.interp(ni, xi, [0.0, 1.0])  # iou loss ratio (obj_loss = 1.0 or iou)
                accumulate = max(
                    1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()
                )
                for j, x in enumerate(optimizer.param_groups):
                    # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
                    x["lr"] = np.interp(
                        ni,
                        xi,
                        [
                            hyp["warmup_bias_lr"] if j == 2 else 0.0,
                            x["initial_lr"] * lf(epoch),
                        ],
                    )
                    if "momentum" in x:
                        x["momentum"] = np.interp(
                            ni, xi, [hyp["warmup_momentum"], hyp["momentum"]]
                        )

            # Multi-scale
            if opt.multi_scale:
                # print(f"DEBUG: {epoch=} / {i=} / multi-scale")
                sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs  # size
                sf = sz / max(imgs.shape[2:])  # scale factor
                if sf != 1:
                    ns = [
                        math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]
                    ]  # new shape (stretched to gs-multiple)
                    imgs = F.interpolate(
                        imgs, size=ns, mode="bilinear", align_corners=False
                    )

            # Forward
            with amp.autocast(enabled=cuda):
                # print(f"DEBUG: {epoch=} / {i=} / forward / {imgs.shape=}")
                pred = model(imgs)  # forward
                if "loss_ota" not in hyp or hyp["loss_ota"] == 1:
                    # print(f"DEBUG: {epoch=} / {i=} / loss OTA")
                    loss, loss_items = compute_loss_ota(
                        pred, targets.to(device), imgs
                    )  # loss scaled by batch_size
                else:
                    # print(f"DEBUG: {epoch=} / {i=} / loss")
                    loss, loss_items = compute_loss(
                        pred, targets.to(device)
                    )  # loss scaled by batch_size
                if rank != -1:
                    loss *= (
                        opt.world_size
                    )  # gradient averaged between devices in DDP mode
                if opt.quad:
                    loss *= 4.0

            # Backward
            # print(f"DEBUG: {epoch=} / {i=} / backward")
            scaler.scale(loss).backward()

            # Optimize
            if ni % accumulate == 0:
                # print(f"DEBUG: {epoch=} / {i=} / optimize")
                scaler.step(optimizer)  # optimizer.step
                scaler.update()
                optimizer.zero_grad()
                if ema:
                    ema.update(model)

            # Print
            if rank in [-1, 0]:
                # print(f"DEBUG: {epoch=} / {i=} / loss")
                mloss = (mloss * i + loss_items) / (i + 1)  # update mean losses
                mem = "%.3gG" % (
                    torch.cuda.memory_reserved() / 1e9
                    if torch.cuda.is_available()
                    else 0
                )  # (GB)
                s = ("%10s" * 2 + "%10.4g" * 6) % (
                    "%g/%g" % (epoch, epochs - 1),
                    mem,
                    *mloss,
                    targets.shape[0],
                    imgs.shape[-1],
                )
                if hasattr(pbar, "set_description"):
                    pbar.set_description(s)

                # Plot
                # print(f"DEBUG: {epoch=} / {i=} / plot")
                if plots and ni < 10:
                    f = save_dir / f"train_batch{ni}.jpg"  # filename
                    Thread(
                        target=plot_images,
                        args=(imgs, targets, paths, f),
                        daemon=True,
                    ).start()
                    # if tb_writer:
                    #     tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
                    #     tb_writer.add_graph(torch.jit.trace(model, imgs, strict=False), [])  # add model graph
                elif plots and ni == 10 and wandb_logger.wandb:
                    wandb_logger.log(
                        {
                            "Mosaics": [
                                wandb_logger.wandb.Image(str(x), caption=x.name)
                                for x in save_dir.glob("train*.jpg")
                                if x.exists()
                            ]
                        }
                    )

            # end batch ------------------------------------------------------------------------------------------------
        # end epoch ----------------------------------------------------------------------------------------------------

        # Scheduler
        # print(f"DEBUG: {epoch=} / scheduler")
        lr = [x["lr"] for x in optimizer.param_groups]  # for tensorboard
        scheduler.step()

        # DDP process 0 or single-GPU
        if rank in [-1, 0]:
            # mAP
            # print(f"DEBUG: {epoch=} / mAP -->")
            ema.update_attr(
                model,
                include=[
                    "yaml",
                    "nc",
                    "hyp",
                    "gr",
                    "names",
                    "stride",
                    "class_weights",
                ],
            )
            final_epoch = epoch + 1 == epochs
            if not opt.notest or final_epoch:  # Calculate mAP
                wandb_logger.current_epoch = epoch + 1
                results, maps, times = test(
                    data_dict,
                    batch_size=batch_size * 2,
                    imgsz=imgsz_test,
                    model=ema.ema,
                    single_cls=opt.single_cls,
                    dataloader=testloader,
                    save_dir=save_dir,
                    verbose=nc < 50 and final_epoch,
                    plots=plots and final_epoch,
                    wandb_logger=wandb_logger,
                    compute_loss=compute_loss,
                    is_coco=is_coco,
                    v5_metric=opt.v5_metric,
                )

            # Write
            # print(f"DEBUG: {epoch=} / write + log + save")
            with open(results_file, "a") as f:
                f.write(s + "%10.4g" * 7 % results + "\n")  # append metrics, val_loss
            if len(opt.name) and opt.bucket:
                os.system(
                    "gsutil cp %s gs://%s/results/results%s.txt"
                    % (results_file, opt.bucket, opt.name)
                )

            # Log
            tags = [
                "train/box_loss",
                "train/obj_loss",
                "train/cls_loss",  # train loss
                "metrics/precision",
                "metrics/recall",
                "metrics/mAP_0.5",
                "metrics/mAP_0.5:0.95",
                "val/box_loss",
                "val/obj_loss",
                "val/cls_loss",  # val loss
                "x/lr0",
                "x/lr1",
                "x/lr2",
            ]  # params
            for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
                if tb_writer:
                    tb_writer.add_scalar(tag, x, epoch)  # tensorboard
                if wandb_logger.wandb:
                    wandb_logger.log({tag: x})  # W&B

            # Update best mAP
            fi = fitness(
                np.array(results).reshape(1, -1)
            )  # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
            if fi > best_fitness:
                best_fitness = fi
            wandb_logger.end_epoch(best_result=best_fitness == fi)

            # Save model
            if (not opt.nosave) or (final_epoch and not opt.evolve):  # if save
                ckpt = {
                    "epoch": epoch,
                    "best_fitness": best_fitness,
                    "training_results": results_file.read_text(),
                    "model": deepcopy(
                        model.module if is_parallel(model) else model
                    ).half(),
                    "ema": deepcopy(ema.ema).half(),
                    "updates": ema.updates,
                    "optimizer": optimizer.state_dict(),
                    "wandb_id": (
                        wandb_logger.wandb_run.id if wandb_logger.wandb else None
                    ),
                }

                # Save last, best and delete
                torch.save(ckpt, last)
                if best_fitness == fi:
                    torch.save(ckpt, best)
                if (best_fitness == fi) and (epoch >= 200):
                    torch.save(ckpt, wdir / "best_{:03d}.pt".format(epoch))
                if epoch == 0:
                    torch.save(ckpt, wdir / "epoch_{:03d}.pt".format(epoch))
                elif ((epoch + 1) % 25) == 0:
                    torch.save(ckpt, wdir / "epoch_{:03d}.pt".format(epoch))
                elif epoch >= (epochs - 5):
                    torch.save(ckpt, wdir / "epoch_{:03d}.pt".format(epoch))
                if wandb_logger.wandb:
                    if (
                        (epoch + 1) % opt.save_period == 0 and not final_epoch
                    ) and opt.save_period != -1:
                        wandb_logger.log_model(
                            last.parent,
                            opt,
                            epoch,
                            fi,
                            best_model=best_fitness == fi,
                        )
                del ckpt

        # end epoch ----------------------------------------------------------------------------------------------------
    # end training
    # print(f"DEBUG: {epoch=} / END --> plots / final")
    if rank in [-1, 0]:
        # Plots
        if plots:
            plot_results(save_dir=save_dir)  # save as results.png
            if wandb_logger.wandb:
                files = [
                    "results.png",
                    "confusion_matrix.png",
                    *[f"{x}_curve.png" for x in ("F1", "PR", "P", "R")],
                ]
                wandb_logger.log(
                    {
                        "Results": [
                            wandb_logger.wandb.Image(str(save_dir / f), caption=f)
                            for f in files
                            if (save_dir / f).exists()
                        ]
                    }
                )
        # Test best.pt
        logger.info(
            "%g epochs completed in %.3f hours.\n"
            % (epoch - start_epoch + 1, (time.time() - t0) / 3600)
        )
        if opt.data.endswith("coco.yaml") and nc == 80:  # if COCO
            for m in (last, best) if best.exists() else (last):  # speed, mAP tests
                results, _, _ = test.test(
                    opt.data,
                    batch_size=batch_size * 2,
                    imgsz=imgsz_test,
                    conf_thres=0.001,
                    iou_thres=0.7,
                    model=attempt_load(m, device).half(),
                    single_cls=opt.single_cls,
                    dataloader=testloader,
                    save_dir=save_dir,
                    save_json=True,
                    plots=False,
                    is_coco=is_coco,
                    v5_metric=opt.v5_metric,
                )

        # Strip optimizers
        final = best if best.exists() else last  # final model
        for f in last, best:
            if f.exists():
                strip_optimizer(f)  # strip optimizers
        if opt.bucket:
            os.system(f"gsutil cp {final} gs://{opt.bucket}/weights")  # upload
        if wandb_logger.wandb and not opt.evolve:  # Log the stripped model
            wandb_logger.wandb.log_artifact(
                str(final),
                type="model",
                name="run_" + wandb_logger.wandb_run.id + "_model",
                aliases=["last", "best", "stripped"],
            )
        wandb_logger.finish_run()
    else:
        dist.destroy_process_group()
    torch.cuda.empty_cache()
    return results

# Config

## Hyperparameters

CLI scripts use model and hyperparameters configuration files. Hyperparameters configuration is the one to be overriden.

This file contains training parameters and augmentations.

In [None]:
from yaml import dump


hyp = {
    "lr0": 0.001,
    "lrf": 0.25,
    "momentum": 0.9,
    "weight_decay": 0.0005,
    "warmup_epochs": 0.01,
    "warmup_momentum": 0.8,
    "warmup_bias_lr": 0.1,
    "box": 0.35,
    "cls": 0.5,
    "cls_pw": 1.0,
    "obj": 1.0,
    "obj_pw": 2.0,
    "iou_t": 0.2,  # superseeded with 'anchor_t'
    "anchor_t": 4.0,
    "fl_gamma": 0.0,
    "hsv_h": 0.015,
    "hsv_s": 0.65,
    "hsv_v": 0.35,
    "degrees": 30.0,
    "translate": 0.1,
    "scale": 0.5,
    "shear": 10.0,
    "perspective": 0.000001,
    "flipud": 0.0,
    "fliplr": 0.5,
    "mosaic": 0.65,
    "mixup": 0.0,  # label smoothing
    "copy_paste": 0.0,  # instance segmentation
    "paste_in": 0.0,  # cutout augmentation
    "loss_ota": 0,
}

with open("hyp.yaml", "w") as hypfile:
    dump(hyp, hypfile, sort_keys=False)

## Options

CLI script uses Python's ArgumentParser for training configuration. ArgumentParser do not work well with Jupyter notebooks, so a mock object with the same fields is required.

Dataclass object is used as such a mock parameters object.

Three fields of the `opt` mock object point to external config files:
* `opt.cfg` - model architecture config file (empty);
* `opt.data` - dataset config file;
* `opt.hyp` - hyperparameters file (created by the cell above).

> The `opt.cfg` field is empty for training resume. Fine-tuning pretrained model is kinda resuming.

In [None]:
from dataclasses import dataclass
from os import getenv


@dataclass
class Config:
    weights: str = "yolov7-tiny.pt"
    cfg: str = ""  # (default: '')
    data: str = "data/config/Pelicans.yaml"
    # hyp: str = 'runs/train/evolve/hyp_evolved.yaml'
    # hyp: str = 'data/hyp.scratch.tiny.yaml'
    hyp: str = "hyp.yaml"
    epochs: int = 1500  # total epochs (default: 300)
    batch_size: int = 2  # training batch size (default: 16)
    total_batch_size: int = 1  # (stub, calculated)
    img_size: int = 3840  # input shape (value or list [w, h], default: 640)
    rect: bool = False  # (default: False)
    resume: bool = False  # (default: False)
    nosave: bool = False  # do not save checkpoints (default: False)
    notest: bool = False  # do not test (default: False)
    noautoanchor: bool = False
    evolve: bool = False  # run HPO with evolution algorithm (default: False)
    bucket: str = ""  # (default: '')
    cache_images: bool = False  # (default: False)
    image_weights: bool = False
    device: str = "cpu"  # target device (default: 'cuda')
    multi_scale: bool = False  # random shapes (BoF, default: False)
    single_cls: bool = (
        False  # squeeze all non-background classes to 'item' (default: False)
    )
    adam: bool = True  # use Adam optimizer instead of SGD (default: False)
    sync_bn: bool = False  # SyncBatchNorm (CUDA only, master node, BoF, default: False)
    local_rank: int = -1  # (stub)
    workers: int = 1  # data loader workers (default: 2)
    project: str = "runs/train"  # W&B experiment name
    entity: str = None  # W&B entity (default: None)
    name: str = "exp"  # experiment base name (default: 'exp')
    exist_ok: bool = False  # overwrite experiment (default: False)
    quad: bool = False  # x4 loss value (default: False)
    linear_lr: bool = False  # linear vs cosine LR scheduler (BoF, default: False)
    label_smoothing: float = 0.05  # BCE loss class label smoothing (BoF, default: 0.0)
    upload_dataset: bool = False  # W&B save dataset as artifact (default: False)
    bbox_interval: int = -1  # W&B bbox image logging interval (default: -1)
    save_period: int = -1  # log model every n epoch (default: -1)
    artifact_alias: str = "latest"  # (default: 'latest')
    freeze: int = 0  # list of layers to freeze (default: 0) FIXME: must be List[int]
    v5_metric: bool = False  # use YOLOv5 metrics (default: False)
    world_size: int = int(getenv("WORLD_SIZE", "1"))  # number of nodes (stub)
    global_rank: int = int(getenv("RANK", "-1"))  # current node rank (stub)
    save_dir: str = ""


opt = Config()
# opt.img_size = [1280, 736]
# opt.freeze = [0, 1, 2, 3, 4, 5]
opt

# Run loop

This is the only model train loop.

Depending on `opt.evolve` parameter set it can switch between HPO (genetic algorithm) and training.

* HPO is used for generating optimal hyperparameter config.

* Training uses either the generated hyperparameters config or a hyperparameters config defined manually.

This piece of code is taken from the [train.py](https://github.com/ValV/yolov7/blob/master/train.py) as well as the `train` function.

In [None]:
set_logging(opt.global_rank)
# if opt.global_rank in [-1, 0]:
#    check_git_status()
#    check_requirements()

opt.freeze = opt.freeze if isinstance(opt.freeze, list) else [int(opt.freeze)]

# Resume
wandb_run = check_wandb_resume(opt)
if opt.resume and not wandb_run:  # resume an interrupted run
    ckpt = (
        opt.resume if isinstance(opt.resume, str) else get_latest_run()
    )  # specified or most recent path
    assert os.path.isfile(ckpt), "ERROR: --resume checkpoint does not exist"
    apriori = opt.global_rank, opt.local_rank
    with open(Path(ckpt).parent.parent / "opt.yaml") as f:
        opt = Config(**yaml.load(f, Loader=yaml.SafeLoader))  # replace
    (
        opt.cfg,
        opt.weights,
        opt.resume,
        opt.batch_size,
        opt.global_rank,
        opt.local_rank,
    ) = (
        "",
        ckpt,
        True,
        opt.total_batch_size,
        *apriori,
    )  # reinstate
    logger.info("Resuming training from %s" % ckpt)
else:
    # opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
    opt.data, opt.cfg, opt.hyp = (
        check_file(opt.data),
        check_file(opt.cfg),
        check_file(opt.hyp),
    )  # check files
    assert len(opt.cfg) or len(
        opt.weights
    ), "either --cfg or --weights must be specified"
    opt.img_size = opt.img_size if isinstance(opt.img_size, list) else [opt.img_size]
    opt.img_size.extend(
        [opt.img_size[-1]] * (2 - len(opt.img_size))
    )  # extend to 2 sizes (train, test)
    opt.name = "evolve" if opt.evolve else opt.name
    opt.save_dir = increment_path(
        Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve
    )  # increment run

# DDP mode
opt.total_batch_size = opt.batch_size
device = select_device(opt.device, batch_size=opt.batch_size)
if opt.local_rank != -1:
    assert torch.cuda.device_count() > opt.local_rank
    torch.cuda.set_device(opt.local_rank)
    device = torch.device("cuda", opt.local_rank)
    dist.init_process_group(backend="nccl", init_method="env://")  # distributed backend
    assert (
        opt.batch_size % opt.world_size == 0
    ), "--batch-size must be multiple of CUDA device count"
    opt.batch_size = opt.total_batch_size // opt.world_size

# Hyperparameters
with open(opt.hyp) as f:
    hyp = yaml.load(f, Loader=yaml.SafeLoader)  # load hyps

# Train
logger.info(opt)
if not opt.evolve:
    tb_writer = None  # init loggers
    if opt.global_rank in [-1, 0]:
        prefix = colorstr("tensorboard: ")
        logger.info(
            f"{prefix}Start with 'tensorboard --logdir {opt.project}', view at http://localhost:6006/"
        )
        tb_writer = SummaryWriter(opt.save_dir)  # Tensorboard
    train(hyp, opt, device, tb_writer)

# Evolve hyperparameters (optional)
else:
    # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
    meta = {
        # 'lr0': (1, 1e-5, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
        "lr0": (0.5, 1e-6, 1e-1),  # initial learning rate (SGD=1E-2, Adam=1E-3)
        # 'lrf': (1, 0.01, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
        "lrf": (1, 0.001, 1.0),  # final OneCycleLR learning rate (lr0 * lrf)
        "momentum": (0.3, 0.6, 0.98),  # SGD momentum/Adam beta1
        "weight_decay": (1, 0.0, 0.001),  # optimizer weight decay
        "warmup_epochs": (1, 0.0, 15.0),  # warmup epochs (fractions ok)
        "warmup_momentum": (1, 0.0, 0.95),  # warmup initial momentum
        "warmup_bias_lr": (1, 0.0, 0.2),  # warmup initial bias lr
        # 'box': (1, 0.02, 0.2),  # box loss gain
        "box": (1, 0.01, 1.0),  # box loss gain
        "cls": (1, 0.2, 4.0),  # cls loss gain
        "cls_pw": (1, 0.5, 2.0),  # cls BCELoss positive_weight
        "obj": (1, 0.2, 4.0),  # obj loss gain (scale with pixels)
        "obj_pw": (1, 0.5, 2.0),  # obj BCELoss positive_weight
        # 'iou_t': (0, 0.1, 0.7),  # IoU training threshold
        "iou_t": (0, 0.1, 0.9),  # IoU training threshold
        "anchor_t": (1, 2.0, 8.0),  # anchor-multiple threshold
        # 'anchors': (2, 2.0, 10.0),  # anchors per output grid (0 to ignore)
        "anchors": (0, 2.0, 10.0),  # anchors per output grid (0 to ignore)
        "fl_gamma": (
            0,
            0.0,
            2.0,
        ),  # focal loss gamma (efficientDet default gamma=1.5)
        # 'hsv_h': (1, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
        "hsv_h": (0, 0.0, 0.1),  # image HSV-Hue augmentation (fraction)
        # 'hsv_s': (1, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
        "hsv_s": (0, 0.0, 0.9),  # image HSV-Saturation augmentation (fraction)
        # 'hsv_v': (1, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
        "hsv_v": (0, 0.0, 0.9),  # image HSV-Value augmentation (fraction)
        # 'degrees': (1, 0.0, 45.0),  # image rotation (+/- deg)
        "degrees": (0, 0.0, 45.0),  # image rotation (+/- deg)
        # 'translate': (1, 0.0, 0.9),  # image translation (+/- fraction)
        "translate": (0, 0.0, 0.9),  # image translation (+/- fraction)
        # 'scale': (1, 0.0, 0.9),  # image scale (+/- gain)
        "scale": (0, 0.0, 0.1),  # image scale (+/- gain)
        # 'shear': (1, 0.0, 10.0),  # image shear (+/- deg)
        "shear": (0, 0.0, 10.0),  # image shear (+/- deg)
        # 'perspective': (0, 0.0, 0.001),  # image perspective (+/- fraction), range 0-0.001
        "perspective": (
            0,
            0.0,
            0.001,
        ),  # image perspective (+/- fraction), range 0-0.001
        # 'flipud': (1, 0.0, 1.0),  # image flip up-down (probability)
        "flipud": (0, 0.0, 1.0),  # image flip up-down (probability)
        # 'fliplr': (0, 0.0, 1.0),  # image flip left-right (probability)
        "fliplr": (0, 0.0, 1.0),  # image flip left-right (probability)
        # 'mosaic': (1, 0.0, 1.0),  # image mixup (probability)
        "mosaic": (0, 0.0, 1.0),  # image mixup (probability)
        # 'mixup': (1, 0.0, 1.0),   # image mixup (probability)
        "mixup": (0, 0.0, 1.0),  # image mixup (probability)
        # 'copy_paste': (1, 0.0, 1.0),  # segment copy-paste (probability)
        "copy_paste": (0, 0.0, 1.0),  # segment copy-paste (probability)
        # 'paste_in': (1, 0.0, 1.0)     # segment copy-paste (probability)
        "paste_in": (0, 0.0, 1.0),  # segment copy-paste (probability)
    }

    with open(opt.hyp, errors="ignore") as f:
        hyp = yaml.safe_load(f)  # load hyps dict
        if "anchors" not in hyp:  # anchors commented in hyp.yaml
            hyp["anchors"] = 3

    assert opt.local_rank == -1, "DDP mode not implemented for --evolve"
    opt.notest, opt.nosave = True, True  # only test/save final epoch
    # ei = [isinstance(x, (int, float)) for x in hyp.values()]  # evolvable indices
    yaml_file = Path(opt.save_dir) / "hyp_evolved.yaml"  # save best result here
    if opt.bucket:
        os.system(
            "gsutil cp gs://%s/evolve.txt ." % opt.bucket
        )  # download evolve.txt if exists

    for _ in range(100):  # FIXME: generations to evolve
        if Path(
            "evolve.txt"
        ).exists():  # if evolve.txt exists: select best hyps and mutate
            # Select parent(s)
            parent = "single"  # parent selection method: 'single' or 'weighted'
            x = np.loadtxt("evolve.txt", ndmin=2)
            n = min(5, len(x))  # number of previous results to consider
            x = x[np.argsort(-fitness(x))][:n]  # top n mutations
            w = fitness(x) - fitness(x).min()  # weights
            w = w if w.any() else None
            if parent == "single" or len(x) == 1:
                # x = x[random.randint(0, n - 1)]  # random selection
                x = x[random.choices(range(n), weights=w)[0]]  # weighted selection
            elif parent == "weighted":
                x = (x * w.reshape(n, 1)).sum(0) / w.sum()  # weighted combination

            # Mutate
            mp, s = 0.8, 0.2  # mutation probability, sigma
            npr = np.random
            npr.seed(int(time.time()))
            g = np.array([x[0] for x in meta.values()])  # gains 0-1
            ng = len(meta)
            v = np.ones(ng)
            while all(v == 1):  # mutate until a change occurs (prevent duplicates)
                v = (
                    g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1
                ).clip(0.3, 3.0)

            # for i, k in enumerate(hyp.keys()):  # plt.hist(v.ravel(), 300)
            keys = [k for k in hyp.keys() if k != "anchors"]
            # print(len(keys))
            for i, k in enumerate(keys):  # plt.hist(v.ravel(), 300)
                hyp[k] = float(x[i + 7] * v[i])  # mutate

        # Constrain to limits
        for k, v in meta.items():
            hyp[k] = max(hyp[k], v[1])  # lower limit
            hyp[k] = min(hyp[k], v[2])  # upper limit
            hyp[k] = round(hyp[k], 5)  # significant digits

        # Train mutation
        opt_evolve = opt  # .copy()
        opt_evolve.epochs = 50  # FIXME: evolution epochs
        results = train(hyp.copy(), opt_evolve, device)

        # Write mutation results
        # print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
        hyp_plot = hyp.copy()
        del hyp_plot["anchors"]
        print_mutation(hyp_plot, results, yaml_file, opt.bucket)

    # Plot results
    plot_evolution(yaml_file)
    print(
        f"Hyperparameter evolution complete. Best results saved as: {yaml_file}\n"
        f"Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}"
    )

# Results

Visualize results for the trained model.

## Plot

This cell is made from `plot_results` function from [plots.py](https://github.com/ValV/yolov7/blob/master/yolov7/utils/plots.py) file.

This cell displays training and validation metrics in separate plots.

In [None]:
%matplotlib inline
from os import path as osp

from matplotlib import pyplot as plt


ids = ()
labels = ()
start, stop = 0, 0

fig, ax = plt.subplots(5, 2, figsize=(12, 24), tight_layout=True)
ax = ax.ravel()

s = [
    "Box",
    "val Box",
    "Objectness",
    "val Objectness",
    "Classification",
    "val Classification",
    "Precision",
    "Recall",
    "mAP@0.5",
    "mAP@0.5:0.95",
]

if opt.bucket:
    # files = ['https://storage.googleapis.com/%s/results%g.txt' % (bucket, x) for x in id]
    files = ["results%g.txt" % x for x in ids]
    c = ("gsutil cp " + "%s " * len(files) + ".") % tuple(
        "gs://%s/results%g.txt" % (opt.bucket, x) for x in ids
    )
    os.system(c)
else:
    files = list(Path(opt.save_dir).glob("results*.txt"))

if len(files):
    for fi, f in enumerate(files):
        try:
            results = np.loadtxt(
                f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2
            ).T
            n = results.shape[1]  # number of rows
            x = range(start, min(stop, n) if stop else n)
            for i in range(10):
                y = results[i, x]
                if i in [0, 1, 2, 5, 6, 7]:
                    y[y == 0] = np.nan  # don't show zero loss values
                    # y /= y[0]  # normalize
                label = labels[fi] if len(labels) else f.parent.stem
                ax[i].plot(x, y, marker=".", label=label, linewidth=2, markersize=8)
                ax[i].set_title(s[i])
                # if i in [5, 6, 7]:  # share train and val loss y axes
                #     ax[i].get_shared_y_axes().join(ax[i], ax[i - 5])
        except Exception as e:
            print("Warning: Plotting error for %s; %s" % (f, e))

    ax[1].legend()
    # fig.savefig(Path(save_dir) / 'results.png', dpi=200)
    fig.show()
else:
    print(
        "No results.txt files found in %s, nothing to plot." % osp.abspath(opt.save_dir)
    )

## Combine

Display combined in a single plot training and validation metrics.

In [None]:
s = [
    "train",
    "train",
    "train",
    "Precision",
    "mAP@0.5",
    "val",
    "val",
    "val",
    "Recall",
    "mAP@0.5:0.95",
]  # legends
t = ["Box", "Objectness", "Classification", "P-R", "mAP-F1"]  # titles

for f in sorted(files):
    results = np.loadtxt(f, usecols=[2, 3, 4, 8, 9, 12, 13, 14, 10, 11], ndmin=2).T
    n = results.shape[1]  # number of rows
    x = range(start, min(stop, n) if stop else n)
    fig, ax = plt.subplots(5, 1, figsize=(12, 24), tight_layout=True)
    ax = ax.ravel()
    for i in range(5):
        for j in [i, i + 5]:
            y = results[j, x]
            ax[i].plot(x, y, marker=".", label=s[j])
            # y_smooth = butter_lowpass_filtfilt(y)
            # ax[i].plot(x, np.gradient(y_smooth), marker='.', label=s[j])

        ax[i].set_title(t[i])
        ax[i].legend()
        # ax[i].set_ylabel(f.parent.stem) if i == 0 else None  # add filename
    # fig.savefig(f.replace('.txt', '.png'), dpi=200)
    fig.show()

# Export

The third elephant - `export` function. Export PyTorch model to ONNX format.

The `export` function is rendered from [export.py](https://github.com/ValV/yolov7/blob/master/export.py).

## Dependencies

Extra dependencies (including Nvidia TensorRT):
* `nvidia-pyindex` - necessary for correct Graph Surgeon installation;
* `onnx-simplifier` - model optimization support during export;
* `onnx-graphsurgeon` - Graph Surgeon for adding NMS into ONNX model;
* `protobuf` - correct version of protobuf (pain).

In [None]:
%pip install -U 'setuptools' 'nvidia-pyindex'
# %pip install 'onnx>=1.9.0'
%pip install 'onnx-simplifier>=0.3.6' 'onnx-graphsurgeon' 'protobuf~=3.19.6'

## Function

NOTE: this function was intended for ONNX export, so export to other formats will require some code modifications.

In [None]:
from yolov7.models.common import Conv
from yolov7.models.experimental import attempt_load, End2End
from yolov7.utils.activations import Hardswish, SiLU
from yolov7.utils.general import set_logging, check_img_size
from yolov7.utils.torch_utils import select_device
from yolov7.utils.add_nms import RegisterNMS


def export(
    weights: str,  # weights path
    img_size: int = [640, 640],  # image size
    batch_size: int = 1,  # batch size
    dynamic: bool = False,  # dynamic ONNX axes
    dynamic_batch: bool = False,  # dynamic batch onnx for tensorrt and onnx-runtime (disables dynamic axes)
    grid: bool = False,  # export Detect() layer grid
    end2end: bool = False,  # export end2end onnx (disables dynamic axes)
    max_wh: int = None,  # None for tensorrt nms, int value for onnx-runtime nms
    topk_all: int = 100,  # topk objects for every image
    iou_thres: float = 0.45,  # iou threshold for NMS
    conf_thres: float = 0.25,  # conf threshold for NMS
    device: str = "cpu",  # cuda device, i.e. 0 or 0,1,2,3 or cpu
    simplify: bool = False,  # simplify onnx model
    include_nms: bool = False,  # export end2end onnx
    fp16: bool = False,  # CoreML FP16 half-precision export
    int8: bool = False,  # CoreML INT8 quantization
):
    img_size *= 2 if len(img_size) == 1 else 1  # expand
    dynamic = dynamic and not end2end
    dynamic = False if dynamic_batch else dynamic
    set_logging()
    t = time.time()

    # Load PyTorch model
    device = select_device(device)
    model = attempt_load(weights, map_location=device)  # load FP32 model
    labels = model.names

    # Checks
    gs = int(max(model.stride))  # grid size (max stride)
    img_size = [
        check_img_size(x, gs) for x in img_size
    ]  # verify img_size are gs-multiples

    # Input
    img = torch.zeros(batch_size, 3, *img_size).to(
        device
    )  # image size(1, 3, 320, 192) iDetection

    # Update model
    for k, m in model.named_modules():
        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
        if isinstance(m, Conv):  # assign export-friendly activations
            if isinstance(m.act, nn.Hardswish):
                m.act = Hardswish()
            elif isinstance(m.act, nn.SiLU):
                m.act = SiLU()
        # elif isinstance(m, models.yolo.Detect):
        #     m.forward = m.forward_export  # assign forward (optional)
    model.model[-1].export = not grid  # set Detect() layer grid export
    y = model(img)  # dry run
    if include_nms:
        model.model[-1].include_nms = True
        y = None

    # TorchScript export
    try:
        print("\nStarting TorchScript export with torch %s..." % torch.__version__)
        f = weights.replace(".pt", ".torchscript.pt")  # filename
        ts = torch.jit.trace(model, img, strict=False)
        ts.save(f)
        print("TorchScript export success, saved as %s" % f)
    except Exception as e:
        print("TorchScript export failure: %s" % e)

    # CoreML export
    try:
        import coremltools as ct

        print("\nStarting CoreML export with coremltools %s..." % ct.__version__)
        # convert model from torchscript and apply pixel scaling as per detect.py
        ct_model = ct.convert(
            ts,
            inputs=[
                ct.ImageType("image", shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])
            ],
        )
        bits, mode = (
            (8, "kmeans_lut") if int8 else (16, "linear") if fp16 else (32, None)
        )
        if bits < 32:
            if sys.platform.lower() == "darwin":  # quantization only supported on macOS
                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        "ignore", category=DeprecationWarning
                    )  # suppress numpy==1.20 float warning
                    ct_model = (
                        ct.models.neural_network.quantization_utils.quantize_weights(
                            ct_model, bits, mode
                        )
                    )
            else:
                print("quantization only supported on macOS, skipping...")

        f = weights.replace(".pt", ".mlmodel")  # filename
        ct_model.save(f)
        print("CoreML export success, saved as %s" % f)
    except Exception as e:
        print("CoreML export failure: %s" % e)

    # TorchScript-Lite export
    try:
        print("\nStarting TorchScript-Lite export with torch %s..." % torch.__version__)
        f = weights.replace(".pt", ".torchscript.ptl")  # filename
        tsl = torch.jit.trace(model, img, strict=False)
        tsl = optimize_for_mobile(tsl)
        tsl._save_for_lite_interpreter(f)
        print("TorchScript-Lite export success, saved as %s" % f)
    except Exception as e:
        print("TorchScript-Lite export failure: %s" % e)

    # ONNX export
    try:
        import onnx

        print("\nStarting ONNX export with onnx %s..." % onnx.__version__)
        f = weights.replace(".pt", ".onnx")  # filename
        model.eval()
        output_names = ["classes", "boxes"] if y is None else ["output"]
        dynamic_axes = None
        if dynamic:
            dynamic_axes = {
                "images": {
                    0: "batch",
                    2: "height",
                    3: "width",
                },  # size(1, 3, 640, 640)
                "output": {0: "batch", 2: "y", 3: "x"},
            }
        if dynamic_batch:
            batch_size = "batch"
            dynamic_axes = {
                "images": {
                    0: "batch",
                },
            }
            if end2end and max_wh is None:
                # TensorRT end2end
                output_axes = {
                    "num_dets": {0: "batch"},
                    "det_boxes": {0: "batch"},
                    "det_scores": {0: "batch"},
                    "det_classes": {0: "batch"},
                }
            else:
                # Onnxruntime
                output_axes = {
                    "output": {0: "batch"},
                }
            dynamic_axes.update(output_axes)
        if grid:
            if end2end:
                # End2end Detect() layer grid export
                print(
                    "\nStarting export end2end onnx model for %s..." % "TensorRT"
                    if max_wh is None
                    else "onnxruntime"
                )
                model = End2End(
                    model,
                    topk_all,
                    iou_thres,
                    conf_thres,
                    max_wh,
                    device,
                    len(labels),
                )
                if end2end and max_wh is None:
                    # TensorRT end2end
                    output_names = [
                        "num_dets",
                        "det_boxes",
                        "det_scores",
                        "det_classes",
                    ]
                    shapes = [
                        batch_size,
                        1,
                        batch_size,
                        topk_all,
                        4,
                        batch_size,
                        topk_all,
                        batch_size,
                        topk_all,
                    ]
                else:
                    # Onnxruntime end2end
                    output_names = ["output"]
            else:
                # Basic Detect() layer grid export
                model.model[-1].concat = True

        torch.onnx.export(
            model,
            img,
            f,
            verbose=False,
            opset_version=12,
            input_names=["images"],
            output_names=output_names,
            dynamic_axes=dynamic_axes,
        )

        # Checks
        onnx_model = onnx.load(f)  # load onnx model
        onnx.checker.check_model(onnx_model)  # check onnx model

        if end2end and max_wh is None:
            # TensorRT end2end
            for i in onnx_model.graph.output:
                for j in i.type.tensor_type.shape.dim:
                    j.dim_param = str(shapes.pop(0))

        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model

        # # Metadata
        # d = {'stride': int(max(model.stride))}
        # for k, v in d.items():
        #     meta = onnx_model.metadata_props.add()
        #     meta.key, meta.value = k, str(v)
        # onnx.save(onnx_model, f)

        if simplify:
            try:
                import onnxsim

                print("\nStarting to simplify ONNX...")
                onnx_model, check = onnxsim.simplify(onnx_model)
                assert check, "assert check failed"
            except Exception as e:
                print(f"Simplifier failure: {e}")

        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
        onnx.save(onnx_model, f)
        print("ONNX export success, saved as %s" % f)

        if include_nms:
            print("Registering NMS plugin for ONNX...")
            mo = RegisterNMS(f)
            mo.register_nms()
            mo.save(f)

    except Exception as e:
        raise
        print("ONNX export failure: %s" % e)

    # Finish
    print(
        "\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron."
        % (time.time() - t)
    )

In [None]:
from os import makedirs
from glob import glob

## Config

Dummy config. The only parameter it requires is `save_dir` - that is the path where experiment results have been written. By default the path `./runs/train/exp-xxx` is set before training.

In order to convert model only without running the whole training procedure, set `opt.save_dir` to a custom path where subdirectory `weights` with PyTorch models.

> This cell **must** be commented for automatic export after training.

In [None]:
# import time

# from dataclasses import dataclass
# from os import path as osp


# @dataclass
# class Config: ...


# opt = Config()
# opt.save_dir = "."

## Export

Some notes on exporting:
* `path_export_source` - is the path where PyTorch models reside;
* `size_input` - the ONNX model input size (necessary for CPU version of NMS).

Parameters explanation:
* `weights` - a path to a PyTorch model;
* `iou_thresh` - NMS IoU threshold value (merge boxes that overlap more than this value), higher values for higher objects density/overlapping;
* `conf_thresh` - NMS object confidence (bboxes below this values will be dropped);
* `grid` - export last Detect() layer (not quite sure about this argument, but it works);
* `end2end` - export the model with NMS embedded into ONNX graph;
* `max_wh` - maximum size of NMS matrix;
* `simplify` - optimize (fuse some nodes, etc).

> If `max_wh` is `None` (default), then NMS in the ONNX model will be a TensorRT ops and will not run on CPU.

Options that should not be enabled when exporting end-to-end ONNX model with NMS for CPU:
* `dynamic`;
* `dynamic_batch`;
* `include_nms`.

> TODO: try dynamic batch.

In [None]:
path_export_source = osp.join(opt.save_dir, "weights")
# path_export_target = osp.join(opt.save_dir, 'models')

# makedirs(path_export_target, exist_ok=True)

size_input = [3840, 2176]  # [3840, 2160] (4K) + multiple of 32
for path_weights in glob(osp.join(path_export_source, "????.pt")):
    if "init.pt" in path_weights:
        continue
    print(f"Exporting {osp.basename(path_weights)}...")
    export(
        weights=path_weights,
        img_size=opt.img_size if hasattr(opt, "img_size") else size_input[::-1],
        iou_thres=0.25,  # intersection over union threshold for NMS
        conf_thres=0.25,  # confidence threshold for NMS
        grid=True,
        end2end=True,
        max_wh=max(size_input)
        if not hasattr(opt, "img_size")
        else max(tuple(opt.img_size))
        if hasattr(opt.img_size, "__iter__")
        else int(opt.img_size),
        simplify=True,
    )
    print("\n\n")

Check exported ONNX models.

In [None]:
%ls {path_export_source}

# Archive

Pack training and export results into an archive for downloading.

In [None]:
from os import path as osp
from shutil import make_archive


make_archive(opt.save_dir, "zip", osp.dirname(opt.save_dir), osp.basename(opt.save_dir))