In [1]:
import argparse
import os
import sys
from pathlib import Path

# FILE = Path(__file__).resolve()
ROOT = os.getcwd()  # YOLOv5 root directory
if str(ROOT) not in sys.path:
    sys.path.append(str(ROOT))  # add ROOT to PATH
ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative

from utils.general import (
    LOGGER,
    TQDM_BAR_FORMAT,
    check_amp,
    check_dataset,
    check_file,
    check_git_info,
    check_git_status,
    check_img_size,
    check_requirements,
    check_suffix,
    check_yaml,
    colorstr,
    get_latest_run,
    increment_path,
    init_seeds,
    intersect_dicts,
    labels_to_class_weights,
    labels_to_image_weights,
    methods,
    one_cycle,
    print_args,
    print_mutation,
    strip_optimizer,
    yaml_save,
)

LOCAL_RANK = int(os.getenv("LOCAL_RANK", -1))  # https://pytorch.org/docs/stable/elastic/run.html
RANK = int(os.getenv("RANK", -1))
WORLD_SIZE = int(os.getenv("WORLD_SIZE", 1))
GIT_INFO = check_git_info()

def parse_opt(known=False):
    """Parses command-line arguments for YOLOv5 training, validation, and testing."""
    parser = argparse.ArgumentParser()
    parser.add_argument("--weights", type=str, default=ROOT / "yolov5s.pt", help="initial weights path")
    parser.add_argument("--cfg", type=str, default="", help="model.yaml path")
    parser.add_argument("--data", type=str, default=ROOT / "data/SRSDD.yaml", help="dataset.yaml path")
    parser.add_argument("--hyp", type=str, default=ROOT / "data/hyps/hyp.scratch-low.yaml", help="hyperparameters path")
    parser.add_argument("--epochs", type=int, default=30, help="total training epochs")
    parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs, -1 for autobatch")
    parser.add_argument("--imgsz", "--img", "--img-size", type=int, default=640, help="train, val image size (pixels)")
    parser.add_argument("--rect", action="store_true", help="rectangular training")
    parser.add_argument("--resume", nargs="?", const=True, default=False, help="resume most recent training")
    parser.add_argument("--nosave", action="store_true", help="only save final checkpoint")
    parser.add_argument("--noval", action="store_true", help="only validate final epoch")
    parser.add_argument("--noautoanchor", action="store_true", help="disable AutoAnchor")
    parser.add_argument("--noplots", action="store_true", help="save no plot files")
    parser.add_argument("--evolve", type=int, nargs="?", const=300, help="evolve hyperparameters for x generations")
    parser.add_argument(
        "--evolve_population", type=str, default=ROOT / "data/hyps", help="location for loading population"
    )
    parser.add_argument("--resume_evolve", type=str, default=None, help="resume evolve from last generation")
    parser.add_argument("--bucket", type=str, default="", help="gsutil bucket")
    parser.add_argument("--cache", type=str, nargs="?", const="ram", help="image --cache ram/disk")
    parser.add_argument("--image-weights", action="store_true", help="use weighted image selection for training")
    parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu")
    parser.add_argument("--multi-scale", action="store_true", help="vary img-size +/- 50%%")
    parser.add_argument("--single-cls", action="store_true", help="train multi-class data as single-class")
    parser.add_argument("--optimizer", type=str, choices=["SGD", "Adam", "AdamW"], default="SGD", help="optimizer")
    parser.add_argument("--sync-bn", action="store_true", help="use SyncBatchNorm, only available in DDP mode")
    parser.add_argument("--workers", type=int, default=0, help="max dataloader workers (per RANK in DDP mode)")
    parser.add_argument("--project", default=ROOT / "runs/train", help="save to project/name")
    parser.add_argument("--name", default="exp", help="save to project/name")
    parser.add_argument("--exist-ok", action="store_true", help="existing project/name ok, do not increment")
    parser.add_argument("--quad", action="store_true", help="quad dataloader")
    parser.add_argument("--cos-lr", action="store_true", help="cosine LR scheduler")
    parser.add_argument("--label-smoothing", type=float, default=0.0, help="Label smoothing epsilon")
    parser.add_argument("--patience", type=int, default=100, help="EarlyStopping patience (epochs without improvement)")
    parser.add_argument("--freeze", nargs="+", type=int, default=[0], help="Freeze layers: backbone=10, first3=0 1 2")
    parser.add_argument("--save-period", type=int, default=-1, help="Save checkpoint every x epochs (disabled if < 1)")
    parser.add_argument("--seed", type=int, default=0, help="Global training seed")
    parser.add_argument("--local_rank", type=int, default=-1, help="Automatic DDP Multi-GPU argument, do not modify")

    # Logger arguments
    parser.add_argument("--entity", default=None, help="Entity")
    parser.add_argument("--upload_dataset", nargs="?", const=True, default=False, help='Upload data, "val" option')
    parser.add_argument("--bbox_interval", type=int, default=-1, help="Set bounding-box image logging interval")
    parser.add_argument("--artifact_alias", type=str, default="latest", help="Version of dataset artifact to use")

    # NDJSON logging
    parser.add_argument("--ndjson-console", action="store_true", help="Log ndjson to console")
    parser.add_argument("--ndjson-file", action="store_true", help="Log ndjson to file")

    return parser.parse_known_args()[0] if known else parser.parse_args()

In [2]:
# 获取参数
opt= parse_opt(True)

if RANK in {-1, 0}:
    print_args(vars(opt))       # 输出参数
    check_git_status()          # 检查git状态

[34m[1m4255116966: [0mweights=yolov5s.pt, cfg=, data=data\SRSDD.yaml, hyp=data\hyps\hyp.scratch-low.yaml, epochs=30, batch_size=16, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, evolve_population=data\hyps, resume_evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=0, project=runs\train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest, ndjson_console=False, ndjson_file=False


[34m[1mgithub: [0mskipping check (offline), for updates see https://github.com/ultralytics/yolov5


In [3]:
opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
        check_file(opt.data),
        check_yaml(opt.cfg),
        check_yaml(opt.hyp),
        str(opt.weights),
        str(opt.project),
    )
print(opt.data)
print(opt.cfg)
print(opt.hyp)
print(opt.weights)
print(opt.project)

data\SRSDD.yaml

data\hyps\hyp.scratch-low.yaml
yolov5s.pt
runs\train


In [4]:
from utils.loggers.comet.comet_utils import check_comet_resume
from utils.downloads import is_url
import yaml
import torch

# Resume judge
if opt.resume and not check_comet_resume(opt) and not opt.evolve:
    last = Path(check_file(opt.resume) if isinstance(opt.resume, str) else get_latest_run())
    opt_yaml = last.parent.parent / "opt.yaml"  # train options yaml
    opt_data = opt.data  # original dataset
    if opt_yaml.is_file():
        with open(opt_yaml, errors="ignore") as f:
            d = yaml.safe_load(f)
    else:
        d = torch.load(last, map_location="cpu")["opt"]
    opt = argparse.Namespace(**d)  # replace
    opt.cfg, opt.weights, opt.resume = "", str(last), True  # reinstate
    if is_url(opt_data):
        opt.data = check_file(opt_data)  # avoid HUB resume auth timeout
else:   # 新建训练
    opt.data, opt.cfg, opt.hyp, opt.weights, opt.project = (
        check_file(opt.data),# 检查文件是否存在，不存在则报错或者下载
        check_yaml(opt.cfg),
        check_yaml(opt.hyp),
        str(opt.weights),
        str(opt.project),
    )  # checks
    assert len(opt.cfg) or len(opt.weights), "either --cfg or --weights must be specified"
    if opt.evolve:
        if opt.project == str(ROOT / "runs/train"):  # if default project name, rename to runs/evolve
            opt.project = str(ROOT / "runs/evolve")
        opt.exist_ok, opt.resume = opt.resume, False  # pass resume to exist_ok and disable resume
    if opt.name == "cfg":
        opt.name = Path(opt.cfg).stem  # use model.yaml as name
    opt.save_dir = str(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))     # 如果path存在会自动增加后缀


In [5]:
# DDP mode（Distributed Data Parallel）分布式训练（直接无视）
from utils.torch_utils import select_device
import torch.distributed as dist
device = select_device(opt.device, batch_size=opt.batch_size)
if LOCAL_RANK != -1:        # 如果 LOCAL_RANK 不等于 -1，那么代码就会进入分布式训练模式
    msg = "is not compatible with YOLOv5 Multi-GPU DDP training"
    assert not opt.image_weights, f"--image-weights {msg}"
    assert not opt.evolve, f"--evolve {msg}"
    assert opt.batch_size != -1, f"AutoBatch with --batch-size -1 {msg}, please pass a valid --batch-size"
    assert opt.batch_size % WORLD_SIZE == 0, f"--batch-size {opt.batch_size} must be multiple of WORLD_SIZE"
    assert torch.cuda.device_count() > LOCAL_RANK, "insufficient CUDA devices for DDP command"
    torch.cuda.set_device(LOCAL_RANK)
    device = torch.device("cuda", LOCAL_RANK)
    dist.init_process_group(
        backend="nccl" if dist.is_nccl_available() else "gloo", timeout=timedelta(seconds=10800)
    )

YOLOv5  299d3d7 Python-3.8.18 torch-1.13.1+cu116 CUDA:0 (NVIDIA GeForce RTX 3050 Laptop GPU, 4096MiB)



In [6]:
save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = (
        Path(opt.save_dir),
        opt.epochs,
        opt.batch_size,
        opt.weights,
        opt.single_cls,
        opt.evolve,
        opt.data,
        opt.cfg,
        opt.resume,
        opt.noval,
        opt.nosave,
        opt.workers,
        opt.freeze,
    )
print('save_dir:',save_dir)
print('epochs:',epochs)
print('batch_size:',batch_size)
print('weights:',weights)
print('single_cls:',single_cls)
print('evolve:',evolve)
print('data:',data)
print('cfg:',cfg)
print('resume:',resume)
print('noval:',noval)
print('nosave:',nosave)
print('workers:',workers)
print('freeze:',freeze)



save_dir: runs\train\exp4
epochs: 30
batch_size: 16
weights: yolov5s.pt
single_cls: False
evolve: None
data: data\SRSDD.yaml
cfg: 
resume: False
noval: False
nosave: False
workers: 0
freeze: [0]


In [7]:
w = save_dir / "weights"  # weights dir
(w.parent if evolve else w).mkdir(parents=True, exist_ok=True)  # make dir
last, best = w / "last.pt", w / "best.pt"

print('weights_dir:',w)
print('last pt:',last)
print('best pt:',best)

weights_dir: runs\train\exp4\weights
last pt: runs\train\exp4\weights\last.pt
best pt: runs\train\exp4\weights\best.pt


In [8]:
hyp = opt.hyp   # hyperparameters
if isinstance(hyp, str):
    with open(hyp, errors="ignore") as f:
        hyp = yaml.safe_load(f)  # load hyps dict
LOGGER.info(colorstr("hyperparameters: ") + ", ".join(f"{k}={v}" for k, v in hyp.items()))
opt.hyp = hyp.copy()  # for saving hyps to checkpoints

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0, copy_paste=0.0


In [9]:
if not evolve:
    yaml_save(save_dir / "hyp.yaml", hyp)
    yaml_save(save_dir / "opt.yaml", vars(opt))


In [10]:
from utils.loggers import LOGGERS, Loggers
from utils.callbacks import Callbacks
callbacks = Callbacks()

callbacks.run("on_pretrain_routine_start")
# Loggers
data_dict = None
if RANK in {-1, 0}:
    include_loggers = list(LOGGERS)
    if getattr(opt, "ndjson_console", False):
        include_loggers.append("ndjson_console")
    if getattr(opt, "ndjson_file", False):
        include_loggers.append("ndjson_file")

    loggers = Loggers(
        save_dir=save_dir,
        weights=weights,
        opt=opt,
        hyp=hyp,
        logger=LOGGER,
        include=tuple(include_loggers),
    )

    # Register actions
    for k in methods(loggers):
        callbacks.register_action(k, callback=getattr(loggers, k))

    # Process custom dataset artifact link
    data_dict = loggers.remote_dataset
    if resume:  # If resuming runs from remote artifact
        weights, epochs, hyp, batch_size = opt.weights, opt.epochs, opt.hyp, opt.batch_size


[34m[1mComet: [0mrun 'pip install comet_ml' to automatically track and visualize YOLOv5  runs in Comet
[34m[1mTensorBoard: [0mStart with 'tensorboard --logdir runs\train', view at http://localhost:6006/


In [11]:
from utils.torch_utils import torch_distributed_zero_first
 # Config
plots = not evolve and not opt.noplots  # create plots
cuda = device.type != "cpu"
init_seeds(opt.seed + 1 + RANK, deterministic=True)
with torch_distributed_zero_first(LOCAL_RANK):
    data_dict = data_dict or check_dataset(data)  # check if None
train_path, val_path = data_dict["train"], data_dict["val"]
nc = 1 if single_cls else int(data_dict["nc"])  # number of classes
names = {0: "item"} if single_cls and len(data_dict["names"]) != 1 else data_dict["names"]  # class names
is_coco = isinstance(val_path, str) and val_path.endswith("coco/val2017.txt")  # COCO dataset

print('class names:', names)

class names: {0: 'Container', 1: 'ore-oil', 2: 'Dredger', 3: 'LawEnforc', 4: 'Cell-Container', 5: 'Fishing'}


In [12]:
from utils.downloads import attempt_download
from models.yolo import Model

# Model
check_suffix(weights, ".pt")  # check weights
pretrained = weights.endswith(".pt")
if pretrained:              # 如果是预训练模型
    with torch_distributed_zero_first(LOCAL_RANK):          # 仅在LOCAL_RANK为0时执行（分布式训练可能会被执行多次如果不这样的话）
        weights = attempt_download(weights)  # download if not found locally
    ckpt = torch.load(weights, map_location="cpu")  # load checkpoint to CPU to avoid CUDA memory leak
    model = Model(cfg or ckpt["model"].yaml, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
    exclude = ["anchor"] if (cfg or hyp.get("anchors")) and not resume else []  # exclude keys
    csd = ckpt["model"].float().state_dict()  # checkpoint state_dict as FP32
    csd = intersect_dicts(csd, model.state_dict(), exclude=exclude)  # intersect
    model.load_state_dict(csd, strict=False)  # load
    LOGGER.info(f"Transferred {len(csd)}/{len(model.state_dict())} items from {weights}")  # report
else:
    model = Model(cfg, ch=3, nc=nc, anchors=hyp.get("anchors")).to(device)  # create
amp = check_amp(model)  # check AMP

Overriding model.yaml nc=80 with nc=6

                 from  n    params  module                                  arguments                     
  0                -1  1      3520  models.common.Conv                      [3, 32, 6, 2, 2]              
  1                -1  1     18560  models.common.Conv                      [32, 64, 3, 2]                
  2                -1  1     18816  models.common.C3                        [64, 64, 1]                   
  3                -1  1     73984  models.common.Conv                      [64, 128, 3, 2]               
  4                -1  2    115712  models.common.C3                        [128, 128, 2]                 
  5                -1  1    295424  models.common.Conv                      [128, 256, 3, 2]              
  6                -1  3    625152  models.common.C3                        [256, 256, 3]                 
  7                -1  1   1180672  models.common.Conv                      [256, 512, 3, 2]             