In [1]:
import os

# 定义基础路径
base_dir = 'archive/Pest24/VOCdevkit/voc2007'
imagesets_dir = os.path.join(base_dir, 'ImageSets')
yolo_path_dir = os.path.join(base_dir, 'yolo_path')
images_dir = os.path.join(base_dir, 'images') # 假设图片在 images 目录

# 确保 yolo_path 目录存在
os.makedirs(yolo_path_dir, exist_ok=True)

# 处理 train, val, test 文件
for split in ['train', 'val', 'test']:
    imageset_file = os.path.join(imagesets_dir, f'{split}.txt')
    yolo_path_file = os.path.join(yolo_path_dir, f'{split}.txt')

    if not os.path.exists(imageset_file):
        print(f"警告：找不到文件 {imageset_file}")
        continue

    with open(imageset_file, 'r') as f:
        image_names = f.read().strip().split()

    # 写入相对路径
    with open(yolo_path_file, 'w') as f:
        for name in image_names:
            image_path = os.path.join(images_dir, f'{name}.jpg').replace('\\', '/')
            f.write(image_path + '\n')
    
    print(f"已成功生成 {yolo_path_file}")

print("\n所有路径文件已更新完毕。")


已成功生成 archive/Pest24/VOCdevkit/voc2007\yolo_path\train.txt
已成功生成 archive/Pest24/VOCdevkit/voc2007\yolo_path\val.txt
已成功生成 archive/Pest24/VOCdevkit/voc2007\yolo_path\test.txt

所有路径文件已更新完毕。


In [2]:
import os

# 返回到项目根目录
os.chdir('d:\\.FileModel')

# 列出 yolo_path 目录的内容
yolo_path_dir = 'archive/Pest24/VOCdevkit/voc2007/yolo_path'
if os.path.exists(yolo_path_dir):
    files = os.listdir(yolo_path_dir)
    print(f"'{yolo_path_dir}' 目录中的文件：")
    for file in files:
        print(file)
else:
    print(f"目录 '{yolo_path_dir}' 不存在。")

'archive/Pest24/VOCdevkit/voc2007/yolo_path' 目录中的文件：
test.cache
test.txt
train.cache
train.txt
val.txt


# 阶段一：Warmup 训练

本阶段只训练模型的头部，冻结主干网络，使用较小学习率，防止破坏预训练特征。

- 预训练模型：yolo11n.pt
- 数据集配置：pest24.yaml
- 冻结主干网络（backbone）
- 只训练头部（head）
- 学习率较小
- 训练周期较短
- 启用权重衰减、LoRA等正则化


In [2]:
import sys
import ultralytics
print(sys.executable)
print(ultralytics.__file__)
from ultralytics import YOLO
print("Successfully imported YOLO")

D:\.FileModel\.venv\Scripts\python.exe
D:\.FileModel\.venv\Lib\site-packages\ultralytics\__init__.py
Successfully imported YOLO


In [3]:
# 检查 GPU/CUDA 可用性并打印详细信息
import sys
import torch
import subprocess
import shutil

print(f"Python executable: {sys.executable}")
print(f"torch.__version__: {torch.__version__}")

cuda_available = torch.cuda.is_available()
print(f"CUDA available: {cuda_available}")
print(f"CUDA device count: {torch.cuda.device_count()}")

for i in range(torch.cuda.device_count()):
    try:
        name = torch.cuda.get_device_name(i)
    except Exception:
        name = 'Unknown'
    print(f"GPU {i}: {name}")

# 尝试调用 nvidia-smi 获取更详细的状态（如果可用）
if shutil.which('nvidia-smi'):
    try:
        out = subprocess.check_output(['nvidia-smi', '--query-gpu=index,name,memory.total,memory.free,driver_version', '--format=csv,noheader,nounits'], stderr=subprocess.STDOUT)
        print('\nnvidia-smi output:')
        print(out.decode())
    except subprocess.CalledProcessError as e:
        print('nvidia-smi command failed:', e.output.decode() if e.output else e)
    except Exception as e:
        print('nvidia-smi query error:', e)
else:
    print('nvidia-smi not found in PATH')

# 建议用于 model.train 的 device 变量
device = 0 if cuda_available and torch.cuda.device_count() > 0 else 'cpu'
print(f"Recommended training device: {device}")

Python executable: D:\.FileModel\.venv\Scripts\python.exe
torch.__version__: 2.8.0+cu126
CUDA available: True
CUDA device count: 1
GPU 0: NVIDIA GeForce RTX 4060 Laptop GPU

nvidia-smi output:
0, NVIDIA GeForce RTX 4060 Laptop GPU, 8188, 7495, 560.94

Recommended training device: 0


In [1]:
# 训练参数优化探索
import psutil
import torch
from math import ceil
import gc
import sys
import subprocess
from pathlib import Path
import datetime

# 主动清理内存
gc.collect()
if torch.cuda.is_available():
    torch.cuda.empty_cache()

def estimate_batch_size():
    # 获取系统内存信息
    mem = psutil.virtual_memory()
    gpu_mem = 0
    if torch.cuda.is_available():
        gpu_mem = torch.cuda.get_device_properties(0).total_memory / 1024**3  # GB
    
    # 估算合理的batch size
    # 假设每张图片约占用 100MB 显存/内存
    if gpu_mem > 0:
        max_batch = int(gpu_mem * 0.7 / 0.1)  # 使用70%显存
    else:
        max_batch = int(mem.available / 1024**3 * 0.5 / 0.1)  # 使用50%可用内存
    
    # 确保batch size是8的倍数（有利于GPU优化）
    batch_size = max(8, min(32, 8 * ceil(max_batch/8)))
    return batch_size

def estimate_workers():
    # 获取CPU核心数
    cpu_count = psutil.cpu_count(logical=False)  # 物理核心数
    if cpu_count is None:
        cpu_count = psutil.cpu_count()  # 逻辑核心数
    
    # workers数量：通常设置为CPU核心数的1/2到1/4
    workers = max(1, min(8, cpu_count // 2))
    return workers

# 估算最佳参数
batch_size = estimate_batch_size()
num_workers = estimate_workers()

print(f"Estimated optimal batch size: {batch_size}")
print(f"Estimated optimal num_workers: {num_workers}")

# 显示当前系统资源状态
print("\nSystem Resources:")
print(f"CPU Usage: {psutil.cpu_percent()}%")
mem = psutil.virtual_memory()
print(f"Memory: {mem.percent}% used, {mem.available/1024**3:.1f}GB available")
if torch.cuda.is_available():
    print(f"GPU Memory: {torch.cuda.memory_allocated()/1024**3:.1f}GB allocated")

# 配置 TensorBoard
try:
    from torch.utils.tensorboard import SummaryWriter
except ModuleNotFoundError:
    subprocess.check_call([sys.executable, "-m", "pip", "install", "tensorboard"])  # 安装 tensorboard
    from torch.utils.tensorboard import SummaryWriter

# 全局变量来存储 writer 和日志目录
writer = None
TB_LOG_DIR = None

def setup_tensorboard(phase_name: str):
    """为训练阶段设置 TensorBoard。"""
    global writer, TB_LOG_DIR
    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    TB_LOG_DIR = Path(f"runs/tensorboard/{phase_name}/{timestamp}")
    TB_LOG_DIR.mkdir(parents=True, exist_ok=True)
    writer = SummaryWriter(log_dir=str(TB_LOG_DIR))
    print(f"TensorBoard for '{phase_name}' phase initialized. Logs will be saved to: {TB_LOG_DIR}")


def _safe_scalar_log(prefix: str, data: dict, step: int):
    if not isinstance(data, dict) or writer is None:
        return
    for k, v in data.items():
        if isinstance(v, (int, float)):
            writer.add_scalar(f"{prefix}/{k}", v, step)


def tb_on_fit_epoch_end(trainer):
    """每个 epoch 结束时记录 metrics、loss、学习率与显存占用。"""
    if writer is None: return
    epoch = getattr(trainer, "epoch", None)
    if epoch is None:
        return

    # 训练/验证指标
    metrics = getattr(trainer, "metrics", None) or {}
    _safe_scalar_log("metrics", metrics, epoch)

    # 训练损失（有的版本将损失汇总到 metrics 中）
    losses = getattr(trainer, "loss", None)
    if isinstance(losses, dict):
        _safe_scalar_log("loss", losses, epoch)

    # 学习率
    try:
        if getattr(trainer, "optimizer", None):
            lr = trainer.optimizer.param_groups[0].get("lr", None)
            if isinstance(lr, (int, float)):
                writer.add_scalar("opt/lr", lr, epoch)
    except Exception:
        pass

    # GPU 显存（如可用）
    try:
        if torch.cuda.is_available():
            mem = torch.cuda.memory_allocated() / (1024 ** 2)  # MB
            writer.add_scalar("gpu/memory_allocated_mb", mem, epoch)
    except Exception:
        pass

    writer.flush()


def tb_on_train_end(trainer):
    """训练结束时的回调。"""
    global writer
    if writer:
        writer.close()
        print(f"TensorBoard logs saved to: {TB_LOG_DIR}")
        writer = None # 重置 writer
    
    # 训练结束后清理内存
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

print("TensorBoard callback functions defined and setup_tensorboard function is ready.")


Estimated optimal batch size: 32
Estimated optimal num_workers: 4

System Resources:
CPU Usage: 4.7%
Memory: 45.1% used, 8.3GB available
GPU Memory: 0.0GB allocated
TensorBoard callback functions defined and setup_tensorboard function is ready.


In [None]:
# 加载模型并开始训练
from ultralytics import YOLO
import torch

# 1. 设置此阶段的 TensorBoard
setup_tensorboard("warmup")

# 2. 加载预训练模型
model = YOLO('yolo11n.pt')

# 3. 注册 TensorBoard 回调
try:
    model.add_callback("on_fit_epoch_end", tb_on_fit_epoch_end)
    model.add_callback("on_train_end", tb_on_train_end) # 使用 on_train_end
except NameError:
    print("TensorBoard callbacks not found. Please run the previous cell that defines them.")

# 4. 动态设备选择
_device = 0 if torch.cuda.is_available() else 'cpu'

# 5. 开始训练
results = model.train(
    data='pest24.yaml',
    epochs=10,                # warmup周期
    lr0=1e-4,                # 初始学习率
    dropout=0.2,             # Dropout
    weight_decay=0.01,       # 权重衰减
    batch=24,                 # 降低 batch_size
    workers=2,              # 降低 workers
    device=_device,          # 设备选择
    freeze=10,              # 冻结层数
    cache=True,            # 启用缓存
    overlap_mask=False,     # 减少mask计算开销
    profile=False,          # 关闭性能分析
    amp=True               # 自动混合精度训练
)

# 6. 保存warmup权重
model.save('yolo11n_warmup.pt')

print("\nWarmup complete. Launch TensorBoard with:\n  tensorboard --logdir runs/tensorboard\nThen open http://localhost:6006 in your browser.")


Ultralytics 8.3.208  Python-3.12.1 torch-2.8.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=pest24.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.2, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=10, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train4, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=False, patience=3, perspective=0.0, plo

In [None]:
# 阶段二：Finetune 训练
from ultralytics import YOLO
import torch

# 1. 设置此阶段的 TensorBoard
setup_tensorboard("finetune")

# 2. 加载 warmup 阶段训练好的模型
model = YOLO('yolo11n_warmup.pt')

# 3. 注册 TensorBoard 回调
try:
    model.add_callback("on_fit_epoch_end", tb_on_fit_epoch_end)
    model.add_callback("on_train_end", tb_on_train_end) # 使用 on_train_end
except NameError:
    print("TensorBoard callbacks not found. Please run the cell that defines them.")

# 4. 动态设备选择
_device = 0 if torch.cuda.is_available() else 'cpu'

# 5. 开始 Finetune 训练
# 解冻所有层，使用更大的学习率
results = model.train(
    seed=42,
    data='pest24.yaml',
    epochs=100,               # 更多周期
    lr0=1e-3,                # 更大的学习率
    lrf=0.05,                 # 学习率衰减到 5%
    optimizer='AdamW',      # 使用 AdamW 优化器
    weight_decay=0.001,       # 权重衰减
    dropout=0.2,             # Dropout
    cos_lr=False,            # 余弦退火学习率
    patience=30,             # 更多耐心
    batch=24,        # 使用估算的最佳 batch_size
    workers=2,     # 使用估算的最佳 workers
    device=_device,
    freeze=0,                # 解冻所有层
    cache=True,
    amp=True,
    overlap_mask=False,      # 减少mask计算开销
    save_period=10,           # 周期保存模型
    name='finetune2'
)

# 6. 保存最终模型
model.save('yolo11n_final.pt')

print("\nFinetune complete. Launch TensorBoard with:\n  tensorboard --logdir runs/tensorboard\nThen open http://localhost:6006 in your browser.")


TensorBoard for 'finetune' phase initialized. Logs will be saved to: runs\tensorboard\finetune\20251012_160626
New https://pypi.org/project/ultralytics/8.3.211 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.208  Python-3.12.1 torch-2.8.0+cu126 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=24, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=pest24.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.2, dynamic=False, embed=None, epochs=100, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=0, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.001, lrf=0.05, mask_ratio=4, max_det=300, mixup

单独的ECA注意力模块（待训练）

In [None]:
from ultralytics import YOLO
from eca_module import ECA
import torch.nn as nn

# === Step 1: 加载我们已有的最优模型 ===
model = YOLO("yolo11n_final4.pt")

# === Step 2: 插入 ECA 模块 ===
def inject_eca(model, layers=(4, 7, 10), k_size=3):
    for i in layers:
        old_layer = model.model[i]
        if isinstance(old_layer, nn.Sequential):
            ch = list(old_layer.children())[-1].out_channels
        elif hasattr(old_layer, 'conv'):
            ch = old_layer.conv.out_channels
        else:
            continue
        model.model[i] = nn.Sequential(old_layer, ECA(ch, k_size))
        print(f"✅ Inserted ECA after layer {i}, channels={ch}")
    return model

model = inject_eca(model, layers=[4, 7, 10])

# === Step 3: 保存注入后的结构版 ===
model.save("weights/yolov11n_eca_init.pt")

# === Step 4: 在ECA版本上finetune ===
results = model.train(
    data="pest24.yaml",
    seed=42,
    epochs=50,
    batch=24,
    workers=2,
    lr0=1e-3,                
    lrf=0.05,                 
    optimizer="AdamW",
    weight_decay=0.001,
    dropout=0.2,
    cos_lr=True,
    cache=True,
    overlap_mask=False,
    amp=True,
    device=0,
    patience=15,
    save_period=10,
    name='finetune_eca'
)

# === Step 5: 保存最终finetune权重 ===
model.save("weights/yolov11n_eca_finetuned.pt")


单独的跨尺度融合(采样偏移)模块（待训练）

In [None]:
# train_with_csab.py
from ultralytics import YOLO
import torch.nn as nn
from ultralytics import YOLO
from csab_offset import CSAB_Offset

# === Step 1: 加载之前的最佳模型 ===
model = YOLO("yolo11n_final4.pt")

# === Step 2: 定义插入函数 ===
def inject_csab_offset(model):
    import torch.nn as nn
    from modules.csab_offset import CSAB_Offset
    for name, m in model.model.named_modules():
        if isinstance(m, nn.Conv2d) and m.out_channels in [256, 512, 1024]:
            csab = CSAB_Offset(m.out_channels)
            m.add_module("csab_offset", csab)
    return model

# === Step 3: 注入模块 ===
model = inject_csab_offset(model)

# === Step 4: 保存结构版模型 ===
model.save("weights/yolov11n_csab_init.pt")

# === Step 5: finetune 训练 ===
model.train(
    data="pest24.yaml",
    seed=42,
    epochs=50,
    batch=24,
    workers=2,
    lr0=1e-3,
    lrf=0.05,
    optimizer="AdamW",
    weight_decay=0.001,
    dropout=0.2,
    cos_lr=True,
    cache=True,
    overlap_mask=False,
    amp=True,
    device=0,
    patience=15,
    save_period=10,
    name='finetune_csab'
)

# === Step 6: 保存finetune完成的模型 ===
model.save("weights/yolov11n_csab_offset_finetuned.pt")


多尺度自适应空间融合（待训练）

In [None]:
# csab_fusion.py
import torch
import torch.nn as nn
import torch.nn.functional as F
from csab_offset import CSAB_Offset

class AdaptiveSpatialFusion(nn.Module):
    """
    MSASF: Multi-Scale Adaptive Spatial Fusion
    对多层特征图自适应加权融合（轻量版）
    """
    def __init__(self, in_channels_list, out_channels):
        super().__init__()
        self.proj = nn.ModuleList([
            nn.Conv2d(c, out_channels, 1, bias=False) for c in in_channels_list
        ])
        self.spatial_att = nn.Sequential(
            nn.Conv2d(len(in_channels_list), 1, kernel_size=3, padding=1, bias=False),
            nn.Sigmoid()
        )
        self.out_conv = nn.Conv2d(out_channels, out_channels, 3, padding=1, bias=False)

    def forward(self, feats):
        # feats: list of multi-scale feature maps [P3, P4, P5]
        target_size = feats[0].shape[-2:]
        upsampled = [F.interpolate(self.proj[i](f), size=target_size, mode='bilinear', align_corners=False)
                     for i, f in enumerate(feats)]
        stacked = torch.stack(upsampled, dim=1)  # [N, n_scales, C, H, W]
        weight_map = self.spatial_att(stacked.mean(2))  # [N,1,H,W]
        fused = (stacked.mean(1) * weight_map + stacked.max(1).values * (1 - weight_map))
        return self.out_conv(fused)


class CrossLevelFeaturePyramid(nn.Module):
    """
    CLHFP: Cross-Level Hierarchical Feature Pyramid (简化版)
    跨层特征金字塔融合模块。
    """
    def __init__(self, channels=256):
        super().__init__()
        self.up = nn.ConvTranspose2d(channels, channels, 2, stride=2)
        self.down = nn.Conv2d(channels, channels, 3, stride=2, padding=1)
        self.mix = nn.Conv2d(channels, channels, 3, padding=1)
        self.gate = nn.Sequential(
            nn.Conv2d(channels, channels // 4, 1),
            nn.ReLU(),
            nn.Conv2d(channels // 4, 1, 1),
            nn.Sigmoid()
        )

    def forward(self, f_low, f_high):
        # f_low: 低层特征（高分辨率）
        # f_high: 高层特征（低分辨率）
        up_feat = F.interpolate(f_high, size=f_low.shape[-2:], mode='bilinear', align_corners=False)
        mix_feat = f_low + self.up(f_high) + self.mix(f_low)
        gate = self.gate(mix_feat)
        return gate * mix_feat + (1 - gate) * f_low


class CSAB_FusionBlock(nn.Module):
    """
    整合 CSAB + Offset + MSASF + CLHFP 的混合模块
    """
    def __init__(self, in_channels_list, out_channels):
        super().__init__()
        self.csab = CSAB_Offset(out_channels)
        self.msasf = AdaptiveSpatialFusion(in_channels_list, out_channels)
        self.clhfp = CrossLevelFeaturePyramid(out_channels)
        self.conv_out = nn.Conv2d(out_channels, out_channels, 3, padding=1)

    def forward(self, feats):
        """
        feats: list [P3, P4, P5]
        """
        fused = self.msasf(feats)
        fused = self.clhfp(fused, feats[-1])  # 使用最高层语义增强
        fused = self.csab(fused)
        return self.conv_out(fused)


单独使用DBL_TSD模块（待训练）

In [None]:
# train_with_dbl.py
from ultralytics import YOLO
from dbl_loss import DBL_TSD
import torch.nn as nn

# Step 1: 加载上一次训练的best模型
model = YOLO("yolo11n_final4.pt")

# Step 2: 初始化DBL损失
loss_wrapper = DBL_TSD(cls_w=1.0, box_w=5.0, iou_w=2.0, scale_alpha=1.0, trunc_thresh=0.4)

# Step 3: 定义loss替换callback
def custom_loss_callback(trainer):
    # trainer.loss_items / trainer.predictions / trainer.targets 可能名字不同
    # 我 use trainer.loss_items and trainer.targets as earlier examples; adapt if your version differs.
    preds = trainer.loss_items   # dict with keys used earlier (cls, box, iou)
    targets = trainer.targets
    loss_val, info = loss_wrapper(preds, targets)
    trainer.loss = loss_val
    # optionally log info to trainer.metrics or print
    return trainer.loss

# 注册callback
model.add_callback("on_train_batch_end", custom_loss_callback)

# Step 4: 保存结构版本
model.save("weights/yolov11n_dbl_init.pt")

# Step 5: 进行finetune训练
model.train(
    data="pest24.yaml",
    seed=42,
    epochs=50,
    batch=24,
    workers=2,
    lr0=0.001,
    lrf=0.05,
    optimizer="AdamW",
    weight_decay=0.001,
    dropout=0.2,
    cos_lr=True,
    cache=True,
    overlap_mask=False,
    amp=True,
    device=0,
    patience=15,
    save_period=10,
    name='finetune_dbl'
)

# Step 6: 保存最终模型
model.save("weights/yolov11n_dbl_finetuned.pt")
