# 模型训练

In [1]:
from PIL import Image  
# 增加PIL的解压炸弹限制
Image.MAX_IMAGE_PIXELS = None 

## CUDA版本配置

In [2]:
import os

# 添加cuda环境变量
os.environ["CUDA_PATH"] = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"

# 添加cudnn环境变量
cudnn_paths = [
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin",
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\libnvvp",
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include",
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\lib",
            ]
for cudnn_path in cudnn_paths:
    if cudnn_path not in os.environ["PATH"]:
        os.environ["PATH"] = cudnn_path + os.pathsep + os.environ["PATH"]

In [3]:
print(os.environ["PATH"])
print(os.environ["CUDA_PATH"])
!nvcc -V

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include;c:\Users\Rooki\.conda\envs\rtdetr;C:\Users\Rooki\.conda\envs\rtdetr;C:\Users\Rooki\.conda\envs\rtdetr\Library\mingw-w64\bin;C:\Users\Rooki\.conda\envs\rtdetr\Library\usr\bin;C:\Users\Rooki\.conda\envs\rtdetr\Library\bin;C:\Users\Rooki\.conda\envs\rtdetr\Scripts;C:\Users\Rooki\.conda\envs\rtdetr\bin;C:\ProgramData\miniconda3\condabin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\libnvvp;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\libnvvp;.;C:\windows\system32;C:\windows;C:\windows\System32\Wbem;C:\windows\System32\WindowsPowerShell\v1.0;C:\windows\System32\OpenSSH;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\Users\Administrator\AppData\Local\Microsoft\WindowsApps;C:\Program Files\HP\OMEN-Broadcast\Common;C:\Program F

In [4]:
!nvidia-smi

Thu Apr 17 14:15:43 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 561.19                 Driver Version: 561.19         CUDA Version: 12.6     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                  Driver-Model | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA GeForce RTX 4060 ...  WDDM  |   00000000:01:00.0  On |                  N/A |
| N/A   42C    P8              2W /   75W |     565MiB /   8188MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [5]:
import torch

print(f"torch.cuda.is_available(): {torch.cuda.is_available()}")
print(f"torch.cuda.get_device_name(0): {torch.cuda.get_device_name(0)}")


torch.cuda.is_available(): True
torch.cuda.get_device_name(0): NVIDIA GeForce RTX 4060 Laptop GPU


## 启动训练

In [6]:
# 导入必要的库
import os 
import sys 
# 将项目根目录添加到系统路径
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
import argparse

# 导入自定义模块
import src.misc.dist as dist 
from src.core import YAMLConfig 
from src.solver import TASKS

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
root_dir

'c:\\Users\\Rooki\\Desktop\\AI\\CV\\RT-DETR\\rtdetr_pytorch'

In [8]:
# 在jupyter中直接设置参数
class Args:
    def __init__(self):
        self.config = f'{root_dir}/configs/rtdetr/rtdetr_r50vd_6x_chengdu.yml'  # 配置文件路径
        self.resume = None  # 恢复训练的检查点路径
        self.tuning = None  # 微调模式的预训练模型路径
        self.test_only = False  # 是否只执行测试
        self.amp = False  # 是否启用自动混合精度
        self.seed = None  # 随机种子

args = Args()

In [9]:
# 初始化分布式训练
dist.init_distributed()

# 如果指定了随机种子则设置
if args.seed is not None:
    dist.set_seed(args.seed)

# 确保不同时使用tuning和resume模式
assert not all([args.tuning, args.resume]), \
    'Only support from_scrach or resume or tuning at one time'

# 创建配置对象
cfg = YAMLConfig(
    args.config,      # 配置文件路径
    resume=args.resume,   # 是否从检查点恢复训练
    use_amp=args.amp,     # 是否使用混合精度训练
    tuning=args.tuning    # 是否使用微调模式
)

Not init distributed mode.


In [10]:
cfg.yaml_cfg

{'task': 'detection',
 'num_classes': 17,
 'remap_mscoco_category': True,
 'train_dataloader': {'type': 'DataLoader',
  'dataset': {'type': 'CocoDetection',
   'img_folder': '\\\\?\\C:\\Users\\Rooki\\Desktop\\AI\\CV\\RT-DETR\\chengdu_dataset\\train',
   'ann_file': '\\\\?\\C:\\Users\\Rooki\\Desktop\\AI\\CV\\RT-DETR\\chengdu_dataset\\train\\_annotations.coco.json',
   'transforms': {'type': 'Compose',
    'ops': [{'type': 'RandomPhotometricDistort', 'p': 0.5},
     {'type': 'RandomZoomOut', 'fill': 0},
     {'type': 'RandomIoUCrop', 'p': 0.8},
     {'type': 'SanitizeBoundingBox', 'min_size': 1},
     {'type': 'RandomHorizontalFlip'},
     {'type': 'Resize', 'size': [640, 640]},
     {'type': 'ToImageTensor'},
     {'type': 'ConvertDtype'},
     {'type': 'SanitizeBoundingBox', 'min_size': 1},
     {'type': 'ConvertBox', 'out_fmt': 'cxcywh', 'normalize': True}]},
   'return_masks': False},
  'shuffle': True,
  'batch_size': 4,
  'num_workers': 4,
  'drop_last': True,
  'collate_fn': 'defa

In [11]:
cfg.yaml_cfg['task']

'detection'

In [15]:
cfg.model

RTDETR(
  (backbone): PResNet(
    (conv1): Sequential(
      (conv1_1): ConvNormLayer(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (norm): FrozenBatchNorm2d(32, eps=1e-05)
        (act): ReLU(inplace=True)
      )
      (conv1_2): ConvNormLayer(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (norm): FrozenBatchNorm2d(32, eps=1e-05)
        (act): ReLU(inplace=True)
      )
      (conv1_3): ConvNormLayer(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (norm): FrozenBatchNorm2d(64, eps=1e-05)
        (act): ReLU(inplace=True)
      )
    )
    (res_layers): ModuleList(
      (0): Blocks(
        (blocks): ModuleList(
          (0): BottleNeck(
            (branch2a): ConvNormLayer(
              (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (norm): FrozenBatchNorm2d(64, eps=1e-05)
    

In [21]:
from torchinfo import summary
summary(cfg.model.backbone, input_size=(1, 3, 640, 640))

Layer (type:depth-idx)                                       Output Shape              Param #
PResNet                                                      [1, 512, 80, 80]          --
├─Sequential: 1-1                                            [1, 64, 320, 320]         --
│    └─ConvNormLayer: 2-1                                    [1, 32, 320, 320]         --
│    │    └─Conv2d: 3-1                                      [1, 32, 320, 320]         (864)
│    │    └─FrozenBatchNorm2d: 3-2                           [1, 32, 320, 320]         --
│    │    └─ReLU: 3-3                                        [1, 32, 320, 320]         --
│    └─ConvNormLayer: 2-2                                    [1, 32, 320, 320]         --
│    │    └─Conv2d: 3-4                                      [1, 32, 320, 320]         (9,216)
│    │    └─FrozenBatchNorm2d: 3-5                           [1, 32, 320, 320]         --
│    │    └─ReLU: 3-6                                        [1, 32, 320, 320]         

In [13]:
cfg.postprocessor

RTDETRPostProcessor(use_focal_loss=True, num_classes=17, num_top_queries=300)

In [14]:
cfg.epoches = 10    # 设置训练轮数


In [15]:
# 根据配置创建对应任务的训练器
solver = TASKS[cfg.yaml_cfg['task']](cfg)

In [16]:
# solver.val()

In [None]:
solver.fit()

start training
Initial lr:  [1e-05, 0.0001, 0.0001, 0.0001]
loading annotations into memory...
Done (t=0.15s)
creating index...
index created!
loading annotations into memory...
Done (t=0.02s)
creating index...
index created!
Model parameters: 42733395
Total epochs: 10
