# 模型训练

In [1]:
from PIL import Image  
# 增加PIL的解压炸弹限制
Image.MAX_IMAGE_PIXELS = None 

## CUDA版本配置

In [2]:
import os

# 添加cuda环境变量
os.environ["CUDA_PATH"] = r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"

# 添加cudnn环境变量
cudnn_paths = [
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin",
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\libnvvp",
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include",
             r"C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\lib",
            ]
for cudnn_path in cudnn_paths:
    if cudnn_path not in os.environ["PATH"]:
        os.environ["PATH"] = cudnn_path + os.pathsep + os.environ["PATH"]

In [3]:
print(os.environ["PATH"])
print(os.environ["CUDA_PATH"])
!nvcc -V

C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\include;c:\Users\Rooki\.conda\envs\rtdetr;C:\Users\Rooki\.conda\envs\rtdetr;C:\Users\Rooki\.conda\envs\rtdetr\Library\mingw-w64\bin;C:\Users\Rooki\.conda\envs\rtdetr\Library\usr\bin;C:\Users\Rooki\.conda\envs\rtdetr\Library\bin;C:\Users\Rooki\.conda\envs\rtdetr\Scripts;C:\Users\Rooki\.conda\envs\rtdetr\bin;C:\ProgramData\miniconda3\condabin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\libnvvp;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.1\libnvvp;.;C:\windows\system32;C:\windows;C:\windows\System32\Wbem;C:\windows\System32\WindowsPowerShell\v1.0;C:\windows\System32\OpenSSH;C:\Program Files (x86)\NVIDIA Corporation\PhysX\Common;C:\Program Files\NVIDIA Corporation\NVIDIA NvDLISR;C:\Users\Administrator\AppData\Local\Microsoft\WindowsApps;C:\Program Files\HP\OMEN-Broadcast\Common;C:\Program F

In [4]:
import torch

print(f"torch.cuda.is_available(): {torch.cuda.is_available()}")
print(f"torch.cuda.get_device_name(0): {torch.cuda.get_device_name(0)}")


torch.cuda.is_available(): True
torch.cuda.get_device_name(0): NVIDIA GeForce RTX 4060 Laptop GPU


In [5]:
# 导入必要的库
import os 
import sys 
# 将项目根目录添加到系统路径
sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..')))
import argparse

# 导入自定义模块
import src.misc.dist as dist 
from src.core import YAMLConfig 
from src.solver import TASKS

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
root_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
root_dir

'c:\\Users\\Rooki\\Desktop\\AI\\CV\\RT-DETR\\rtdetr_pytorch'

In [7]:
# 在jupyter中直接设置参数
class Args:
    def __init__(self):
        self.config = f'{root_dir}/configs/rtdetr/rtdetr_r50vd_6x_chengdu.yml'  # 配置文件路径
        self.resume = None  # 恢复训练的检查点路径
        self.tuning = None  # 微调模式的预训练模型路径
        self.test_only = False  # 是否只执行测试
        self.amp = False  # 是否启用自动混合精度
        self.seed = None  # 随机种子

args = Args()

In [8]:
# 初始化分布式训练
dist.init_distributed()

# 如果指定了随机种子则设置
if args.seed is not None:
    dist.set_seed(args.seed)

# 确保不同时使用tuning和resume模式
assert not all([args.tuning, args.resume]), \
    'Only support from_scrach or resume or tuning at one time'

# 创建配置对象
cfg = YAMLConfig(
    args.config,      # 配置文件路径
    resume=args.resume,   # 是否从检查点恢复训练
    use_amp=args.amp,     # 是否使用混合精度训练
    tuning=args.tuning    # 是否使用微调模式
)

cfg.yaml_cfg

Not init distributed mode.


{'task': 'detection',
 'num_classes': 17,
 'remap_mscoco_category': True,
 'train_dataloader': {'type': 'DataLoader',
  'dataset': {'type': 'CocoDetection',
   'img_folder': '\\\\?\\C:\\Users\\Rooki\\Desktop\\AI\\CV\\RT-DETR\\chengdu_dataset\\train',
   'ann_file': '\\\\?\\C:\\Users\\Rooki\\Desktop\\AI\\CV\\RT-DETR\\chengdu_dataset\\train\\_annotations.coco.json',
   'transforms': {'type': 'Compose',
    'ops': [{'type': 'RandomPhotometricDistort', 'p': 0.5},
     {'type': 'RandomZoomOut', 'fill': 0},
     {'type': 'RandomIoUCrop', 'p': 0.8},
     {'type': 'SanitizeBoundingBox', 'min_size': 1},
     {'type': 'RandomHorizontalFlip'},
     {'type': 'Resize', 'size': [640, 640]},
     {'type': 'ToImageTensor'},
     {'type': 'ConvertDtype'},
     {'type': 'SanitizeBoundingBox', 'min_size': 1},
     {'type': 'ConvertBox', 'out_fmt': 'cxcywh', 'normalize': True}]},
   'return_masks': False},
  'shuffle': True,
  'batch_size': 4,
  'num_workers': 4,
  'drop_last': True,
  'collate_fn': 'defa

In [9]:
cfg.yaml_cfg['task']

'detection'

In [10]:
type(cfg.model)

Load PResNet50 state_dict


src.zoo.rtdetr.rtdetr.RTDETR

In [11]:
# 根据配置创建对应任务的训练器
solver = TASKS[cfg.yaml_cfg['task']](cfg)

In [12]:
solver.cfg.model

RTDETR(
  (backbone): PResNet(
    (conv1): Sequential(
      (conv1_1): ConvNormLayer(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        (norm): FrozenBatchNorm2d(32, eps=1e-05)
        (act): ReLU(inplace=True)
      )
      (conv1_2): ConvNormLayer(
        (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (norm): FrozenBatchNorm2d(32, eps=1e-05)
        (act): ReLU(inplace=True)
      )
      (conv1_3): ConvNormLayer(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (norm): FrozenBatchNorm2d(64, eps=1e-05)
        (act): ReLU(inplace=True)
      )
    )
    (res_layers): ModuleList(
      (0): Blocks(
        (blocks): ModuleList(
          (0): BottleNeck(
            (branch2a): ConvNormLayer(
              (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
              (norm): FrozenBatchNorm2d(64, eps=1e-05)
    

In [13]:
solver.cfg.postprocessor

RTDETRPostProcessor(use_focal_loss=True, num_classes=17, num_top_queries=300)

In [14]:
# solver.val()

In [15]:
solver.fit()

start training
Initial lr:  [1e-05, 0.0001, 0.0001, 0.0001]
loading annotations into memory...
Done (t=0.10s)
creating index...
index created!
loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
模型参数数量: 42733395
Epoch: [0]  [   0/1348]  eta: 4:41:44  lr: 0.000010  loss: 32.1872 (32.1872)  loss_vfl: 0.2446 (0.2446)  loss_bbox: 0.5230 (0.5230)  loss_giou: 1.7820 (1.7820)  loss_vfl_aux_0: 0.2453 (0.2453)  loss_bbox_aux_0: 0.5734 (0.5734)  loss_giou_aux_0: 1.7573 (1.7573)  loss_vfl_aux_1: 0.2535 (0.2535)  loss_bbox_aux_1: 0.5223 (0.5223)  loss_giou_aux_1: 1.7907 (1.7907)  loss_vfl_aux_2: 0.2170 (0.2170)  loss_bbox_aux_2: 0.5070 (0.5070)  loss_giou_aux_2: 1.8338 (1.8338)  loss_vfl_aux_3: 0.2742 (0.2742)  loss_bbox_aux_3: 0.4943 (0.4943)  loss_giou_aux_3: 1.7994 (1.7994)  loss_vfl_aux_4: 0.2602 (0.2602)  loss_bbox_aux_4: 0.5297 (0.5297)  loss_giou_aux_4: 1.7572 (1.7572)  loss_vfl_aux_5: 0.2019 (0.2019)  loss_bbox_aux_5: 0.5331 (0.5331)  loss_giou_aux_5: 1.8132 