In [None]:
import torch
import torchvision
from torchvision.models.detection import fasterrcnn_resnet50_fpn
# from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.transforms import ToTensor
from torchvision.datasets import VOCDetection
from torch.utils.data import DataLoader
from torchvision.utils import draw_bounding_boxes

In [None]:
# 设置训练和测试的参数
num_classes = 21  # VOC数据集中的物体类别数量（包括背景）
batch_size = 2
num_epochs = 10

In [None]:
# 加载VOC数据集
train_dataset = VOCDetection(root='./', year='2007', image_set='train',download=True, transform=ToTensor())
test_dataset = VOCDetection(root='./', year='2007', image_set='test', download=True,transform=ToTensor())

In [None]:
# 创建数据加载器
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=1)

In [None]:
# 定义模型
model = fasterrcnn_resnet50_fpn(pretrained=False,num_classes=num_classes)

In [None]:
# 定义优化器和学习率调度器
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
# 训练模型
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.train()

for epoch in range(num_epochs):
    for i,(images, targets) in enumerate(train_loader, 0):

        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        total_loss = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

    lr_scheduler.step()
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {total_loss.item()}")

In [None]:
# 测试模型并可视化proposal box
model.eval()

for i, (image, target) in enumerate(test_loader):
    image = image.to(device)
    with torch.no_grad():
        proposals, _ = model.rpn(image.unsqueeze(0))

    # 绘制proposal box
    image = image.squeeze().permute(1, 2, 0)
    image_with_proposals = draw_bounding_boxes(image, proposals[0][:, :4], width=2)
    image_with_proposals = ToTensor()(image_with_proposals)  # 转换回Tensor格式以显示
    image_with_proposals.show()

In [None]:
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import ToTensor
from torchvision.datasets import VOCDetection
from torch.utils.data import DataLoader
import torchvision.transforms.functional as F
import matplotlib.pyplot as plt

# 设置训练和测试的参数
num_classes = 21  # VOC数据集中的物体类别数量（包括背景）
batch_size = 2
num_epochs = 10

# 加载VOC数据集
test_dataset = VOCDetection(root='/path/to/VOC_dataset', year='2007', image_set='test', transform=ToTensor())

# 创建数据加载器
test_loader = DataLoader(test_dataset, batch_size=1)

# 加载预训练的Faster R-CNN模型
model = fasterrcnn_resnet50_fpn(pretrained=True)
model.eval()

# 将模型移动到设备上
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# 迭代测试集并可视化proposal box
for i, (image, target) in enumerate(test_loader):
    images = [image.squeeze(0).to(device)]  # 将图像转移到设备上，并从大小为[1, 3, 500, 353]的张量中移除批次维度

    with torch.no_grad():
        predictions = model(images)  # 不传递目标框给模型

    # 获取第一阶段的边界框
    boxes = predictions[0]['boxes']

    # 可视化图片和边界框
    image = F.to_pil_image(image.squeeze(0).cpu())
    plt.imshow(image)
    ax = plt.gca()

    for box in boxes:
        xmin, ymin, xmax, ymax = box.tolist()
        rect = plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor='r', linewidth=2)
        ax.add_patch(rect)

    plt.axis('off')
    plt.show()

    if i == 3:  # 仅可视化四张测试图像
        break


In [None]:
# 结果可视化
images, targets= next(iter(train_data_loader))
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

plt.figure(figsize=(20,20))
for i, (image, target) in enumerate(zip(images, targets)):
    plt.subplot(2,2, i+1)
    boxes = targets[i]['boxes'].cpu().numpy().astype(np.int32)
    sample = images[i].permute(1,2,0).cpu().numpy()
    names = targets[i]['labels'].cpu().numpy().astype(np.int64)
    for i,box in enumerate(boxes):
        cv2.rectangle(sample,
                      (box[0], box[1]),
                      (box[2], box[3]),
                      (0, 0, 220), 2)
        cv2.putText(sample, classes[names[i]], (box[0],box[1]+15),cv2.FONT_HERSHEY_COMPLEX ,0.5,(0,220,0),1,cv2.LINE_AA)  

    plt.axis('off')
    plt.imshow(sample)
    