In [8]:
import os
import shutil

# 假定这是你的数据文件夹路径
data_dir = '.'
xml_dir = './xml'
png_dir = './png'

# 创建存放 XML 和 PNG 文件的文件夹
os.makedirs(xml_dir, exist_ok=True)
os.makedirs(png_dir, exist_ok=True)

# 遍历原文件夹，将 XML 和 PNG 文件分别移动到新文件夹
for filename in os.listdir(data_dir):
    if filename.endswith('.xml'):
        shutil.move(os.path.join(data_dir, filename), os.path.join(xml_dir, filename))
    elif filename.endswith('.png'):
        shutil.move(os.path.join(data_dir, filename), os.path.join(png_dir, filename))

print('文件已经分别移动到 xml 和 png 文件夹。')


文件已经分别移动到 xml 和 png 文件夹。


In [9]:
import os
import xmltodict
import torch
from PIL import Image


def parse_xml(xml_file):
    with open(xml_file, 'r') as file:
        doc = xmltodict.parse(file.read())
    return doc

# 遍历 xml 文件夹，解析 XML 文件
def prepare_data(xml_dir, png_dir):
    dataset = []
    for xml_file in os.listdir(xml_dir):
        if not xml_file.endswith('.xml'):
            continue
        
        xml_path = os.path.join(xml_dir, xml_file)
        png_file = xml_file.replace('.xml', '.png')
        png_path = os.path.join(png_dir, png_file)
        
        # 确保对应的 PNG 文件存在
        if not os.path.exists(png_path):
            continue
        
        annotation = parse_xml(xml_path)
        
        # 修正单个对象和多个对象的处理方式
        objects = annotation['annotation']['object']
        if not isinstance(objects, list):  # 如果不是列表，将其转换为包含一个元素的列表
            objects = [objects]

        bboxes = []
        labels = []
        for obj in objects:
            bndbox = obj['bndbox']
            bboxes.append([int(bndbox['xmin']), int(bndbox['ymin']),
                           int(bndbox['xmax']), int(bndbox['ymax'])])
            labels.append(1)  # 假设所有对象都是同一个类别
        
        data_item = {
            'image': png_path,
            'bboxes': torch.tensor(bboxes, dtype=torch.float32),
            'labels': torch.tensor(labels, dtype=torch.int64)
        }
        
        dataset.append(data_item)
    
    return dataset


# 准备数据
dataset = prepare_data(xml_dir, png_dir)


KeyboardInterrupt: 

In [14]:
import os
import shutil
from glob import glob
import random

# 设置数据根目录和目标目录
data_root = './'
target_root = './GRP'

# 读取图片和标注文件
png_files = glob(os.path.join(data_root, 'png', '*.png'))
xml_files = glob(os.path.join(data_root, 'xml', '*.xml'))

# 创建文件名到完整路径的映射
xml_map = {os.path.basename(os.path.splitext(f)[0]): f for f in xml_files}

# 确保每个图片都有对应的XML文件
matched_files = [f for f in png_files if os.path.basename(os.path.splitext(f)[0]) in xml_map]

# 划分数据集
random.shuffle(matched_files)
split_point = int(0.7 * len(matched_files))
train_files = matched_files[:split_point]
test_files = matched_files[split_point:]

# 创建目标文件夹
for folder in ['train/png', 'train/xml', 'test/png', 'test/xml']:
    os.makedirs(os.path.join(target_root, folder), exist_ok=True)

# 复制文件到新目录
def copy_files(files, folder):
    for f in files:
        png_dest_path = os.path.join(target_root, folder, os.path.basename(f))
        xml_file = xml_map[os.path.basename(os.path.splitext(f)[0])]
        xml_dest_path = os.path.join(target_root, folder.replace('png', 'xml'), os.path.basename(xml_file))
        try:
            shutil.copy(f, png_dest_path)
            shutil.copy(xml_file, xml_dest_path)
        except Exception as e:
            print(f"Error copying file {os.path.basename(f)} or its XML: {e}")

# 执行复制
copy_files(train_files, 'train/png')
copy_files(test_files, 'test/png')

print("数据集划分完成。")


数据集划分完成。


In [15]:
def verify_dataset(folder):
    png_files = glob(os.path.join(target_root, folder, 'png', '*.png'))
    xml_files = glob(os.path.join(target_root, folder, 'xml', '*.xml'))

    # 检查文件数量
    print(f"在 {folder} 中有 {len(png_files)} 张图片和 {len(xml_files)} 个标注文件。")

    # 检查图片和标注文件的一致性
    png_basenames = {os.path.splitext(os.path.basename(f))[0] for f in png_files}
    xml_basenames = {os.path.splitext(os.path.basename(f))[0] for f in xml_files}

    if png_basenames == xml_basenames:
        print(f"{folder} 文件夹中的图片和标注文件匹配完整。")
    else:
        mismatched = png_basenames.symmetric_difference(xml_basenames)
        print(f"{folder} 文件夹中有不匹配的文件：{mismatched}")

# 验证训练集和测试集
verify_dataset('train')
verify_dataset('test')


在 train 中有 1472 张图片和 1472 个标注文件。
train 文件夹中的图片和标注文件匹配完整。
在 test 中有 632 张图片和 632 个标注文件。
test 文件夹中的图片和标注文件匹配完整。


In [16]:
import os
from glob import glob
import xml.etree.ElementTree as ET
from collections import Counter

# 设置数据根目录
data_root = './'

# 读取所有XML文件
xml_files = glob(os.path.join(data_root, 'xml', '*.xml'))

# 类别计数器
category_counts = Counter()

# 遍历每个文件并统计类别
for xml_file in xml_files:
    tree = ET.parse(xml_file)
    root = tree.getroot()
    for obj in root.findall('object'):
        category = obj.find('name').text
        category_counts[category] += 1

# 打印类别及其计数结果
for category, count in category_counts.items():
    print(f"类别 '{category}' 有 {count} 个实例。")



类别 'manhole' 有 289 个实例。
类别 'cavity' 有 1413 个实例。
类别 'loose zone' 有 2056 个实例。
类别 'rebar' 有 1188 个实例。
类别 'water bearing zone' 有 169 个实例。
类别 'concave' 有 3 个实例。
类别 'pipeline' 有 20 个实例。


In [None]:
def split_dataset(dataset, train_ratio=0.8):
    """随机打乱数据并分割为训练集和测试集"""
    # 随机打乱索引
    indices = torch.randperm(len(dataset)).tolist()
    split = int(len(indices) * train_ratio)
    
    # 划分训练集和测试集
    train_indices = indices[:split]
    test_indices = indices[split:]
    
    # 根据索引划分数据集
    train_dataset = [dataset[i] for i in train_indices]
    test_dataset = [dataset[i] for i in test_indices]
    
    return train_dataset, test_dataset

# 使用准备好的数据集进行划分
train_dataset, test_dataset = split_dataset(dataset)

print(f"训练集数量: {len(train_dataset)}")
print(f"测试集数量: {len(test_dataset)}")


训练集数量: 1683
测试集数量: 421


In [None]:
from torch.utils.data import DataLoader
from torchvision import transforms

class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        image = Image.open(item['image']).convert('RGB')
        target = {'boxes': item['bboxes'], 'labels': item['labels']}
        
        if self.transform:
            image = self.transform(image)
        
        return image, target

# 数据转换，这里简单地转换为 PyTorch Tensor
transform = transforms.Compose([
    transforms.ToTensor(),  # 将图片转换为 PyTorch Tensor
    transforms.Resize((1024, 1024))  # 假设我们需要将图片统一调整到800x800大小
])

# 创建 PyTorch 数据集
train_ds = CustomDataset(train_dataset, transform=transform)
test_ds = CustomDataset(test_dataset, transform=transform)

# 创建数据加载器
train_loader = DataLoader(train_ds, batch_size=4, shuffle=True, collate_fn=lambda x: tuple(zip(*x)))
test_loader = DataLoader(test_ds, batch_size=4, shuffle=False, collate_fn=lambda x: tuple(zip(*x)))


In [None]:
from torchvision.ops import box_iou
def calculate_accuracy(tp, fp, fn):
    """计算准确率"""
    return tp / (tp + fp + fn) if (tp + fp + fn) > 0 else 0

def evaluate(model, data_loader, device):
    model.eval()  # 将模型设置为评估模式
    tp = 0  # 真阳性
    fp = 0  # 假阳性
    fn = 0  # 假阴性

    with torch.no_grad():
        for images, targets in data_loader:
            images = list(img.to(device) for img in images)
            outputs = model(images)
            
            for i in range(len(images)):
                pred_boxes = outputs[i]['boxes'].data.cpu()
                pred_labels = outputs[i]['labels'].data.cpu()
                pred_scores = outputs[i]['scores'].data.cpu()
                
                true_boxes = targets[i]['boxes'].data.cpu()
                true_labels = targets[i]['labels'].data.cpu()
                
                if pred_boxes.size(0) == 0 or true_boxes.size(0) == 0:
                    fp += pred_boxes.size(0)  # 所有预测都是假阳性，因为没有真实的边界框
                    fn += true_boxes.size(0)  # 所有真实的边界框都是假阴性，因为没有预测
                    continue
                
                iou = box_iou(pred_boxes, true_boxes)
                
                # 选择 IoU > 0.5 作为匹配的框
                match_threshold = 0.5
                if iou.size(0) > 0 and iou.size(1) > 0:  # 验证是否有可计算的IoU值
                    for j, iou_score in enumerate(iou.max(1)[0]):
                        if iou_score > match_threshold and pred_labels[j] == true_labels[iou.max(1)[1][j]]:
                            tp += 1
                        else:
                            fp += 1
                    fn += (true_boxes.size(0) - iou.max(0)[0].ge(match_threshold).sum().item())

    accuracy = calculate_accuracy(tp, fp, fn)
    return accuracy


In [None]:
import torchvision.models.detection as models
import torchvision.models.detection.faster_rcnn as fasterrcnn
import torch

# 加载预训练的 Faster R-CNN 模型
model = models.fasterrcnn_resnet50_fpn(pretrained=True)

# 替换分类器的头部以适应新的类别数（假设只有一个类别加背景）
num_classes = 7  # 1 class + background
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = fasterrcnn.FastRCNNPredictor(in_features, num_classes)

# 将模型移动到正确的设备
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# 选择优化器
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, momentum=0.9, weight_decay=0.0005)

# 训练循环
num_epochs = 100  # 假设我们训练10个epoch
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    num_batches = 0
    for images, targets in train_loader:
        images = list(img.to(device) for img in images)
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
        
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        
        total_loss += losses.item()
        num_batches += 1
        if num_batches % 10 == 0:  # 每10次迭代打印一次训练损失
            print(f"Iteration #{num_batches} loss: {losses.item()}")

    # 计算整个 epoch 的平均损失
    avg_loss = total_loss / num_batches
    print(f"Epoch #{epoch+1} average loss: {avg_loss}")

    # 在每个epoch结束后评估模型
    accuracy = evaluate(model, test_loader, device)
    print(f"Epoch #{epoch+1} accuracy: {accuracy}")


Iteration #10 loss: 0.26811137795448303
Iteration #20 loss: 0.2633698582649231
Iteration #30 loss: 0.2819766700267792
Iteration #40 loss: 0.34112468361854553
Iteration #50 loss: 0.4944613575935364
Iteration #60 loss: 0.3240409791469574
Iteration #70 loss: 0.3577042818069458
Iteration #80 loss: 0.8500251770019531
Iteration #90 loss: 0.37111034989356995
Iteration #100 loss: 0.17871831357479095
Iteration #110 loss: 0.19433918595314026
Iteration #120 loss: 0.6450316309928894
Iteration #130 loss: 0.19023685157299042
Iteration #140 loss: 0.5023806691169739
Iteration #150 loss: 0.4475024342536926
Iteration #160 loss: 0.32887524366378784
Iteration #170 loss: 0.2788355052471161
Iteration #180 loss: 0.4263101816177368
Iteration #190 loss: 0.2986884117126465
Iteration #200 loss: 0.37839964032173157
Iteration #210 loss: 0.37307289242744446
Iteration #220 loss: 0.3672666549682617
Iteration #230 loss: 0.3832627534866333
Iteration #240 loss: 0.3036424219608307
Iteration #250 loss: 0.31010740995407104

KeyboardInterrupt: 