In [1]:
import os
import numpy as np
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import torch
import torch.nn as nn
import torch.optim as optim
from UNet import UNet

In [2]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.images = [f for f in os.listdir(image_dir) if f.endswith(('.jpg', '.png'))]  # 仅筛选出图像文件

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]
        
        # 获取图像路径和标签路径（假设标签文件以 .npy 结尾）
        img_path = os.path.join(self.image_dir, img_name)
        mask_name = img_name.rsplit('.')[0] + '.npy'  # 将图像文件后缀替换为 .npy
        mask_path = os.path.join(self.mask_dir, mask_name)

        # 加载灰度图像和标签
        image = Image.open(img_path).convert('L')  
        mask = np.load(mask_path)  # 加载标签

       # 图像预处理
        if self.transform:
            image = self.transform(image)  # 转换为 Tensor，并变为 [1, 200, 200]

        mask = torch.from_numpy(mask).long()  # 标签转换为 LongTensor，并保持形状为 [200, 200]
       


        return image, mask

# 设置图像和标签路径
image_dir = 'images\\training'
mask_dir = 'test_ground_truths'

# 定义图像预处理
transform = transforms.Compose([
    transforms.Resize((200, 200)),  # 确保图像大小为200x200
    transforms.ToTensor(),  # 转换为 [1, 200, 200] 的张量
])

# 创建数据集和数据加载器
train_dataset = SegmentationDataset(image_dir, mask_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [3]:
# 假设 UNet 模型已经封装好
model = UNet(in_channels=1, num_classes=4)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# 定义损失函数和优化器
criterion = nn.CrossEntropyLoss()  # 适用于多类分割
optimizer = optim.Adam(model.parameters(), lr=1e-4 * 2)

# 训练循环
num_epochs = 25

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, masks in train_loader:
        images = images.to(device)  # 假设您有 GPU
        masks = masks.to(device)
        
        # 前向传播
        outputs = model(images)
        
        # 计算损失
        loss = criterion(outputs, masks)  # 输出为 [batch_size, num_classes, 200, 200]，标签为 [batch_size, 200, 200]
        running_loss += loss.item()
        
        # 反向传播和优化
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        # 释放未使用的显存缓存
        torch.cuda.empty_cache()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

    # 指定保存路径
save_path = 'model.pth'

# 保存模型的状态字典
torch.save(model.state_dict(), save_path)

Epoch [1/25], Loss: 0.4263
Epoch [2/25], Loss: 0.1957
Epoch [3/25], Loss: 0.1371
Epoch [4/25], Loss: 0.1143
Epoch [5/25], Loss: 0.1016
Epoch [6/25], Loss: 0.0921
Epoch [7/25], Loss: 0.0887
Epoch [8/25], Loss: 0.0837
Epoch [9/25], Loss: 0.0805
Epoch [10/25], Loss: 0.0788
Epoch [11/25], Loss: 0.0760
Epoch [12/25], Loss: 0.0751
Epoch [13/25], Loss: 0.0709
Epoch [14/25], Loss: 0.0752
Epoch [15/25], Loss: 0.0706
Epoch [16/25], Loss: 0.0677
Epoch [17/25], Loss: 0.0660
Epoch [18/25], Loss: 0.0698
Epoch [19/25], Loss: 0.0665
Epoch [20/25], Loss: 0.0643
Epoch [21/25], Loss: 0.0617
Epoch [22/25], Loss: 0.0616
Epoch [23/25], Loss: 0.0632
Epoch [24/25], Loss: 0.0603
Epoch [25/25], Loss: 0.0592
