In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torch.nn.functional as F
from torchvision import transforms
from PIL import Image
import os

# Định nghĩa mô hình UNet lớn hơn
class UNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNet, self).__init__()
        
        # Encoder
        self.encoder1 = self.conv_block(in_channels, 32)
        self.encoder2 = self.conv_block(32, 64)
        self.encoder3 = self.conv_block(64, 128)
        self.encoder4 = self.conv_block(128, 256)
        self.encoder5 = self.conv_block(256, 512)
        
        # Bottleneck
        self.bottleneck = self.conv_block(512, 1024)
        
        # Decoder
        self.upconv5 = self.upconv_block(1024, 512)
        self.decoder5 = self.conv_block(1024, 512)
        
        self.upconv4 = self.upconv_block(512, 256)
        self.decoder4 = self.conv_block(512, 256)
        
        self.upconv3 = self.upconv_block(256, 128)
        self.decoder3 = self.conv_block(256, 128)
        
        self.upconv2 = self.upconv_block(128, 64)
        self.decoder2 = self.conv_block(128, 64)
        
        self.upconv1 = self.upconv_block(64, 32)
        self.decoder1 = self.conv_block(64, 32)
        
        # Final layer
        self.final_conv = nn.Conv2d(32, out_channels, kernel_size=1)
        
    def conv_block(self, in_channels, out_channels):
        return nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
    
    def upconv_block(self, in_channels, out_channels):
        return nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)
    
    def crop_tensor(self, tensor, target_size):
        _, _, H, W = tensor.size()
        diffY = (H - target_size[0]) // 2
        diffX = (W - target_size[1]) // 2
        return tensor[:, :, diffY:(diffY + target_size[0]), diffX:(diffX + target_size[1])]
    
    def forward(self, x):
        # Encoder
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(F.max_pool2d(enc1, 2))
        enc3 = self.encoder3(F.max_pool2d(enc2, 2))
        enc4 = self.encoder4(F.max_pool2d(enc3, 2))
        enc5 = self.encoder5(F.max_pool2d(enc4, 2))
        
        # Bottleneck
        bottleneck = self.bottleneck(F.max_pool2d(enc5, 2))
        
        # Decoder
        dec5 = self.upconv5(bottleneck)
        enc5 = self.crop_tensor(enc5, dec5.shape[2:])
        dec5 = torch.cat((dec5, enc5), dim=1)
        dec5 = self.decoder5(dec5)
        
        dec4 = self.upconv4(dec5)
        enc4 = self.crop_tensor(enc4, dec4.shape[2:])
        dec4 = torch.cat((dec4, enc4), dim=1)
        dec4 = self.decoder4(dec4)
        
        dec3 = self.upconv3(dec4)
        enc3 = self.crop_tensor(enc3, dec3.shape[2:])
        dec3 = torch.cat((dec3, enc3), dim=1)
        dec3 = self.decoder3(dec3)
        
        dec2 = self.upconv2(dec3)
        enc2 = self.crop_tensor(enc2, dec2.shape[2:])
        dec2 = torch.cat((dec2, enc2), dim=1)
        dec2 = self.decoder2(dec2)
        
        dec1 = self.upconv1(dec2)
        enc1 = self.crop_tensor(enc1, dec1.shape[2:])
        dec1 = torch.cat((dec1, enc1), dim=1)
        dec1 = self.decoder1(dec1)
        
        # Final layer
        out = self.final_conv(dec1)
        
        return out



In [2]:
# Định nghĩa Dataset
class MedicalDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_files = os.listdir(image_dir)
    
    def __len__(self):
        return len(self.image_files)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_files[idx])
        mask_path = os.path.join(self.mask_dir, self.image_files[idx])
        
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")  # Grayscale mask
        
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        
        return image, mask



In [3]:
# Chuẩn bị dữ liệu
transform = transforms.Compose([
    transforms.Resize((572, 572)),
    transforms.ToTensor()
])
image_url = "kvasir-dataset-for-classification-and-segmentation/kvasir-seg/Kvasir-SEG/images/"
mask_url = "kvasir-dataset-for-classification-and-segmentation/kvasir-seg/Kvasir-SEG/masks"
train_dataset = MedicalDataset(image_dir=image_url, mask_dir=mask_url, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)

In [None]:
# Khởi tạo mô hình, hàm loss và optimizer

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# device = 'cpu'

model = UNet(in_channels=3, out_channels=1).to(device)
criterion = nn.BCEWithLogitsLoss()  # Binary Cross-Entropy Loss
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [5]:
# Huấn luyện mô hình
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, masks in train_loader:
        images = images.to(device)
        masks = masks.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, masks)
        
        # Backward pass và tối ưu hóa
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

print("Training complete!")

ValueError: Target size (torch.Size([4, 1, 572, 572])) must be the same as input size (torch.Size([4, 1, 544, 544]))