In [15]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets, transforms, models
from PIL import Image
import torch.nn.functional as F


In [16]:
class SkinCancerDataset(Dataset):
    def __init__(self, metadata, images_dir, transform=None):
        self.metadata = metadata
        self.images_dir = images_dir
        self.transform = transform

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, idx):
        img_name = os.path.join(self.images_dir, self.metadata.iloc[idx]['isic_id'] + '.jpg')
        image = Image.open(img_name).convert("RGB")

        if self.transform:
            image = self.transform(image)
        
        label = torch.tensor(self.metadata.iloc[idx]['target'], dtype=torch.float32).unsqueeze(0).expand(224, 224)        
        return image, label

In [17]:
class SegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None, mask_transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_filenames = os.listdir(image_dir)
        self.transform = transform
        self.mask_transform = mask_transform
        
    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_X = self.image_filenames[idx]
        img_Y = img_X.replace("X", "Y")
        image = Image.open(os.path.join(self.image_dir, img_X)).convert("RGB")
        mask = Image.open(os.path.join(self.mask_dir, img_Y)).convert("L")
        
        if self.transform:
            image = self.transform(image)
            mask = self.mask_transform(mask)

        mask = (mask > 0).float()
        
        return image, mask

In [18]:
class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, init_features=64):
        super(UNet, self).__init__()

        features = init_features
        self.encoder1 = UNet._block(in_channels, features, name="enc1")
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder2 = UNet._block(features, features * 2, name="enc2")
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder3 = UNet._block(features * 2, features * 4, name="enc3")
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.encoder4 = UNet._block(features * 4, features * 8, name="enc4")
        self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.bottleneck = UNet._block(features * 8, features * 16, name="bottleneck")

        self.upconv4 = nn.ConvTranspose2d(
            features * 16, features * 8, kernel_size=2, stride=2
        )
        self.decoder4 = UNet._block((features * 8) * 2, features * 8, name="dec4")
        self.upconv3 = nn.ConvTranspose2d(
            features * 8, features * 4, kernel_size=2, stride=2
        )
        self.decoder3 = UNet._block((features * 4) * 2, features * 4, name="dec3")
        self.upconv2 = nn.ConvTranspose2d(
            features * 4, features * 2, kernel_size=2, stride=2
        )
        self.decoder2 = UNet._block((features * 2) * 2, features * 2, name="dec2")
        self.upconv1 = nn.ConvTranspose2d(
            features * 2, features, kernel_size=2, stride=2
        )
        self.decoder1 = UNet._block(features * 2, features, name="dec1")

        self.conv = nn.Conv2d(
            in_channels=features, out_channels=out_channels, kernel_size=1
        )

    def forward(self, x):
        enc1 = self.encoder1(x)
        enc2 = self.encoder2(self.pool1(enc1))
        enc3 = self.encoder3(self.pool2(enc2))
        enc4 = self.encoder4(self.pool3(enc3))

        bottleneck = self.bottleneck(self.pool4(enc4))

        dec4 = self.upconv4(bottleneck)
        dec4 = torch.cat((dec4, self.crop(enc4, dec4)), dim=1)
        dec4 = self.decoder4(dec4)
        
        dec3 = self.upconv3(dec4)
        dec3 = torch.cat((dec3, self.crop(enc3, dec3)), dim=1)
        dec3 = self.decoder3(dec3)
        
        dec2 = self.upconv2(dec3)
        dec2 = torch.cat((dec2, self.crop(enc2, dec2)), dim=1)
        dec2 = self.decoder2(dec2)
        
        dec1 = self.upconv1(dec2)
        dec1 = torch.cat((dec1, self.crop(enc1, dec1)), dim=1)
        dec1 = self.decoder1(dec1)

        return torch.sigmoid(self.conv(dec1))

    @staticmethod
    def _block(in_channels, features, name):
        return nn.Sequential(
            nn.Conv2d(
                in_channels=in_channels,
                out_channels=features,
                kernel_size=3,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm2d(num_features=features),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                in_channels=features,
                out_channels=features,
                kernel_size=3,
                padding=1,
                bias=False,
            ),
            nn.BatchNorm2d(num_features=features),
            nn.ReLU(inplace=True),
        )
        
    def crop(self, enc, dec):
        _, _, H, W = dec.size()
        enc = F.interpolate(enc, size=(H, W), mode='bilinear', align_corners=False)
        return enc
    
# Kiểm tra mô hình
if __name__ == "__main__":
    model = UNet(in_channels=1, out_channels=1, init_features=32)
    x = torch.randn((1, 1, 572, 572))
    preds = model(x)
    print(preds.shape)


torch.Size([1, 1, 560, 560])


In [19]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
mask_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [20]:
# metadata = pd.read_csv(r"D:\SkinCancerPrediction\Original\isic-2024-challenge\train-metadata.csv")

In [21]:
# image_names = []

# # Read all the names of images in the folder
# for filename in os.listdir(r"D:\SkinCancerPrediction\Model\Test\Image\HairRemoved"):
#     if filename.endswith('.jpg') or filename.endswith('.png'):
#         image_names.append(filename)
# image_names_no_ext = [os.path.splitext(name)[0] for name in image_names]

# metadata = metadata[metadata['isic_id'].isin(image_names_no_ext)]
# metadata = metadata[['isic_id', 'target']]

In [22]:
# metadata

In [23]:
init_features = 32
learning_rate = 0.001
batch_size = 64
num_epochs = 100

In [24]:
train_dataset = SegmentationDataset(
    image_dir= r"D:\SkinCancerPrediction\PH2_segmentation\trainx",
    mask_dir= r"D:\SkinCancerPrediction\PH2_segmentation\trainy",
    transform=transform,
    mask_transform=mask_transform
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

In [25]:
model = UNet(in_channels=3, out_channels=1, init_features=init_features)
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [26]:
# Move the model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

In [27]:
import torch

def test_shapes(data_loader, model, device):
    # Set model to evaluation mode
    model.eval()
    
    # Fetch a single batch
    images, masks = next(iter(data_loader))
    
    # Move data to the specified device
    images = images.to(device)
    masks = masks.to(device)
    
    # Forward pass through the model
    with torch.no_grad():
        outputs = model(images)
    
    # Print shapes
    print(f"Image batch shape: {images.shape}")
    print(f"Label batch shape: {masks.shape}")
    print(f"Model output shape: {outputs.shape}")


# Example usage:
if __name__ == "__main__":
    # Assuming you have the following variables defined:
    # - train_loader: your DataLoader
    # - model: your model instance
    # - device: your computing device (cpu or cuda)
    
    test_shapes(train_loader, model, device)


Image batch shape: torch.Size([64, 3, 224, 224])
Label batch shape: torch.Size([64, 1, 224, 224])
Model output shape: torch.Size([64, 1, 224, 224])


In [28]:
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for images, masks in train_loader:
        images = images.to(device)
        masks = masks.to(device)

        optimizer.zero_grad()
        
        outputs = model(images)

        outputs = outputs.float()
        masks = masks.float()

        
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    epoch_loss = running_loss / len(train_loader)
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}')

# Save the trained model
torch.save(model.state_dict(), 'unet_skin_cancer.pth')

Epoch 1/100, Loss: 0.5626
Epoch 2/100, Loss: 0.4485
Epoch 3/100, Loss: 0.3992
Epoch 4/100, Loss: 0.3924
Epoch 5/100, Loss: 0.3509
Epoch 6/100, Loss: 0.3555
Epoch 7/100, Loss: 0.3330
Epoch 8/100, Loss: 0.3272
Epoch 9/100, Loss: 0.3446
Epoch 10/100, Loss: 0.3004
Epoch 11/100, Loss: 0.3051
Epoch 12/100, Loss: 0.2822
Epoch 13/100, Loss: 0.2601
Epoch 14/100, Loss: 0.2763
Epoch 15/100, Loss: 0.2669
Epoch 16/100, Loss: 0.2624
Epoch 17/100, Loss: 0.2390
Epoch 18/100, Loss: 0.2339
Epoch 19/100, Loss: 0.2331
Epoch 20/100, Loss: 0.2192
Epoch 21/100, Loss: 0.2366
Epoch 22/100, Loss: 0.2082
Epoch 23/100, Loss: 0.2010
Epoch 24/100, Loss: 0.2155
Epoch 25/100, Loss: 0.2026
Epoch 26/100, Loss: 0.1912
Epoch 27/100, Loss: 0.1856
Epoch 28/100, Loss: 0.2393
Epoch 29/100, Loss: 0.2096
Epoch 30/100, Loss: 0.2024
Epoch 31/100, Loss: 0.1973
Epoch 32/100, Loss: 0.1945
Epoch 33/100, Loss: 0.1875
Epoch 34/100, Loss: 0.1725
Epoch 35/100, Loss: 0.1641
Epoch 36/100, Loss: 0.1559
Epoch 37/100, Loss: 0.1544
Epoch 38/1