In [69]:
import os

# Define the base directory
base_dir = "/kaggle/input/helen-face-segmentation-dataset"

# Walk through the dataset directory
for dirname, _, filenames in os.walk(base_dir):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break

/kaggle/input/helen-face-segmentation-dataset/helenstar_release/f1_score.py
/kaggle/input/helen-face-segmentation-dataset/helenstar_release/test_resize/155093594_1_label.png
/kaggle/input/helen-face-segmentation-dataset/helenstar_release/test/155093594_1_label.png
/kaggle/input/helen-face-segmentation-dataset/helenstar_release/train/2056280349_1_image.jpg


In [70]:
from glob import glob

train_dir = "/kaggle/input/helen-face-segmentation-dataset/helenstar_release/train"

image_files = sorted(glob(os.path.join(train_dir, "*_image.jpg")))
label_files = sorted(glob(os.path.join(train_dir, "*_label.png")))
viz_files = sorted(glob(os.path.join(train_dir, "*_viz.jpg")))

In [71]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
from tqdm import tqdm

In [72]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [73]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

target_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
])

In [75]:
base_dir = "/kaggle/input/helen-face-segmentation-dataset/helenstar_release/train"

dataset = CustomDataset(
    base_dir=base_dir,
    transform=transform,
    target_transform=target_transform
)

dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)


In [74]:
class CustomDataset(Dataset):
    def __init__(self, base_dir, img_size=(512, 512), transform=None, target_transform=None):
        self.base_dir = base_dir
        self.image_files = sorted(glob(os.path.join(base_dir, "*_image.jpg")))
        self.mask_files = sorted(glob(os.path.join(base_dir, "*_label.png")))

        assert len(self.image_files) == len(self.mask_files), "Number of images and masks do not match!"

        self.img_size = img_size

        self.transform = transform if transform else transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor()
        ])
        self.target_transform = transforms.Compose([
            transforms.Resize((256, 256)),  # Ensure masks match model output
            transforms.ToTensor()
        ])


    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_path = self.image_files[idx]
        mask_path = self.mask_files[idx]

        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L") 

        image = self.transform(image)
        mask = self.target_transform(mask)

        return image, mask


In [76]:

dataset = CustomDataset(base_dir = "/kaggle/input/helen-face-segmentation-dataset/helenstar_release/train"
, transform=transform)

dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=4, pin_memory=True)

In [77]:
import torch.nn.functional as F

def dice_loss(pred, target, smooth=1e-6):
    pred = torch.sigmoid(pred)
    
    pred_flat = pred.view(-1)
    target_flat = target.view(-1)
    
    intersection = torch.sum(pred_flat * target_flat)
    dice_score = (2. * intersection + smooth) / (torch.sum(pred_flat) + torch.sum(target_flat) + smooth)
    
    return 1 - dice_score 


In [78]:

# U-Net Model
class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(UNet, self).__init__()

        def conv_block(in_c, out_c):
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_c, out_c, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            )

        self.encoder = nn.ModuleList([
            conv_block(in_channels, 64),
            conv_block(64, 128),
            conv_block(128, 256),
            conv_block(256, 512),
        ])

        self.pool = nn.MaxPool2d(2, 2)

        self.bottleneck = conv_block(512, 1024)

        self.upconv = nn.ModuleList([
            nn.ConvTranspose2d(1024, 512, kernel_size=2, stride=2),
            nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2),
            nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2),
            nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2),
        ])

        self.decoder = nn.ModuleList([
            conv_block(1024, 512),
            conv_block(512, 256),
            conv_block(256, 128),
            conv_block(128, 64),
        ])

        self.final = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        enc_features = []
        for enc in self.encoder:
            x = enc(x)
            enc_features.append(x)
            x = self.pool(x)

        x = self.bottleneck(x)

        for i, (up, dec) in enumerate(zip(self.upconv, self.decoder)):
            x = up(x)
            x = torch.cat([x, enc_features[-(i+1)]], dim=1)
            x = dec(x)

        return self.final(x)


# SegNet Model
class SegNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1):
        super(SegNet, self).__init__()

        def encoder_block(in_c, out_c):
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(inplace=True)
            )

        def decoder_block(in_c, out_c):
            return nn.Sequential(
                nn.Conv2d(in_c, out_c, kernel_size=3, padding=1),
                nn.BatchNorm2d(out_c),
                nn.ReLU(inplace=True)
            )

        self.enc1 = encoder_block(in_channels, 64)
        self.enc2 = encoder_block(64, 128)
        self.enc3 = encoder_block(128, 256)
        self.enc4 = encoder_block(256, 512)

        self.pool = nn.MaxPool2d(2, 2, return_indices=True)

        self.dec4 = decoder_block(512, 256)
        self.dec3 = decoder_block(256, 128)
        self.dec2 = decoder_block(128, 64)
        self.dec1 = decoder_block(64, out_channels)

        self.unpool = nn.MaxUnpool2d(2, 2)

    def forward(self, x):
        x, idx1 = self.pool(self.enc1(x))
        x, idx2 = self.pool(self.enc2(x))
        x, idx3 = self.pool(self.enc3(x))
        x, idx4 = self.pool(self.enc4(x))

        x = self.unpool(x, idx4)
        x = self.dec4(x)
        x = self.unpool(x, idx3)
        x = self.dec3(x)
        x = self.unpool(x, idx2)
        x = self.dec2(x)
        x = self.unpool(x, idx1)
        x = self.dec1(x)

        return x


In [79]:
torch.cuda.empty_cache()

In [80]:
def train(model, dataloader, epochs=10, lr=0.001):
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.BCEWithLogitsLoss()
    scaler = torch.amp.GradScaler('cuda')  
    
    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for images, masks in tqdm(dataloader):
            images, masks = images.to(device), masks.to(device)

            optimizer.zero_grad()
            with torch.cuda.amp.autocast(): 
                outputs = model(images)
                loss = criterion(outputs, masks) + dice_loss(outputs, masks)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            
            epoch_loss += loss.item()
        
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {epoch_loss/len(dataloader):.4f}")

    return model

# Train U-Net
print("Training U-Net...")
unet = UNet().to(device)
unet = train(unet, dataloader)


Training U-Net...


  with torch.cuda.amp.autocast():
100%|██████████| 250/250 [00:44<00:00,  5.59it/s]


Epoch [1/10], Loss: 1.0474


100%|██████████| 250/250 [00:44<00:00,  5.56it/s]


Epoch [2/10], Loss: 1.0269


100%|██████████| 250/250 [00:45<00:00,  5.47it/s]


Epoch [3/10], Loss: 1.1489


100%|██████████| 250/250 [00:46<00:00,  5.43it/s]


Epoch [4/10], Loss: 1.0256


100%|██████████| 250/250 [00:46<00:00,  5.32it/s]


Epoch [5/10], Loss: 1.0237


100%|██████████| 250/250 [00:46<00:00,  5.32it/s]


Epoch [6/10], Loss: 1.0227


100%|██████████| 250/250 [00:46<00:00,  5.33it/s]


Epoch [7/10], Loss: 1.0213


100%|██████████| 250/250 [00:47<00:00,  5.28it/s]


Epoch [8/10], Loss: 1.0202


100%|██████████| 250/250 [00:47<00:00,  5.30it/s]


Epoch [9/10], Loss: 1.0198


100%|██████████| 250/250 [00:47<00:00,  5.30it/s]

Epoch [10/10], Loss: 1.0193





In [81]:
import numpy as np
from sklearn.metrics import confusion_matrix

def compute_iou(pred, target, num_classes=2):
    iou_list = []
    pred = pred.view(-1)
    target = target.view(-1)
    
    for i in range(num_classes):
        intersection = torch.sum((pred == i) & (target == i)).item()
        union = torch.sum((pred == i) | (target == i)).item()
        iou = intersection / (union + 1e-6)  # To avoid division by zero
        iou_list.append(iou)
    
    return np.array(iou_list)

def compute_dice_coefficient(pred, target, num_classes=2):
    dice_list = []
    pred = pred.view(-1)
    target = target.view(-1)
    
    for i in range(num_classes):
        intersection = torch.sum((pred == i) & (target == i)).item()
        dice = (2 * intersection) / (torch.sum(pred == i).item() + torch.sum(target == i).item() + 1e-6)
        dice_list.append(dice)
    
    return np.array(dice_list)

def compute_pixel_accuracy(pred, target):
    pred = pred.view(-1)
    target = target.view(-1)
    
    correct_pixels = torch.sum(pred == target).item()
    total_pixels = target.numel()
    
    return correct_pixels / total_pixels

def evaluate(model, dataloader, device):
    model.eval()
    all_iou = []
    all_dice = []
    all_pixel_accuracy = []
    
    with torch.no_grad():
        for images, masks in dataloader:
            images, masks = images.to(device), masks.to(device)
            outputs = model(images)
            pred = torch.argmax(outputs, dim=1)
            
            iou = compute_iou(pred, masks)
            dice = compute_dice_coefficient(pred, masks)
            pixel_accuracy = compute_pixel_accuracy(pred, masks)
            
            all_iou.append(iou)
            all_dice.append(dice)
            all_pixel_accuracy.append(pixel_accuracy)
    
    mean_iou = np.mean(all_iou, axis=0)
    mean_dice = np.mean(all_dice, axis=0)
    mean_pixel_accuracy = np.mean(all_pixel_accuracy)
    
    print("Mean IoU: ", mean_iou)
    print("Mean Dice Coefficient: ", mean_dice)
    print("Mean Pixel Accuracy: ", mean_pixel_accuracy)

evaluate(unet, dataloader, device)


Mean IoU:  [0.67656085 0.        ]
Mean Dice Coefficient:  [0.80567518 0.        ]
Mean Pixel Accuracy:  0.6765608476911272
