## **Via Unet method**

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
# Check for GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [6]:
def iou_loss(pred, target, smooth=1e-6):
    pred = torch.sigmoid(pred)
    intersection = (pred * target).sum()
    union = pred.sum() + target.sum() - intersection
    return 1 - (intersection + smooth) / (union + smooth)

In [7]:
# Define dataset class
class FaceMaskDataset(Dataset):
    def __init__(self, image_dir, mask_dir=None, transform=None, img_size=(512, 512)):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.image_filenames = os.listdir(image_dir)
        self.transform = transform
        self.img_size = img_size

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_filenames[idx])

        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = cv2.resize(image, self.img_size) / 255.0

        if self.mask_dir:
            mask_path = os.path.join(self.mask_dir, self.image_filenames[idx])
            if not os.path.exists(mask_path) or cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) is None:
                mask = np.zeros(self.img_size, dtype=np.uint8)
            else:
                mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
                mask = cv2.resize(mask, self.img_size)
            mask = np.expand_dims(mask, axis=0) / 255.0
            mask_tensor = torch.tensor(mask, dtype=torch.float32)
        else:
            mask_tensor = torch.zeros((1, *self.img_size), dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, mask_tensor

In [8]:
# Define transformations
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor()
])

# Load dataset
train_dataset = FaceMaskDataset("/content/drive/MyDrive/MSFD/1/face_crop", "/content/drive/MyDrive/MSFD/1/face_crop_segmentation", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, num_workers=4, pin_memory=True)



In [9]:
# Define U-Net model
class UNet(nn.Module):
    def __init__(self):
        super(UNet, self).__init__()

        def conv_block(in_channels, out_channels):
            return nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
                nn.ReLU(inplace=True)
            )

        self.enc1 = conv_block(3, 32)
        self.enc2 = conv_block(32, 64)
        self.enc3 = conv_block(64, 128)
        self.enc4 = conv_block(128, 256)
        self.bottleneck = conv_block(256, 512)

        self.up1 = nn.ConvTranspose2d(512, 256, kernel_size=2, stride=2)
        self.dec1 = conv_block(512, 256)

        self.up2 = nn.ConvTranspose2d(256, 128, kernel_size=2, stride=2)
        self.dec2 = conv_block(256, 128)

        self.up3 = nn.ConvTranspose2d(128, 64, kernel_size=2, stride=2)
        self.dec3 = conv_block(128, 64)

        self.up4 = nn.ConvTranspose2d(64, 32, kernel_size=2, stride=2)
        self.dec4 = conv_block(64, 32)

        self.final = nn.Conv2d(32, 1, kernel_size=1)
        self.pool = nn.MaxPool2d(2)

    def forward(self, x):
        e1 = self.enc1(x)
        e2 = self.enc2(self.pool(e1))
        e3 = self.enc3(self.pool(e2))
        e4 = self.enc4(self.pool(e3))
        b = self.bottleneck(self.pool(e4))

        d1 = self.dec1(torch.cat([self.up1(b), e4], dim=1))
        d2 = self.dec2(torch.cat([self.up2(d1), e3], dim=1))
        d3 = self.dec3(torch.cat([self.up3(d2), e2], dim=1))
        d4 = self.dec4(torch.cat([self.up4(d3), e1], dim=1))

        return self.final(d4)

In [None]:
# Initialize model
model = UNet().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.0005)
# criterion = nn.BCEWithLogitsLoss()
criterion = iou_loss

def train(model, dataloader, epochs=10):
    model.train()
    scaler = torch.cuda.amp.GradScaler()
    for epoch in range(epochs):
        epoch_loss = 0
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch+1}/{epochs}")
        for images, masks in progress_bar:
            images, masks = images.to(device).float(), masks.to(device).float()
            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                outputs = model(images)
                loss = criterion(outputs, masks)
            scaler.scale(loss).backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)  # Clip gradients
            scaler.step(optimizer)
            scaler.update()
            epoch_loss += loss.item()
            progress_bar.set_postfix(loss=epoch_loss / len(dataloader))

# Train the model
train(model, train_loader, epochs=10)


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():
Epoch 1/10:   1%|          | 4/587 [00:28<39:21,  4.05s/it, loss=0.00513]  

In [None]:
torch.save(model.state_dict(), "unet_face_mask.pth")

In [None]:
def test_and_visualize(model, dataloader):
    model.eval()
    with torch.no_grad():
        fig, ax = plt.subplots(5, 2, figsize=(10, 25))
        for i, (images, _) in enumerate(dataloader):
            if i >= 5:
                break
            images = images.to(device).float()
            outputs = torch.sigmoid(model(images))
            image_np = images[0].cpu().permute(1, 2, 0).numpy()
            output_np = outputs[0].cpu().squeeze().numpy()
            ax[i, 0].imshow(image_np)
            ax[i, 0].set_title("Original Image")
            ax[i, 1].imshow(output_np, cmap='gray')
            ax[i, 1].set_title("Predicted Mask")
        plt.show()

In [None]:
test_dataset = FaceMaskDataset("/content/drive/MyDrive/MSFD/2/img", mask_dir=None, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=True)
test_and_visualize(model, test_loader)