<a href="https://colab.research.google.com/github/Ayush310803/ViT/blob/main/unet_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [29]:
import os
import zipfile

os.environ["KAGGLE_CONFIG_DIR"] = "/content"
!kaggle datasets download -d rajkumarl/people-clothing-segmentation

dataset_zip = "/content/people-clothing-segmentation.zip"
extract_path = "/content/dataset"
os.makedirs(extract_path, exist_ok=True)

with zipfile.ZipFile(dataset_zip, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("✅ Dataset extracted successfully.")
!ls /content/dataset

image_dir = "/content/dataset/jpeg_images/IMAGES"
mask_dir = "/content/dataset/jpeg_masks/MASKS"
label_path = "/content/dataset/labels.csv"

Dataset URL: https://www.kaggle.com/datasets/rajkumarl/people-clothing-segmentation
License(s): CC0-1.0
people-clothing-segmentation.zip: Skipping, found more recently modified local copy (use --force to force download)
✅ Dataset extracted successfully.
 jpeg_images   jpeg_masks  'labels (1).csv'   labels.csv   png_images   png_masks


In [30]:
import numpy as np
import pandas as pd
import os
import torch
import torchvision.transforms.functional as tfunc
from torch import nn
from PIL import Image
from torch.utils.data import Dataset, DataLoader, random_split
from torchvision import transforms
import matplotlib.pyplot as plt
import torchvision.transforms.functional as F
import torch.optim as optim

In [31]:
class DoubleConvolution(nn.Module):
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.first = nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1)
        self.act1 = nn.ReLU()
        self.second = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        self.act2 = nn.ReLU()

    def forward(self, x: torch.Tensor):
        x = self.first(x)
        x = self.act1(x)
        x = self.second(x)
        return self.act2(x)

In [32]:
class DownSample(nn.Module):
    def __init__(self):
        super().__init__()
        self.pool = nn.MaxPool2d(2)

    def forward(self, x: torch.Tensor):
        return self.pool(x)

# Define the UpSample class


# Define the CropAndConcat class


In [33]:
class UpSample(nn.Module):
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)

    def forward(self, x: torch.Tensor):
        return self.up(x)

In [34]:
class CropAndConcat(nn.Module):
    def forward(self, x: torch.Tensor, x_contract: torch.Tensor):
        x_contract = tfunc.center_crop(x_contract, [x.shape[2], x.shape[3]])
        x = torch.cat([x, x_contract], dim=1)
        return x

In [35]:
class UNet(nn.Module):
    def __init__(self, in_channels: int, out_channels: int):
        super().__init__()
        self.down_conv = nn.ModuleList([DoubleConvolution(i, o) for i, o in
                                        [(in_channels, 64), (64, 128), (128, 256), (256, 512)]])
        self.down_sample = nn.ModuleList([DownSample() for _ in range(4)])
        self.mid_conv = DoubleConvolution(512, 1024)
        self.up_sample = nn.ModuleList([UpSample(i, o) for i, o in
                                        [(1024, 512), (512, 256), (256, 128), (128, 64)]])
        self.up_conv = nn.ModuleList([DoubleConvolution(i, o) for i, o in
                                      [(1024, 512), (512, 256), (256, 128), (128, 64)]])
        self.concat = nn.ModuleList([CropAndConcat() for _ in range(4)])
        self.final_conv = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x: torch.Tensor):
        pass_thru = []
        for i in range(len(self.down_conv)):
            x = self.down_conv[i](x)
            pass_thru.append(x)
            x = self.down_sample[i](x)
        x = self.mid_conv(x)
        for i in range(len(self.up_conv)):
            x = self.up_sample[i](x)
            x = self.concat[i](x, pass_thru.pop())
            x = self.up_conv[i](x)
        x = self.final_conv(x)
        return x

In [36]:
class ClothingSegmentationDataset(Dataset):
    def __init__(self, image_dir, mask_dir, transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.image_filenames = sorted(os.listdir(image_dir))
        self.mask_filenames = sorted(os.listdir(mask_dir))

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_filenames[idx])
        mask_path = os.path.join(self.mask_dir, self.mask_filenames[idx])
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")
        if self.transform:
            image = self.transform(image)
            mask = self.transform(mask)
        return image, mask


In [37]:
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor()
])

dataset = ClothingSegmentationDataset(image_dir, mask_dir, transform=transform)

In [38]:
test_size = int(0.1 * len(dataset))
val_size = int(0.1 * len(dataset))
train_size = len(dataset) - val_size - test_size
xtrain, xval, xtest = random_split(dataset, [train_size, val_size, test_size])

train_loader = DataLoader(xtrain, batch_size=16, shuffle=True)
val_loader = DataLoader(xval, batch_size=16, shuffle=False)

label_path = "/content/dataset/labels.csv"
labels = pd.read_csv(label_path)

In [39]:
model = UNet(3, 59)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.99)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.1, patience=5)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

UNet(
  (down_conv): ModuleList(
    (0): DoubleConvolution(
      (first): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act1): ReLU()
      (second): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act2): ReLU()
    )
    (1): DoubleConvolution(
      (first): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act1): ReLU()
      (second): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act2): ReLU()
    )
    (2): DoubleConvolution(
      (first): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act1): ReLU()
      (second): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act2): ReLU()
    )
    (3): DoubleConvolution(
      (first): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act1): ReLU()
      (second): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (act2): ReLU()
    )
 

In [None]:
num_epochs = 25
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for images, masks in train_loader:
        images, masks = images.to(device), masks.to(device)
        masks = masks.squeeze(1)
        masks = masks.long()
        outputs = model(images)
        loss = criterion(outputs, masks)
        epoch_loss += loss.item()
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Epoch: {epoch+1}/{num_epochs}, Loss: {epoch_loss/len(train_loader)}")

    if (epoch+1)%5 == 0:
        model.eval()
        val_loss = 0
        for images, masks in val_loader:
            images, masks = images.to(device), masks.to(device)
            masks = masks.squeeze(1)
            masks = masks.long()
            outputs = model(images)
            loss = criterion(outputs, masks)
            val_loss += loss.item()
            scheduler.step(val_loss)
        print(f"Validation Loss: {val_loss/len(val_loader)}")

Epoch: 1/25, Loss: 3.3288028228282927


In [None]:
test_loader = DataLoader(xtest, batch_size=16, shuffle=False)
model.eval()
test_loss = 0
for images, masks in test_loader:
    images, masks = images.to(device), masks.to(device)
    masks = masks.squeeze(1)
    masks = masks.long()
    outputs = model(images)
    loss = criterion(outputs, masks)
    test_loss += loss.item()
print(f"Test Loss: {test_loss/len(test_loader)}")

In [None]:
val_iter = iter(val_loader)
images, masks = next(val_iter)
img_idx = 0
image = images[img_idx]
mask = masks[img_idx]
output = model(image.to(device))
image = F.to_pil_image(output)
mask = F.to_pil_image(mask)
fig, ax = plt.subplots(1, 2, figsize=(10, 5))
ax[0].imshow(image)
ax[0].set_title("Image")
ax[0].axis("off")
ax[1].imshow(mask, cmap="gray")
ax[1].set_title("Mask")
ax[1].axis("off")
plt.tight_layout()
plt.show()