In [8]:
import torch
import segmentation_models_pytorch as smp
import torchvision.models.segmentation as models

In [9]:
model = smp.DeepLabV3Plus(
    encoder_name="mobilenet_v2",  # MobileNetV2 as backbone
    encoder_weights="imagenet",   # Use pretrained ImageNet weights
    classes=1,  # Number of output classes (e.g., clothing vs. background)
    activation="sigmoid"  # Use "softmax" for multi-class segmentation
)

In [10]:
import os 
from torch.utils.data import Dataset,DataLoader
from PIL import Image
import torchvision.transforms as transforms


class ClothingData(Dataset):
    def __init__(self, image_dir,mask_dir, transform=None, mask_transform=None):
        self.image_dir = image_dir
        self.mask_dir = mask_dir
        self.transform = transform
        self.mask_transform = mask_transform
        self.image_filenames = sorted(os.listdir(image_dir))
        self.mask_filenames = sorted(os.listdir(mask_dir))

    def __len__(self):  
        return len(self.image_filenames)

    def __getitem__(self, idx):
        image_path=os.path.join(self.image_dir,self.image_filenames[idx])
        mask_path = os.path.join(self.mask_dir, self.mask_filenames[idx])

        image = Image.open(image_path).convert("RGB")
        mask = Image.open(mask_path).convert("L")

        if self.transform:
            image =self.transform(image)
        if self.mask_transform:
            mask= self.mask_transform(mask)

        return image, mask, idx

In [11]:
transform = transforms.Compose([
    transforms.Pad((0, 0, 10, 7)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

mask_transform = transforms.Compose([
    transforms.Pad((0, 0, 10, 7)),
    transforms.ToTensor()
])

dataset = ClothingData(image_dir="archive\jpeg_images\IMAGES",mask_dir="archive\jpeg_masks\MASKS",transform=transform, mask_transform=mask_transform)

  dataset = ClothingData(image_dir="archive\jpeg_images\IMAGES",mask_dir="archive\jpeg_masks\MASKS",transform=transform, mask_transform=mask_transform)
  dataset = ClothingData(image_dir="archive\jpeg_images\IMAGES",mask_dir="archive\jpeg_masks\MASKS",transform=transform, mask_transform=mask_transform)


In [12]:
train_loader = DataLoader(dataset, batch_size=4, shuffle=True)

In [13]:
images, masks,_ = next(iter(train_loader))
print("Batch shape:", images.shape, masks.shape)

Batch shape: torch.Size([4, 3, 832, 560]) torch.Size([4, 1, 832, 560])


In [14]:
from IPython.display import clear_output
import time
import torch.nn as nn
criterion = nn.BCEWithLogitsLoss()
device = torch.device("cpu")
optimizer = torch.optim.Adam(params=model.parameters(),lr=0.001)
idx = 0
EPOCHS = 10
# def calculate_loss()
for i in range(EPOCHS):
    for idx, _input in enumerate(train_loader):
        optimizer.zero_grad()
        print(idx+1)
        images, masks = _input[0].to(device), _input[1].to(device)
        outputs = model(images)
        # print(images.shape)
        # print(masks.shape)
        # print(outputs.shape)
        # print(outputs[0])
        # print(outputs[1])
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()
        idx+=1
        print(loss.item())
    print(f"Epoch: {i}")

1
0.9747364521026611
2
0.9550235271453857
3
0.9232414960861206
4
0.8893241882324219
5
0.8604052066802979
6
0.8367282748222351
7
0.8203077912330627
8
0.8035755157470703
9
0.7906456589698792
10
0.7816969752311707
11
0.7710587382316589
12
0.7713474035263062
13
0.7601804137229919
14
0.7533364295959473
15
0.7513597011566162
16
0.7443769574165344
17
0.7400384545326233
18
0.7351234555244446
19
0.7319625616073608
20
0.7293255925178528
21
0.7263880372047424
22


KeyboardInterrupt: 