In [1]:
cd drive/MyDrive/2021-Creative

/content/drive/MyDrive/2021-Creative


In [2]:
import os
import glob
import json

import cv2
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from pycocotools.coco import COCO

import torch
import torch.optim as optim
from torch import nn, Tensor
from torch.utils.data import Dataset, DataLoader
import torchvision
import torchvision.transforms.functional as TF
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [3]:
classes = (
    'top', 'blouse', 't-shirt', 'Knitted fabri', 'shirt', 'bra top', 
    'hood', 'blue jeans', 'pants', 'skirt', 'leggings', 'jogger pants', 
    'coat', 'jacket', 'jumper', 'padding jacket', 'best', 'kadigan', 
    'zip up', 'dress', 'jumpsuit')


class FashionDataset(Dataset):
    def __init__(self, path, transforms=None):
        self.coco = COCO(path)
        self.image_ids = list(self.coco.imgToAnns.keys())
        self.transforms = transforms

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        file_name = self.coco.loadImgs(image_id)[0]['file_name']
        file_name = f'/content/drive/MyDrive/2021-K_fashion/train_new_all/{file_name}'
        image = Image.open(file_name).convert('RGB')

        annot_ids = self.coco.getAnnIds(imgIds=image_id)
        annots = [x for x in self.coco.loadAnns(annot_ids) if x['image_id'] == image_id]
        
        boxes = np.array([annot['bbox'] for annot in annots], dtype=np.float32)
        boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
        boxes[:, 3] = boxes[:, 1] + boxes[:, 3]

        labels = np.array([annot['category_id'] for annot in annots], dtype=np.int32)
        masks = np.array([self.coco.annToMask(annot) for annot in annots], dtype=np.uint8)

        area = np.array([annot['area'] for annot in annots], dtype=np.float32)
        iscrowd = np.array([annot['iscrowd'] for annot in annots], dtype=np.uint8)

        target = {
            'boxes': boxes,
            'masks': masks,
            'labels': labels,
            'area': area,
            'iscrowd': iscrowd}
        
        if self.transforms is not None:
            image, target = self.transforms(image, target)
            
        target['boxes'] = torch.as_tensor(target['boxes'], dtype=torch.float32)
        target['masks'] = torch.as_tensor(target['masks'], dtype=torch.uint8)
        target['labels'] = torch.as_tensor(target['labels'], dtype=torch.int64)
        target['area'] = torch.as_tensor(target['area'], dtype=torch.float32)
        target['iscrowd'] = torch.as_tensor(target['iscrowd'], dtype=torch.uint8)            

        return image, target

In [4]:
class Compose:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for transform in self.transforms:
            image, target = transform(
                image, target)

        return image, target


class Resize:
    def __init__(self, size, interpolation=Image.BILINEAR):
        self.size = size
        self.interpolation = interpolation

    def __call__(self, image, target):
        w, h = image.size
        image = image.resize(self.size)

        _masks = target['masks'].copy()
        masks = np.zeros((_masks.shape[0], self.size[0], self.size[1]))
        
        for i, v in enumerate(_masks):
            v = Image.fromarray(v).resize(self.size, resample=Image.BILINEAR)
            masks[i] = np.array(v, dtype=np.uint8)

        target['masks'] = masks
        target['boxes'][:, [0, 2]] *= self.size[0] / w
        target['boxes'][:, [1, 3]] *= self.size[1] / h
        
        return image, target
        

class ToTensor:
    def __call__(self, image, target):
        image = TF.to_tensor(image)
        
        return image, target

In [5]:
model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, len(classes)+1)
in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
hidden_layer = 256
model.roi_heads.mask_predictor = MaskRCNNPredictor(
    in_features_mask, hidden_layer, len(classes)+1)

In [6]:
def save_checkpoint(epoch, model, optimizer):
    """
    Save model checkpoint.

    :param epoch: epoch number
    :param model: model
    :param optimizer: optimizer
    """
    state = {'epoch': epoch,
             'model': model,
             'optimizer': optimizer}
    filename = 'checkpoint.tar'
    torch.save(state, filename)

In [7]:
batch_size = 16
lr = 1e-3
max_size = 800
num_workers = 2
print_freq = 500
num_epochs = 5
checkpoint = "checkpoint.tar" # checkpoint path
device = 'cuda:0'

transforms_train = Compose([
    Resize((max_size, max_size)),
    ToTensor()])


def collate_fn(batch):
    return tuple(zip(*batch))

# Load Checkpoint
if checkpoint is None:
    start_epoch = 0
    params = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=lr, weight_decay=1e-5)
else:
    checkpoint = torch.load(checkpoint)
    start_epoch = checkpoint['epoch'] + 1
    print('\nLoaded checkpoint from epoch %d.\n' % start_epoch)
    model = checkpoint['model']
    optimizer = checkpoint['optimizer']

# Data Loader
dataset = FashionDataset('/content/drive/MyDrive/2021-K_fashion/train.json', transforms=transforms_train)
train_loader = DataLoader(
    dataset, batch_size=batch_size, shuffle=True, 
    num_workers=num_workers, collate_fn=collate_fn)

model.to(device)


def train_fn():
    model.train()
    for epoch in range(start_epoch, num_epochs):
        for i, (images, targets) in enumerate(train_loader):
            optimizer.zero_grad()
            images = [image.to(device) for image in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            losses = model(images, targets)
            loss = sum(loss for loss in losses.values())
            
            if i % print_freq == 0:
              print(
                  f"{epoch}, {i}, C: {losses['loss_classifier'].item():.5f}, M: {losses['loss_mask'].item():.5f}, "\
                  f"B: {losses['loss_box_reg'].item():.5f}, O: {losses['loss_objectness'].item():.5f}, T: {loss.item():.5f}")
              
            loss.backward()
            optimizer.step()
        del images, targets, losses
        save_checkpoint(epoch, model, optimizer)


Loaded checkpoint from epoch 1.

loading annotations into memory...
Done (t=1.88s)
creating index...
index created!


In [None]:
train_fn()

1, 0, C: 0.63482, M: 0.66967, B: 0.01352, O: 0.02180, T: 1.48984
1, 500, C: 0.43182, M: 0.65483, B: 0.11669, O: 0.04842, T: 1.25779
1, 1000, C: 172.60710, M: 0.59062, B: 235.00777, O: 43.19550, T: 491.26004
1, 1500, C: 1.09248, M: 0.65886, B: 0.30623, O: 0.25132, T: 2.56951
1, 2000, C: 2.75686, M: 0.65495, B: 0.33983, O: 7.24888, T: 19.57147
1, 2500, C: 0.56451, M: 0.65537, B: 0.12288, O: 0.04438, T: 1.39352
1, 3000, C: 0.41165, M: 0.66035, B: 0.12372, O: 0.04163, T: 1.24574
1, 3500, C: 0.32919, M: 0.66092, B: 0.11382, O: 0.01930, T: 1.12990
1, 4000, C: 0.32494, M: 0.64419, B: 0.15721, O: 0.04611, T: 1.17864
1, 4500, C: 0.23894, M: 0.66690, B: 0.10348, O: 0.04521, T: 1.06423
1, 5000, C: 0.24160, M: 0.67656, B: 0.11298, O: 0.03652, T: 1.07305
1, 5500, C: 0.26479, M: 0.68033, B: 0.13774, O: 0.03671, T: 1.12326
2, 0, C: 0.23628, M: 0.63138, B: 0.12727, O: 0.03895, T: 1.04416
2, 500, C: 0.20308, M: 0.65073, B: 0.09975, O: 7.51303, T: 8.47223
2, 1000, C: 0.24078, M: 0.65304, B: 0.12874, O: 