In [2]:
import os
import glob
import numpy as np
import torch
import torchvision
from PIL import Image, ImageDraw
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T

In [3]:
from decouple import config

BASE_DIR = config('BASE_DIR', default='', cast=str)
dataset_path = os.path.join(BASE_DIR, 'data-science-bowl-2018')

In [4]:
import cv2
class Rescale(object):
    """Rescale the image in a sample to a given size.

    Args:
        output_size (tuple or int): Desired output size. If tuple, output is
            matched to output_size. If int, smaller of image edges is matched
            to output_size keeping aspect ratio the same.
    """

    def __init__(self, output_size):
        assert isinstance(output_size, (int, tuple))
        self.output_size = output_size

    def __call__(self, image, targets):

        h, w = image.shape[:2]
        if isinstance(self.output_size, int):
            if h > w:
                new_h, new_w = self.output_size * h / w, self.output_size
            else:
                new_h, new_w = self.output_size, self.output_size * w / h
        else:
            new_h, new_w = self.output_size

        new_h, new_w = int(new_h), int(new_w)

        #image = np.resize(image, (new_h, new_w, 3))
        image = cv2.resize(image, dsize=(new_h, new_w), interpolation=cv2.INTER_CUBIC)

        # h and w are swapped for landmarks because for images,
        # x and y axes are axis 1 and 0 respectively
        ratio_height = new_h / h
        ratio_width = new_w / w

        xmin, ymin, xmax, ymax = targets[0]["boxes"].unbind(1)

        xmin = xmin * ratio_width
        xmax = xmax * ratio_width
        ymin = ymin * ratio_height
        ymax = ymax * ratio_height
        
        targets[0]["boxes"] = torch.stack((xmin, ymin, xmax, ymax), dim=1)

        return image, targets

In [5]:
class ToTensor(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, image, targets):

        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W

        image = image.transpose((2, 0, 1))
        image = torch.from_numpy(image).float()
        return image, targets

In [6]:
class Compose(object):
    """Composes several transforms together.

    Args:
        transforms (list of ``Transform`` objects): list of transforms to compose.

    Example:
        >>> transforms.Compose([
        >>>     transforms.CenterCrop(10),
        >>>     transforms.ToTensor(),
        >>> ])
    """

    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, targets):
        for t in self.transforms:
            image, targets = t(image, targets)
        return image, targets

In [7]:
class MyDataset(Dataset):
    def __init__(self, transforms=None, split="stage1_train", path=dataset_path):
        self.split = split
        self.path = path + '/' + split

        self.transforms = transforms

        self.path_id_list = glob.glob(os.path.join(self.path, '*'))
        self.id_list = []
        self.image_list = []
        self.mask_list = []

        for path_id in self.path_id_list:
            images = glob.glob(path_id + '/images/*png')
            masks = glob.glob(path_id + '/masks/*png')
            self.id_list.append(os.path.basename(path_id))
            self.image_list.extend(images)
            self.mask_list.append(masks)

    def __len__(self):
        return len(self.path_id_list)

    def __getitem__(self, index):
        image = np.array(Image.open(self.image_list[index]), dtype=np.uint8)
        image = image[:, :, :3]  # remove alpha channel
        boxes, labels = self.mask_to_bbox(self.mask_list[index])
        targets = [
            {
                'boxes':torch.FloatTensor(boxes),
                'labels':torch.LongTensor(labels),
                'name': self.id_list[index]
            }
        ]
        
        #sample = {'image': image, 'boxes': boxes, 'labels': labels, 'name': self.id_list[index]}

        if self.transforms is not None:
            image, targets = self.transforms(image, targets)

        return image, targets

    def mask_to_bbox(self, mask_paths):
        boxes = []
        labels = []
        for path in mask_paths:
            mask = Image.open(path)
            mask = np.array(mask)
            pos = np.where(mask)
            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])
            boxes.append([xmin, ymin, xmax, ymax])
            labels.append(1)
        #boxes = np.asarray(boxes,dtype=np.float32)
        #labels = np.asarray(labels, dtype=np.int8)
        return boxes, labels

In [8]:
def get_transform(train=False):
    transforms = []
    if train:
        transforms.append(Rescale((256,256)))
    transforms.append(ToTensor())
    return Compose(transforms)

In [9]:
dataset = MyDataset(split='stage1_train', transforms=get_transform(train=True))

In [151]:
def my_collate(batch):
    image = batch[0]
    target = [item[1] for item in batch]
    return image, target


In [152]:
# sample from dataloader
trainloader = DataLoader(dataset, num_workers=0, shuffle=True, drop_last=True, collate_fn=my_collate)
it = iter(trainloader)
image, targets = next(it)

In [153]:
print(targets[0])
print(image[0].shape)

[{'boxes': tensor([[ 84.5283, 137.5522,  86.9434, 141.3731],
        [184.9560, 180.4312, 187.9748, 191.4693],
        [ 72.4528, 135.0050,  74.2641, 138.8259],
        ...,
        [125.3836, 181.7048, 127.3962, 187.6484],
        [ 83.9245, 106.5605,  85.5346, 110.3814],
        [105.8616, 169.3930, 107.4717, 172.3648]]), 'labels': tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1

In [160]:
# sample from dataset
image, targets = dataset[0]
image = image[None, :, :, :]

In [10]:
for image, targets in (dataset):
    print(targets[0]["name"])

ValueError: not enough values to unpack (expected 3, got 2)

In [169]:
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
# load a pre-trained model for classification and return
# only the features
backbone = torchvision.models.mobilenet_v2(pretrained=True).features
# FasterRCNN needs to know the number of
# output channels in a backbone. For mobilenet_v2, it's 1280
# so we need to add it here
backbone.out_channels = 1280

# let's make the RPN generate 5 x 3 anchors per spatial
# location, with 5 different sizes and 3 different aspect
# ratios. We have a Tuple[Tuple[int]] because each feature
# map could potentially have different sizes and
# aspect ratios
anchor_generator = AnchorGenerator(sizes=((8, 16, 32, 64, 128),),
                                aspect_ratios=((0.5, 1.0, 2.0),))

# let's define what are the feature maps that we will
# use to perform the region of interest cropping, as well as
# the size of the crop after rescaling.
# if your backbone returns a Tensor, featmap_names is expected to
# be ['0']. More generally, the backbone should return an
# OrderedDict[Tensor], and in featmap_names you can choose which
# feature maps to use.
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                             output_size=7,
                                             sampling_ratio=2)

# put the pieces together inside a FasterRCNN model
model = FasterRCNN(
                backbone,
                num_classes=2,
                rpn_anchor_generator=anchor_generator,
                box_roi_pool=roi_pooler,
                min_size=256,
                max_size=256,
                  )

#model.train()
#loss = model(image, targets)

#model.eval()
#prediction = model(image)

In [170]:
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.01,
                            momentum=0.9, weight_decay=0.0005)

In [171]:
for epoch in range(10):    
    model.train()
    loss = model(image, targets)
    loss_sum = sum(lss for lss in loss.values())
    optimizer.zero_grad()
    loss_sum.backward()
    optimizer.step()
    
    model.eval()
    prediction = model(image)
    image2 = Image.fromarray(image.numpy()[0, 0, :, :])
    if image2.mode != "RGB":
        image2 = image2.convert("RGB")
    draw = ImageDraw.Draw(image2)
    for box, score in zip(prediction[0]["boxes"], prediction[0]["scores"]):
        x0, y0, x1, y1  = box
        draw.rectangle([(x0, y0), (x1, y1)], outline=(255, 0, 255))

    image2.show()

    print(f"{epoch}, {loss_sum}")

0, 1.5226142406463623
1, 1.0527878999710083
2, 0.8521511554718018
3, 0.7292611598968506
4, 0.6765128970146179


KeyboardInterrupt: 

In [161]:
model.eval()
prediction = model(image)

In [162]:
prediction

[{'boxes': tensor([[1.4711e+02, 2.1045e+02, 1.9949e+02, 2.4327e+02],
          [1.3220e+02, 2.1952e+02, 2.1291e+02, 2.5600e+02],
          [7.1682e+01, 1.3358e+00, 1.9804e+02, 8.5147e+01],
          [1.5647e+02, 2.1592e+02, 1.6529e+02, 2.3014e+02],
          [1.5531e+02, 2.1968e+02, 1.6531e+02, 2.2424e+02],
          [6.3368e+01, 1.8782e+02, 6.9085e+01, 1.9211e+02],
          [1.5143e+02, 2.1942e+02, 1.5798e+02, 2.3232e+02],
          [4.5302e+01, 1.7995e+02, 9.3009e+01, 2.0629e+02],
          [4.5983e+01, 1.7382e+02, 1.3717e+02, 2.3771e+02],
          [6.5989e+01, 1.9506e+02, 1.2608e+02, 2.3859e+02],
          [7.5612e+01, 1.8812e+02, 1.0642e+02, 2.0272e+02],
          [1.5630e+02, 2.2624e+02, 1.6693e+02, 2.3921e+02],
          [5.7844e+01, 1.8586e+02, 6.9134e+01, 2.0332e+02],
          [5.8511e+01, 1.8783e+02, 6.7629e+01, 1.9848e+02],
          [5.6321e+01, 1.8569e+02, 6.4863e+01, 2.0169e+02],
          [0.0000e+00, 6.0301e+01, 2.3205e+02, 1.6185e+02],
          [5.0454e+01, 2.1590e+

In [164]:
image.shape

torch.Size([1, 3, 256, 256])

In [163]:
image2 = Image.fromarray(image[0].numpy()[0, :, :])
if image2.mode != "RGB":
    image2 = image2.convert("RGB")
draw = ImageDraw.Draw(image2)
for box, score in zip(prediction[0]["boxes"], prediction[0]["scores"]):
    x0, y0, x1, y1  = box
    draw.rectangle([(x0, y0), (x1, y1)], outline=(255, 0, 255))

image2.show()

In [165]:
image3 = Image.fromarray(image.numpy()[0, 0, :, :])
if image3.mode != "RGB":
    image3 = image3.convert("RGB")
draw = ImageDraw.Draw(image3)
for box in targets[0]["boxes"]:
    x0, y0, x1, y1  = box
    draw.rectangle([(x0, y0), (x1, y1)], outline=(255, 0, 255))

image3.show()