In [4]:
# -*- coding: utf-8 -*-
"""Creates integer-based masks from RLE data."""
from os import path
import pandas as pd
import numpy as np
from PIL import Image

DATA_DIR = r'C:\Users\ZEPHYRUS\Downloads\kaggle data'
MASK_DIR = path.join(DATA_DIR, r'C:\Users\ZEPHYRUS\Downloads\kaggle data')

def get_mask(encoded_pixels, dims):
    """Get integer-based mask from multiple run length encoded ships."""

    # Init mask with all 0-class pixels.
    mask = np.zeros(dims[0] * dims[1], dtype = np.uint8)

    for obj_id, curr_encoded in enumerate(encoded_pixels):

        s = curr_encoded.split()

        for i in range(len(s) // 2):
            start = int(s[2 * i]) - 1
            length = int(s[2 * i + 1])
            mask[start : start + length] = obj_id + 1

    return mask.reshape(dims).T


if __name__ == '__main__':

    # Load training metadata.
    metadata = path.join(DATA_DIR, 'train_ship_segmentations_v2.csv')
    df_metadata = pd.read_csv(metadata)

    # Corrupted images.
    exclude_list = ['6384c3e78.jpg', '13703f040.jpg', '14715c06d.jpg',
        '33e0ff2d5.jpg', '4d4e09f2a.jpg', '877691df8.jpg', '8b909bb20.jpg',
        'a8d99130e.jpg', 'ad55c3143.jpg', 'c8260c541.jpg', 'd6c7f17c7.jpg',
        'dc3e7c901.jpg', 'e44dffe88.jpg', 'ef87bad36.jpg', 'f083256d8.jpg']

    for corrupted in exclude_list:
        df_metadata = df_metadata[~df_metadata.ImageId.str.contains(corrupted)]

    # Remove images without ships.
    df_metadata.dropna(inplace = True)

    n_ships = len(df_metadata)
    image_ids = np.unique(df_metadata.ImageId.tolist())
    print(f'There are {n_ships} ships across {len(image_ids)} images.')

    print('Starting to create masks...')

    for image_id in image_ids:

        fn_mask = path.join(MASK_DIR, image_id.replace('.jpg', '_mask.png'))

        samples = df_metadata[df_metadata['ImageId'] == image_id]
        encoded_pixels = samples.EncodedPixels.tolist()
        mask = get_mask(encoded_pixels, (768, 768))

        im = Image.fromarray(mask)
        im.save(fn_mask)

    print('Done.')
    

There are 81723 ships across 42556 images.
Starting to create masks...
Done.


In [13]:
# -*- coding: utf-8 -*-
"""Custom dataset for Airbus images."""
from torch.utils.data import Dataset
from os import path
from PIL import Image
import numpy as np
import torch

class AirbusShipDetection(Dataset):

    def __init__(self, image_ids, dir_images, dir_masks, transforms = None):

        self.image_ids = image_ids
        self.dir_images = dir_images
        self.dir_masks = dir_masks
        self._transforms = transforms

    def __getitem__(self, idx):

        # Read the RGB image.
        fn_image = f'{self.image_ids[idx]}.jpg'
        path_image = path.join(self.dir_images, fn_image)
        image = Image.open(path_image).convert("RGB")

        # Read the integer-based mask.
        fn_mask = f'{self.image_ids[idx]}_mask.png'
        path_mask = path.join(self.dir_masks, fn_mask)
        mask = np.array(Image.open(path_mask))

        # Instances are encoded with different integers.
        obj_ids = np.unique(mask)

        # We remove the background (id=0) from the mask.
        obj_ids = obj_ids[1:]
        num_objs = len(obj_ids)

        # Split the mask into a set of binary masks
        # masks.shape[0] = number of istances
        masks = mask == obj_ids[:, None, None]

        # Get bounding box of each mask.
        boxes = []
        for mask in masks:

            pos = np.where(mask)

            xmin = np.min(pos[1])
            xmax = np.max(pos[1])
            ymin = np.min(pos[0])
            ymax = np.max(pos[0])

            # Enforce a positive area.
            if xmax - xmin < 1:
                xmax += 1
            if ymax - ymin < 1:
                ymax += 1

            boxes.append([xmin, ymin, xmax, ymax])

        boxes = torch.as_tensor(boxes, dtype = torch.float32)

        # Compute the area.
        area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])

        # Only one class (ships).
        labels = torch.ones((num_objs,), dtype = torch.int64)
        masks = torch.as_tensor(masks, dtype = torch.uint8)

        # Crowd flag not applicable here.
        iscrowd = torch.zeros((num_objs,), dtype=torch.int64)

        image_id = torch.tensor([idx])

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['masks'] = masks
        target['image_id'] = image_id
        target['area'] = area
        target['iscrowd'] = iscrowd

        # Apply image augmentation.
        if self._transforms:
            image, target = self._transforms(image, target)

        return image, target

    def __len__(self):
        # return length of
        return len(self.image_ids)

In [12]:
masks = pd.read_csv("C:\Users\ZEPHYRUS\Downloads\kaggle data\trainship")
masks.head(10)

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 2-3: truncated \UXXXXXXXX escape (635657696.py, line 1)