## 1.1 读入coco数据集

In [1]:
from __future__ import print_function, division
import sys
import os
import torch
import numpy as np
import random
import csv

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
from torch.utils.data.sampler import Sampler

from pycocotools.coco import COCO

import skimage.io
import skimage.transform
import skimage.color
import skimage

from PIL import Image

In [2]:
class CocoDataset(Dataset):
    """Coco dataset."""

    def __init__(self, root_dir, set_name='train2017', transform=None):
        """
        Args:
            root_dir (string): COCO directory.
            transform (callable, optional): Optional transform to be applied
                on a sample.
        """
        self.root_dir = root_dir  # COCO的路径
        self.set_name = set_name  # 存有图片名称信息的部分路径名
        self.transform = transform  # 对于每一个图片，设定某个或某些变换

        # COCO函数是pycocotools中的读入图片信息文件并得到其标注（anns），图片类别（catToImgs），总的类别数和名称（cats），
        # 数据集基本信息（dataset），每一张图片对应的anns（imgToAnns），图片（imgs）
        self.coco      = COCO(os.path.join(self.root_dir, 'annotations', 'instances_' + self.set_name + '.json'))
        # self.coco      = COCO('coco/annotations/image_info_test2017.json')
        self.image_ids = self.coco.getImgIds()  # 获得所有图片所对应的ID信息

        self.load_classes()

    def load_classes(self):
        # load class names (name -> label) 生成名字到数字的对照表
        categories = self.coco.loadCats(self.coco.getCatIds())  # 获得总的categroy数，80个和对应的名称
        categories.sort(key=lambda x: x['id'])

        self.classes             = {}
        self.coco_labels         = {}
        self.coco_labels_inverse = {}
        for c in categories:
            self.coco_labels[len(self.classes)] = c['id']
            self.coco_labels_inverse[c['id']] = len(self.classes)
            self.classes[c['name']] = len(self.classes)

        # also load the reverse (label -> name) 生成数字到名字的对照表
        self.labels = {}
        for key, value in self.classes.items():
            self.labels[value] = key

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):

        img = self.load_image(idx)
        annot = self.load_annotations(idx)
        sample = {'img': img, 'annot': annot}
        if self.transform:
            sample = self.transform(sample)

        return sample

    def load_image(self, image_index):
        image_info = self.coco.loadImgs(self.image_ids[image_index])[0]  # 读入对应图片的基本信息
        path       = os.path.join(self.root_dir, 'images', self.set_name, image_info['file_name'])
        img = skimage.io.imread(path)  # 用skimage读入图片

        if len(img.shape) == 2:  # 如果是灰度图，转化为rgb的图片
            img = skimage.color.gray2rgb(img)

        return img.astype(np.float32)/255.0  # 图片的数值转化到0～1之间

    def load_annotations(self, image_index):
        # get ground truth annotations
        annotations_ids = self.coco.getAnnIds(imgIds=self.image_ids[image_index], iscrowd=False)
        annotations     = np.zeros((0, 5))

        # some images appear to miss annotations (like image with id 257034)
        if len(annotations_ids) == 0:  # 如果没有标注，直接返回[0,0,0,0,0]
            return annotations

        # parse annotations
        coco_annotations = self.coco.loadAnns(annotations_ids)
        for idx, a in enumerate(coco_annotations):

            # some annotations have basically no width / height, skip them 有一些注释没有宽和高，则不采用
            if a['bbox'][2] < 1 or a['bbox'][3] < 1:
                continue

            annotation        = np.zeros((1, 5)) # 将annotation记录为前4位为标注框的位置信息，第5位为label信息
            annotation[0, :4] = a['bbox']
            annotation[0, 4]  = self.coco_label_to_label(a['category_id'])
            annotations       = np.append(annotations, annotation, axis=0)

        # transform from [x, y, w, h] to [x1, y1, x2, y2] 将[x, y, w, h]信息转换为[x1, y1, x2, y2]的形式
        annotations[:, 2] = annotations[:, 0] + annotations[:, 2]
        annotations[:, 3] = annotations[:, 1] + annotations[:, 3]

        return annotations

    def coco_label_to_label(self, coco_label):
        return self.coco_labels_inverse[coco_label]


    def label_to_coco_label(self, label):
        return self.coco_labels[label]

    def image_aspect_ratio(self, image_index):
        image = self.coco.loadImgs(self.image_ids[image_index])[0]
        return float(image['width']) / float(image['height'])

    def num_classes(self):
        return 80

## 1.2 一些数据增强函数

In [3]:
class Resizer(object):
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample, min_side=608, max_side=1024):
        image, annots = sample['img'], sample['annot']

        rows, cols, cns = image.shape

        smallest_side = min(rows, cols)

        # rescale the image so the smallest side is min_side
        scale = min_side / smallest_side

        # check if the largest side is now greater than max_side, which can happen
        # when images have a large aspect ratio
        largest_side = max(rows, cols)

        if largest_side * scale > max_side:
            scale = max_side / largest_side

        # resize the image with the computed scale 为了保证长和宽都是32的倍数，在图片的外围补了一圈
        image = skimage.transform.resize(image, (int(round(rows*scale)), int(round((cols*scale)))))
        rows, cols, cns = image.shape

        pad_w = 32 - rows%32
        pad_h = 32 - cols%32

        new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)

        annots[:, :4] *= scale

        return {'img': torch.from_numpy(new_image), 'annot': torch.from_numpy(annots), 'scale': scale}

In [4]:
class Augmenter(object):
    """Convert ndarrays in sample to Tensors."""
    # 有50%的概率给图片做一个翻转
    def __call__(self, sample, flip_x=0.5):

        if np.random.rand() < flip_x:
            image, annots = sample['img'], sample['annot']
            image = image[:, ::-1, :]

            rows, cols, channels = image.shape

            x1 = annots[:, 0].copy()
            x2 = annots[:, 2].copy()
            
            x_tmp = x1.copy()

            annots[:, 0] = cols - x2
            annots[:, 2] = cols - x_tmp

            sample = {'img': image, 'annot': annots}

        return sample

In [5]:
class Normalizer(object):
    # 按照经验的均值和方差处理了每一个通道上的值
    def __init__(self):
        self.mean = np.array([[[0.485, 0.456, 0.406]]])
        self.std = np.array([[[0.229, 0.224, 0.225]]])

    def __call__(self, sample):

        image, annots = sample['img'], sample['annot']

        return {'img':((image.astype(np.float32)-self.mean)/self.std), 'annot': annots}

## 1.3 结果展示

In [6]:
dataset_train = CocoDataset('coco', set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

loading annotations into memory...
Done (t=10.59s)
creating index...
index created!


In [8]:
temp = dataset_train[10]
print('img:', temp['img'])
print('annot:', temp['annot'])

img: tensor([[[-0.8321, -1.5966, -1.7681],
         [-0.8342, -1.5987, -1.7703],
         [-0.8499, -1.6147, -1.7862],
         ...,
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.8243, -1.5886, -1.7602],
         [-0.8379, -1.6024, -1.7740],
         [-0.8504, -1.6153, -1.7867],
         ...,
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        [[-0.8281, -1.5925, -1.7788],
         [-0.8480, -1.6128, -1.7895],
         [-0.8511, -1.6160, -1.7874],
         ...,
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

        ...,

        [[ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000]],

     

&emsp;该dataloader函数返回对应的img和所有的annotation的结果