载入必要的包：

In [3]:
import os
import zipfile
from mxnet import image

In [None]:
def get_image_names(root):
    dir_names = os.listdir(root + '/images')
    image_names = [
        '{}/images/{}'.format(root, dir_names[i])
        for i, _ in enumerate(dir_names)
    ]
    return image_names


def get_stuff_names():
    '''
    dataType=['val2017', 'train2017']
    '''
    stuff_val_names = root + '/annotations' + \
        '/stuff_%s_pixelmaps.zip' % ('val2017')
    stuff_train_names = root + '/annotations' + \
        '/stuff_%s_pixelmaps.zip' % ('train2017')
    return stuff_train_names, stuff_val_names


def data_iter(dataType):
    z = zipfile.ZipFile(DataType)
    for filename in z.namelist()[1:]:
        content = z.read(filename)
        yield image.imdecode(content)


def unzip_annotations():
    '''
    标签数据下载到 root 文件夹中，并解压，然后删除（标签的）压缩文件
    '''
    # 获取标签信息文件名
    annDir = [z_name for z_name in os.listdir(root) if z_name.endswith('.zip')]
    for ann_name in annDir:
        filename = root + '/' + ann_name
        z = zipfile.ZipFile(filename)
        # 全部解压
        z.extractall(root)
    z.close()


def del_annotations():
    # 获取标签信息文件名
    annDir = [z_name for z_name in os.listdir(root) if z_name.endswith('.zip')]
    for ann_name in annDir:
        filename = root + '/' + ann_name
        os.remove(filename)

In [None]:
import cocox
import zipfile

In [None]:
cocox.root = 'F:/coco'

查看 `coco/images/` 文件夹下的数据：

In [None]:
image_names = cocox.get_image_names()
image_names

查看 `coco/` 文件夹的文件：

In [None]:
import os
dataDir = cocox.root

In [None]:
os.listdir(dataDir)

我们只需要获取 annotations 的信息（这里都是以 `.zip` 结尾）：

In [None]:
annDir = [z_name for z_name in os.listdir(dataDir) if z_name.endswith('.zip')]
annDir

解压 annotations 的文件：

In [None]:
for ann_name in annDir:
    z = zipfile.ZipFile(dataDir + '/' + ann_name)
    # 全部解压
    z.extractall(dataDir)

In [None]:
# 封装为函数
cocox.unzip_annotations()

In [None]:
# 删除标签的压缩文件
cocox.del_annotations()

由于图片数据比较大，我就不解压了，不过可以通过 `MXNet + zipfile` 来直接获取图片信息。

# 获取图片数据

我以 `test2017.zip` 为例：

In [None]:
image_names

In [None]:
z = zipfile.ZipFile(image_names[0])

In [None]:
# 测试集的图片名称列表
z.namelist()

我们可以看出，第一个是目录名，之后的才是图片。下面我们来看看第一张图片：

In [None]:
from mxnet import image

In [None]:
r = z.read(z.namelist()[1])    # bytes
data = image.imdecode(r)       # 转换为 NDArray 数组，可以做数值运算
data

In [None]:
x = data.asnumpy()   # 转换为 array

In [None]:
# 显示图片
%pylab inline 
plt.imshow(x)

为此，我们可以将其封装为一个迭代器：`cocox.data_iter(dataType)`

# 获取标签信息（利用官方给定教程）

- 安装 python API：
```sh
pip install -U pycocotools
```

Windows 下有许多的坑：[Windows 10 编译 Pycocotools 踩坑记](https://www.jianshu.com/p/de455d653301)

In [None]:
!pip install -U pycocotools

In [6]:
import sys
sys.path.append('D:\API\cocoapi\PythonAPI')

In [7]:
%pylab inline
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

Populating the interactive namespace from numpy and matplotlib


In [8]:
dataDir = cocox.root
dataType = 'val2017'
annFile = '{}/annotations/instances_{}.json'.format(dataDir, dataType)

NameError: name 'cocox' is not defined

In [None]:
# initialize COCO api for instance annotations
coco=COCO(annFile)

`COCO` 是一个类：
```json
Constructor of Microsoft COCO helper class for reading and visualizing annotations.
:param annotation_file (str): location of annotation file
:param image_folder (str): location to the folder that hosts images.
```

## display COCO categories and supercategories

In [None]:
cats = coco.loadCats(coco.getCatIds())
nms = [cat['name'] for cat in cats]
print('COCO categories: \n{}\n'.format(' '.join(nms)))

nms = set([cat['supercategory'] for cat in cats])
print('COCO supercategories: \n{}'.format(' '.join(nms)))

In [None]:
# get all images containing given categories, select one at random
catIds = coco.getCatIds(catNms=['person', 'dog', 'skateboard'])
imgIds = coco.getImgIds(catIds=catIds)
imgIds = coco.getImgIds(imgIds=[335328])
img = coco.loadImgs(imgIds[np.random.randint(0, len(imgIds))])[0]

In [None]:
img

官方给的这个代码需要将图片数据集解压：
```py
# load and display image
# use url to load image
# I = io.imread(img['coco_url'])
I = io.imread('%s/images/%s/%s' % (dataDir, dataType, img['file_name']))
plt.axis('off')
plt.imshow(I)
plt.show()
```

我们可以使用 `zipfile` 模块直接读取图片，而无须解压：

In [None]:
image_names[-1]

In [None]:
val_z = zipfile.ZipFile(image_names[-1])
I = image.imdecode(val_z.read('%s/%s' % (dataType, img['file_name']))).asnumpy()
plt.axis('off')
plt.imshow(I)
plt.show()

## load and display instance annotations

In [None]:
plt.imshow(I)
plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
coco.showAnns(anns)

## initialize COCO api for person keypoints annotations

In [None]:
annFile = '{}/annotations/person_keypoints_{}.json'.format(dataDir, dataType)
coco_kps = COCO(annFile)

## load and display keypoints annotations

In [None]:
plt.imshow(I)
plt.axis('off')
ax = plt.gca()
annIds = coco_kps.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco_kps.loadAnns(annIds)
coco_kps.showAnns(anns)

## initialize COCO api for caption annotations

In [None]:
annFile = '{}/annotations/captions_{}.json'.format(dataDir, dataType)
coco_caps = COCO(annFile)

## load and display caption annotations

In [None]:
annIds = coco_caps.getAnnIds(imgIds=img['id'])
anns = coco_caps.loadAnns(annIds)
coco_caps.showAnns(anns)
plt.imshow(I)
plt.axis('off')
plt.show()