In [7]:
import os
import zipfile


class Bunch(dict):
    def __init__(self, *args, **kwds):
        super().__init__(*args, **kwds)
        self.__dict__ = self


def buffer2array(Z, image_name):
    '''
    无需解压，直接获取图片数据
    
    参数
    ===========
    Z:: 图片数据是 ZipFile 对象
    '''
    buffer = Z.read(image_name)
    image = np.frombuffer(buffer, dtype="B")  # 将 buffer 转换为 np.uint8 数组
    img = cv2.imdecode(image, cv2.IMREAD_COLOR)
    return img


class NameBunch:
    def __init__(self, root):
        '''
        建立一个 root 下的所有由 ZIP 文件组成的 Bunch 对象
        '''
        self.B = Bunch
        self.root = root
        self.name2bunch = self.__nameBunch()

    def __nameBunch(self):

        bunch = self.B()
        for name in os.listdir(self.root):
            if zipfile.is_zipfile(self.root + name):
                dataType, _ = os.path.splitext(name)
                bunch[dataType] = self.B({
                    'Z': zipfile.ZipFile(self.root + name)
                })
        return bunch

In [20]:
class Image(dict):
    def __init__(self, root, *args, **kwds):
        super().__init__(*args, **kwds)
        self.__dict__ = self
        self.root = os.path.join(root, 'images')

In [21]:
I = Image(root)

In [25]:
a = []
for name in os.listdir(I.root):
    a.append(name.split('.')[0])

In [36]:
class TypeName(dict):
    def __init__(self, root, *args, **kwds):
        '''
        获得 root 下的压缩文件的文件名
        '''
        super().__init__(*args, **kwds)
        self.__dict__ = self
        images_root = os.path.join(root, 'images')
        annotations_root = os.path.join(root, 'annotations')
        self.images_names = [name.split('.')[0] for name in os.listdir(images_root)]
        self.annotations = [name.split('.')[0] for name in os.listdir(annotations_root)]

In [37]:
type_names = TypeName(root)

In [38]:
type_names

{'images_names': ['test2014',
  'test2015',
  'test2017',
  'train2014',
  'train2017',
  'unlabeled2017',
  'val2014',
  'val2017'],
 'annotations': ['annotations_trainval2014',
  'annotations_trainval2017',
  'image_info_test2014',
  'image_info_test2015',
  'image_info_test2017',
  'image_info_unlabeled2017',
  'panoptic_annotations_trainval2017',
  'stuff_annotations_trainval2017',
  'stuff_annotations_trainval2017_2']}

In [43]:
zI = ImageZ(root, 'val2017')

In [45]:
zI.Z.namelist()

['val2017/',
 'val2017/000000212226.jpg',
 'val2017/000000231527.jpg',
 'val2017/000000578922.jpg',
 'val2017/000000062808.jpg',
 'val2017/000000119038.jpg',
 'val2017/000000114871.jpg',
 'val2017/000000463918.jpg',
 'val2017/000000365745.jpg',
 'val2017/000000320425.jpg',
 'val2017/000000481404.jpg',
 'val2017/000000314294.jpg',
 'val2017/000000335328.jpg',
 'val2017/000000513688.jpg',
 'val2017/000000158548.jpg',
 'val2017/000000132116.jpg',
 'val2017/000000415238.jpg',
 'val2017/000000321333.jpg',
 'val2017/000000081738.jpg',
 'val2017/000000577584.jpg',
 'val2017/000000346905.jpg',
 'val2017/000000433980.jpg',
 'val2017/000000228144.jpg',
 'val2017/000000041872.jpg',
 'val2017/000000117492.jpg',
 'val2017/000000368900.jpg',
 'val2017/000000376900.jpg',
 'val2017/000000352491.jpg',
 'val2017/000000330790.jpg',
 'val2017/000000384850.jpg',
 'val2017/000000032735.jpg',
 'val2017/000000197004.jpg',
 'val2017/000000526751.jpg',
 'val2017/000000041488.jpg',
 'val2017/000000153632.jpg',
 

In [None]:
'''
COCO 2017 数据集
示例：
root = 'E:/Data/coco/'
f = ZFile(root)

f.bunch.keys()

stuffZ = f.bunch.stuff_annotations_trainval2017.Z

train_stuffZ = get_content(stuffZ,  'annotations/stuff_train2017_pixelmaps.zip')

print(train_stuffZ)

X = get_content(train_stuffZ, 'stuff_train2017_pixelmaps/000000013992.png')

%pylab inline 
plt.imshow(X)
plt.show()
'''

from collections import defaultdict
import zipfile
import os
import time
import json

import numpy as np
import cv2

from pycocotools.coco import COCO






def get_content(Z, filename, is_estract=None):
    '''
    返回 Z 下 filename 的数据
    '''
    if filename.endswith('.json'):
        if is_estract:
            K = Z.extract(filename, 'D:/coco/')
        else:
            print('loading annotations into memory...')
            start = time.clock()
            with Z.open(filename) as fp:
                K = json.load(fp)
            print('Done (t = %gs)' % (time.clock() - start))
        return K
    elif filename.endswith('.jpg') or filename.endswith('.png'):
        return buffer2array(Z, filename)
    elif zipfile.is_zipfile(filename):
        path = Z.extract(filename, 'D:/coco/')
        return zipfile.ZipFile(path)  # 返回 ZipFile 对象





class NameBunch:
    def __init__(self, root):
        '''
        建立一个 root 下的所有由 ZIP 文件组成的 Bunch 对象
        '''
        self.B = Bunch
        self.root = root
        self.name2bunch = self.__nameBunch()

    def __nameBunch(self):

        bunch = self.B()
        for name in os.listdir(self.root):
            if zipfile.is_zipfile(self.root + name):
                dataType, _ = os.path.splitext(name)
                bunch[dataType] = self.B({
                    'Z': zipfile.ZipFile(self.root + name)
                })
        return bunch


class ZFile:
    def __init__(self, Z):
        self.B = Bunch
        Images = [filename for filename, file in Z.NameToInfo.items() \
        if not (file.is_dir() or filename.endswith('.txt'))]
        Anns = [
            filename for filename in Z.namelist() if filename.endswith('.txt')
        ]
        self.Z2B = self.B(Images=Images, Anns=Anns)


class COCOX(COCO):
    def __init__(self, annZ, annotation_name):
        """
        改写了微软提供的 COCO API 
        使用该 API 可直接读取 ZIP 文件而无须解压
        """
        super().__init__()
        self.B = Bunch
        self.dataset = self.B(get_content(annZ, annotation_name))
        self.createIndex()