In [1]:
"""解析coco数据集的标注信息
"""
import json
import os

In [121]:
CLASSES = [
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
    'fire hydrant',
    'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
    'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite',
    'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork',
    'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut',
    'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
    'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
    'scissors',
    'teddy bear', 'hair drier', 'toothbrush']

In [122]:
json_path = "annotations/instances_val2017.json"
json_labels = json.load(open(json_path, "r"))
annotations = json_labels['annotations'] # list
images = json_labels['images'] # list
categories = json_labels['categories'] # list

In [123]:
idtoimage = {}
for image in images:
    file_name = image['file_name']
    image_id = image['id'] 
    height = image['height'] 
    width = image['width'] 
    idtoimage[image_id] = [file_name,height,width]

In [124]:
idtoclss = {}
for category in categories:
    id = category['id']
    name = category['name'] # 类别名
    idtoclss[id] = name

In [106]:
# 解析分割：<class-index> <x1> <y1> <x2> <y2> ... <xn> <yn>,归一化
for annotation in annotations:
    try:
        segmentation = annotation['segmentation'][0]  # 分割点坐标
        image_id = annotation['image_id']  
        category_id = annotation['category_id']  # 类别编号
                
        classname = idtoclss[category_id] # 类别名
        category_id = CLASSES.index(classname) # 转下编号
        
        file = idtoimage[image_id]
        filename,h,w = file[0],file[1],file[2]
        x = [i/w for i in segmentation[0::2]] # x坐标归一化
        y = [i/h for i in segmentation[1::2]] # y坐标归一化
        xy = ''
        for i in range(len(x)):
            xy += str(x[i]) + ' ' + str(y[i])
        line = str(category_id)+ ' ' + xy + '\n'
        outfile = filename.split('.')[0]+'.txt'
        outfile = os.path.join('instances',outfile)
        with open(outfile,'a') as f:
            f.write(line)
    except:
        continue

In [125]:
# 解析目标检测：<object-class> <cx> <cy> <width> <height>,归一化
for annotation in annotations:
    try:
        bbox = annotation['bbox']  # 左上角x,y,w,h
        image_id = annotation['image_id']  
        category_id = annotation['category_id']  # 类别编号
                
        classname = idtoclss[category_id] # 类别名
        category_id = CLASSES.index(classname) # 转下编号

        file = idtoimage[image_id]
        filename,h,w = file[0],file[1],file[2]

        box_w, box_h = bbox[2]/w, bbox[3]/h
        cx = (bbox[0] + bbox[2]/2) / w
        cy = (bbox[1] + bbox[3]/2) / h
        line = [str(i) for i in [category_id, cx, cy, box_w, box_h]]
        line = ' '.join(line) + '\n'
        outfile = filename.split('.')[0]+'.txt'
        outfile = os.path.join('bbox',outfile)
        with open(outfile,'a') as f:
            f.write(line)
    except:
        continue

In [107]:
# list(zip(x,y))[0]

In [16]:
# 制作一个500张图片的小样本集
import shutil
names = os.listdir('labels_instances')[:500]
for name in names:
    cur_path = os.path.join('labels_instances', name)
    to_path = os.path.join('labels_instances500', name)
    shutil.copy(cur_path, to_path)  
    
    cur_path = os.path.join('labels_bbox', name)
    to_path = os.path.join('labels_bbox500', name)
    shutil.copy(cur_path, to_path) 
    
    image = name.split('.')[0]+'.jpg'
    cur_path = os.path.join('images', image)
    to_path = os.path.join('images500', image)
    shutil.copy(cur_path, to_path)

In [20]:
# 划分训练集和测试集
ftrain = open('train.txt', 'w')
fval = open('val.txt', 'w')
imgaes = os.listdir('images')
for i,image in enumerate(imgaes):
    name = f'/home/data/images/{image}\n'
    if i % 10 == 2:
        fval.write(name)
    else:
        ftrain.write(name)
ftrain.close()
fval.close()