In [1]:
import json
import os

# 去掉没有标注的图片并重新标注，且删除文件夹中的图片文件
def clear_coco(coco_json, img_src, json_dst):
    # coco_json = json.load(open("___.json"))
    anno_imgid = list(set([i['image_id'] for i in coco_json['annotations']]))
    print(f"一共有{len(anno_imgid)}张有标注的图片。")
    data_images = []
    mark_id = 0
    sum_remove = 0
    for img in coco_json['images']:
        if img['id'] in anno_imgid:
            # 遍历annotation中所有id为img['id']的都更新为mark_id
            for anno in coco_json['annotations']:
                if anno['image_id'] == img['id']:
                    anno['image_id'] = mark_id

            img['id'] = mark_id
            data_images.append(img)
            mark_id += 1
        else:
            # 删除图片文件
            img_file = os.path.join(img_src, img['file_name'])
            if os.path.exists(img_file):
                os.unlink(img_file)
                sum_remove += 1
            continue
    print(f"一共移除了{sum_remove}张图片。")
    coco_json['images'] = data_images
    
    # save
    with open(json_dst, "w", encoding='utf-8') as sf:
        sf.write(json.dumps(coco_json, indent=2))
    return coco_json

if __name__ == '__main__':
    main_dir = './CatBack_0129-0206'
    # CLEAR
    with open(os.path.join(main_dir, 'annotations/instances_default.json')) as f:
        data = json.load(f)
    # print(data.keys())
    data = clear_coco(data,img_src= os.path.join(main_dir, 'images'), json_dst=os.path.join(main_dir, './annotations/instances_default_cleared.json'))


一共有513张有标注的图片。
一共移除了2140张图片。


In [2]:
import cv2
import numpy as np
import os
import json
from collections import defaultdict
from tqdm import tqdm
 
def cocojson2png(main_dir, json_path='instances_train2017.json', save_path = './gtFine'):
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    annotation_file = os.path.join(main_dir, 'annotations', json_path)
    with open(annotation_file, 'r', encoding='utf-8') as annf:
        annotations = json.load(annf)
        images = [i['id'] for i in annotations['images']]
 
    img_anno = defaultdict(list)
    for anno in annotations['annotations']:
        for img_id in images:
            if anno['image_id'] == img_id:
                img_anno[img_id].append(anno)
    imgid_file = {}
    for im in annotations['images']:
        imgid_file[im['id']] = im['file_name']
    
    lst = open(os.path.join(main_dir, "train.lst"), "w")
    for img_idx in tqdm(img_anno):
        image = cv2.imread(os.path.join(main_dir, 'images/', imgid_file[img_idx]))
        h, w, _ = image.shape
        instance_png = np.zeros((h, w), dtype=np.uint8)
        for idx, ann in enumerate(img_anno[img_idx]):
            im_mask = np.zeros((h, w), dtype=np.uint8)
            mask = []
            for an in ann['segmentation']:
                ct = np.expand_dims(np.array(an), 0).astype(int)
                contour = np.stack((ct[:, ::2], ct[:, 1::2])).T
                mask.append(contour)
            imm = cv2.drawContours(im_mask, mask, -1, 1, -1)
            imm = imm * (1000 * anno['category_id'] + idx)
            instance_png = instance_png + imm
            instance_png = np.clip(instance_png,0 ,255)
        instance_png = np.expand_dims(instance_png,axis=2).repeat(3,axis=2).astype(np.uint8)
    
        # print(instance_png.shape)
        instance_png = cv2.cvtColor(instance_png, cv2.COLOR_BGR2GRAY)
        cv2.imwrite(os.path.join(save_path, imgid_file[img_idx].split('.')[0]+".png"), instance_png)

        lst_s = 'CatBack_0122_0125/images/' + imgid_file[img_idx].split('.')[0]+".jpg" + ' ' + \
                'CatBack_0122_0125/gtFine/' + imgid_file[img_idx].split('.')[0]+".png"
        lst.write(lst_s + '\n')
 
if __name__ == '__main__':
    main_dir = "./CatBack_0129-0206"
    cocojson2png(main_dir, json_path='instances_default_cleared.json', save_path = os.path.join(main_dir, 'gtFine'))

100%|██████████| 513/513 [00:13<00:00, 37.31it/s]


In [None]:
import cv2
import random
import json, os
from pycocotools.coco import COCO
from skimage import io
from matplotlib import pyplot as plt

 
def visualization_seg(num_image, json_path, img_path, coco_classes, c = 'cat'):
    # 需要画图的是第num副图片, 对应的json路径和图片路径,
    # str = ' '为类别字符串，输入必须为字符串形式 'str'，若为空，则返回所有类别id
    coco = COCO(json_path)
    
    catIds = coco.getCatIds(catNms=[c]) # 获取指定类别 id
    
    imgIds = coco.getImgIds(catIds=catIds) # 获取图片i
    img = coco.loadImgs(imgIds[num_image-1])[0]  # 加载图片,loadImgs() 返回的是只有一个内嵌字典元素的list, 使用[0]来访问这个元素
    image = io.imread(train_path + img['file_name'])
   
    annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
    anns = coco.loadAnns(annIds)

    for i in range(len(annIds)):
            x, y, w, h = anns[i-1]['bbox']          # 读取边框
            name = coco_classes[anns[i-1]['category_id']]
            image = cv2.rectangle(image, (int(x), int(y)), (int(x + w), int(y + h)), (0, 255, 255), 1)      # 绘制矩形框
            cv2.putText(image, name, (int(x+w/2), int(y+h/2)), 5, 3, (255, 0, 0), 3)  
   
    # 读取在线图片的方法
    # I = io.imread(img['coco_url'])
    
    plt.imshow(image) 
    coco.showAnns(anns)
    plt.show() 



train_json = './annotations/instances_default.json'
train_path = './images/'

visualization_seg(136, train_json, train_path, coco_classes = ["background", "cat", "nocat"])
