# Labelme转COCO-批量

同济子豪兄 呕心沥血写成

兼容目标检测、图像分割、关键点检测三种标注

2023-3-9

2023-3-10

2023-4-15

2023-4-16

2023-4-21

## 导入工具包

In [1]:
import os
import json
import numpy as np

## 删除系统自动生成的多余文件

### 查看待删除的多余文件

In [2]:
!find . -iname '__MACOSX'

In [3]:
!find . -iname '.DS_Store'

In [4]:
!find . -iname '.ipynb_checkpoints'

./【C】Labelme转COCO-单个文件/.ipynb_checkpoints
./Ruler_15_Dataset/labelme_jsons/val_labelme_jsons/.ipynb_checkpoints
./.ipynb_checkpoints


### 删除多余文件

In [5]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [6]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [7]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### 验证多余文件已删除

In [8]:
!find . -iname '__MACOSX'

In [9]:
!find . -iname '.DS_Store'

In [10]:
!find . -iname '.ipynb_checkpoints'

## 指定数据集信息

In [12]:
Dataset_root = 'Ruler_15_Dataset'

In [13]:
class_list= [
    {'id': 0, 'name': 'Triangle_30'},
    {'id': 1, 'name': 'Triangle_45'},
    {'id': 2, 'name': 'Angle'},
    {'id': 3, 'name': 'Ruler'}
]

In [14]:
label2id = {}
for each in class_list:
    label2id[each['name']] = each['id']

In [15]:
label2id

{'Triangle_30': 0, 'Triangle_45': 1, 'Angle': 2, 'Ruler': 3}

## 函数-处理单个labelme标注json文件

In [16]:
def process_single_json(labelme, image_id=1):
    '''
    输入labelme的json数据，输出coco格式的每个框的关键点标注信息
    '''
    
    global ANN_ID
    
    coco_annotations = []
    
    for each_ann in labelme['shapes']: # 遍历该json文件中的所有标注

        if each_ann['shape_type'] == 'rectangle': # 筛选出框

            ## 该框的元数据
            bbox_dict = {}
            # 该框的类别 信息
            bbox_dict['category_id'] = label2id[each_ann['label']]
            bbox_dict['segmentation'] = []
            
            bbox_dict['iscrowd'] = 0
            bbox_dict['image_id'] = image_id
            bbox_dict['id'] = ANN_ID
            ANN_ID += 1

            # 获取框坐标
            bbox_left_top_x = min(int(each_ann['points'][0][0]), int(each_ann['points'][1][0]))
            bbox_left_top_y = min(int(each_ann['points'][0][1]), int(each_ann['points'][1][1]))
            bbox_right_bottom_x = max(int(each_ann['points'][0][0]), int(each_ann['points'][1][0]))
            bbox_right_bottom_y = max(int(each_ann['points'][0][1]), int(each_ann['points'][1][1]))
            bbox_w = bbox_right_bottom_x - bbox_left_top_x
            bbox_h = bbox_right_bottom_y - bbox_left_top_y
            bbox_dict['bbox'] = [bbox_left_top_x, bbox_left_top_y, bbox_w, bbox_h] # 左上角x、y、框的w、h
            bbox_dict['area'] = bbox_w * bbox_h
            
            # 筛选出分割多段线
            for each_ann in labelme['shapes']: # 遍历所有标注
                if each_ann['shape_type'] == 'polygon': # 筛选出分割多段线标注
                    # 第一个点的坐标
                    first_x = each_ann['points'][0][0]
                    first_y = each_ann['points'][0][1]
                    if (first_x>bbox_left_top_x) & (first_x<bbox_right_bottom_x) & (first_y<bbox_right_bottom_y) & (first_y>bbox_left_top_y): # 筛选出在该个体框中的关键点
                        bbox_dict['segmentation'] = list(map(lambda x: list(map(lambda y: round(y, 2), x)), each_ann['points'])) # 坐标保留两位小数
                    
            coco_annotations.append(bbox_dict)
            
    return coco_annotations

## 函数-转换当前目录下所有labelme格式的json文件

In [17]:
def process_folder():
    IMG_ID = 0
    ANN_ID = 0

    # 遍历所有 labelme 格式的 json 文件
    for labelme_json in os.listdir(): 

        if labelme_json.split('.')[-1] == 'json':

            with open(labelme_json, 'r', encoding='utf-8') as f:

                labelme = json.load(f)

                ## 提取图像元数据
                img_dict = {}
                img_dict['file_name'] = labelme['imagePath']
                img_dict['height'] = labelme['imageHeight']
                img_dict['width'] = labelme['imageWidth']
                img_dict['id'] = IMG_ID
                coco['images'].append(img_dict)

                ## 提取框和关键点信息
                coco_annotations = process_single_json(labelme, image_id=IMG_ID)
                coco['annotations'] += coco_annotations

                IMG_ID += 1

                print(labelme_json, '已处理完毕')

        else:
            pass

## 转换训练集的所有labelme标注文件

In [18]:
coco = {}

coco['categories'] = class_list

coco['images'] = []
coco['annotations'] = []

IMG_ID = 0
ANN_ID = 0

In [19]:
path = os.path.join(Dataset_root, 'labelme_jsons', 'train_labelme_jsons')
os.chdir(path)

In [20]:
process_folder()

2.json 已处理完毕
10.json 已处理完毕
13.json 已处理完毕
4.json 已处理完毕
14.json 已处理完毕
9.json 已处理完毕
15.json 已处理完毕
5.json 已处理完毕
12.json 已处理完毕
7.json 已处理完毕
11.json 已处理完毕
6.json 已处理完毕


In [21]:
# 保存coco标注文件
coco_path = '../../train_coco.json'
with open(coco_path, 'w') as f:
    json.dump(coco, f, indent=2)
    
os.chdir('../../')

## 验证训练集的MS COCO格式标注

In [22]:
os.listdir()

['images', 'labelme_jsons', 'train_coco.json']

In [23]:
from pycocotools.coco import COCO

my_coco = COCO('train_coco.json')

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


## 转换测试集的所有labelme标注文件

In [24]:
coco = {}

coco['categories'] = class_list

coco['images'] = []
coco['annotations'] = []

IMG_ID = 0
ANN_ID = 0

In [25]:
path = os.path.join('labelme_jsons', 'val_labelme_jsons')
os.chdir(path)

In [26]:
process_folder()

8.json 已处理完毕
1.json 已处理完毕
3.json 已处理完毕


In [27]:
# 保存coco标注文件
coco_path = '../../val_coco.json'
with open(coco_path, 'w') as f:
    json.dump(coco, f, indent=2)
    
os.chdir('../../')

## 验证测试集的MS COCO格式标注

In [28]:
from pycocotools.coco import COCO

my_coco = COCO('val_coco.json')

loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


## 删除labelme标注目录

In [29]:
!rm -rf labelme_jsons

In [30]:
os.listdir()

['images', 'train_coco.json', 'val_coco.json']

## 删除系统自动生成的多余文件

### 查看待删除的多余文件

In [1]:
!find . -iname '__MACOSX'

In [2]:
!find . -iname '.DS_Store'

In [3]:
!find . -iname '.ipynb_checkpoints'

./【C】Labelme转COCO-单个文件/.ipynb_checkpoints
./Ruler_15_Dataset/.ipynb_checkpoints
./.ipynb_checkpoints


### 删除多余文件

In [4]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [5]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [6]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### 验证多余文件已删除

In [7]:
!find . -iname '__MACOSX'

In [8]:
!find . -iname '.DS_Store'

In [9]:
!find . -iname '.ipynb_checkpoints'

## 得到完整的MS COCO格式的数据集