# 汇总labelme标注信息-所有labelme标注文件

同济子豪兄 2023-4-24

## 导入工具包

In [1]:
import os
import json

import numpy as np
import pandas as pd
import cv2

from tqdm import tqdm

## 进入labelme标注文件目录

In [2]:
Dataset_root = 'Triangle_215_Keypoint_Labelme'

In [3]:
os.chdir(os.path.join(Dataset_root, 'labelme_jsons'))

In [4]:
print('共有 {} 个 labelme 格式的 json 标注文件'.format(len(os.listdir())))

共有 215 个 labelme 格式的 json 标注文件


## 函数-处理单个labelme标注文件

In [5]:
def process_sinlge_labelme(labelme_path):
    
    global df
    
    # 读入labelme格式的json文件
    with open(labelme_path, 'r', encoding='utf-8') as f:
        labelme = json.load(f)
    
    # 获取图像元数据
    imagePath = labelme['imagePath']
    imageWidth = labelme['imageWidth']
    imageHeight = labelme['imageHeight']
    
    for each_ann in labelme['shapes']: # 遍历每一个标注

        df_temp = {}

        # 图像信息
        df_temp['imagePath'] = imagePath
        df_temp['imageWidth'] = imageWidth
        df_temp['imageHeight'] = imageHeight

        if each_ann['shape_type'] == 'rectangle': # 筛选出框标注

            # 框的两点坐标
            bbox_keypoints = each_ann['points'] 
            bbox_keypoint_A_xy = bbox_keypoints[0]
            bbox_keypoint_B_xy = bbox_keypoints[1]
            # 左上角坐标
            bbox_top_left_x = int(min(bbox_keypoint_A_xy[0], bbox_keypoint_B_xy[0]))
            bbox_top_left_y = int(min(bbox_keypoint_A_xy[1], bbox_keypoint_B_xy[1]))
            # 右下角坐标
            bbox_bottom_right_x = int(max(bbox_keypoint_A_xy[0], bbox_keypoint_B_xy[0]))
            bbox_bottom_right_y = int(max(bbox_keypoint_A_xy[1], bbox_keypoint_B_xy[1]))

            # 标注信息
            df_temp['label_type'] = each_ann['shape_type']
            df_temp['label'] = each_ann['label'] 

            # 框坐标信息
            df_temp['bbox_top_left_x'] = bbox_top_left_x
            df_temp['bbox_top_left_y'] = bbox_top_left_y
            df_temp['bbox_bottom_right_x'] = bbox_bottom_right_x
            df_temp['bbox_bottom_right_y'] = bbox_bottom_right_y
            df_temp['bbox_width_pix'] = bbox_bottom_right_x - bbox_top_left_x
            df_temp['bbox_height_pix'] = bbox_bottom_right_y - bbox_top_left_y
            df_temp['bbox_width_norm'] = df_temp['bbox_width_pix'] / df_temp['imageWidth']
            df_temp['bbox_height_norm'] = df_temp['bbox_height_pix'] / df_temp['imageHeight']
            df_temp['bbox_center_x_norm'] = (bbox_top_left_x + bbox_bottom_right_x) / 2 / df_temp['imageWidth']
            df_temp['bbox_center_y_norm'] = (bbox_top_left_y + bbox_bottom_right_y) / 2 / df_temp['imageHeight']

        if each_ann['shape_type'] == 'point': # 筛选出关键点标注

            # 该点的 XY 坐标
            kpt_xy = each_ann['points'][0]
            kpt_x, kpt_y = int(kpt_xy[0]), int(kpt_xy[1])

            # 标注信息
            df_temp['label_type'] = each_ann['shape_type']
            df_temp['label'] = each_ann['label'] 

            # 关键点坐标信息
            df_temp['kpt_x_pix'] = kpt_x
            df_temp['kpt_y_pix'] = kpt_y
            df_temp['kpt_x_norm'] = kpt_x / df_temp['imageWidth']
            df_temp['kpt_y_norm'] = kpt_y / df_temp['imageHeight']

        if each_ann['shape_type'] == 'polygon': # 筛选出多段线（polygon）标注

            poly_points = np.array(each_ann['points']).astype('uint32').tolist() # 该多段线每个点的坐标
            poly_num_points = len(poly_points) # 该多段线点的个数

            # 计算多段线区域面积
            poly_pts = [np.array(each_ann['points'], np.int32).reshape((-1, 1, 2))] # 该多段线每个点的坐标
            img_bgr = cv2.imread('../images/' + imagePath)
            img_zeros = np.zeros(img_bgr.shape[:2], dtype='uint8')
            img_mask = cv2.fillPoly(img_zeros, poly_pts, 1)
            poly_area = np.sum(img_mask)

            # 标注信息
            df_temp['label_type'] = each_ann['shape_type']
            df_temp['label'] = each_ann['label']

            # 多段线信息
            df_temp['poly_points'] = poly_points
            df_temp['poly_num_points'] = poly_num_points
            df_temp['poly_area'] = poly_area


        df = df.append(df_temp, ignore_index=True)

## 删除系统自动生成的多余文件

### 查看待删除的多余文件

In [6]:
!find . -iname '__MACOSX'

In [7]:
!find . -iname '.DS_Store'

In [8]:
!find . -iname '.ipynb_checkpoints'

### 删除多余文件

In [9]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [10]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [11]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### 验证多余文件已删除

In [12]:
!find . -iname '__MACOSX'

In [13]:
!find . -iname '.DS_Store'

In [14]:
!find . -iname '.ipynb_checkpoints'

## 批量处理所有labelme格式的标注文件

In [15]:
df = pd.DataFrame()

In [16]:
for labelme_path in tqdm(os.listdir()):
    process_sinlge_labelme(labelme_path)

100%|██████████| 215/215 [00:14<00:00, 15.30it/s]


In [17]:
df

Unnamed: 0,imagePath,imageWidth,imageHeight,label_type,label,bbox_top_left_x,bbox_top_left_y,bbox_bottom_right_x,bbox_bottom_right_y,bbox_width_pix,...,bbox_height_norm,bbox_center_x_norm,bbox_center_y_norm,kpt_x_norm,kpt_x_pix,kpt_y_norm,kpt_y_pix,poly_area,poly_num_points,poly_points
0,IMG_20230417_173752.jpg,3648.0,2736.0,rectangle,sjb_rect,259.0,338.0,1426.0,2485.0,1167.0,...,0.784722,0.230948,0.515899,,,,,,,
1,IMG_20230417_173752.jpg,3648.0,2736.0,rectangle,sjb_rect,1712.0,753.0,3532.0,1856.0,1820.0,...,0.403143,0.718750,0.476791,,,,,,,
2,IMG_20230417_173752.jpg,3648.0,2736.0,point,angle_30,,,,,,...,,,,0.078125,285.0,0.897661,2456.0,,,
3,IMG_20230417_173752.jpg,3648.0,2736.0,point,angle_30,,,,,,...,,,,0.964364,3518.0,0.284722,779.0,,,
4,IMG_20230417_173752.jpg,3648.0,2736.0,point,angle_60,,,,,,...,,,,0.157895,576.0,0.130117,356.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1661,IMG_20230417_174145.jpg,3648.0,2736.0,point,angle_30,,,,,,...,,,,0.255482,932.0,0.089181,244.0,,,
1662,IMG_20230417_174145.jpg,3648.0,2736.0,point,angle_60,,,,,,...,,,,0.342654,1250.0,0.862208,2359.0,,,
1663,IMG_20230417_174145.jpg,3648.0,2736.0,point,angle_60,,,,,,...,,,,0.424068,1547.0,0.640716,1753.0,,,
1664,IMG_20230417_174145.jpg,3648.0,2736.0,point,angle_90,,,,,,...,,,,0.052357,191.0,0.721126,1973.0,,,


In [18]:
df.to_csv('../../kpt_dataset_eda.csv', index=False)