# Labelme转mask-批量



## 下载样例数据集

In [1]:
!rm -rf Watermelon87_Semantic_Seg_Labelme.zip Watermelon87_Semantic_Seg_Labelme # 删除原有压缩包和文件夹
!wget https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20230130-mmseg/dataset/watermelon/Watermelon87_Semantic_Seg_Labelme.zip # 下载压缩包
!unzip Watermelon87_Semantic_Seg_Labelme.zip >> /dev/null # 解压压缩包
!rm -rf Watermelon87_Semantic_Seg_Labelme.zip # 删除压缩包

--2023-06-09 09:09:27--  https://zihao-openmmlab.obs.cn-east-3.myhuaweicloud.com/20230130-mmseg/dataset/watermelon/Watermelon87_Semantic_Seg_Labelme.zip
正在连接 172.16.0.13:5848... 已连接。
已发出 Proxy 请求，正在等待回应... 200 OK
长度： 12978831 (12M) [application/zip]
正在保存至: “Watermelon87_Semantic_Seg_Labelme.zip”


2023-06-09 09:09:28 (26.4 MB/s) - 已保存 “Watermelon87_Semantic_Seg_Labelme.zip” [12978831/12978831])



## 查看数据集目录结构

In [2]:
import os
import shutil

In [4]:
def extract_files_by_extension(source_folder, destination_folder, extension=".txt"):
    # 确保目标文件夹存在，如果不存在则创建
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)

    # 获取源文件夹中的所有文件
    files = os.listdir(source_folder)

    # 提取特定格式的文件到目标文件夹
    for file in files:
        if file.endswith(extension):
            source_path = os.path.join(source_folder, file)
            destination_path = os.path.join(destination_folder, file)
            shutil.move(source_path, destination_path)
            print(f"Moved: {file}")

# 使用示例
source_directory = "mmsegmentation/LSR16F656"  # 替换为源文件夹的路径
# destination_directory = "mmsegmentation/LSR16F656/images"  # 替换为目标文件夹的路径
# file_extension = ".tiff"  # 替换为你要提取的文件扩展名

destination_directory = "mmsegmentation/LSR16F656/labelme_jsons"  # 替换为目标文件夹的路径
file_extension = ".json"  # 替换为你要提取的文件扩展名

extract_files_by_extension(source_directory, destination_directory, file_extension)


Moved: 001 (502).json
Moved: 001 (503).json
Moved: 001 (505).json
Moved: 001 (506).json
Moved: 001 (507).json
Moved: 001 (508).json
Moved: 001 (510).json
Moved: 001 (511).json
Moved: 001 (515).json
Moved: 001 (520).json
Moved: 001 (525).json
Moved: 001 (527).json
Moved: 001 (530).json
Moved: 001 (531).json
Moved: 001 (535).json
Moved: 001 (537).json
Moved: 001 (539).json
Moved: 001 (543).json
Moved: 001 (550).json
Moved: 001 (549).json
Moved: 001 (546).json
Moved: 001 (553).json
Moved: 001 (557).json
Moved: 001 (556).json
Moved: 001 (533).json
Moved: 001 (561).json
Moved: 001 (560).json
Moved: 001 (562).json
Moved: 001 (563).json
Moved: 001 (564).json
Moved: 001 (521).json
Moved: 001 (526).json
Moved: 001 (528).json
Moved: 001 (534).json
Moved: 001 (536).json
Moved: 001 (538).json
Moved: 001 (540).json
Moved: 001 (544).json
Moved: 001 (547).json
Moved: 001 (545).json
Moved: 001 (551).json
Moved: 001 (559).json
Moved: 001 (558).json
Moved: 001 (554).json
Moved: 001 (512).json
Moved: 001

In [10]:
!pip install seedir emoji

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple


In [21]:
import seedir as sd
sd.seedir('mmsegmentation/LSR16F656', style='emoji', depthlimit=1)

📁 LSR16F656/
├─📁 images/
└─📁 labelme_jsons/


## 删除系统自动生成的多余文件

### 查看待删除的多余文件

In [6]:
!find . -iname '__MACOSX'

In [7]:
!find . -iname '.DS_Store'

In [12]:
!find . -iname '.ipynb_checkpoints'

./.ipynb_checkpoints
./mmsegmentation/LSR16F656/.ipynb_checkpoints
./mmsegmentation/.ipynb_checkpoints


### 删除多余文件

In [14]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [15]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [16]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### 验证多余文件已删除

In [18]:
!find . -iname '__MACOSX'

In [19]:
!find . -iname '.DS_Store'

In [20]:
!find . -iname '.ipynb_checkpoints'

./.ipynb_checkpoints


## 进入数据集目录

In [22]:
import os
import json
import numpy as np
import cv2
import shutil

from tqdm import tqdm

## 数据集及类别信息

In [23]:
Dataset_Path = 'mmsegmentation/LSR16F656'

## 每个类别的信息及画mask的顺序（按照由大到小，由粗到精的顺序）

In [24]:
# 0-背景，从 1 开始
class_info = [
    {'label':'outer_ring', 'type':'polygon', 'color':1},                    # polygon 多段线
    {'label':'inner_ring', 'type':'polygon', 'color':2}
]

## 单张图像labelme转mask函数

In [25]:
def labelme2mask_single_img(img_path, labelme_json_path):
    '''
    输入原始图像路径和labelme标注路径，输出 mask
    '''
    
    img_bgr = cv2.imread(img_path)
    img_mask = np.zeros(img_bgr.shape[:2]) # 创建空白图像 0-背景
    
    with open(labelme_json_path, 'r', encoding='utf-8') as f:
        labelme = json.load(f)
        
    for one_class in class_info: # 按顺序遍历每一个类别
        for each in labelme['shapes']: # 遍历所有标注，找到属于当前类别的标注
            if each['label'] == one_class['label']:
                if one_class['type'] == 'polygon': # polygon 多段线标注

                    # 获取点的坐标
                    points = [np.array(each['points'], dtype=np.int32).reshape((-1, 1, 2))]

                    # 在空白图上画 mask（闭合区域）
                    img_mask = cv2.fillPoly(img_mask, points, color=one_class['color'])

                elif one_class['type'] == 'line' or one_class['type'] == 'linestrip': # line 或者 linestrip 线段标注

                    # 获取点的坐标
                    points = [np.array(each['points'], dtype=np.int32).reshape((-1, 1, 2))]

                    # 在空白图上画 mask（非闭合区域）
                    img_mask = cv2.polylines(img_mask, points, isClosed=False, color=one_class['color'], thickness=one_class['thickness']) 

                elif one_class['type'] == 'circle': # circle 圆形标注

                    points = np.array(each['points'], dtype=np.int32)

                    center_x, center_y = points[0][0], points[0][1] # 圆心点坐标

                    edge_x, edge_y = points[1][0], points[1][1]     # 圆周点坐标

                    radius = np.linalg.norm(np.array([center_x, center_y] - np.array([edge_x, edge_y]))).astype('uint32') # 半径

                    img_mask = cv2.circle(img_mask, (center_x, center_y), radius, one_class['color'], one_class['thickness'])

                else:
                    print('未知标注类型', one_class['type'])
                    
    return img_mask

## labelme转mask-批量

In [26]:
os.chdir(Dataset_Path)
os.mkdir('masks')
os.chdir('images')

In [27]:
for img_path in tqdm(os.listdir()):
    
    try:
    
        labelme_json_path = os.path.join('../', 'labelme_jsons', '.'.join(img_path.split('.')[:-1])+'.json')

        img_mask = labelme2mask_single_img(img_path, labelme_json_path)

        mask_path = img_path.split('.')[0] + '.png'

        cv2.imwrite(os.path.join('../','masks',mask_path), img_mask)
    
    except Exception as E:
        print(img_path, '转换失败', E)
        

100%|██████████| 656/656 [00:42<00:00, 15.56it/s]


## 转换之后的mask保存在`masks`文件夹中

## 重命名和删除文件夹

In [32]:
os.chdir('mmsegmentation/LSR16F656')
os.getcwd()

'/home/featurize/work/MMSegmentation_Tutorials-20230816/mmsegmentation/LSR16F656'

In [33]:
!rm -rf labelme_jsons

In [30]:
os.chdir('../')
shutil.move('images', 'img_dir')
shutil.move('masks', 'ann_dir')
!rm -rf labelme_jsons
os.chdir('../')

FileNotFoundError: [Errno 2] No such file or directory: 'images'

## 得到最终的语义分割数据集

## 查看数据集目录结构

In [36]:
import seedir as sd
sd.seedir('LSR16F656', style='emoji', depthlimit=1)

📁 LSR16F656/
├─📁 img_dir/
└─📁 ann_dir/
