Skip to content

Commit

Permalink
Add labelme to coco
Browse files Browse the repository at this point in the history
  • Loading branch information
SWHL committed Oct 31, 2023
1 parent ff8d081 commit 05debad
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 178 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ D(COCO) --> A
B --> D
E(YOLOv5 YAML) --> D
F(darknet) --> D
G(labelme) --> D
```

## Installation
Expand Down
File renamed without changes
File renamed without changes.
File renamed without changes
File renamed without changes.
9 changes: 0 additions & 9 deletions demo.py

This file was deleted.

242 changes: 78 additions & 164 deletions label_convert/labelme_to_coco.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
import random
import shutil
import time
import warnings
from pathlib import Path
from typing import List, Optional, Union

import cv2
import numpy as np
from tqdm import tqdm


Expand All @@ -33,7 +32,8 @@ def __init__(
if out_dir is None:
save_dir_name = f"{Path(self.raw_data_dir).name}_COCO_format"
self.output_dir = self.raw_data_dir.parent / save_dir_name
self.output_dir = Path(out_dir)
else:
self.output_dir = Path(out_dir)
self.mkdir(self.output_dir)

self.anno_dir = self.output_dir / "annotations"
Expand All @@ -52,7 +52,14 @@ def __init__(

self.cur_year = time.strftime("%Y", time.localtime(time.time()))

def __call__(self, mode_list: List[str] = None):
self.cls_to_idx = {}
self.object_id = 1

self.categories = self._get_category()

def __call__(
self,
):
img_list = self.get_img_list()
if not img_list:
raise ValueError(f"{self.raw_data_dir} is empty!")
Expand All @@ -66,48 +73,15 @@ def __call__(self, mode_list: List[str] = None):
)
train_list, val_list, test_list = split_list

# 遍历所有的json,得到所有类别字段
# TODO

anno = self._init_json()
for i, img_path in enumerate(train_list):
img_id = i + 1
train_anno = self.generate_json(train_list, self.train_dir)
self.write_json(self.anno_dir / "instances_train2017.json", train_anno)

new_img_name = f"{img_id:012d}.jpg"
new_img_path = self.train_dir / new_img_name
val_anno = self.generate_json(val_list, self.val_dir)
self.write_json(self.anno_dir / "instances_val2017.json", val_anno)

# 将图像复制到指定目录下
self.cp_file(img_path, new_img_path)

raw_json_path = img_path.with_suffix(".json")
raw_json_data = self.read_json(raw_json_path)

# 写入到json中
img_info = {
"date_captured": str(self.cur_year),
"file_name": new_img_name,
"id": img_id,
"height": raw_json_data.get("imageHeight"),
"width": raw_json_data.get("imageWidth"),
}

# 记录类别

print("ok")

for mode in mode_list:
# Create the directory of saving the new image.
save_img_dir = self.output_dir / f"{mode}2017"
self.mkdir(save_img_dir)

# Generate json file.
anno_dir = self.output_dir / "annotations"
self.mkdir(anno_dir)

save_json_path = anno_dir / f"instances_{mode}2017.json"
json_data = self.convert(img_list, save_img_dir, mode)

self.write_json(save_json_path, json_data)
if test_list:
test_anno = self.generate_json(test_list, self.test_dir)
self.write_json(self.anno_dir / "instances_test2017.json", test_anno)
print(f"Successfully convert, detail in {self.output_dir}")

def get_img_list(self):
Expand All @@ -119,7 +93,7 @@ def gen_image_label_dir(self, img_list):
new_image_list = []
for img_path in tqdm(img_list):
right_label_path = img_path.with_name(f"{img_path.stem}.json")
if right_label_path.exists() and self.read_txt(str(right_label_path)):
if right_label_path.exists() and self.read_json(str(right_label_path)):
new_image_list.append(img_path)
return new_image_list

Expand Down Expand Up @@ -161,141 +135,81 @@ def _init_json(self):
"url": "https://github.com/RapidAI/LabelConvert/LICENSE",
}
],
"categories": [],
"categories": self.categories,
}
return annotation_info

def _get_category(
self,
):
# 这个放在扫描全部json的中获取
class_list = self.read_txt(classes_path)
categories = []
for i, category in enumerate(class_list, 1):
categories.append(
json_list = Path(self.raw_data_dir).glob("*.json")
all_categories = []
for json_path in json_list:
json_info = self.read_json(json_path)
shapes = json_info.get("shapes", [])
all_categories.extend([v["label"] for v in shapes])

categories = list(set(all_categories))
categories.sort(key=all_categories.index)

coco_categories = []
for i, cls_name in enumerate(categories):
coco_categories.append(
{
"supercategory": category,
"id": i,
"name": category,
"supercategory": cls_name,
"id": i + 1,
"name": cls_name,
}
)
return categories

def convert(self, img_list, save_img_dir, mode):
images, annotations = [], []
for img_id, img_path in enumerate(tqdm(img_list, desc=mode), 1):
image_dict = self.get_image_info(img_path, img_id, save_img_dir)
images.append(image_dict)
self.cls_to_idx = {v: i + 1 for i, v in enumerate(categories)}
return coco_categories

label_path = self.raw_data_dir / "labels" / f"{Path(img_path).stem}.txt"
annotation = self.get_annotation(
label_path, img_id, image_dict["height"], image_dict["width"]
)
annotations.extend(annotation)

json_data = {
"info": self.info,
"images": images,
"licenses": self.licenses,
"type": self.type,
"annotations": annotations,
"categories": self.categories,
}
return json_data

def get_image_info(self, img_path, img_id, save_img_dir):
img_path = Path(img_path)
if self.raw_data_dir.as_posix() not in img_path.as_posix():
# relative path (relative to the raw_data_dir)
# e.g. images/images(3).jpg
img_path = self.raw_data_dir / img_path

self.verify_exists(img_path)

new_img_name = f"{img_id:012d}.jpg"
save_img_path = save_img_dir / new_img_name
img_src = cv2.imread(str(img_path))
if img_path.suffix.lower() == ".jpg":
shutil.copyfile(img_path, save_img_path)
else:
cv2.imwrite(str(save_img_path), img_src)

height, width = img_src.shape[:2]
image_info = {
"date_captured": self.cur_year,
"file_name": new_img_name,
"id": img_id,
"height": height,
"width": width,
}
return image_info

def get_annotation(self, label_path: Path, img_id, height, width):
def get_box_info(vertex_info, height, width):
cx, cy, w, h = [float(i) for i in vertex_info]

cx = cx * width
cy = cy * height
box_w = w * width
box_h = h * height

# left top
x0 = max(cx - box_w / 2, 0)
y0 = max(cy - box_h / 2, 0)
def generate_json(self, img_list, save_dir):
anno = self._init_json()
for i, img_path in enumerate(img_list):
img_id = i + 1

# right bottom
x1 = min(x0 + box_w, width)
y1 = min(y0 + box_h, height)
new_img_name = f"{img_id:012d}{Path(img_path).suffix}"
new_img_path = save_dir / new_img_name
self.cp_file(img_path, new_img_path)

segmentation = [[x0, y0, x1, y0, x1, y1, x0, y1]]
bbox = [x0, y0, box_w, box_h]
area = box_w * box_h
return segmentation, bbox, area
raw_json_path = img_path.with_suffix(".json")
raw_json_data = self.read_json(raw_json_path)

if not label_path.exists():
annotation = [
{
"segmentation": [],
"area": 0,
"iscrowd": 0,
"image_id": img_id,
"bbox": [],
"category_id": -1,
"id": self.annotation_id,
}
]
self.annotation_id += 1
return annotation

annotation = []
label_list = self.read_txt(str(label_path))
for i, one_line in enumerate(label_list):
label_info = one_line.split(" ")
if len(label_info) < 5:
warnings.warn(f"The {i+1} line of the {label_path} has been corrupted.")
continue

category_id, vertex_info = label_info[0], label_info[1:]
segmentation, bbox, area = get_box_info(vertex_info, height, width)
annotation.append(
{
"segmentation": segmentation,
img_info = {
"date_captured": str(self.cur_year),
"file_name": new_img_name,
"id": img_id,
"height": raw_json_data.get("imageHeight"),
"width": raw_json_data.get("imageWidth"),
}
anno["images"].append(img_info)

shapes = raw_json_data.get("shapes", [])
anno_list = []
for shape in shapes:
label_name = shape.get("label")
label_id = self.cls_to_idx[label_name]

points = np.array(shape.get("points"))
x0, y0 = np.min(points, axis=0)
x1, y1 = np.max(points, axis=0)
area = (x1 - x0) * (y1 - y0)

seg_points = [np.ravel(points, order="C").tolist()]
one_anno_dict = {
"segmentation": seg_points,
"area": area,
"iscrowd": 0,
"image_id": img_id,
"bbox": bbox,
"category_id": int(category_id) + 1,
"id": self.annotation_id,
"bbox": [x0, y0, x1, y1],
"category_id": label_id,
"id": self.object_id,
}
)
self.annotation_id += 1
return annotation

@staticmethod
def read_txt(txt_path):
with open(str(txt_path), "r", encoding="utf-8") as f:
data = list(map(lambda x: x.rstrip("\n"), f))
return data
anno_list.append(one_anno_dict)
self.object_id += 1
anno["annotations"].extend(anno_list)
return anno

@staticmethod
def read_json(json_path: Union[str, Path]):
Expand All @@ -313,7 +227,7 @@ def verify_exists(file_path: Union[Path, str]):
raise FileNotFoundError(f"The {file_path} is not exists!!!")

@staticmethod
def write_json(json_path, content: dict):
def write_json(json_path: Union[str, Path], content: dict):
with open(json_path, "w", encoding="utf-8") as f:
json.dump(content, f, ensure_ascii=False)

Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
opencv_python
tqdm
tqdm
numpy
7 changes: 3 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ def get_readme() -> str:
VERSION_NUM = matched_versions
sys.argv = sys.argv[:2]

project_urls = {
'Documentation': 'https://rapidai.github.io/LabelConvert/docs'
}
project_urls = {"Documentation": "https://rapidai.github.io/LabelConvert/docs"}

setuptools.setup(
name=MODULE_NAME,
Expand Down Expand Up @@ -82,7 +80,8 @@ def get_readme() -> str:
f"labelImg_to_yolov5={MODULE_NAME}.labelImg_to_yolov5:main",
f"yolov5_to_coco={MODULE_NAME}.yolov5_to_coco:main",
f"yolov5_yaml_to_coco={MODULE_NAME}.yolov5_yaml_to_coco:main",
f'labelImg_to_publaynet={MODULE_NAME}.labelImg_to_publaynet:main',
f"labelImg_to_publaynet={MODULE_NAME}.labelImg_to_publaynet:main",
f"labelme_to_coco={MODULE_NAME}.labelme_to_coco:main",
],
},
)

0 comments on commit 05debad

Please sign in to comment.