### 生成YOLO数据集标注

In [8]:
import shutil
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import json
import glob
import cv2
BASE_DATA_PATH = "/home/tipriest/data/seg_risky"

training_anno = pd.read_csv(
    'http://mirror.coggle.club/seg_risky_training_anno.csv')

train_jpgs = [x.replace('/home/tipriest/data/seg_risky/', '')
              for x in glob.glob(os.path.join(BASE_DATA_PATH, '4/*.jpg'))]
training_anno = training_anno[training_anno['Path'].isin(train_jpgs)]
training_anno['Polygons'] = training_anno['Polygons'].apply(json.loads)

# 生成训练集
yolo_seg_dataset_path = os.path.join(BASE_DATA_PATH, "datasetBaseDetr")
yolo_seg_dataset_train_path = os.path.join(yolo_seg_dataset_path, "train/")
yolo_seg_dataset_valid_path = os.path.join(yolo_seg_dataset_path, "valid/")


def normalize_polygon(polygon, img_width, img_height):
    return [(x / img_width, y / img_height) for x, y in polygon]


def create_classes_file(folder_path):
    # 定义 classes.txt 文件的路径
    classes_file_path = os.path.join(folder_path, 'classes.txt')
    class_list = ['cat', 'dog', 'bird']
    # 将类别写入 classes.txt 文件
    with open(classes_file_path, 'w') as file:
        for class_name in class_list:
            file.write(class_name + '\n')

if os.path.exists(yolo_seg_dataset_path):
    shutil.rmtree(yolo_seg_dataset_path)
os.makedirs(yolo_seg_dataset_train_path)
os.makedirs(yolo_seg_dataset_valid_path)

# 生成训练集
create_classes_file(yolo_seg_dataset_train_path)
for row in training_anno.iloc[1000:11000].iterrows():
    shutil.copy(os.path.join(
        BASE_DATA_PATH, row[1].Path), yolo_seg_dataset_train_path)
    img = cv2.imread(os.path.join(BASE_DATA_PATH, row[1].Path))
    img_height, img_width = img.shape[:2]
    txt_filename = os.path.join(
        yolo_seg_dataset_train_path + row[1].Path.split('/')[-1][:-4] + '.txt')
    with open(txt_filename, 'w') as up:
        for polygon in row[1].Polygons:
            normalized_polygon = normalize_polygon(
                polygon, img_width, img_height)
            normalized_coords = ' '.join(
                [f'{coord[0]:.3f} {coord[1]:.3f}' for coord in normalized_polygon])
            up.write(f'0 {normalized_coords}\n')

# 生成验证集
create_classes_file(yolo_seg_dataset_valid_path)
for row in training_anno.iloc[:1000].iterrows():
    shutil.copy(os.path.join(
        BASE_DATA_PATH, row[1].Path), yolo_seg_dataset_valid_path)

    img = cv2.imread(os.path.join(BASE_DATA_PATH, row[1].Path))
    img_height, img_width = img.shape[:2]
    txt_filename = os.path.join(
        yolo_seg_dataset_valid_path + row[1].Path.split('/')[-1][:-4] + '.txt')
    with open(txt_filename, 'w') as up:
        for polygon in row[1].Polygons:
            normalized_polygon = normalize_polygon(
                polygon, img_width, img_height)
            normalized_coords = ' '.join(
                [f'{coord[0]:.3f} {coord[1]:.3f}' for coord in normalized_polygon])
            up.write(f'0 {normalized_coords}\n')

### 从YOLO数据集标注生成COCO数据集标注

In [9]:
import json
import os
from PIL import Image

def yolo_polygon_to_coco_bbox(polygon_points, img_width, img_height):
    """将 YOLO 多边形格式转换为 COCO 格式的外接矩形边界框"""
    x_coords = [point[0] * img_width for point in polygon_points]
    y_coords = [point[1] * img_height for point in polygon_points]
    x_min = min(x_coords)
    y_min = min(y_coords)
    width = max(x_coords) - x_min
    height = max(y_coords) - y_min
    return [x_min, y_min, width, height]

def convert_yolo_polygon_to_coco(yolo_folder, output_file):
    images = []
    annotations = []
    categories = []

    # 假设类别信息存储在一个类别文件中
    with open(os.path.join(yolo_folder, "classes.txt"), 'r') as f:
        category_names = [line.strip() for line in f.readlines()]

    for category_id, category_name in enumerate(category_names):
        categories.append({
            "id": category_id,
            "name": category_name,
            "supercategory": "none"
        })

    annotation_id = 0
    img_id = 0

    for filename in os.listdir(yolo_folder):
        if filename.endswith('.txt') and filename != 'classes.txt':
            # 处理标签文件
            label_path = os.path.join(yolo_folder, filename)
            img_filename = filename.replace('.txt', '.jpg')
            img_path = os.path.join(yolo_folder, img_filename)

            # 确保对应的图像文件存在
            if not os.path.exists(img_path):
                continue

            img = Image.open(img_path)
            img_width, img_height = img.size

            images.append({
                "id": img_id,
                "file_name": img_filename,
                "width": img_width,
                "height": img_height
            })

            with open(label_path, 'r') as f:
                for line in f.readlines():
                    label_info = line.strip().split()
                    category_id = int(label_info[0])
                    polygon_coords = list(map(float, label_info[1:]))
                    polygon_points = [(polygon_coords[i], polygon_coords[i + 1]) for i in range(0, len(polygon_coords), 2)]

                    # 计算 COCO 格式的 bbox
                    coco_bbox = yolo_polygon_to_coco_bbox(polygon_points, img_width, img_height)
                    # 将多边形顶点转换为 COCO 格式的 segmentation
                    segmentation = [coord * img_width if i % 2 == 0 else coord * img_height for i, coord in enumerate(polygon_coords)]

                    annotations.append({
                        "id": annotation_id,
                        "image_id": img_id,
                        "category_id": category_id,
                        "segmentation": [segmentation],
                        "bbox": coco_bbox,
                        "area": coco_bbox[2] * coco_bbox[3],
                        "iscrowd": 0
                    })
                    annotation_id += 1

            img_id += 1

    coco_format = {
        "images": images,
        "annotations": annotations,
        "categories": categories
    }

    with open(output_file, 'w') as f:
        json.dump(coco_format, f, indent=4)

# 使用示例
yolo_train_folder = '/home/tipriest/data/seg_risky/datasetCOCO/train'
yolo_valid_folder = '/home/tipriest/data/seg_risky/datasetCOCO/valid'
coco_train_file = '/home/tipriest/data/seg_risky/datasetCOCO/train/train_annotations.json'
coco_valid_file = '/home/tipriest/data/seg_risky/datasetCOCO/valid/valid_annotations.json'
convert_yolo_polygon_to_coco(yolo_train_folder, coco_train_file)
convert_yolo_polygon_to_coco(yolo_valid_folder, coco_valid_file)


### 删除文件夹中的YOLO标注

In [10]:
import os
import glob

def delete_txt_files(folder_path):
    # 获取文件夹中所有后缀为 .txt 的文件
    txt_files = glob.glob(os.path.join(folder_path, '*.txt'))

    for file_path in txt_files:
        try:
            os.remove(file_path)
            print(f"已删除文件: {file_path}")
        except Exception as e:
            print(f"删除文件失败: {file_path}，错误信息: {e}")

# 使用示例
delete_txt_files(yolo_train_folder)
delete_txt_files(yolo_valid_folder)

已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/47ff59ecaa028ebf74348f534f20569c.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/465eed279eb4d90338e5978ed0731de6.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/415ac15767f349fce946b6bfab90747d.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/409f2f6ea591f61939d72cd448921254.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/4a43dcbdb8760eef02cfaf288c5a053f.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/4f014e167b7408ad6c5f90675962a025.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/464fbf33102ba21c327c4c51808108f0.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/4ed64252d4fade5ff110f8178f9600b1.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/4484b746ea442d55f9e582de44ef7dc1.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/4bbe5aa19923e36f615e650e5d4ba1b2.txt
已删除文件: /home/tipriest/data/seg_risky/datasetCOCO/train/4e4d59ad500e69df7fa5f1549