In [2]:
import os
import re
import pandas as pd

def natural_sort_key(s):
    """自然排序的辅助函数，用于对文件名进行排序"""
    return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

def summarize_labels(label_dir, output_csv):
    # 初始化每个标签的计数器
    label_counts = {
        '0': 'bus',
        '1': 'traffic light',
        '2': 'traffic sign',
        '3': 'person',
        '4': 'bike',
        '5': 'truck',
        '6': 'motor',
        '7': 'car',
        '8': 'rider'
    }
    
    # 创建一个字典来存储计数结果
    counts = {label: 0 for label in label_counts.values()}

    # 按自然排序顺序遍历文件
    for file_name in sorted(os.listdir(label_dir), key=natural_sort_key):
        if file_name.endswith('.txt'):
            with open(os.path.join(label_dir, file_name), 'r') as file:
                for line in file:
                    category = line.strip().split()[0]
                    if category in label_counts:
                        counts[label_counts[category]] += 1

    # 将结果转换为DataFrame
    df_label_counts = pd.DataFrame(list(counts.items()), columns=['label', 'total_count'])

    # 保存到CSV文件
    df_label_counts.to_csv(output_csv, index=False)
    print(f"Summary CSV has been created at {output_csv}")

# 标签文件所在目录
label_dir = r"E:\yolov5-master\trainB\new_labels"
# 输出CSV文件的路径
output_csv = r"E:\yolov5-master\everone_trainB_labels_test.csv"

# 调用函数
summarize_labels(label_dir, output_csv)

Summary CSV has been created at E:\yolov5-master\everone_trainB_labels_test.csv
