# 计数工具

In [None]:
import os
import pandas as pd
from collections import Counter

def analyze_folders(base_paths, output_excel="folder_file_counts.xlsx"):
    """
    遍历并统计指定路径下所有子文件夹的信息 (递归到底)
    并将每种文件类型单独作为列输出
    """
    results = []
    all_suffixes = set()

    # 先统计每个文件夹的情况
    folder_stats = []

    for base_path in base_paths:
        if not os.path.isdir(base_path):
            print(f"⚠️ {base_path} 不是有效的文件夹，跳过。")
            continue

        for root, dirs, files in os.walk(base_path):
            # 文件数量
            file_count = len(files)

            # 子文件夹数量
            subdir_count = len(dirs)

            # 后缀统计
            suffix_counter = Counter([os.path.splitext(f)[1].lower() for f in files if "." in f])
            all_suffixes.update(suffix_counter.keys())

            folder_stats.append({
                "Folder Path": root,
                "Folder Name": os.path.basename(root),
                "Subfolder Count": subdir_count,
                "File Count": file_count,
                "Suffix Counter": suffix_counter
            })

    # 按照所有出现过的后缀扩展为独立列
    for stat in folder_stats:
        row = {
            "Folder Path": stat["Folder Path"],
            "Folder Name": stat["Folder Name"],
            "Subfolder Count": stat["Subfolder Count"],
            "File Count": stat["File Count"],
        }
        for suffix in all_suffixes:
            row[suffix if suffix else "(no extension)"] = stat["Suffix Counter"].get(suffix, 0)
        results.append(row)

    # 转换为 DataFrame
    df = pd.DataFrame(results)

    # 保存为 Excel
    df.to_excel(output_excel, index=False)
    print(f"✅ 统计结果已导出到: {output_excel}")


# 示例用法：
if __name__ == "__main__":
    base_paths = [
        "/workspace/models/SAHI/run_v7",  # 改成你要统计的路径
    ]
    analyze_folders(base_paths, output_excel="folder_analysis_with_types.xlsx")


✅ 统计结果已导出到: folder_analysis_with_types.xlsx
