# 查看根目录下需要运行的文件夹

In [1]:
from pathlib import Path

if __name__ == "__main__":
    # === 1) 给定一个根目录 ===
    root_dir = Path("/workspace/models/SAHI/run_v8")
    # end_with = "_sliced"
    end_with = "_data"

    # === 2) 遍历所有子目录 ===
    sub_dirs = list(root_dir.glob("**/*" + end_with))


    if not sub_dirs:
        print(f"没有找到 *{end_with} 目录")
        exit(0)

    print(f"找到以下 {end_with} 数据集：")
    for i, d in enumerate(sub_dirs):
        print(f"[{i}] {d}")

    # === 3) 让你选择要跑的目录 ===
    idx_str = input("请输入要处理的编号 (多个用逗号分隔, 回车默认全选): ").strip()
    if idx_str:
        indices = [int(x) for x in idx_str.split(",")]
        chosen_dirs = [sub_dirs[i] for i in indices]
    else:
        chosen_dirs = sub_dirs
    
    print(f"将处理以下 {end_with} 目录：")
    for i, d in enumerate(chosen_dirs):
        print(f"- {i+1}. {d}")

    # # 如果有的文件夹的raw_data里面没有图片，就移除
    # chosen_dirs = [d for d in chosen_dirs if (d.parent / "raw_data").exists() and any((d.parent / "raw_data").glob("*.jpg"))]
    # if not chosen_dirs:
    #     print(f"没有找到包含图片的 *{end_with} 目录")
    #     exit(0)

找到以下 _data 数据集：
[0] /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data
[1] /workspace/models/SAHI/run_v8/cropped_objects/others_data
[2] /workspace/models/SAHI/run_v8/cropped_objects/swd_data
将处理以下 _data 目录：
- 1. /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data
- 2. /workspace/models/SAHI/run_v8/cropped_objects/others_data
- 3. /workspace/models/SAHI/run_v8/cropped_objects/swd_data


# 通过模型,回溯重新精炼数据集

In [3]:
import os
import re
import json
from collections import Counter
from ultralytics import YOLO
from pathlib import Path


MODEL_PATH = "/workspace/models/best_model/yolo11n-cls-best_v5.pt"

# 解析文件名的正则表达式
UUID_RE = re.compile(r"uuid_([a-f0-9\-]+)\.jpg", re.IGNORECASE)
ORIG_RE = re.compile(r"^(\d+_\d+_\d+)_obj", re.IGNORECASE)

def get_probs_fields(res, name_map):
    """安全获取 top1/top5 字段（缺失时给空值/空列表）。"""
    probs = getattr(res, "probs", None)
    if probs is None:
        return None, None, None, [], [], []

    # top1
    try:
        top1_id = int(probs.top1)
    except Exception:
        top1_id = None
    top1_name = name_map.get(top1_id) if top1_id is not None else None

    # top1 conf
    try:
        top1_conf = float(getattr(probs.top1conf, "item", lambda: probs.top1conf)())
    except Exception:
        top1_conf = None

    # top5
    try:
        top5_id = [int(x) for x in list(probs.top5)]
    except Exception:
        top5_id = []
    top5_name = [name_map.get(i, str(i)) for i in top5_id]
    try:
        top5_conf = [float(x) for x in list(probs.top5conf)]
    except Exception:
        top5_conf = []

    return top1_id, top1_name, top1_conf, top5_id, top5_name, top5_conf


# ==== 主流程 ====
if __name__ == "__main__":
    # 1) 加载模型
    model = YOLO(MODEL_PATH)
    class_names = model.names  # dict: {0: "...", 1: "..."}

    for d in chosen_dirs:
        print(f"\n=== 处理目录: {d} ===")
        input_dir  = d
        output_json = d.parent / f"{d.name}_classification_predicted_results.json"

        # 确保输入目录不为空
        if not any(Path(input_dir).glob("*.jpg")):
            print(f"⚠️ 输入目录 {input_dir} 没有 JPG 文件，跳过")
            continue

        # 2) 执行预测（Ultralytics 支持目录）
        results = model(input_dir)

        data = []
        counts = Counter()

        for res in results:
            path = getattr(res, "path", "")
            fname = os.path.basename(path)

            # 提取 uuid / 原图名
            uuid_match = UUID_RE.search(fname)
            uuid_str = uuid_match.group(1) if uuid_match else None
            orig_match = ORIG_RE.match(fname)
            original_name = orig_match.group(1) if orig_match else None

            # 概率字段
            top1_id, top1_name, top1_conf, top5_id, top5_name, top5_conf = get_probs_fields(res, class_names)
            if top1_id is not None:
                counts[top1_id] += 1

            # 记录一条
            data.append({
                "path": path,
                "uuid": uuid_str,
                "original_name": original_name,
                "top1_id": top1_id,
                "top1_name": top1_name,
                "top1_conf": top1_conf,
                "top5_id": top5_id,
                
                "top5_name": top5_name,
                "top5_conf": top5_conf,
            })

        # 3) 打印统计
        print("分类统计结果：")
        for cls_id, num in counts.items():
            print(f"{class_names.get(cls_id, cls_id)}: {num}")
        total = sum(counts.values())
        print(f"总计: {total}")

        # 4) 保存 JSON（保持与你原来一致的结构）
        with open(output_json, "w", encoding="utf-8") as f:
            json.dump(data, f, indent=2, ensure_ascii=False)

        print(f"✅ 已保存到 {output_json}")



=== 处理目录: /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data ===

image 1/42 /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/0618_0635_780_obj27_swd_uuid_0bf091b9-2d79-4cfa-ac91-93e72292453b.jpg: 64x64 others 0.80, swd 0.20, mayswd 0.00, 1.6ms
image 2/42 /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/0718_0635_600_obj115_swd_uuid_124a9204-9cdf-4edb-8375-5434540c4c38.jpg: 64x64 swd 0.48, others 0.35, mayswd 0.18, 1.6ms
image 3/42 /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/0718_0635_600_obj40_swd_uuid_f875d675-5b10-4e1f-bab8-09deff6a9b58.jpg: 64x64 mayswd 0.64, others 0.25, swd 0.11, 1.6ms
image 4/42 /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/0718_0635_600_obj42_swd_uuid_e9b71ac3-0ec7-462c-a666-63d288f94000.jpg: 64x64 swd 0.88, others 0.06, mayswd 0.05, 1.5ms
image 5/42 /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/0718_0635_600_obj70_swd_uuid_498e3bbc-3444-4616-866c-d60da75819d6.jpg: 64x64 mayswd 0.71, others 0.19, s

In [4]:
# 根据classification_predicted_results.json，将文件夹里的图片，移动到对应的分类子目录里


import os
import json
import shutil
from pathlib import Path

def move_images_by_json(json_path, base_dir):
    """
    根据 classification_predicted_results.json 把图片移动到对应的分类子目录
    :param json_path: JSON 文件路径
    :param base_dir: 图片所在的根目录
    """
    with open(json_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    base_dir = Path(base_dir)

    for item in data:
        img_path = Path(item["path"])
        cls_name = item.get("top1_name")
        if not cls_name:
            print(f"⚠️ {img_path.name} 没有预测类别，跳过")
            continue

        # 目标子目录
        target_dir = base_dir / cls_name
        target_dir.mkdir(parents=True, exist_ok=True)

        target_path = target_dir / img_path.name
        try:
            shutil.move(str(img_path), str(target_path))
            print(f"✅ {img_path.name} -> {target_dir}")
        except Exception as e:
            print(f"❌ 移动 {img_path.name} 失败: {e}")

if __name__ == "__main__":
    # 示例：你的输入目录和 JSON 路径
    for d in chosen_dirs:
        print(f"\n=== 处理目录: {d} ===")
        input_dir  = d
        classification_predicted_results_json = d.parent / f"{d.name}_classification_predicted_results.json"

        if not classification_predicted_results_json.exists():
            print(f"⚠️ JSON 文件 {classification_predicted_results_json} 不存在，跳过")
            continue

        move_images_by_json(classification_predicted_results_json, input_dir)






=== 处理目录: /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data ===
✅ 0618_0635_780_obj27_swd_uuid_0bf091b9-2d79-4cfa-ac91-93e72292453b.jpg -> /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/others
✅ 0718_0635_600_obj115_swd_uuid_124a9204-9cdf-4edb-8375-5434540c4c38.jpg -> /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/swd
✅ 0718_0635_600_obj40_swd_uuid_f875d675-5b10-4e1f-bab8-09deff6a9b58.jpg -> /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/mayswd
✅ 0718_0635_600_obj42_swd_uuid_e9b71ac3-0ec7-462c-a666-63d288f94000.jpg -> /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/swd
✅ 0718_0635_600_obj70_swd_uuid_498e3bbc-3444-4616-866c-d60da75819d6.jpg -> /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/mayswd
✅ 0718_0636_640_obj11_swd_uuid_5ba7f846-3801-47f5-b151-33a0999f5f77.jpg -> /workspace/models/SAHI/run_v8/cropped_objects/mayswd_data/swd
✅ 0718_0636_640_obj42_swd_uuid_adc1155a-6bfd-4c00-ba07-08fc35d47f69.jpg -> /workspace/models/SA