# 分割

In [3]:
import os
from PIL import Image
import random
from tqdm import tqdm   # 重点：加进度条

# ========== 请修改以下路径 ==========
images_dir = "/data_lg/keru/project/yolov5/dataset/train/images"   # 图片文件夹
labels_dir = "/data_lg/keru/project/yolov5/dataset/train/labels"   # 标签文件夹
save_root = "/data_lg/keru/project/part2/yolo_cutting"     # 输出：positive & negative 文件夹
log_path = os.path.join(save_root, "log.txt")  # log.txt 路径

# ========== 负样本与正样本比例 ==========
negative_ratio = 2            # 每个正样本配几个负样本
patch_size = 224              # patch 尺寸

# === 自动创建输出文件夹 ===
os.makedirs(os.path.join(save_root, "positive"), exist_ok=True)
os.makedirs(os.path.join(save_root, "negative"), exist_ok=True)

# === 新建或清空 log.txt ===
with open(log_path, "w") as log:
    log.write("Image_Name | Positive_Count | Negative_Count | Positive_Boxes | Negative_Boxes\n")

def yolo_to_box(txt_line, img_w, img_h):
    cls, xc, yc, w, h = map(float, txt_line.strip().split())
    xmin = int((xc - w / 2) * img_w)
    ymin = int((yc - h / 2) * img_h)
    xmax = int((xc + w / 2) * img_w)
    ymax = int((yc + h / 2) * img_h)
    return [xmin, ymin, xmax, ymax]

def boxes_overlap(box1, box2):
    x1, y1, x2, y2 = box1
    a1, b1, a2, b2 = box2
    return not (x2 < a1 or x1 > a2 or y2 < b1 or y1 > b2)

images = [f for f in os.listdir(images_dir) if f.endswith(".jpg") or f.endswith(".png")]

# === 加 tqdm 进度条 ===
for idx, img_name in enumerate(tqdm(images, desc="Processing Images")):
    img_path = os.path.join(images_dir, img_name)
    label_path = os.path.join(labels_dir, img_name.replace(".jpg", ".txt").replace(".png", ".txt"))

    image = Image.open(img_path).convert("RGB")
    W, H = image.size

    pos_boxes = []
    neg_boxes = []

    # === 1) Positive Patch ===
    if os.path.exists(label_path):
        with open(label_path, "r") as f:
            lines = f.readlines()
        for i, line in enumerate(lines):
            box = yolo_to_box(line, W, H)
            crop = image.crop(box).resize((patch_size, patch_size))
            crop.save(os.path.join(save_root, "positive", f"{img_name[:-4]}_pos{i}.jpg"))
            pos_boxes.append(box)

    pos_count = len(pos_boxes)
    neg_target = pos_count * negative_ratio

    if pos_count == 0:
        continue  # 没标注就跳过

    # === 2) Negative Patch ===
    neg_count = 0
    trials = 0

    # 中心区域限制
    x_min = int(W * 0.2)
    x_max = int(W * 0.8) - patch_size
    y_min = int(H * 0.2)
    y_max = int(H * 0.8) - patch_size

    while neg_count < neg_target and trials < 50:
        x = random.randint(x_min, x_max)
        y = random.randint(y_min, y_max)
        box = [x, y, x + patch_size, y + patch_size]
        if all(not boxes_overlap(box, pbox) for pbox in pos_boxes):
            crop = image.crop(box)
            crop.save(os.path.join(save_root, "negative", f"{img_name[:-4]}_neg{neg_count}.jpg"))
            neg_boxes.append(box)
            neg_count += 1
        trials += 1

    # === 3) 写 log.txt ===
    with open(log_path, "a") as log:
        log.write(f"{img_name} | {pos_count} | {neg_count} | {pos_boxes} | {neg_boxes}\n")

print(f"✅ 全部搞定！positive 和 negative 已保存到 {save_root}/ ，详情见 log.txt")



Processing Images: 100%|██████████| 2973/2973 [01:35<00:00, 31.07it/s]

✅ 全部搞定！positive 和 negative 已保存到 /data_lg/keru/project/part2/yolo_cutting/ ，详情见 log.txt





# 计算


In [1]:

import os

def count_files_in_directory(directory_path):
    """计算指定目录中的文件数量（包括子目录）"""
    if not os.path.exists(directory_path):
        raise FileNotFoundError(f"目录不存在: {directory_path}")
    if not os.path.isdir(directory_path):
        raise NotADirectoryError(f"不是有效的目录: {directory_path}")
    
    file_count = 0
    for root, dirs, files in os.walk(directory_path):
        file_count += len(files)
    
    return file_count

# 使用示例
folder_path = "/data_lg/keru/project/part2/yolo_cutting/negative"  # 默认当前目录，可替换为你的路径
try:
    count = count_files_in_directory(folder_path)
    print(f"文件夹 '{folder_path}' 中共有 {count} 个文件")
except Exception as e:
    print(f"错误: {e}")

文件夹 '/data_lg/keru/project/part2/yolo_cutting/negative' 中共有 16623 个文件
