In [11]:
import os
import shutil
import random

def select_and_copy_files(source_dir_images, source_dir_labels, dest_dir_images, dest_dir_labels, num_files, categories):
    for category in categories:
        source_subdir_images = os.path.join(source_dir_images, category)
        source_subdir_labels = os.path.join(source_dir_labels, category)
        dest_subdir_images = os.path.join(dest_dir_images, category)
        dest_subdir_labels = os.path.join(dest_dir_labels, category)

        # 创建目标子目录，如果它们不存在的话
        os.makedirs(dest_subdir_images, exist_ok=True)
        os.makedirs(dest_subdir_labels, exist_ok=True)

        # 获取源目录中的所有.jpg和.txt文件
        all_images = [f[:-4] for f in os.listdir(source_subdir_images) if os.path.isfile(os.path.join(source_subdir_images, f)) and f.endswith('.jpg')]
        all_labels = [f[:-4] for f in os.listdir(source_subdir_labels) if os.path.isfile(os.path.join(source_subdir_labels, f)) and f.endswith('.txt')]

        # 找到同时存在于两个目录中的文件
        common_files = list(set(all_images) & set(all_labels))

        if len(common_files) < num_files[category]:
            raise ValueError(f"Only {len(common_files)} common files found in {category}. Cannot select {num_files[category]} files.")

        selected_files = random.sample(common_files, num_files[category])

        for file in selected_files:
            src_file_image = os.path.join(source_subdir_images, file + '.jpg')
            src_file_label = os.path.join(source_subdir_labels, file + '.txt')
            dst_file_image = os.path.join(dest_subdir_images, file + '.jpg')
            dst_file_label = os.path.join(dest_subdir_labels, file + '.txt')

            shutil.copy(src_file_image, dst_file_image)
            shutil.copy(src_file_label, dst_file_label)

# 在以下目录进行操作
source_directory_images = "/data/jiayuan/BDDcoco/yolo_v8/images"
source_directory_labels = "/data/jiayuan/BDDcoco/yolo_v8/labels"
destination_directory_images = "/data/jiayuan/BDDcoco/yolo_v8_toy/images"
destination_directory_labels = "/data/jiayuan/BDDcoco/yolo_v8_toy/labels"

# 你想从中选择文件的子目录，以及每个子目录下要选择的文件数量
categories = ['train2017', 'val2017']
num_of_files = {'train2017': 100, 'val2017': 10}

# 选择并复制.jpg和.txt文件
select_and_copy_files(source_directory_images, source_directory_labels, destination_directory_images, destination_directory_labels, num_of_files, categories)
