# 分离--防止卡顿

In [3]:
import fiftyone as fo
import math

SRC_DATASET = "ms1_0710-0726_36_ok_v2"
CHUNK_SIZE = 100

src = fo.load_dataset(SRC_DATASET)
total = len(src)
num_parts = math.ceil(total / CHUNK_SIZE)

print(f"Total samples: {total}")
print(f"Splitting into {num_parts} datasets\n")

for i in range(num_parts):
    start = i * CHUNK_SIZE
    end = start + CHUNK_SIZE

    print(f"Creating dataset {i+1}/{num_parts}")

    view = src.skip(start).limit(CHUNK_SIZE)

    new_name = f"{SRC_DATASET}_part_{i+1:03d}"

    # ⭐ 旧版写法：用 clone 创建新 dataset
    new_ds = view.clone(name=new_name)

print("\nDone!")


Total samples: 1664
Splitting into 17 datasets

Creating dataset 1/17
Creating dataset 2/17
Creating dataset 3/17
Creating dataset 4/17
Creating dataset 5/17
Creating dataset 6/17
Creating dataset 7/17
Creating dataset 8/17
Creating dataset 9/17
Creating dataset 10/17
Creating dataset 11/17
Creating dataset 12/17
Creating dataset 13/17
Creating dataset 14/17
Creating dataset 15/17
Creating dataset 16/17
Creating dataset 17/17

Done!


# 合并

In [8]:
import fiftyone as fo

BASE_NAME = "ms1_0710-0726_36_ok_v2"   # 原始大 dataset 名
part_names = [
            'ms1_0710-0726_36_ok_v2_part_001',
            'ms1_0710-0726_36_ok_v2_part_002',
            'ms1_0710-0726_36_ok_v2_part_003',
            'ms1_0710-0726_36_ok_v2_part_004',
            'ms1_0710-0726_36_ok_v2_part_005',
            'ms1_0710-0726_36_ok_v2_part_006',
            'ms1_0710-0726_36_ok_v2_part_007',
            'ms1_0710-0726_36_ok_v2_part_008',
            'ms1_0710-0726_36_ok_v2_part_009',
            'ms1_0710-0726_36_ok_v2_part_010',
            'ms1_0710-0726_36_ok_v2_part_011',
            'ms1_0710-0726_36_ok_v2_part_012',
            'ms1_0710-0726_36_ok_v2_part_013',
            'ms1_0710-0726_36_ok_v2_part_014',
            'ms1_0710-0726_36_ok_v2_part_015',
            'ms1_0710-0726_36_ok_v2_part_016',
]

LABEL_FIELDS = ["01_swd_seg_results_coco"]  # 可以多写几个


base = fo.load_dataset(BASE_NAME)

for name in part_names:
    print(f"Merging tags from: {name}")
    part = fo.load_dataset(name)

    base.merge_samples(
        part,
        key_field="filepath",          # ⭐ 必须：按 filepath 找到同一张图
        fields=LABEL_FIELDS,               # ⭐ 只同步 tags（你也可以加别的字段）
        merge_lists=True,              # list 类型字段（tags）会合并
        overwrite=True,                # 同一张图的 tags 用小 dataset 里的覆盖
        insert_new=False,              # 不插入新样本，只更新已有的
    )

print("✅ Done. 直接在 App 里打开:", BASE_NAME)


Merging tags from: ms1_0710-0726_36_ok_v2_part_001
Merging tags from: ms1_0710-0726_36_ok_v2_part_002
Merging tags from: ms1_0710-0726_36_ok_v2_part_003
Merging tags from: ms1_0710-0726_36_ok_v2_part_004
Merging tags from: ms1_0710-0726_36_ok_v2_part_005
Merging tags from: ms1_0710-0726_36_ok_v2_part_006
Merging tags from: ms1_0710-0726_36_ok_v2_part_007
Merging tags from: ms1_0710-0726_36_ok_v2_part_008
Merging tags from: ms1_0710-0726_36_ok_v2_part_009
Merging tags from: ms1_0710-0726_36_ok_v2_part_010
Merging tags from: ms1_0710-0726_36_ok_v2_part_011
Merging tags from: ms1_0710-0726_36_ok_v2_part_012
Merging tags from: ms1_0710-0726_36_ok_v2_part_013
Merging tags from: ms1_0710-0726_36_ok_v2_part_014
Merging tags from: ms1_0710-0726_36_ok_v2_part_015
Merging tags from: ms1_0710-0726_36_ok_v2_part_016
✅ Done. 直接在 App 里打开: ms1_0710-0726_36_ok_v2


In [5]:
import fiftyone as fo
# 获取所有dataset
datasets = fo.list_datasets()
display(datasets)

['00_try',
 'jeff_0613-0624_04_ok',
 'lloyd_0715-0729_04_ok',
 'ms1_0605-0621_40_ok',
 'ms1_0710-0726_36_ok',
 'ms1_0710-0726_36_ok_v2',
 'ms1_0710-0726_36_ok_v2_part_001',
 'ms1_0710-0726_36_ok_v2_part_002',
 'ms1_0710-0726_36_ok_v2_part_003',
 'ms1_0710-0726_36_ok_v2_part_004',
 'ms1_0710-0726_36_ok_v2_part_005',
 'ms1_0710-0726_36_ok_v2_part_006',
 'ms1_0710-0726_36_ok_v2_part_007',
 'ms1_0710-0726_36_ok_v2_part_008',
 'ms1_0710-0726_36_ok_v2_part_009',
 'ms1_0710-0726_36_ok_v2_part_010',
 'ms1_0710-0726_36_ok_v2_part_011',
 'ms1_0710-0726_36_ok_v2_part_012',
 'ms1_0710-0726_36_ok_v2_part_013',
 'ms1_0710-0726_36_ok_v2_part_014',
 'ms1_0710-0726_36_ok_v2_part_015',
 'ms1_0710-0726_36_ok_v2_part_016',
 'ms1_0710-0726_36_ok_v2_part_017',
 'ms1_0726-0809_11_ok',
 'ms1_0809-0823_34_ok',
 'ms1_0809-0823_34_ok_v2',
 'ms2_0726-0809_13_ok',
 'ms2_0726-0809_13_ok_v2',
 'ms2_0809-0823_10_ok',
 'sw1_0605-0613_07_ok',
 'sw1_0605-0613_07_ok_v2',
 'sw1_0711-0725_03_ok',
 'sw2_0808-0823_04_ok']

In [None]:
# import fiftyone as fo
# # delete all datasets
# datasets = [ 
#                 'ms1_0710-0726_36_ok_v2_part_001',
#             'ms1_0710-0726_36_ok_v2_part_002',
#             'ms1_0710-0726_36_ok_v2_part_003',
#             'ms1_0710-0726_36_ok_v2_part_004',
#             'ms1_0710-0726_36_ok_v2_part_005',
#             'ms1_0710-0726_36_ok_v2_part_006',
#             'ms1_0710-0726_36_ok_v2_part_007',
#             'ms1_0710-0726_36_ok_v2_part_008',
#             'ms1_0710-0726_36_ok_v2_part_009',
#             'ms1_0710-0726_36_ok_v2_part_010',
#             'ms1_0710-0726_36_ok_v2_part_011',
#             'ms1_0710-0726_36_ok_v2_part_012',
#             'ms1_0710-0726_36_ok_v2_part_013',
#             'ms1_0710-0726_36_ok_v2_part_014',
#             'ms1_0710-0726_36_ok_v2_part_015',
#             'ms1_0710-0726_36_ok_v2_part_016',
#             'ms1_0710-0726_36_ok_v2_part_017',
#             'ms1_0710-0726_36_ok_v3',
# ]
# for ds_name in datasets:
#     fo.delete_dataset(ds_name) 