# FiftyOne 脚本

In [1]:
import fiftyone as fo
session = fo.launch_app()


  from .autonotebook import tqdm as notebook_tqdm


Connected to FiftyOne on port 5151 at localhost.
If you are not connecting to a remote session, you may need to start a new session and specify a port


In [2]:
import fiftyone as fo

# 获取所有dataset
datasets = fo.list_datasets()
display(datasets)

['ms1_0710-0726_36_ok_obb_v1',
 'ms1_0710-0726_36_ok_v2',
 'ms1_0710-0726_36_ok_v22',
 'ms1_0710-0726_36_ok_v22_conf09_patches',
 'ms1_0726-0809_11_ok_v3',
 'ms1_0809-0823_34_ok_obb_v1',
 'ms1_0809-0823_34_ok_v2',
 'ms1_0809-0823_34_ok_v22',
 'ms1_0809-0823_34_ok_v22_conf09_patches',
 'ms2_0726-0809_13_ok_obb_v1',
 'ms2_0726-0809_13_ok_v2',
 'ms2_0726-0809_13_ok_v22',
 'ms2_0809-0823_10_ok_v3',
 'quickstart',
 'sw1_0605-0613_07_ok_obb_v1',
 'sw1_0605-0613_07_ok_v2',
 'sw1_0605-0613_07_ok_v22']

In [3]:
# # delete all datasets
# for ds_name in datasets:
#     fo.delete_dataset(ds_name) 

In [4]:
from pathlib import Path

def fetch_subsequent_dir(data_root: Path, target_subdir_name: Path):
    data_paths = list(data_root.glob(f"*/{target_subdir_name}"))
    # display(data_paths)
    # get sub dir - no target_subdir_name
    subdir_path_list = [data_path.parent for data_path in data_paths]
    # display(subdir_path_list)
    subdir_name_list = [subdir.name for subdir in subdir_path_list]
    # display(subdir_name_list)
    return subdir_path_list, subdir_name_list


# data_root = Path("/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test")
data_root = Path("/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1")

target_subdir_name = Path("raw_data")
subdir_path_list, subdir_name_list = fetch_subsequent_dir(data_root, target_subdir_name)
display(subdir_path_list)
display(subdir_name_list)
len(subdir_path_list), len(subdir_name_list)

[PosixPath('/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1/ms1_0710-0726_36_ok_obb_v1'),
 PosixPath('/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1/sw1_0605-0613_07_ok_obb_v1'),
 PosixPath('/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1/ms1_0809-0823_34_ok_obb_v1'),
 PosixPath('/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1/ms2_0726-0809_13_ok_obb_v1')]

['ms1_0710-0726_36_ok_obb_v1',
 'sw1_0605-0613_07_ok_obb_v1',
 'ms1_0809-0823_34_ok_obb_v1',
 'ms2_0726-0809_13_ok_obb_v1']

(4, 4)

In [5]:
import fiftyone.core.labels as fol
import json
from collections import defaultdict
from datetime import datetime
from pathlib import Path
import fiftyone as fo

def attach_dot_to_fiftyone(
    dataset: fo.Dataset,
    coco_json_path: Path,
    dot_field: str,
):
    """
    根据 COCO json 里的标准字段：
      - annotation["attributes"]["dot_detections"]
    给 FiftyOne dataset 加一个新的可视化字段：
      - sample[dot_field]: fo.Detections（所有 dot box）
    """
    # 1. 读 COCO JSON
    with coco_json_path.open("r", encoding="utf-8") as f:
        coco = json.load(f)

    images = coco["images"]
    annotations = coco["annotations"]

    # image_id -> file_name
    image_id_to_fname = {img["id"]: img["file_name"] for img in images}

    # file_name -> [annotations...] （保持原始顺序）
    anns_by_fname = defaultdict(list)
    for ann in annotations:
        fname = image_id_to_fname[ann["image_id"]]
        anns_by_fname[fname].append(ann)

    # skeleton（跟 categories 里的 keypoints / skeleton 一致）
    dataset.default_skeleton = fo.KeypointSkeleton(
        labels=["h", "lp", "rp"],
        edges=[[0, 1], [0, 2]],
    )

    # 2. 按 sample 遍历，把 pose / dot 加进去
    for sample in dataset:
        fname = Path(sample.filepath).name
        anns = anns_by_fname.get(fname, [])
        if not anns:
            continue

        W = sample.metadata.width
        H = sample.metadata.height

        dot_dets_list = []

        for ann in anns:
            # -------- attributes.dot_detections -> fo.Detections --------
            attrs = ann.get("attributes") or {}
            dot_list = attrs.get("dot_detections") or []
            for dot in dot_list:
                # bbox: [x, y, w, h] 像素坐标
                x, y, w, h = dot["bbox"]
                rel_box = [
                    x / W,
                    y / H,
                    w / W,
                    h / H,
                ]
                det = fol.Detection(
                    bounding_box=rel_box,
                    confidence=dot.get("score", None),
                    label=str(dot.get("category_id", "")),  # 这里用 category_id，或者直接写 "dot"
                )
                dot_dets_list.append(det)

        # 3. 挂到 sample 上
        if dot_dets_list:
            sample[dot_field] = fol.Detections(detections=dot_dets_list)

        sample.save()

    print(f"✅ 已将 dot 检测框写入字段: {dot_field}")


def extract_time_info(file_name: str) -> datetime:
    time_info = "_".join(file_name.split("_")[:-1])
    dt = datetime.strptime(time_info, "%m%d_%H%M")
    return dt.replace(year=2024)  # 假设年份为2024年

def extract_focus_info(file_name: str) -> str:
    return file_name.split("_")[-1].split(".")[0]

In [6]:
import fiftyone as fo

def add_tags_to_all_labels(
    dataset: fo.Dataset,
    bool_attr: str,
    tag_name: str | None = None,
    mode: str = "both",
):
    """
    给 label 添加 tag，当 label.<bool_attr> 为 True 时打上 tag。

    参数:
        bool_attr:   label 上的布尔属性名，例如 "is_final_swd"
        tag_name:    标签名，默认用 bool_attr
        mode:        "keypoints", "detections", 或 "both"
    """
    if tag_name is None:
        tag_name = bool_attr

    mode = mode.lower()
    assert mode in ("keypoints", "detections", "both"), "mode 必须是 keypoints/detections/both"

    label_fields = dataset._get_label_fields()
    print("所有 label 字段:", label_fields)

    for field in label_fields:
        first_label = dataset.first()[field]

        # 判断这个字段包含 keypoints 或 detections
        is_kp = hasattr(first_label, "keypoints")
        is_det = hasattr(first_label, "detections")

        # 根据 mode 决定是否处理该字段
        if mode == "keypoints" and not is_kp:
            continue
        if mode == "detections" and not is_det:
            continue
        if mode == "both" and not (is_kp or is_det):
            continue

        for sample in dataset:
            labels_layer = sample[field]
            if not labels_layer:
                continue

            # 选择对应的 label 列表
            if is_kp:
                labels = labels_layer.keypoints
            else:
                labels = labels_layer.detections

            changed = False

            for label in labels:
                if not hasattr(label, bool_attr):
                    continue
                if not getattr(label, bool_attr):
                    continue

                if label.tags is None:
                    label.tags = []
                if tag_name not in label.tags:
                    label.tags.append(tag_name)
                    changed = True

            if changed:
                sample[field] = labels_layer
                sample.save()

    print(f"✅ 已根据 `{bool_attr}` 添加 tag: `{tag_name}` (mode={mode})")


In [None]:
import fiftyone.utils.coco as fouc  

RESET_DATASET = True

for subdir_path, subdir_name in zip(subdir_path_list, subdir_name_list):
    if subdir_name in fo.list_datasets() and RESET_DATASET:
        fo.delete_dataset(subdir_name) 
    print(f"subdir_path: {subdir_path}, subdir_name: {subdir_name}")
    # dataset = fo.Dataset.from_dir(
    #     dataset_type=fo.types.COCODetectionDataset,
    #     name=f"{subdir_name}",
    #     data_path=subdir_path / "raw_data",
    #     # labels_path=subdir_path / "output" / "swd_seg_results_coco.json",   
    #     labels_path=subdir_path / "output" / "01_swd_seg_results_coco.json",
    #     label_field="01_swd_seg_results_coco",
    #     label_types="detections",
    # )

    dataset = fo.Dataset.from_dir(
        dataset_type=fo.types.COCODetectionDataset,
        name=f"{subdir_name}",
        data_path=subdir_path / "raw_data",
        # labels_path=subdir_path / "output" / "swd_seg_results_coco.json",   
        labels_path=subdir_path / "output" / "01_swd_seg_results_coco.json",
        label_field="01_swd_seg_results_coco",
        label_types="polylines",
    )

    # putin rest no annotation image data
    dataset.merge_dir(  
        dataset_dir=subdir_path / "raw_data",  
        dataset_type=fo.types.ImageDirectory,  
        skip_existing=True,  # 跳过已存在的样本  
        insert_new=True,     # 插入新样本  
    )

    # fouc.add_coco_labels(
    #     dataset,
    #     label_field="02_combined_annotations_dedup",
    #     labels_or_path=str(subdir_path / "output" / "02_combined_annotations_dedup.json"),
    #     categories={1: "swd"},
    #     label_type="detections",
    # )
    # fouc.add_coco_labels(
    #     dataset,
    #     label_field="03_coco_with_pose_dot_keypoints",
    #     labels_or_path=str(subdir_path / "output" / "03_coco_with_pose_dot.json"),
    #     categories={1: "swd"},
    #     label_type="keypoints",
    # )

    # fouc.add_coco_labels(
    #     dataset,
    #     label_field="03_coco_with_pose_dot_detections",
    #     labels_or_path=str(subdir_path / "output" / "03_coco_with_pose_dot.json"),
    #     categories={1: "swd"},
    #     label_type="detections",
    # )

    # attach_dot_to_fiftyone(
    #     dataset=dataset,
    #     coco_json_path=subdir_path / "output" / "03_coco_with_pose_dot.json",
    #     dot_field="04_dot_boxes",
    # )

    # add_tags_to_all_labels(dataset, "is_final_swd", "fswd","detections")

    # 添加时间和focus信息,通过file_name获取， 0606_0617_760.jpg 0606_0617表示时间信息 760表示焦点距离
    for sample in dataset:
        file_name = sample.filepath.split("/")[-1]
        sample["Date"] = extract_time_info(file_name)
        sample["focus"] = extract_focus_info(file_name)
        sample.save()
    print(f"✅ 已完成添加时间和focus信息")
    # 6. 建立索引
    dataset.create_index("Date")  
    dataset.create_index("focus")
    print(f"✅ 已完成时间和focus信息索引创建")

subdir_path: /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1/ms1_0710-0726_36_ok_obb_v1, subdir_name: ms1_0710-0726_36_ok_obb_v1
 100% |███████████████| 1495/1495 [8.0s elapsed, 0s remaining, 143.9 samples/s]      
 100% |███████████████| 1664/1664 [173.4ms elapsed, 0s remaining, 9.7K samples/s]   
Converting existing index 'filepath' to unique on dataset '2025.12.11.01.20.35.476014'
Converting existing index 'filepath' to unique on dataset 'ms1_0710-0726_36_ok_obb_v1'
✅ 已完成添加时间和focus信息
✅ 已完成时间和focus信息索引创建
subdir_path: /home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/01_16mp_2024_pipeline_data_obb_v1/sw1_0605-0613_07_ok_obb_v1, subdir_name: sw1_0605-0613_07_ok_obb_v1
 100% |█████████████████| 151/151 [390.4ms elapsed, 0s remaining, 388.0 samples/s]      
 100% |█████████████████| 207/207 [32.6ms elapsed, 0s remaining, 6.4K samples/s]   
Converting existing index 'filepath' to unique on dataset 