In [23]:
import fiftyone as fo  

# 加载已存在的数据集  
dataset = fo.load_dataset("my-dataset")
session = fo.launch_app(dataset)

# FiftyOne 常见规范操作

## 数据集管理规范


### 创建和加载



In [None]:
import fiftyone as fo  
import fiftyone.zoo as foz  

# 创建空数据集  
dataset = fo.Dataset("my-dataset")

# # 从目录加载标准格式数据集  
# dataset = fo.Dataset.from_dir(  
#     dataset_dir="/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test/00_try/raw_data",  
#     dataset_type=fo.types.COCODetectionDataset,  
#     name="my-dataset"  
# )

# # 从Zoo加载预定义数据集  
# dataset = foz.load_zoo_dataset("quickstart")

### 持久化管理


In [None]:
import fiftyone as fo  

# 加载已存在的数据集  
dataset = fo.load_dataset("ms1_0605-0621_40_ok_v3")
print(dataset)
print("\n==========================\n")

# 设置为持久化（推荐用于重要数据集）  
dataset.persistent = True
print(f"Persistent-{dataset.name:<40}|{dataset.persistent!s:>8}")

In [26]:
import fiftyone as fo  

# 列出所有数据集, 并设置为持久化
datasets_list = fo.list_datasets()
for dataset_name in datasets_list:
    dataset = fo.load_dataset(dataset_name)
    # dataset.persistent = True
    print(f"Persistent-{dataset.name:<40}|{dataset.persistent!s:>8}")

Persistent-my-dataset                              |    True
Persistent-v1_ms1_0710-0726_36_ok                  |    True
Persistent-v1_ms1_0809-0823_34_ok                  |    True
Persistent-v1_ms2_0726-0809_13_ok                  |    True
Persistent-v1_sw1_0605-0613_07_ok                  |    True
Persistent-v2_ms1_0605-0621_40_ok                  |    True
Persistent-v2_ms1_0726-0809_11_ok                  |    True
Persistent-v2_ms2_0809-0823_10_ok                  |    True
Persistent-v3_sahi_ms1_0605-0621_40_ok             |    True
Persistent-v3_sahi_ms1_0710-0726_36_ok             |    True
Persistent-v3_sahi_ms1_0726-0809_11_ok             |    True
Persistent-v3_sahi_ms1_0809-0823_34_ok             |    True
Persistent-v3_sahi_ms2_0726-0809_13_ok             |    True
Persistent-v3_sahi_ms2_0809-0823_10_ok             |    True
Persistent-v3_sahi_sw1_0605-0613_07_ok             |    True


In [27]:
import fiftyone as fo  

# 删除所有非持久化数据集
fo.delete_non_persistent_datasets(verbose=True)

In [None]:
# 删除指定数据集
# fo.delete_dataset("ms1_0605-0621_40_ok_sahi_v1")

# 删除多个数据集
# datasets = [
#     'ms1_0710-0726_36_ok_v22',
#     'ms1_0710-0726_36_ok_v22_conf09_patches',
#     'ms1_0809-0823_34_ok_v22',
#     'ms1_0809-0823_34_ok_v22_conf09_patches',
#     'ms2_0726-0809_13_ok_v22',
#     # 'my-dataset',
#     'sw1_0605-0613_07_ok_v22',
#     ]
# for ds in datasets:
#     fo.delete_dataset(ds)

## 数据操作规范


### 样本管理


In [None]:
# 创建样本  
sample = fo.Sample(filepath="/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test/00_try/raw_data/0719_0111_760.jpg")
print(sample)

In [None]:
import fiftyone as fo

# 添加样本
dataset = fo.load_dataset("my-dataset")
dataset.add_sample(sample)  

In [None]:
import fiftyone as fo

# 批量添加样本
filepaths = [
    "/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test/00_try/raw_data/0719_0111_760.jpg",
    "/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test/00_try/raw_data/0731_2037_760.jpg",
    # "/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test/00_try/raw_data/0731_2039_800.jpg",
    # "/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/01_data/00_test/00_try/raw_data/0731_2242_800.jpg"
    ]
samples = [fo.Sample(filepath=f) for f in filepaths]  
dataset.add_samples(samples)  

In [None]:
# 修改样本后保存  
sample.tags.append("test")
sample.save()

### 字段操作


In [None]:
# 计算数据集元数据
dataset.compute_metadata()  


In [None]:
import fiftyone as fo
from fiftyone import ViewField as F

# 添加字段，左侧PRIMITIVES的变量
dataset = fo.load_dataset("my-dataset")
dataset.add_sample_field("confidence", fo.FloatField)  
print(dataset)

In [None]:

# 将置信度下限设为0.5  
view = dataset.set_field(  
    "predictions.detections.confidence",   
    F("confidence").max(0.5)  
)
# 将view保存到数据集中，持久化
# view.save("confidence")                   # 保存view到当前视图。不推荐使用，因为覆盖了原始数据集
dataset.save_view("confidence_0.5", view)   # 保存view到新视图，持久化


In [None]:
# 获取字段值
values = dataset.values("predictions.detections.confidence")
print(values)

In [None]:
# 重命名字段
dataset.rename_sample_field("base_model", "ground_truth")

# 批量重命名多个字段  
dataset.rename_sample_fields({  
    "ground_truth": "new_label_field",  
    "predictions": "model_predictions"  
})

In [None]:
# 删除字段
dataset.delete_sample_field("confidence")
print(dataset)


## 视图操作规范


### 字段过滤


In [None]:
from fiftyone import ViewField as F  

# 创建视图  
view = dataset.view()
view

In [None]:
# 过滤样本  
view = dataset.match(F("filepath").starts_with("/path"))
view

In [None]:
# 限制和排序  
view = dataset.limit(2).sort_by("filepath")
view

In [None]:
# 过滤数值字段  
view = dataset.filter_field("created_at", F() > 0)
view

In [None]:
# 存在字段  
view = dataset.exists("predictions")
view

In [None]:
# 更新视图到APP
session.view = view

### 标签过滤


In [None]:
# 过滤标签  
dataset = fo.load_dataset("ms2_0726-0809_13_ok_v22")
view = dataset.filter_labels("01_swd_seg_results_coco", F("confidence") > 0.5)
view

In [None]:
# 选择包含高置信度标签的样本  
view = dataset.match_labels(F("confidence") > 0.9)

In [None]:
# 排除特定标签  
view = dataset.exclude_labels(tags="test")

In [None]:
# 匹配标签  
view = dataset.match_tags("test")
view

In [None]:
# 将多个动物标签映射为"animal"  
view = dataset.map_labels("predictions", {"cat": "animal", "dog": "animal"})

## 模型操作规范


### 模型应用


In [None]:
import fiftyone.zoo as foz
import fiftyone as fo
from ultralytics import YOLO
import fiftyone.utils.ultralytics as fou

# 加载模型  
# model = foz.load_zoo_model("clip-vit-base32-torch")
model = YOLO("yolo11s.pt")

# 应用模型
dataset = fo.load_dataset("my-dataset")
dataset.apply_model(model, label_field="predictions", confidence_thresh=0.1)

# 计算embeddings  
# embeddings = dataset.compute_embeddings(model)

### 模型评估


In [None]:
# 评估检测模型  
results = dataset.evaluate_detections(  
    "predictions2",             # predictions field 
    gt_field="predictions2",    # ground truth field
    eval_key="eval2"  
)

In [None]:
# 评估分类模型  
results = dataset.evaluate_classifications(  
    "predictions",  
    gt_field="ground_truth",  
    eval_key="eval"  
)

## 标注工作流规范


### 标注流程


### 标签操作


## Brain分析规范


In [None]:
# 列出所有可用的brain runs

print(dataset.list_brain_runs())

In [None]:
# 删除brain run
for run in dataset.list_brain_runs():
    dataset.delete_brain_run(run)

### 可视化分析-embeddings


In [None]:
import fiftyone.brain as fob 
import fiftyone.zoo as foz
from matplotlib import patches


# 计算embeddings  
model = foz.load_zoo_model("clip-vit-base32-torch")
emb_field = "embeddings2"
patches_field = "small_slices"  # 关键：按这个字段里的 bbox/mask 作为 patch
# dataset.compute_embeddings(model, embeddings_field=emb_field)
dataset.compute_patch_embeddings(
    model,
    patches_field=patches_field,   # 关键：按这个字段里的 bbox/mask 作为 patch
    embeddings_field=emb_field,
)

In [None]:
# # 计算可视化  
# results = fob.compute_visualization(  
#     dataset,   
#     embeddings=emb_field,   
#     method="umap",   # 可选，pca, tsne, umap
#     brain_key=f"{emb_field}_vis",
#     seed=51,
# )

fob.compute_visualization(
    dataset,
    patches_field=patches_field,   # 告诉 brain 这是 patch 字段
    embeddings=emb_field,            # 用上一步算好的 embedding 字段
    method="umap",                # 先用 pca，规避 umap/numba 问题
    seed=51,
    brain_key=f"{emb_field}_vis2",  # 每个 dataset 自己有一份同名 brain_key 就行
)

In [None]:
# 加载brain结果
results = dataset.load_brain_results("vis3")
results

### 按相似度排序

In [None]:
import fiftyone.brain as fob

# Indexes the images in the dataset by visual similarity
fob.compute_similarity(dataset, brain_key="similarity")

In [None]:
# 检测近似重复  
fob.compute_near_duplicates(dataset, threshold=0.95)  

In [None]:
# 计算唯一性  
fob.compute_uniqueness(dataset)

In [None]:
# 计算错误性  
fob.compute_mistakenness(dataset, "predictions", "ground_truth")

In [None]:
import fiftyone.brain as fob 

# 检测重复
fob.compute_exact_duplicates(dataset)

In [None]:
# 创建相似度索引  
fob.compute_similarity(  
    dataset,  
    embeddings=emb_field,  
    brain_key="similarity",  
    metric="cosine"  
)  

## 数据导入导出规范


### 导出数据


In [None]:
import fiftyone as fo
import os

root = "/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/03_code/04_fiftyone/backup"
os.makedirs(root, exist_ok=True)

datasets = fo.list_datasets()
# datasets = ["00_try"]

for dataset_name in datasets:
    ds = fo.load_dataset(dataset_name)

    out = os.path.join(root, dataset_name)
    print("Backing up:", dataset_name)
    os.makedirs(out, exist_ok=True)

    ds.export(
        export_dir=out,
        dataset_type=fo.types.FiftyOneDataset,
        export_media=True,
    )


### 导入数据

In [None]:
import fiftyone as fo
import os

# get name list
root = "/home/tianqi/D/01_Projects/01_swd/02_code/pipeline/ultralytics_ty/_ty/03_code/04_fiftyone/backup"
name_list = os.listdir(root)
# name_list = ["00_try"]

for dataset_name in name_list:
    print(dataset_name)

    ds = fo.Dataset.from_dir(
        f"{root}/{dataset_name}",
        fo.types.FiftyOneDataset,
        name=dataset_name,
    )
