In [None]:
import numba, llvmlite, umap

print("numba:", numba.__version__)
print("llvmlite:", llvmlite.__version__)
print("umap:", umap.__version__)


In [1]:
import fiftyone as fo

dataset_name = fo.list_datasets()[0]
if dataset_name is None:
    session = fo.launch_app(port=5151)
else:
    dataset = fo.load_dataset(dataset_name)
    print(f"Default dataset name: {dataset_name}")
    session = fo.launch_app(dataset,port=5151)

  from .autonotebook import tqdm as notebook_tqdm


Default dataset name: 00_try


In [None]:
import fiftyone as fo
datasets = fo.list_datasets()
display(datasets)

In [None]:
# # fo.delete_sdataset("00_try") 
# # remove endwith patches
# for ds in datasets:
#     if ds.endswith("_patches"):
#         fo.delete_dataset(ds)   

In [None]:
# for key in dataset.list_brain_runs():
#     dataset.delete_brain_run(key)

In [None]:
for dataset_name in datasets:
    dataset = fo.load_dataset(dataset_name)
    for key in dataset.list_brain_runs():
        print(f"Deleting existing brain run: {key}")
        dataset.delete_brain_run(key)

In [None]:
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob

label_field = "01_swd_seg_results_coco"

# 模型只加载一次就行
model = foz.load_zoo_model("clip-vit-base32-torch")
# model = foz.load_zoo_model("dinov2-vits14-torch")
# model = foz.load_zoo_model("resnet50-imagenet-torch")
# model = foz.load_zoo_model("mobilenet-v2-imagenet-torch")


for dataset_name in datasets:
    print(f"Dataset: {dataset_name}")
    dataset = fo.load_dataset(dataset_name)

    # 1) 对每个 ann 直接算 patch embedding（按 bbox/mask 裁剪，不导出图片）
    dataset.compute_patch_embeddings(
        model,
        patches_field=label_field,   # 关键：按这个字段里的 bbox/mask 作为 patch
        embeddings_field="emb_clip",      # embedding 存在每个 ann 的 .emb 里
    )

    # 2) 对所有 patch 做 PCA 可视化
    fob.compute_visualization(
        dataset,
        patches_field=label_field,   # 告诉 brain 这是 patch 字段
        embeddings="emb_clip",            # 用上一步算好的 embedding 字段
        method="umap",                # 先用 pca，规避 umap/numba 问题
        seed=51,
        brain_key="patches_clip_umap_v1",  # 每个 dataset 自己有一份同名 brain_key 就行
    )

    # 3) 对所有 patch 做 tsne 可视化
    fob.compute_visualization(
        dataset,
        patches_field=label_field,   # 告诉 brain 这是 patch 字段
        embeddings="emb_clip",            # 用上一步算好的 embedding 字段
        method="tsne",                # 先用 pca，规避 umap/numba 问题
        seed=51,
        brain_key="patches_clip_tsne_v1",  # 每个 dataset 自己有一份同名 brain_key 就行
    )

    # 4) 对所有 patch 做 umap 可视化
    fob.compute_visualization(
        dataset,
        patches_field=label_field,   # 告诉 brain 这是 patch 字段
        embeddings="emb_clip",            # 用上一步算好的 embedding 字段
        method="umap",                # 先用 pca，规避 umap/numba 问题
        seed=51,
        brain_key="patches_clip_umap_v1",  # 每个 dataset 自己有一份同名 brain_key 就行
    )
