In [2]:
import fiftyone as fo
import fiftyone.core.labels as fol
from path_utils import path_all

## データセットの可視化


In [None]:
# to_see = "YOLO"

to_see = "COCO"
if to_see == "YOLO":
    split = "train"
    # split = "val"
    dataset = fo.Dataset.from_dir(
        dataset_type=fo.types.YOLOv5Dataset,
        dataset_dir=str(path_all.DATASET_ROOT),
        name="synthetic-kaggle3",
        overwrite=True,
        split=split,  # trainスプリットを指定
        tags=[split],  # trainタグ付け
        max_samples=1000,  # Optional: limit the number of samples for faster loading
    )
elif to_see == "COCO":
    split = "train"
    # split = "val"
    dataset = fo.Dataset.from_dir(
        dataset_type=fo.types.COCODetectionDataset,
        data_path=str(path_all.TRAIN_COCO_DIR / split / "images"),
        labels_path=str(path_all.TRAIN_COCO_DIR / split / "instances.json"),
        max_samples=1000,  # Optional: limit the number of samples for faster loading
        include_id=True,
    )

print(dataset)


In [None]:
remove_bbox_mode = False

for idx, sample in enumerate(dataset):
    min_area, max_area = None, None
    x_min, y_min, x_max, y_max = [None] * 4
    margin = None
    min_edge = None

    add_dets = []
    for det in sample.ground_truth.detections:
        # <left-top-x>, <left-top-y>, <width>, <height>
        tlx, tly, w, h = det.bounding_box
        area = w * h

        min_area = min(min_area, area) if min_area is not None else area
        max_area = max(max_area, area) if max_area is not None else area
        aspect_max = max(w / h, h / w) if w is not None and h is not None else None

        x_min = min(x_min, tlx) if x_min is not None else tlx
        y_min = min(y_min, tly) if y_min is not None else tly
        x_max = max(x_max, tlx + w) if x_max is not None else tlx + w
        y_max = max(y_max, tly + h) if y_max is not None else tly + h

        margin = (
            min(margin, x_min, y_min, 1 - x_max, 1 - y_max)
            if margin is not None
            else min(x_min, y_min, 1 - x_max, 1 - y_max)
        )

        min_edge = min(min_edge, w, h) if min_edge is not None else min(w, h)

        remove = False
        if area < 0.0003:
            add_dets.append(det)
            remove = True
        if min_edge < 0.02:
            add_dets.append(det)
            remove = True
        if remove_bbox_mode and remove:
            sample.ground_truth.detections.remove(det)

    sample["del_target"] = fol.Detections(detections=[d for d in add_dets])

    sample["min_area"] = min_area
    sample["max_area"] = max_area
    sample["margin"] = margin
    sample["aspect_ratio"] = aspect_max
    sample["min_edge"] = min_edge

    sample.save()


## 可視化


In [None]:
session = fo.launch_app(dataset, port=5151, auto=False)
# session.show()


## 保存


In [None]:
dataset.export(
    export_dir=str(path_all.DATASET_ROOT / "02-2_Fix"),
    dataset_type=fo.types.YOLOv5Dataset,
    split=split,
    export_media=True,
    label_field="ground_truth",
    classes=["cheerios", "soup"],
)

In [None]:
# 選択した画像のfilepathを取得

selected_ids = session.selected

if selected_ids:
    # IDを使ってサンプルを取得
    selected_samples = dataset.select(selected_ids)

    # filepathフィールドの値を取得
    filepaths = selected_samples.values("filepath")

    print(f"選択された {len(filepaths)} 個の画像のファイルパス:")
    for filepath in filepaths:
        print(f" - {filepath}")


In [None]:
# 今保存したviewしかダメかも
min_edge_filepaths = session.view.values("filepath")

In [None]:
min_edge_filepaths

In [None]:
# 今保存したviewしかダメかも
min_area_filepaths = session.view.values("filepath")

In [None]:
dataset.export(
    export_dir=path_all.DATASET_ROOT / "my_dir",
    dataset_type=fo.types.YOLOv5Dataset,
    label_field="ground_truth",
)