In [1]:
import os
import pandas as pd
from collections import defaultdict

In [2]:
csv_path = "bbox_coordinate.csv"
npy_dir = "dataset"
max_bbox_count = 0  # Change to desired threshold (e.g., 2 for <=2 bboxes)

df = pd.read_csv(csv_path)

# Count bounding boxes per slice
bbox_counts = defaultdict(int)
for idx, row in df.iterrows():
    slice_id = row['img_slice_id']
    bbox_counts[slice_id] += 0 if row['x'] == 'NA' or pd.isna(row['x']) else 1

# Identify slices to delete
slices_to_delete = [sid for sid, count in bbox_counts.items() if count <= max_bbox_count]

# Delete corresponding .npy files
deleted = []
missing = []
for sid in slices_to_delete:
    file_path = os.path.join(npy_dir, f"{sid}.npy")
    if os.path.isfile(file_path):
        os.remove(file_path)
        deleted.append(sid)
    else:
        missing.append(sid)


print(f"Deleted {len(deleted)} .npy files with ≤ {max_bbox_count} bounding boxes.")
if missing:
    print(f"{len(missing)} files not found (possibly already deleted or never created).")

Deleted 42290 .npy files with ≤ 0 bounding boxes.
48516 files not found (possibly already deleted or never created).


In [3]:
print(deleted)

['r001s001_0', 'r001s001_1', 'r001s001_10', 'r001s001_11', 'r001s001_12', 'r001s001_13', 'r001s001_14', 'r001s001_140', 'r001s001_141', 'r001s001_142', 'r001s001_143', 'r001s001_144', 'r001s001_145', 'r001s001_146', 'r001s001_147', 'r001s001_148', 'r001s001_149', 'r001s001_15', 'r001s001_150', 'r001s001_151', 'r001s001_152', 'r001s001_153', 'r001s001_154', 'r001s001_155', 'r001s001_156', 'r001s001_157', 'r001s001_158', 'r001s001_159', 'r001s001_16', 'r001s001_160', 'r001s001_161', 'r001s001_162', 'r001s001_163', 'r001s001_164', 'r001s001_165', 'r001s001_166', 'r001s001_167', 'r001s001_168', 'r001s001_169', 'r001s001_17', 'r001s001_170', 'r001s001_171', 'r001s001_172', 'r001s001_173', 'r001s001_174', 'r001s001_175', 'r001s001_176', 'r001s001_177', 'r001s001_178', 'r001s001_179', 'r001s001_18', 'r001s001_180', 'r001s001_181', 'r001s001_182', 'r001s001_183', 'r001s001_184', 'r001s001_185', 'r001s001_186', 'r001s001_187', 'r001s001_188', 'r001s001_19', 'r001s001_2', 'r001s001_20', 'r001s00