In [1]:
import fiftyone.zoo as foz
import fiftyone as fo
from fiftyone import ViewField as F
import os
import shutil
import pandas as pd

In [2]:
images_freq = pd.read_csv("../data_files/number_of_images_per_parent.csv")
images_freq = images_freq[:10]
classes = images_freq["Parent"].tolist()
print(classes)

['Person', 'Clothing', 'Plant', 'Human body', 'Tree', 'Vehicle', 'Building', 'Land vehicle', 'Animal', 'Footwear']


In [3]:
# Load the Open Images V7 validation split with detections
dataset = foz.load_zoo_dataset(
    "open-images-v7",
    split="validation",
    label_types=["detections"],
    dataset_name="open-images-validation",
    shuffle=True,  # Randomize the order for varied selection
    seed=51,       # For reproducibility
    classes=classes
)

Downloading split 'validation' to 'C:\Users\Admin\fiftyone\open-images-v7\validation' if necessary
Necessary images already downloaded
Existing download of split 'validation' is sufficient
Loading existing dataset 'open-images-validation'. To reload from disk, either delete the existing dataset or provide a custom `dataset_name` to use


In [4]:
print(dataset)

Name:        open-images-validation
Media type:  image
Num samples: 19208
Persistent:  False
Tags:        []
Sample fields:
    id:               fiftyone.core.fields.ObjectIdField
    filepath:         fiftyone.core.fields.StringField
    tags:             fiftyone.core.fields.ListField(fiftyone.core.fields.StringField)
    metadata:         fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.metadata.ImageMetadata)
    created_at:       fiftyone.core.fields.DateTimeField
    last_modified_at: fiftyone.core.fields.DateTimeField
    ground_truth:     fiftyone.core.fields.EmbeddedDocumentField(fiftyone.core.labels.Detections)


In [8]:
# Create a base directory to store class folders
base_dir = "open_images_classes"
os.makedirs(base_dir, exist_ok=True)

# Process each class
for class_name in classes:
    # Create a folder for this class
    class_dir = os.path.join(base_dir, class_name)
    os.makedirs(class_dir, exist_ok=True)

    # Filter dataset for samples with at least one detection of this class
    view = dataset.match(F("detections.detections").filter(F("label") == class_name).length() > 0)

    # Take up to 500 samples (or fewer if less are available)
    samples = view[:500]

    # Copy each sample's image to the class folder
    for sample in samples:
        src_path = sample.filepath  # Path to the downloaded image
        dst_path = os.path.join(class_dir, f"{sample.id}.jpg")  # Unique filename using sample ID
        shutil.copyfile(src_path, dst_path)

print("Image download and organization complete!")

Image download and organization complete!


In [5]:
label_f = "ground_truth"
base_dir = "open_images_classes_1"
os.makedirs(base_dir, exist_ok=True)
for cls in classes:
    view = dataset.filter_labels(label_f, fo.ViewField("label") == cls)
    print(f"Class: {cls} has {len(view)} samples")
    export_dir = f"{base_dir}/{cls}"
    view = view[:100]
    
    view.export(export_dir, dataset_type=fo.types.YOLOv5Dataset, label_field=label_f)
    print(f"Exported {cls} to {export_dir}")

Class: Person has 7144 samples
 100% |█████████████████| 100/100 [380.0ms elapsed, 0s remaining, 263.1 samples/s]      
Exported Person to open_images_classes_1/Person
Class: Clothing has 6626 samples
 100% |█████████████████| 100/100 [606.0ms elapsed, 0s remaining, 165.0 samples/s]      
Exported Clothing to open_images_classes_1/Clothing
Class: Plant has 3838 samples
 100% |█████████████████| 100/100 [859.9ms elapsed, 0s remaining, 116.3 samples/s]      
Exported Plant to open_images_classes_1/Plant
Class: Human body has 5686 samples
 100% |█████████████████| 100/100 [533.2ms elapsed, 0s remaining, 187.5 samples/s]      
Exported Human body to open_images_classes_1/Human body
Class: Tree has 3611 samples
 100% |█████████████████| 100/100 [856.8ms elapsed, 0s remaining, 116.7 samples/s]      
Exported Tree to open_images_classes_1/Tree
Class: Vehicle has 1545 samples
 100% |█████████████████| 100/100 [866.7ms elapsed, 0s remaining, 115.4 samples/s]      
Exported Vehicle to open_image

In [3]:
subsets = []
with open("../data_files/classes_with_none_objects.json", "r") as f:
    subsets = json.load(f)

label_field = "ground_truth"

base_export_dir = "./main/dataset"

for cls in subsets:
    view = dataset.filter_labels(label_field, fo.ViewField("label") == cls)
    export_dir = f"{base_export_dir}/{cls}"

    view.export(
        export_dir=export_dir,
        dataset_type=fo.types.YOLOv5Dataset,
        label_field=label_field,
    )

 100% |███████████████████| 64/64 [3.0s elapsed, 0s remaining, 21.3 samples/s]      
 100% |█████████████████████| 1/1 [2.6s elapsed, 0s remaining, 0.4 samples/s] 
 100% |█████████████████████| 6/6 [2.5s elapsed, 0s remaining, 2.4 samples/s] 
 100% |█████████████████████| 1/1 [2.3s elapsed, 0s remaining, 0.4 samples/s] 
 100% |███████████████████| 14/14 [2.1s elapsed, 0s remaining, 6.7 samples/s] 
 100% |█████████████████████| 8/8 [2.0s elapsed, 0s remaining, 4.1 samples/s] 
 100% |███████████████████| 13/13 [2.0s elapsed, 0s remaining, 6.4 samples/s] 
 100% |█████████████████████| 0/0 [2.0s elapsed, ? remaining, ? samples/s] 
 100% |█████████████████████| 1/1 [2.1s elapsed, 0s remaining, 0.5 samples/s] 
 100% |█████████████████████| 8/8 [2.1s elapsed, 0s remaining, 3.9 samples/s] 
 100% |█████████████████████| 9/9 [2.0s elapsed, 0s remaining, 4.5 samples/s] 
 100% |█████████████████████| 0/0 [2.0s elapsed, ? remaining, ? samples/s] 
 100% |███████████████████| 11/11 [2.0s elapsed, 0s 

In [4]:
from glob import glob

In [23]:
img_ids = []
for cls in subsets:
    img_path = glob(f"./main/dataset/{cls}/images/val/*.jpg")
    for path in img_path:
        img_ids.append(path.split("/")[-1].split(".")[0].split("\\")[-1])

img_path_none_detect = []
with open("./main/img_undetected.json", "r") as f:
    img_path_none_detect = json.load(f)

img_id_none_detect = []
for img_id in img_path_none_detect:
    img_id_temp = img_id.split("/")[-1].split(".")[0].split("\\")[-1]
    img_id_none_detect.append(img_id_temp)

for img_id in img_id_none_detect:
    if img_id in img_ids:
        img_ids.remove(img_id)

with open("./main/img_undected2.json", "w") as f:
    json.dump(img_ids, f)