### Notebook for listing all datasets

Lists available datasets which might be helpful for finetuning specific detectors. Also creates a fiftyone session suitable for exploring them.

In [1]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F
import os

In [2]:
dataset_dirs = ['Bordeaux', 'Bonn', 'image_search']
dataset_type = fo.types.COCODetectionDataset
data_path = 'data/robocup_bordeaux_2023/robocup_data/coco_datasets'
num_datasets, num_frames, num_samples = 0, 0, 0
for dataset_dir in dataset_dirs:
    print('Loading datasets in directory \'' + dataset_dir + '\'')
    datasets = os.listdir(os.path.join(data_path, dataset_dir))
    print('Found datasets:', datasets, '\n')
    for dataset in datasets:
        num_datasets += 1
        print('Loading dataset ' + dataset)
        train_dataset = fo.Dataset.from_dir(
            dataset_type=dataset_type,
            data_path=os.path.join(data_path, dataset_dir, dataset, 'train'),
            labels_path=os.path.join(data_path, dataset_dir, dataset, 'annotations', 'instances_Train.json'),
            name=dataset + '_train',
        )
        num_frames += train_dataset.count()
        num_samples += sum(train_dataset.count_values("segmentations.detections.label").values())
        print('--> Available training labels are: \n', train_dataset.distinct("segmentations.detections.label"))
        if os.path.exists(os.path.join(data_path, dataset_dir, dataset, 'val')):
            valid_dataset = fo.Dataset.from_dir(
                dataset_type=dataset_type,
                data_path=os.path.join(data_path, dataset_dir, dataset, 'val'),
                labels_path=os.path.join(data_path, dataset_dir, dataset, 'annotations', 'instances_Validation.json'),
                name=dataset + '_valid',
            )
            num_frames += valid_dataset.count()
            num_samples += sum(valid_dataset.count_values("segmentations.detections.label").values())
            print('--> Available validation labels are: \n', valid_dataset.distinct("segmentations.detections.label"))
        print()

Loading datasets in directory 'Bordeaux'
Found datasets: ['bordeaux_day_2_coco', 'bordeaux_clean_table_coco', 'bordeaux_day_1_coco', 'bordeaux_bags_coco', 'arena_furniture_coco'] 

Loading dataset bordeaux_day_2_coco
 100% |█████████████████| 253/253 [5.7s elapsed, 0s remaining, 35.1 samples/s]       
--> Available training labels are: 
 ['apple', 'bag', 'banana', 'bed', 'bin', 'bowl', 'candle', 'cereals', 'chair', 'chocolate_pudding_box', 'cola', 'couch', 'couch_table', 'cracker_box', 'cupboard', 'decoration', 'dice', 'dishwasher', 'dishwasher_tray', 'door', 'door_handle', 'dresser', 'fridge', 'iced_tea', 'jacket_stand', 'juice_pack', 'lamp', 'lemon', 'milk_bottle', 'mustard_bottle', 'orange', 'orange_juice', 'peach', 'pear', 'person', 'plum', 'potted_meat_can', 'pringles', 'red_wine', 'rubiks_cube', 'shelf', 'shelf_door', 'sink', 'soccer_ball', 'sponge', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tennis_ball', 'tomato_soup_can', 'tray', 'tropical_juice', 'tuna_fis

 100% |███████████████████| 30/30 [764.6ms elapsed, 0s remaining, 39.3 samples/s]      
--> Available validation labels are: 
 ['almdudler', 'apple', 'apple_juice', 'bag', 'banana', 'baseball', 'booster', 'bowl', 'cereals', 'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'coke', 'cracker_box', 'dice', 'dishwasher_tab', 'fork', 'golf_ball', 'iso_drink', 'knife', 'lemon', 'milk_bottle', 'milk_carton', 'mug', 'mustard_bottle', 'orange', 'orange_juice', 'peach', 'pear', 'person', 'plate', 'plum', 'potted_meat_can', 'pringles', 'racquet_ball', 'rubiks_cube', 'soccer_ball', 'soft_ball', 'spoon', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tennis_ball', 'tomato_soup_can', 'tuna_fish_can']

Loading dataset kitchenware_ais_coco
 100% |█████████████████| 150/150 [2.5s elapsed, 0s remaining, 60.6 samples/s]      
--> Available training labels are: 
 ['cooker_plate', 'cooker_temp', 'grill_grate', 'grill_temp', 'pan', 'pan_handle', 'person', 'pizza_grill', 'piz

In [3]:
print(f'Found a total of {num_samples} annotated object instances in {num_frames} frames from {num_datasets} datasets.')

Found a total of 30742 annotated object instances in 3130 frames from 21 datasets.


In [4]:
# The datasets can be explored right in this notebook, or alternatively under localhost:5151
# To select a dataset, use the menu near the top-left of the session.
session = fo.launch_app(train_dataset)