### Template notebook for finetuning MaskDINO on different datasets.

Notebook to finetune MaskDINO to detect and segment object classes relevant for the Storing Groceries task at RoboCup@Home2023. To finetune for other tasks, just select relevant datasets and labels as exemplified here.

In [1]:
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

import torch
import detectron2
from detectron2.utils.logger import setup_logger

setup_logger()

# import some common libraries
import numpy as np
import os, shutil, json, random, cv2
from datetime import datetime

# import some common detectron2 utilities
from detectron2.structures import BoxMode
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.projects.deeplab import add_deeplab_config
#from detectron2.data.datasets import register_coco_instances

import sys
sys.path.append('contrib/MaskDINO')

sys.path.append("..")
from helpers.util import get_timestamp, select_classes, merge_to_superclass, concat_datasets, make_archive
from helpers.fiftyone_detectron2_bridge import clean_instances, detectron_to_fo, get_fiftyone_dicts

In [2]:
experiment_name = 'example_detector'
model_type = 'swin' #'r50', 'swin' -> r50 uses fewer resources, swin is more performant

#### Dataset creation

Usually we want to add the base dataset from Bonn or the day 1 dataset from Bordeaux, the older robotics hall dataset at AIS and probably also at least one more dataset created in Bordeaux. For some tasks it might be beneficial to add some of the auxiliary image search datasets as well. Locally it can make sense to select a different validation set as well. The available datasets and the object classes they cover are listed in *dataset_overview.ipynb*.

In [3]:
# add the base dataset
# name for the dataset
name = "robocup23_day1_coco"

# The directory containing the dataset to import
data_dir = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bordeaux/bordeaux_day_1_coco"
labels_path = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bordeaux/bordeaux_day_1_coco/annotations"
train_dir = data_dir + '/train'
valid_dir = data_dir + '/val'
train_labels_path = labels_path + '/instances_Train.json'
valid_labels_path = labels_path + '/instances_Validation.json'

# setup the fiftyone dataset
dataset_type = fo.types.COCODetectionDataset

train_dataset = fo.Dataset.from_dir(
    dataset_type=dataset_type,
    data_path=train_dir,
    labels_path=train_labels_path,
    name=name+'_train',
)

valid_dataset = fo.Dataset.from_dir(
    dataset_type=dataset_type,
    data_path=valid_dir,
    labels_path=valid_labels_path,
    name=name+'_valid',
)

# most of the time we use all classes here and filter the total dataset at the end instead
relevant_classes = ['apple', 'bin', 'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'couch', 
                    'couch_table', 'cupboard', 'dishwasher', 'door', 'dresser', 'fork', 'fridge', 'fridge_handle', 
                    'jacket_stand', 'knife', 'lamp', 'mustard_bottle', 'person', 'pringles', 'rubiks_cube', 'shelf', 
                    'sink', 'spoon', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tv']

train_dataset = select_classes(train_dataset, labels=relevant_classes)
valid_dataset = select_classes(valid_dataset, labels=relevant_classes)

print('Found train dataset labels:')
print(train_dataset.distinct("segmentations.detections.label"))
print('Found valid dataset labels:')
print(valid_dataset.distinct("segmentations.detections.label"))

# merge training and validation data here (because at competition time we are greedy)
# we will create a validation set further down the line
combined_train_dataset = concat_datasets([train_dataset, valid_dataset])
valid_dataset = None

 100% |█████████████████| 100/100 [2.2s elapsed, 0s remaining, 44.6 samples/s]        
 100% |███████████████████| 25/25 [653.9ms elapsed, 0s remaining, 38.3 samples/s]      
Found train dataset labels:
['apple', 'bin', 'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'couch', 'couch_table', 'cupboard', 'dishwasher', 'door', 'dresser', 'fork', 'fridge', 'fridge_handle', 'jacket_stand', 'knife', 'lamp', 'mustard_bottle', 'person', 'pringles', 'rubiks_cube', 'shelf', 'sink', 'spoon', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tv']
Found valid dataset labels:
['apple', 'bin', 'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'couch', 'couch_table', 'cupboard', 'dishwasher', 'door', 'fork', 'knife', 'lamp', 'mustard_bottle', 'person', 'pringles', 'rubiks_cube', 'shelf', 'sink', 'spoon', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tv']


In [4]:
# we continue by adding the dataset containing furniture from the arenas.
# again use the validation set for training instead
combined_train_dataset = combined_train_dataset

USE_LOCAL_FURNITURE = True

if USE_LOCAL_FURNITURE:
    # add the older robotics hall dataset
    name = "robocup23_local_furniture"

    # The directory containing the dataset to import
    data_dir = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bordeaux/arena_furniture_coco"
    labels_path = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bordeaux/arena_furniture_coco/annotations"

    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/val'
    train_labels_path = labels_path + '/instances_Train.json'
    valid_labels_path = labels_path + '/instances_Validation.json'

    # The type of the dataset being imported
    dataset_type = fo.types.COCODetectionDataset  # for example

    train_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=train_dir,
        labels_path=train_labels_path,
        name=name+'_train',
    )

    valid_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=valid_dir,
        labels_path=valid_labels_path,
        name=name+'_valid',
    )

    print('Adding dataset ' + name + '...')
    print('Found train dataset labels:')
    print(train_dataset_add.distinct("segmentations.detections.label"))
    print('Found valid dataset labels:')
    print(valid_dataset_add.distinct("segmentations.detections.label"))

    # most of the time we use all classes here and filter the total dataset at the end instead
    relevant_classes = ['chair', 'couch', 'couch_table', 'cupboard', 'dishwasher', 'dishwasher_slot', 
                        'dishwasher_tray', 'door', 'door_handle', 'dresser', 'fridge', 'fridge_handle', 'jacket_stand', 
                        'lamp', 'person', 'shelf', 'sink', 'table', 'tv']

    train_dataset_add = select_classes(train_dataset_add, labels=relevant_classes)
    valid_dataset_add = select_classes(valid_dataset_add, labels=relevant_classes)

    combined_train_dataset = concat_datasets([combined_train_dataset, train_dataset_add])
    combined_train_dataset = concat_datasets([combined_train_dataset, valid_dataset_add])

 100% |█████████████████| 100/100 [1.3s elapsed, 0s remaining, 78.2 samples/s]         
 100% |███████████████████| 50/50 [611.2ms elapsed, 0s remaining, 81.8 samples/s]      
Adding dataset robocup23_local_furniture...
Found train dataset labels:
['bed', 'chair', 'couch', 'couch_table', 'cupboard', 'dishwasher', 'dishwasher_slot', 'dishwasher_tray', 'door', 'door_handle', 'dresser', 'fridge', 'fridge_handle', 'jacket_stand', 'lamp', 'microwave_oven', 'person', 'shelf', 'sink', 'table', 'tv']
Found valid dataset labels:
['bed', 'bin', 'chair', 'couch', 'couch_table', 'cupboard', 'dishwasher', 'dishwasher_slot', 'dishwasher_tray', 'door', 'door_handle', 'dresser', 'fridge', 'fridge_handle', 'jacket_stand', 'lamp', 'person', 'shelf', 'sink', 'table', 'tv']


In [5]:
# now we add our dataset from day 2 in Bordeaux
# at this point we actually initialize the validation set cleanly
combined_train_dataset = combined_train_dataset

USE_DAY2 = True
if USE_DAY2:
    # add the older robotics hall dataset
    name = "bordeaux_day2"

    # The directory containing the dataset to import
    data_dir = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bordeaux/bordeaux_day_2_coco"
    labels_path = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bordeaux/bordeaux_day_2_coco/annotations"

    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/val'
    train_labels_path = labels_path + '/instances_Train.json'
    valid_labels_path = labels_path + '/instances_Validation.json'

    # The type of the dataset being imported
    dataset_type = fo.types.COCODetectionDataset  # for example

    train_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=train_dir,
        labels_path=train_labels_path,
        name=name+'_train',
    )
    
    valid_dataset_add = fo.Dataset.from_dir(
    dataset_type=dataset_type,
    data_path=valid_dir,
    labels_path=valid_labels_path,
    name=name+'_valid',
    )

    print('Adding dataset ' + name + '...')
    print('Found train dataset labels:')
    print(train_dataset_add.distinct("segmentations.detections.label"))
    print('Found valid dataset labels:')
    print(valid_dataset_add.distinct("segmentations.detections.label"))

    # most of the time we use all classes here and filter the total dataset at the end instead
    relevant_classes = ['apple', 'bag', 'banana', 'bed', 'bin', 'bowl', 'candle', 'cereals', 'chair',
                        'chocolate_pudding_box', 'cola', 'couch', 'couch_table', 'cracker_box', 'cupboard', 
                        'decoration', 'dishwasher', 'dishwasher_tray', 'door', 'door_handle', 'dresser', 'fridge', 
                        'iced_tea', 'jacket_stand', 'juice_pack', 'lamp', 'lemon', 'milk_bottle', 'mustard_bottle', 
                        'orange', 'orange_juice', 'peach', 'pear', 'person', 'plum', 'potted_meat_can', 'pringles', 
                        'red_wine', 'rubiks_cube', 'shelf', 'shelf_door', 'sink', 'soccer_ball', 'sponge', 'strawberry', 
                        'strawberry_gelatin_box', 'sugar_box', 'table', 'tennis_ball', 'tomato_soup_can', 'tray', 
                        'tropical_juice', 'tuna_fish_can', 'tv']

    train_dataset_add = select_classes(train_dataset_add, labels=relevant_classes)
    valid_dataset_add = select_classes(valid_dataset_add, labels=relevant_classes)

    combined_train_dataset = concat_datasets([combined_train_dataset, train_dataset_add])
    combined_valid_dataset = valid_dataset_add

 100% |█████████████████| 253/253 [5.2s elapsed, 0s remaining, 38.1 samples/s]      
 100% |███████████████████| 32/32 [572.5ms elapsed, 0s remaining, 55.9 samples/s]      
Adding dataset bordeaux_day2...
Found train dataset labels:
['apple', 'bag', 'banana', 'bed', 'bin', 'bowl', 'candle', 'cereals', 'chair', 'chocolate_pudding_box', 'cola', 'couch', 'couch_table', 'cracker_box', 'cupboard', 'decoration', 'dice', 'dishwasher', 'dishwasher_tray', 'door', 'door_handle', 'dresser', 'fridge', 'iced_tea', 'jacket_stand', 'juice_pack', 'lamp', 'lemon', 'milk_bottle', 'mustard_bottle', 'orange', 'orange_juice', 'peach', 'pear', 'person', 'plum', 'potted_meat_can', 'pringles', 'red_wine', 'rubiks_cube', 'shelf', 'shelf_door', 'sink', 'soccer_ball', 'sponge', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tennis_ball', 'tomato_soup_can', 'tray', 'tropical_juice', 'tuna_fish_can', 'tv']
Found valid dataset labels:
['banana', 'bed', 'bin', 'bowl', 'candle', 'cereals', 'chair', 'c

In [6]:
# now we add a rather large dataset from Bonn. 
# here the orange juice looks a bit like the tropical juice at robocup, so we just remap that class name
# the validation set is combined with the validation set from this particular dataset

USE_AIS_OBJECTS = True
if USE_AIS_OBJECTS:
    # add the older robotics hall dataset
    name = "robocup23_ais_coco"

    # The directory containing the dataset to import
    data_dir = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bonn/robocup23_ais_coco"
    labels_path = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bonn/robocup23_ais_coco/annotations"

    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/val'
    train_labels_path = labels_path + '/instances_Train.json'
    valid_labels_path = labels_path + '/instances_Validation.json'

    dataset_type = fo.types.COCODetectionDataset  # for example

    train_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=train_dir,
        labels_path=train_labels_path,
        name=name+'_train',
    )

    valid_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=valid_dir,
        labels_path=valid_labels_path,
        name=name+'_valid',
    )

    print('Adding dataset ' + name + '...')
    print('Found train dataset labels:')
    print(train_dataset_add.distinct("segmentations.detections.label"))
    print('Found valid dataset labels:')
    print(valid_dataset_add.distinct("segmentations.detections.label"))
    


    # most of the time we use all classes here and filter the total dataset at the end instead
    relevant_classes = ['apple', 'bag', 'banana', 'baseball', 'bowl', 'cereals',
                        'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'cracker_box', 'dice', 
                        'dishwasher_tab', 'fork', 'knife', 'lemon', 'milk_bottle', 'mug', 'mustard_bottle', 
                        'orange', 'orange_juice', 'peach', 'pear', 'person', 
                        'plate', 'plum', 'potted_meat_can', 'pringles', 'rubiks_cube', 'soccer_ball', 
                        'spoon', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 
                        'tennis_ball', 'tomato_soup_can', 'tuna_fish_can']

    train_dataset_add = select_classes(train_dataset_add, labels=relevant_classes)
    valid_dataset_add = select_classes(valid_dataset_add, labels=relevant_classes)
    
    old_class_labels = ['orange_juice']
    new_class_labels = ['tropical_juice']
    train_dataset_add = merge_to_superclass(train_dataset_add, new_class_labels, old_class_labels)
    valid_dataset_add = merge_to_superclass(valid_dataset_add, new_class_labels, old_class_labels)
    
    combined_train_dataset = concat_datasets([combined_train_dataset, train_dataset_add])
    combined_valid_dataset = concat_datasets([combined_valid_dataset, valid_dataset_add])

 100% |█████████████████| 300/300 [8.8s elapsed, 0s remaining, 35.2 samples/s]      
 100% |███████████████████| 30/30 [692.4ms elapsed, 0s remaining, 43.4 samples/s]      
Adding dataset robocup23_ais_coco...
Found train dataset labels:
['almdudler', 'apple', 'apple_juice', 'bag', 'banana', 'baseball', 'booster', 'bowl', 'cereals', 'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'coke', 'cracker_box', 'dice', 'dishwasher_tab', 'fork', 'golf_ball', 'iso_drink', 'knife', 'lemon', 'milk_bottle', 'milk_carton', 'mug', 'mustard_bottle', 'orange', 'orange_juice', 'peach', 'pear', 'person', 'plate', 'plum', 'potted_meat_can', 'pringles', 'racquet_ball', 'rubiks_cube', 'soccer_ball', 'soft_ball', 'spoon', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tennis_ball', 'tomato_soup_can', 'tuna_fish_can']
Found valid dataset labels:
['almdudler', 'apple', 'apple_juice', 'bag', 'banana', 'baseball', 'booster', 'bowl', 'cereals', 'chair', 'chocolate_pudding_box', '

In [7]:
# this dataset contains some objects which we considered challenging after initial tests in Bonn

USE_DIFFICULT_OBJECTS = True

if USE_DIFFICULT_OBJECTS:
    # add the older robotics hall dataset
    name = "difficult_objects"

    # The directory containing the dataset to import
    data_dir = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bonn/difficult_objects_ais_coco"
    labels_path = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bonn/difficult_objects_ais_coco/annotations"

    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/val'
    train_labels_path = labels_path + '/instances_Train.json'
    valid_labels_path = labels_path + '/instances_Validation.json'

    dataset_type = fo.types.COCODetectionDataset  # for example

    train_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=train_dir,
        labels_path=train_labels_path,
        name=name+'_train',
    )

    valid_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=valid_dir,
        labels_path=valid_labels_path,
        name=name+'_valid',
    )

    print('Adding dataset ' + name + '...')
    print('Found train dataset labels:')
    print(train_dataset_add.distinct("segmentations.detections.label"))
    print('Found valid dataset labels:')
    print(valid_dataset_add.distinct("segmentations.detections.label"))

    # most of the time we use all classes here and filter the total dataset at the end instead
    relevant_classes = ['apple', 'bag', 'banana', 'baseball', 'bowl', 'cereals',
                        'chair', 'chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'cracker_box', 'dice', 
                        'dishwasher_tab', 'fork', 'knife', 'lemon', 'milk_bottle', 'mug', 'mustard_bottle', 
                        'orange', 'orange_juice', 'peach', 'pear', 'person', 
                        'plate', 'plum', 'potted_meat_can', 'pringles', 'rubiks_cube', 'soccer_ball', 
                        'spoon', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 
                        'tennis_ball', 'tomato_soup_can', 'tuna_fish_can']

    train_dataset_add = select_classes(train_dataset_add, labels=relevant_classes)
    valid_dataset_add = select_classes(valid_dataset_add, labels=relevant_classes)

    # note that we freeze the validation dataset
    combined_train_dataset = concat_datasets([combined_train_dataset, train_dataset_add])
    combined_train_dataset = concat_datasets([combined_train_dataset, valid_dataset_add])

 100% |█████████████████| 132/132 [2.7s elapsed, 0s remaining, 54.1 samples/s]      
 100% |███████████████████| 12/12 [250.5ms elapsed, 0s remaining, 48.2 samples/s]     
Adding dataset difficult_objects...
Found train dataset labels:
['almdudler', 'apple', 'apple_juice', 'bowl', 'cereals', 'chair', 'coffee_can', 'coke', 'cracker_box', 'fork', 'iso_drink', 'knife', 'lemon', 'mug', 'peach', 'pear', 'plate', 'pringles', 'spoon', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'table', 'tennis_ball', 'tomato_soup_can']
Found valid dataset labels:
['almdudler', 'apple_juice', 'bowl', 'cereals', 'coffee_can', 'coke', 'cracker_box', 'fork', 'iso_drink', 'knife', 'lemon', 'mug', 'mustard_bottle', 'peach', 'pear', 'spoon', 'strawberry', 'strawberry_gelatin_box', 'sugar_box', 'tomato_soup_can']


In [8]:
# this is an older dataset, but its quite large and contains some useful objects
# it does not contribute to the validation set

USE_OLD_DATASET = True
if USE_OLD_DATASET:
    # add the older robotics hall dataset
    name = "hall_manyobject"

    # The directory containing the dataset to import
    data_dir = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bonn/base_dataset_ais_coco"
    labels_path = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/Bonn/base_dataset_ais_coco/annotations"

    train_dir = data_dir + '/train'
    valid_dir = data_dir + '/val'
    train_labels_path = labels_path + '/instances_Train.json'
    valid_labels_path = labels_path + '/instances_Validation.json'

    # The type of the dataset being imported
    dataset_type = fo.types.COCODetectionDataset  # for example

    train_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=train_dir,
        labels_path=train_labels_path,
        name=name+'_train',
    )

    valid_dataset_add = fo.Dataset.from_dir(
        dataset_type=dataset_type,
        data_path=valid_dir,
        labels_path=valid_labels_path,
        name=name+'_valid',
    )

    print('Adding dataset ' + name + '...')
    print('Found train dataset labels:')
    print(train_dataset_add.distinct("segmentations.detections.label"))
    print('Found valid dataset labels:')
    print(valid_dataset_add.distinct("segmentations.detections.label"))

    # most of the time we use all classes here and filter the total dataset at the end instead
    relevant_classes = ['bag', 'bowl', 'cereals', 'chair', 'cup', 'dishwasher_tab', 'fork', 
                        'knife', 'orange_juice', 'person', 'plate', 'pringles', 'spoon', 'table', 
                        'tomato_can', 'kidney_bean_can']

    train_dataset_add = select_classes(train_dataset_add, labels=relevant_classes)
    valid_dataset_add = select_classes(valid_dataset_add, labels=relevant_classes)
    
    # here tomato_can and kidney_bean_can are merged to tomato_soup_can
    # the false-positive risk is low because tomato_soup was the only can relevant in Bordeaux
    old_class_labels = ['orange_juice', 'cup', ['tomato_can', 'kidney_bean_can']]
    new_class_labels = ['tropical_juice', 'mug', 'tomato_soup_can']
    train_dataset_add = merge_to_superclass(train_dataset_add, new_class_labels, old_class_labels)
    valid_dataset_add = merge_to_superclass(valid_dataset_add, new_class_labels, old_class_labels)

    combined_train_dataset = concat_datasets([combined_train_dataset, train_dataset_add])
    combined_train_dataset = concat_datasets([combined_train_dataset, valid_dataset_add])

 100% |█████████████████| 507/507 [10.5s elapsed, 0s remaining, 57.9 samples/s]      
 100% |███████████████████| 30/30 [1.1s elapsed, 0s remaining, 28.5 samples/s]         
Adding dataset hall_manyobject...
Found train dataset labels:
['almdudler', 'apple_juice', 'bag', 'bell_pepper', 'booster', 'bowl', 'brush', 'cereals', 'chair', 'chamomile_tea', 'chocolate_candy', 'coke', 'cookies', 'couch', 'cup', 'dishwasher_tab', 'fork', 'iso_drink', 'ketchup', 'kidney_bean_can', 'kleenex', 'knife', 'mustard', 'noodles_chicken_flavor', 'noodles_duck_flavor', 'orange_juice', 'person', 'plate', 'pringles', 'sparkling_water', 'sponge', 'spoon', 'table', 'tomato_can', 'tuc', 'windex', 'wipes']
Found valid dataset labels:
['almdudler', 'apple_juice', 'bag', 'bell_pepper', 'booster', 'bowl', 'brush', 'cereals', 'chair', 'chamomile_tea', 'chocolate_candy', 'coke', 'cookies', 'couch', 'cup', 'dishwasher_tab', 'fork', 'iso_drink', 'ketchup', 'kidney_bean_can', 'kleenex', 'knife', 'mustard', 'noodles_chic

In [9]:
# add some auxiliary datasets from image searches here
# datasets are loaded, their labels merged and then filtered as defined below
# they also do not contribute to validation

aux_dataset_names = ['fruits', 'balls', 'cereals']
merge_superclasses = [None, None, None]
merge_labels = [None, None, None]
use_classes = [['apple', 'lemon', 'orange', 'peach', 'pear', 'person', 'plum', 'strawberry'], 
               ['soccer_ball', 'baseball', 'tennis_ball', 'person'], 
               ['cereals', 'strawberry']]

COMBINE = True
if COMBINE:
    ### load more datasets that are relevant
    for i, dataset_name in enumerate(aux_dataset_names):
        # The directory containing the dataset to import
        data_dir_add = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/image_search/" + dataset_name
        labels_path_add = "data/robocup_bordeaux_2023/robocup_data/coco_datasets/image_search/" + dataset_name + "/annotations"

        train_dir_add = data_dir_add + '/train'
        valid_dir_add = data_dir_add + '/val'
        train_labels_path_add = labels_path_add + '/instances_Train.json'
        valid_labels_path_add = labels_path_add + '/instances_Validation.json'

        train_dataset_add = fo.Dataset.from_dir(
            dataset_type=dataset_type,
            data_path=train_dir_add,
            labels_path=train_labels_path_add,
            name=dataset_name+'_train',
        )

        valid_dataset_add = fo.Dataset.from_dir(
            dataset_type=dataset_type,
            data_path=valid_dir_add,
            labels_path=valid_labels_path_add,
            name=dataset_name+'_valid',
        )
        
        # summarize dataset being added
        train_dataset_add_labels = train_dataset_add.distinct("segmentations.detections.label")
        valid_dataset_add_labels = valid_dataset_add.distinct("segmentations.detections.label")
        print('Adding dataset ' + dataset_name + '...')
        print('Training labels:', train_dataset_add_labels)
        print('Validation labels:', valid_dataset_add_labels)
        print()
        
        # merge some classes as specified if necessary
        train_dataset_add = merge_to_superclass(train_dataset_add, merge_superclasses[i], merge_labels[i])
        valid_dataset_add = merge_to_superclass(valid_dataset_add, merge_superclasses[i], merge_labels[i])
        # use only the classes we specified
        train_dataset_add = select_classes(train_dataset_add, labels=use_classes[i])
        valid_dataset_add = select_classes(valid_dataset_add, labels=use_classes[i])

        # combine these with the previous dataset
        combined_train_dataset = concat_datasets([combined_train_dataset, train_dataset_add])
        combined_train_dataset = concat_datasets([combined_train_dataset, valid_dataset_add])

 100% |███████████████████| 88/88 [2.8s elapsed, 0s remaining, 33.2 samples/s]        
 100% |███████████████████| 10/10 [156.7ms elapsed, 0s remaining, 63.8 samples/s]     
Adding dataset fruits...
Training labels: ['apple', 'bag', 'banana', 'bell_pepper', 'chair', 'dishwasher', 'lemon', 'orange', 'orange_juice', 'peach', 'pear', 'person', 'plum', 'schweppes_lemon', 'strawberry', 'table']
Validation labels: ['apple', 'banana', 'bell_pepper', 'chair', 'lemon', 'orange', 'orange_juice', 'pear', 'plum']

 100% |███████████████████| 89/89 [1.3s elapsed, 0s remaining, 66.3 samples/s]         
 100% |███████████████████| 11/11 [89.7ms elapsed, 0s remaining, 122.6 samples/s]     
Adding dataset balls...
Training labels: ['baseball', 'golf_ball', 'person', 'racquetball', 'soccer_ball', 'soft_ball', 'tennis_ball']
Validation labels: ['baseball', 'golf_ball', 'soccer_ball', 'tennis_ball']

 100% |███████████████████| 88/88 [1.2s elapsed, 0s remaining, 70.7 samples/s]          
 100% |██████████

In [10]:
# in this cell we can further customize the dataset which we have constructed above
# usually this means merging or filtering labels in a way which concerns all contributing datasets

CUSTOMIZE = True
if CUSTOMIZE:
    # this renaming step ensures that we use the official class labels at RoboCup@Home23
    old_class_labels = ['chocolate_pudding_box', 'cleanser_bottle', 'coffee_can', 'cracker_box', 'jacket_stand', 'mug', 'mustard_bottle', 'potted_meat_can', 'strawberry_gelatin_box', 
                        'sugar_box', 'tomato_soup_can', 'tuna_fish_can', 'milk_bottle']
    new_class_labels = ['chocolate_jello', 'cleanser', 'coffee_grounds', 'cheezit', 'clothes_rack', 'cup', 'mustard', 'spam', 'strawberry_jello', 
                        'sugar', 'tomato_soup', 'tuna', 'milk']
    train_dataset_processed = merge_to_superclass(combined_train_dataset, new_class_labels, old_class_labels)
    valid_dataset_processed = merge_to_superclass(combined_valid_dataset, new_class_labels, old_class_labels)
    
    # finally constrain to classes relevant for task
    task_specific_classes = ['apple', 'banana', 'baseball', 'cereals', 'chocolate_jello', 'cleanser', 'coffee_grounds', 'cola', 
                        'couch_table', 'cheezit', 'dice', 'fork', 'iced_tea', 'juice_pack', 'knife', 'lemon', 'milk', 
                        'peach', 'mustard', 'orange', 'orange_juice', 'pear', 'plum', 'spam', 'pringles', 'rubiks_cube', 
                        'soccer_ball', 'sponge', 'spoon', 'strawberry', 'strawberry_jello', 'sugar', 'tennis_ball', 
                        'tomato_soup', 'tropical_juice', 'tuna', 'shelf', 'shelf_door', 'red_wine']

    train_dataset_processed = select_classes(train_dataset_processed, labels=task_specific_classes)
    valid_dataset_processed = select_classes(valid_dataset_processed, labels=task_specific_classes)
else:
    train_dataset_processed = combined_train_dataset
    valid_dataset_processed = combined_valid_dataset

In [11]:
# this cell bridges from fiftyone to detectron2 and allows sanity-checking the constructed dataset
classes = train_dataset_processed.distinct("segmentations.detections.label")
labels_dict = {class_label: class_idx for class_idx, class_label in enumerate(classes)}
print('Label dictionary for complete dataset')
print(labels_dict)

# dump labels for deployment
os.makedirs('output', exist_ok=True)
with open("output/classes.json", "w") as fp:
    json.dump(classes, fp)

# bridge fiftyone to detectron2
for dataset, tag in [(train_dataset_processed, "train"), (valid_dataset_processed, "valid")]:
    view = dataset
    if "fiftyone_" + tag in DatasetCatalog.list():
        DatasetCatalog.remove("fiftyone_" + tag)
    DatasetCatalog.register("fiftyone_" + tag, lambda view=view: get_fiftyone_dicts(view, labels_dict))
    MetadataCatalog.get("fiftyone_" + tag).set(thing_classes=classes, evaluator_type="coco")
metadata = MetadataCatalog.get("fiftyone_train")

Label dictionary for complete dataset
{'apple': 0, 'banana': 1, 'baseball': 2, 'cereals': 3, 'cheezit': 4, 'chocolate_jello': 5, 'cleanser': 6, 'coffee_grounds': 7, 'cola': 8, 'couch_table': 9, 'dice': 10, 'fork': 11, 'iced_tea': 12, 'juice_pack': 13, 'knife': 14, 'lemon': 15, 'milk': 16, 'mustard': 17, 'orange': 18, 'orange_juice': 19, 'peach': 20, 'pear': 21, 'plum': 22, 'pringles': 23, 'red_wine': 24, 'rubiks_cube': 25, 'shelf': 26, 'shelf_door': 27, 'soccer_ball': 28, 'spam': 29, 'sponge': 30, 'spoon': 31, 'strawberry': 32, 'strawberry_jello': 33, 'sugar': 34, 'tennis_ball': 35, 'tomato_soup': 36, 'tropical_juice': 37, 'tuna': 38}


#### Detector Training

Now that the task-specific dataset is ready, we can finetune a pretrained MaskDINO model on the selected data.

In [12]:
# setup and launch the trainer
from helpers.model_trainer import Trainer
import maskdino
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 

# in case our dataset was changed discard previous cache
try:
    for item in os.listdir("output/inference"):
        os.remove("output/inference/" + item)
except:
    pass

cfg = get_cfg()
add_deeplab_config(cfg)
maskdino.add_maskdino_config(cfg)
if model_type == 'swin':
    cfg.merge_from_file('data/robocup_bordeaux_2023/robocup_data/trained_models/conf/swin_config.yaml')
    cfg.MODEL.WEIGHTS = "data/robocup_bordeaux_2023/robocup_data/trained_models/coco_pretrained_maskdino/swin/maskdino_swinl_50ep_300q_hid2048_3sd1_instance_maskenhanced_mask52.3ap_box59.0ap.pth" 
    cfg.SOLVER.IMS_PER_BATCH = 4 # batch size
elif model_type == 'r50':
    cfg.merge_from_file('data/robocup_bordeaux_2023/robocup_data/trained_models/conf/r50_config.yaml')
    cfg.MODEL.WEIGHTS = "data/robocup_bordeaux_2023/robocup_data/trained_models/coco_pretrained_maskdino/resnet/maskdino_r50_50ep_300q_hid2048_3sd1_instance_maskenhanced_mask46.3ap_box51.7ap.pth"
    cfg.SOLVER.IMS_PER_BATCH = 4
cfg.DATASETS.TRAIN = ('fiftyone_train',)
cfg.DATASETS.TEST = ('fiftyone_valid',)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES = len(classes)
cfg.SOLVER.MAX_ITER = 2400  # corresponds to about 30 minutes of finetuning on an RTX4090
cfg.SOLVER.STEPS = (800, 1400, 2000) # when to decay learning rate

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
TRAIN = True
if TRAIN:
    trainer = Trainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


criterion.weight_dict  {'loss_ce': 4.0, 'loss_mask': 5.0, 'loss_dice': 5.0, 'loss_bbox': 5.0, 'loss_giou': 2.0, 'loss_ce_interm': 4.0, 'loss_mask_interm': 5.0, 'loss_dice_interm': 5.0, 'loss_bbox_interm': 5.0, 'loss_giou_interm': 2.0, 'loss_ce_dn': 4.0, 'loss_mask_dn': 5.0, 'loss_dice_dn': 5.0, 'loss_bbox_dn': 5.0, 'loss_giou_dn': 2.0, 'loss_ce_interm_dn': 4.0, 'loss_mask_interm_dn': 5.0, 'loss_dice_interm_dn': 5.0, 'loss_bbox_interm_dn': 5.0, 'loss_giou_interm_dn': 2.0, 'loss_ce_0': 4.0, 'loss_mask_0': 5.0, 'loss_dice_0': 5.0, 'loss_bbox_0': 5.0, 'loss_giou_0': 2.0, 'loss_ce_interm_0': 4.0, 'loss_mask_interm_0': 5.0, 'loss_dice_interm_0': 5.0, 'loss_bbox_interm_0': 5.0, 'loss_giou_interm_0': 2.0, 'loss_ce_dn_0': 4.0, 'loss_mask_dn_0': 5.0, 'loss_dice_dn_0': 5.0, 'loss_bbox_dn_0': 5.0, 'loss_giou_dn_0': 2.0, 'loss_ce_interm_dn_0': 4.0, 'loss_mask_interm_dn_0': 5.0, 'loss_dice_interm_dn_0': 5.0, 'loss_bbox_interm_dn_0': 5.0, 'loss_giou_interm_dn_0': 2.0, 'loss_ce_1': 4.0, 'loss_mask_1':

[32m[07/13 11:11:29 d2.data.build]: [0mRemoved 1 images with no usable annotations. 1512 images left.
[32m[07/13 11:11:29 d2.data.build]: [0mDistribution of instances among all 39 categories:
[36m|   category    | #instances   |   category    | #instances   |   category    | #instances   |
|:-------------:|:-------------|:-------------:|:-------------|:-------------:|:-------------|
|     apple     | 417          |    banana     | 139          |   baseball    | 234          |
|    cereals    | 1071         |    cheezit    | 258          | chocolate_j.. | 194          |
|   cleanser    | 178          | coffee_grou.. | 311          |     cola      | 144          |
|  couch_table  | 147          |     dice      | 73           |     fork      | 397          |
|   iced_tea    | 146          |  juice_pack   | 149          |     knife     | 390          |
|     lemon     | 336          |     milk      | 241          |    mustard    | 162          |
|    orange     | 277          | orange

Skip loading parameter 'sem_seg_head.predictor.class_embed.weight' to the model due to incompatible shapes: (80, 256) in the checkpoint but (39, 256) in the model! You might want to double check if this is expected.
Skip loading parameter 'sem_seg_head.predictor.class_embed.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (39,) in the model! You might want to double check if this is expected.
Skip loading parameter 'sem_seg_head.predictor.label_enc.weight' to the model due to incompatible shapes: (80, 256) in the checkpoint but (39, 256) in the model! You might want to double check if this is expected.
Skip loading parameter 'criterion.empty_weight' to the model due to incompatible shapes: (81,) in the checkpoint but (40,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
[34mcriterion.empty_weight[0m
[34msem_seg_head.predictor.class_embed.{bias, weight}[0m
[34msem_seg_head.

[32m[07/13 11:11:29 d2.engine.train_loop]: [0mStarting training from iteration 0




[32m[07/13 11:11:48 d2.utils.events]: [0m eta: 0:29:42  iter: 19  total_loss: 5923  loss_ce: 459  loss_mask: 0.8046  loss_dice: 2.597  loss_bbox: 1.214  loss_giou: 1.742  loss_ce_dn: 48.84  loss_mask_dn: 0.1484  loss_dice_dn: 0.7376  loss_bbox_dn: 0.1079  loss_giou_dn: 0.3437  loss_ce_0: 477.8  loss_mask_0: 1.341  loss_dice_0: 4.244  loss_bbox_0: 1.326  loss_giou_0: 2.248  loss_ce_dn_0: 43.81  loss_mask_dn_0: 1.165  loss_dice_dn_0: 4.725  loss_bbox_dn_0: 0.3508  loss_giou_dn_0: 0.8503  loss_ce_1: 543.7  loss_mask_1: 1.121  loss_dice_1: 3.792  loss_bbox_1: 1.436  loss_giou_1: 2.098  loss_ce_dn_1: 53.86  loss_mask_dn_1: 0.4731  loss_dice_dn_1: 1.597  loss_bbox_dn_1: 0.2359  loss_giou_dn_1: 0.6011  loss_ce_2: 518.3  loss_mask_2: 0.9685  loss_dice_2: 3.575  loss_bbox_2: 1.403  loss_giou_2: 2.067  loss_ce_dn_2: 51.5  loss_mask_dn_2: 0.2135  loss_dice_dn_2: 0.8561  loss_bbox_dn_2: 0.1862  loss_giou_dn_2: 0.4675  loss_ce_3: 495.6  loss_mask_3: 0.9404  loss_dice_3: 3.415  loss_bbox_3: 1.403 

[32m[07/13 11:12:51 d2.utils.events]: [0m eta: 0:29:38  iter: 99  total_loss: 160.4  loss_ce: 2.858  loss_mask: 0.7085  loss_dice: 2.444  loss_bbox: 0.9981  loss_giou: 1.353  loss_ce_dn: 1.996  loss_mask_dn: 0.1536  loss_dice_dn: 0.6132  loss_bbox_dn: 0.1126  loss_giou_dn: 0.2771  loss_ce_0: 6.744  loss_mask_0: 0.8277  loss_dice_0: 2.637  loss_bbox_0: 1.173  loss_giou_0: 1.436  loss_ce_dn_0: 18.64  loss_mask_dn_0: 0.9692  loss_dice_dn_0: 4.659  loss_bbox_dn_0: 0.3746  loss_giou_dn_0: 0.8576  loss_ce_1: 3.084  loss_mask_1: 0.9013  loss_dice_1: 2.576  loss_bbox_1: 1.106  loss_giou_1: 1.46  loss_ce_dn_1: 1.928  loss_mask_dn_1: 0.1552  loss_dice_dn_1: 0.6929  loss_bbox_dn_1: 0.1802  loss_giou_dn_1: 0.4719  loss_ce_2: 2.957  loss_mask_2: 0.8823  loss_dice_2: 2.45  loss_bbox_2: 1.096  loss_giou_2: 1.364  loss_ce_dn_2: 1.919  loss_mask_dn_2: 0.1298  loss_dice_dn_2: 0.5793  loss_bbox_dn_2: 0.1338  loss_giou_dn_2: 0.3368  loss_ce_3: 2.951  loss_mask_3: 0.7922  loss_dice_3: 2.427  loss_bbox_3:

[32m[07/13 11:13:55 d2.utils.events]: [0m eta: 0:28:53  iter: 179  total_loss: 73.81  loss_ce: 1.733  loss_mask: 0.07292  loss_dice: 0.8007  loss_bbox: 0.09462  loss_giou: 0.467  loss_ce_dn: 1.116  loss_mask_dn: 0.06015  loss_dice_dn: 0.5522  loss_bbox_dn: 0.04301  loss_giou_dn: 0.257  loss_ce_0: 2.222  loss_mask_0: 0.06588  loss_dice_0: 0.8566  loss_bbox_0: 0.1268  loss_giou_0: 0.5627  loss_ce_dn_0: 10.71  loss_mask_dn_0: 0.4619  loss_dice_dn_0: 4.206  loss_bbox_dn_0: 0.2525  loss_giou_dn_0: 0.8613  loss_ce_1: 2.042  loss_mask_1: 0.06736  loss_dice_1: 0.8862  loss_bbox_1: 0.1003  loss_giou_1: 0.4732  loss_ce_dn_1: 1.226  loss_mask_dn_1: 0.07449  loss_dice_dn_1: 0.6616  loss_bbox_dn_1: 0.09939  loss_giou_dn_1: 0.4611  loss_ce_2: 1.933  loss_mask_2: 0.0684  loss_dice_2: 0.8655  loss_bbox_2: 0.09733  loss_giou_2: 0.4676  loss_ce_dn_2: 1.054  loss_mask_dn_2: 0.07496  loss_dice_dn_2: 0.594  loss_bbox_dn_2: 0.06094  loss_giou_dn_2: 0.322  loss_ce_3: 1.851  loss_mask_3: 0.08415  loss_dice_

[32m[07/13 11:14:58 d2.utils.events]: [0m eta: 0:27:53  iter: 259  total_loss: 49.14  loss_ce: 1.281  loss_mask: 0.06543  loss_dice: 0.4279  loss_bbox: 0.05621  loss_giou: 0.2599  loss_ce_dn: 0.5548  loss_mask_dn: 0.06901  loss_dice_dn: 0.3887  loss_bbox_dn: 0.04337  loss_giou_dn: 0.1726  loss_ce_0: 1.716  loss_mask_0: 0.04925  loss_dice_0: 0.4742  loss_bbox_0: 0.07607  loss_giou_0: 0.3293  loss_ce_dn_0: 6.912  loss_mask_dn_0: 0.8401  loss_dice_dn_0: 3.934  loss_bbox_dn_0: 0.3978  loss_giou_dn_0: 0.8518  loss_ce_1: 1.541  loss_mask_1: 0.06205  loss_dice_1: 0.4755  loss_bbox_1: 0.07538  loss_giou_1: 0.2857  loss_ce_dn_1: 0.7024  loss_mask_dn_1: 0.06716  loss_dice_dn_1: 0.5033  loss_bbox_dn_1: 0.1179  loss_giou_dn_1: 0.3867  loss_ce_2: 1.403  loss_mask_2: 0.06893  loss_dice_2: 0.4684  loss_bbox_2: 0.07074  loss_giou_2: 0.2714  loss_ce_dn_2: 0.5331  loss_mask_dn_2: 0.06546  loss_dice_dn_2: 0.4201  loss_bbox_dn_2: 0.06541  loss_giou_dn_2: 0.2443  loss_ce_3: 1.381  loss_mask_3: 0.07931  l

[32m[07/13 11:16:01 d2.utils.events]: [0m eta: 0:26:51  iter: 339  total_loss: 44.23  loss_ce: 1.058  loss_mask: 0.09724  loss_dice: 0.4453  loss_bbox: 0.05235  loss_giou: 0.2674  loss_ce_dn: 0.3426  loss_mask_dn: 0.08161  loss_dice_dn: 0.4239  loss_bbox_dn: 0.03368  loss_giou_dn: 0.1937  loss_ce_0: 1.489  loss_mask_0: 0.08411  loss_dice_0: 0.5234  loss_bbox_0: 0.0773  loss_giou_0: 0.3539  loss_ce_dn_0: 5.028  loss_mask_dn_0: 0.5879  loss_dice_dn_0: 3.698  loss_bbox_dn_0: 0.3401  loss_giou_dn_0: 0.8492  loss_ce_1: 1.339  loss_mask_1: 0.09238  loss_dice_1: 0.5232  loss_bbox_1: 0.07617  loss_giou_1: 0.3631  loss_ce_dn_1: 0.5727  loss_mask_dn_1: 0.07726  loss_dice_dn_1: 0.5188  loss_bbox_dn_1: 0.09621  loss_giou_dn_1: 0.346  loss_ce_2: 1.199  loss_mask_2: 0.07733  loss_dice_2: 0.5738  loss_bbox_2: 0.06465  loss_giou_2: 0.2818  loss_ce_dn_2: 0.4297  loss_mask_dn_2: 0.07443  loss_dice_dn_2: 0.4463  loss_bbox_dn_2: 0.05535  loss_giou_dn_2: 0.2499  loss_ce_3: 1.108  loss_mask_3: 0.08038  lo

[32m[07/13 11:17:05 d2.utils.events]: [0m eta: 0:25:48  iter: 419  total_loss: 46.21  loss_ce: 0.9875  loss_mask: 0.1043  loss_dice: 0.5378  loss_bbox: 0.07691  loss_giou: 0.2902  loss_ce_dn: 0.3439  loss_mask_dn: 0.06852  loss_dice_dn: 0.4614  loss_bbox_dn: 0.04132  loss_giou_dn: 0.2038  loss_ce_0: 1.449  loss_mask_0: 0.1081  loss_dice_0: 0.5587  loss_bbox_0: 0.07942  loss_giou_0: 0.4424  loss_ce_dn_0: 4.052  loss_mask_dn_0: 0.7075  loss_dice_dn_0: 3.527  loss_bbox_dn_0: 0.3572  loss_giou_dn_0: 0.8544  loss_ce_1: 1.246  loss_mask_1: 0.1286  loss_dice_1: 0.5662  loss_bbox_1: 0.09006  loss_giou_1: 0.3572  loss_ce_dn_1: 0.4763  loss_mask_dn_1: 0.09796  loss_dice_dn_1: 0.5626  loss_bbox_dn_1: 0.1064  loss_giou_dn_1: 0.3497  loss_ce_2: 1.141  loss_mask_2: 0.1036  loss_dice_2: 0.642  loss_bbox_2: 0.07989  loss_giou_2: 0.331  loss_ce_dn_2: 0.3646  loss_mask_dn_2: 0.08519  loss_dice_dn_2: 0.4922  loss_bbox_dn_2: 0.06694  loss_giou_dn_2: 0.2581  loss_ce_3: 1.065  loss_mask_3: 0.09728  loss_d

[32m[07/13 11:18:08 d2.utils.events]: [0m eta: 0:24:46  iter: 499  total_loss: 37.53  loss_ce: 0.9206  loss_mask: 0.04815  loss_dice: 0.4556  loss_bbox: 0.06666  loss_giou: 0.2554  loss_ce_dn: 0.2902  loss_mask_dn: 0.03436  loss_dice_dn: 0.4074  loss_bbox_dn: 0.03578  loss_giou_dn: 0.1757  loss_ce_0: 1.318  loss_mask_0: 0.05323  loss_dice_0: 0.5441  loss_bbox_0: 0.08112  loss_giou_0: 0.3897  loss_ce_dn_0: 3.383  loss_mask_dn_0: 0.4588  loss_dice_dn_0: 3.408  loss_bbox_dn_0: 0.3186  loss_giou_dn_0: 0.8562  loss_ce_1: 1.192  loss_mask_1: 0.05278  loss_dice_1: 0.5314  loss_bbox_1: 0.07257  loss_giou_1: 0.327  loss_ce_dn_1: 0.4459  loss_mask_dn_1: 0.03776  loss_dice_dn_1: 0.5093  loss_bbox_dn_1: 0.08937  loss_giou_dn_1: 0.3308  loss_ce_2: 1.058  loss_mask_2: 0.05586  loss_dice_2: 0.5006  loss_bbox_2: 0.06656  loss_giou_2: 0.2649  loss_ce_dn_2: 0.3503  loss_mask_dn_2: 0.03382  loss_dice_dn_2: 0.4339  loss_bbox_dn_2: 0.05614  loss_giou_dn_2: 0.2424  loss_ce_3: 1.038  loss_mask_3: 0.04757  

[32m[07/13 11:19:11 d2.utils.events]: [0m eta: 0:23:44  iter: 579  total_loss: 35.33  loss_ce: 0.7799  loss_mask: 0.06  loss_dice: 0.42  loss_bbox: 0.05682  loss_giou: 0.1962  loss_ce_dn: 0.2085  loss_mask_dn: 0.05664  loss_dice_dn: 0.3675  loss_bbox_dn: 0.03924  loss_giou_dn: 0.1574  loss_ce_0: 1.219  loss_mask_0: 0.05713  loss_dice_0: 0.4559  loss_bbox_0: 0.07249  loss_giou_0: 0.2573  loss_ce_dn_0: 2.925  loss_mask_dn_0: 0.6121  loss_dice_dn_0: 3.197  loss_bbox_dn_0: 0.4056  loss_giou_dn_0: 0.8458  loss_ce_1: 1.048  loss_mask_1: 0.05988  loss_dice_1: 0.4676  loss_bbox_1: 0.06547  loss_giou_1: 0.224  loss_ce_dn_1: 0.4038  loss_mask_dn_1: 0.05947  loss_dice_dn_1: 0.5157  loss_bbox_dn_1: 0.1031  loss_giou_dn_1: 0.2985  loss_ce_2: 0.9823  loss_mask_2: 0.06132  loss_dice_2: 0.4663  loss_bbox_2: 0.06713  loss_giou_2: 0.2208  loss_ce_dn_2: 0.3115  loss_mask_dn_2: 0.06006  loss_dice_dn_2: 0.4395  loss_bbox_dn_2: 0.06329  loss_giou_dn_2: 0.2169  loss_ce_3: 0.9013  loss_mask_3: 0.05971  loss

[32m[07/13 11:20:15 d2.utils.events]: [0m eta: 0:22:42  iter: 659  total_loss: 37.95  loss_ce: 0.819  loss_mask: 0.03757  loss_dice: 0.5824  loss_bbox: 0.04849  loss_giou: 0.3094  loss_ce_dn: 0.2299  loss_mask_dn: 0.03005  loss_dice_dn: 0.4728  loss_bbox_dn: 0.02664  loss_giou_dn: 0.2188  loss_ce_0: 1.294  loss_mask_0: 0.0317  loss_dice_0: 0.6211  loss_bbox_0: 0.06046  loss_giou_0: 0.4569  loss_ce_dn_0: 2.385  loss_mask_dn_0: 0.2801  loss_dice_dn_0: 3.093  loss_bbox_dn_0: 0.2417  loss_giou_dn_0: 0.8551  loss_ce_1: 1.179  loss_mask_1: 0.03465  loss_dice_1: 0.5623  loss_bbox_1: 0.04693  loss_giou_1: 0.3687  loss_ce_dn_1: 0.3636  loss_mask_dn_1: 0.03501  loss_dice_dn_1: 0.5277  loss_bbox_dn_1: 0.06488  loss_giou_dn_1: 0.3565  loss_ce_2: 0.9951  loss_mask_2: 0.03401  loss_dice_2: 0.5986  loss_bbox_2: 0.04428  loss_giou_2: 0.339  loss_ce_dn_2: 0.3005  loss_mask_dn_2: 0.0334  loss_dice_dn_2: 0.4793  loss_bbox_dn_2: 0.04421  loss_giou_dn_2: 0.263  loss_ce_3: 0.954  loss_mask_3: 0.03124  los

[32m[07/13 11:21:18 d2.utils.events]: [0m eta: 0:21:40  iter: 739  total_loss: 35.46  loss_ce: 0.7467  loss_mask: 0.03676  loss_dice: 0.5306  loss_bbox: 0.042  loss_giou: 0.29  loss_ce_dn: 0.1668  loss_mask_dn: 0.03029  loss_dice_dn: 0.4517  loss_bbox_dn: 0.02897  loss_giou_dn: 0.2092  loss_ce_0: 1.129  loss_mask_0: 0.03759  loss_dice_0: 0.4694  loss_bbox_0: 0.06404  loss_giou_0: 0.4288  loss_ce_dn_0: 2.002  loss_mask_dn_0: 0.3318  loss_dice_dn_0: 3.021  loss_bbox_dn_0: 0.289  loss_giou_dn_0: 0.855  loss_ce_1: 1.031  loss_mask_1: 0.04093  loss_dice_1: 0.4586  loss_bbox_1: 0.05634  loss_giou_1: 0.3017  loss_ce_dn_1: 0.3004  loss_mask_dn_1: 0.03659  loss_dice_dn_1: 0.475  loss_bbox_dn_1: 0.0653  loss_giou_dn_1: 0.3231  loss_ce_2: 0.9215  loss_mask_2: 0.03785  loss_dice_2: 0.5782  loss_bbox_2: 0.04786  loss_giou_2: 0.3019  loss_ce_dn_2: 0.2278  loss_mask_dn_2: 0.03399  loss_dice_dn_2: 0.4453  loss_bbox_dn_2: 0.03955  loss_giou_dn_2: 0.233  loss_ce_3: 0.8382  loss_mask_3: 0.04053  loss_d

[32m[07/13 11:22:22 d2.utils.events]: [0m eta: 0:20:38  iter: 819  total_loss: 35.02  loss_ce: 0.6241  loss_mask: 0.0614  loss_dice: 0.4352  loss_bbox: 0.07231  loss_giou: 0.2241  loss_ce_dn: 0.1553  loss_mask_dn: 0.06282  loss_dice_dn: 0.3822  loss_bbox_dn: 0.0366  loss_giou_dn: 0.1569  loss_ce_0: 1.016  loss_mask_0: 0.06742  loss_dice_0: 0.4698  loss_bbox_0: 0.09545  loss_giou_0: 0.3388  loss_ce_dn_0: 1.905  loss_mask_dn_0: 0.5292  loss_dice_dn_0: 2.949  loss_bbox_dn_0: 0.3729  loss_giou_dn_0: 0.8493  loss_ce_1: 0.9279  loss_mask_1: 0.06198  loss_dice_1: 0.4743  loss_bbox_1: 0.08994  loss_giou_1: 0.2539  loss_ce_dn_1: 0.2933  loss_mask_dn_1: 0.07313  loss_dice_dn_1: 0.4591  loss_bbox_dn_1: 0.08609  loss_giou_dn_1: 0.3048  loss_ce_2: 0.8151  loss_mask_2: 0.06467  loss_dice_2: 0.466  loss_bbox_2: 0.08161  loss_giou_2: 0.2417  loss_ce_dn_2: 0.2076  loss_mask_dn_2: 0.06884  loss_dice_dn_2: 0.3882  loss_bbox_dn_2: 0.05729  loss_giou_dn_2: 0.2212  loss_ce_3: 0.7478  loss_mask_3: 0.05824 

[32m[07/13 11:23:26 d2.utils.events]: [0m eta: 0:19:35  iter: 899  total_loss: 34.34  loss_ce: 0.6024  loss_mask: 0.03792  loss_dice: 0.5046  loss_bbox: 0.05427  loss_giou: 0.2498  loss_ce_dn: 0.1683  loss_mask_dn: 0.03389  loss_dice_dn: 0.3411  loss_bbox_dn: 0.02831  loss_giou_dn: 0.1503  loss_ce_0: 1.069  loss_mask_0: 0.04541  loss_dice_0: 0.5225  loss_bbox_0: 0.08711  loss_giou_0: 0.3907  loss_ce_dn_0: 1.904  loss_mask_dn_0: 0.4723  loss_dice_dn_0: 2.984  loss_bbox_dn_0: 0.3091  loss_giou_dn_0: 0.852  loss_ce_1: 0.928  loss_mask_1: 0.04659  loss_dice_1: 0.5285  loss_bbox_1: 0.06182  loss_giou_1: 0.2959  loss_ce_dn_1: 0.3238  loss_mask_dn_1: 0.04143  loss_dice_dn_1: 0.386  loss_bbox_dn_1: 0.06071  loss_giou_dn_1: 0.2716  loss_ce_2: 0.8115  loss_mask_2: 0.04778  loss_dice_2: 0.536  loss_bbox_2: 0.06002  loss_giou_2: 0.292  loss_ce_dn_2: 0.2391  loss_mask_dn_2: 0.03616  loss_dice_dn_2: 0.3692  loss_bbox_dn_2: 0.03804  loss_giou_dn_2: 0.187  loss_ce_3: 0.6912  loss_mask_3: 0.04252  lo

[32m[07/13 11:24:29 d2.utils.events]: [0m eta: 0:18:33  iter: 979  total_loss: 30.74  loss_ce: 0.5564  loss_mask: 0.03582  loss_dice: 0.3474  loss_bbox: 0.03588  loss_giou: 0.1784  loss_ce_dn: 0.1098  loss_mask_dn: 0.03692  loss_dice_dn: 0.342  loss_bbox_dn: 0.03092  loss_giou_dn: 0.1565  loss_ce_0: 0.984  loss_mask_0: 0.03714  loss_dice_0: 0.3822  loss_bbox_0: 0.05312  loss_giou_0: 0.324  loss_ce_dn_0: 1.825  loss_mask_dn_0: 0.4763  loss_dice_dn_0: 2.856  loss_bbox_dn_0: 0.3658  loss_giou_dn_0: 0.8602  loss_ce_1: 0.9022  loss_mask_1: 0.04006  loss_dice_1: 0.3731  loss_bbox_1: 0.04312  loss_giou_1: 0.2411  loss_ce_dn_1: 0.2582  loss_mask_dn_1: 0.043  loss_dice_dn_1: 0.3753  loss_bbox_dn_1: 0.07934  loss_giou_dn_1: 0.281  loss_ce_2: 0.7411  loss_mask_2: 0.04362  loss_dice_2: 0.3788  loss_bbox_2: 0.03888  loss_giou_2: 0.2109  loss_ce_dn_2: 0.1732  loss_mask_dn_2: 0.04153  loss_dice_dn_2: 0.3521  loss_bbox_dn_2: 0.04756  loss_giou_dn_2: 0.2033  loss_ce_3: 0.6238  loss_mask_3: 0.04092  l

[32m[07/13 11:25:05 d2.evaluation.coco_evaluation]: [0mPer-category bbox AP: 
| category         | AP     | category       | AP     | category        | AP     |
|:-----------------|:-------|:---------------|:-------|:----------------|:-------|
| apple            | 85.561 | banana         | 94.990 | baseball        | 71.931 |
| cereals          | 82.016 | cheezit        | 84.017 | chocolate_jello | 76.124 |
| cleanser         | 78.422 | coffee_grounds | 69.196 | cola            | 87.309 |
| couch_table      | 93.069 | dice           | 70.728 | fork            | 82.589 |
| iced_tea         | 99.233 | juice_pack     | 85.815 | knife           | 75.195 |
| lemon            | 88.112 | milk           | 82.198 | mustard         | 86.694 |
| orange           | 65.759 | orange_juice   | 98.216 | peach           | 76.609 |
| pear             | 85.901 | plum           | 89.311 | pringles        | 94.137 |
| red_wine         | 90.089 | rubiks_cube    | 84.841 | shelf           | 96.179 |
| shelf

[32m[07/13 11:25:36 d2.utils.events]: [0m eta: 0:17:46  iter: 1039  total_loss: 25.17  loss_ce: 0.4579  loss_mask: 0.05114  loss_dice: 0.2886  loss_bbox: 0.03645  loss_giou: 0.1312  loss_ce_dn: 0.1208  loss_mask_dn: 0.05071  loss_dice_dn: 0.2785  loss_bbox_dn: 0.03127  loss_giou_dn: 0.1219  loss_ce_0: 0.8757  loss_mask_0: 0.054  loss_dice_0: 0.2924  loss_bbox_0: 0.04873  loss_giou_0: 0.2318  loss_ce_dn_0: 1.829  loss_mask_dn_0: 0.5828  loss_dice_dn_0: 2.76  loss_bbox_dn_0: 0.3797  loss_giou_dn_0: 0.8492  loss_ce_1: 0.7379  loss_mask_1: 0.05165  loss_dice_1: 0.2887  loss_bbox_1: 0.04237  loss_giou_1: 0.1667  loss_ce_dn_1: 0.2558  loss_mask_dn_1: 0.06421  loss_dice_dn_1: 0.3173  loss_bbox_dn_1: 0.07361  loss_giou_dn_1: 0.2448  loss_ce_2: 0.6163  loss_mask_2: 0.05172  loss_dice_2: 0.2972  loss_bbox_2: 0.03949  loss_giou_2: 0.1461  loss_ce_dn_2: 0.1833  loss_mask_dn_2: 0.05836  loss_dice_dn_2: 0.2963  loss_bbox_dn_2: 0.04736  loss_giou_dn_2: 0.16  loss_ce_3: 0.5413  loss_mask_3: 0.05009 

[32m[07/13 11:26:40 d2.utils.events]: [0m eta: 0:16:44  iter: 1119  total_loss: 32.48  loss_ce: 0.6272  loss_mask: 0.05121  loss_dice: 0.453  loss_bbox: 0.03895  loss_giou: 0.2359  loss_ce_dn: 0.1404  loss_mask_dn: 0.0423  loss_dice_dn: 0.3595  loss_bbox_dn: 0.03164  loss_giou_dn: 0.1812  loss_ce_0: 1.053  loss_mask_0: 0.06274  loss_dice_0: 0.5752  loss_bbox_0: 0.05985  loss_giou_0: 0.4224  loss_ce_dn_0: 1.95  loss_mask_dn_0: 0.5  loss_dice_dn_0: 3.002  loss_bbox_dn_0: 0.3224  loss_giou_dn_0: 0.8537  loss_ce_1: 0.9329  loss_mask_1: 0.04607  loss_dice_1: 0.4705  loss_bbox_1: 0.05273  loss_giou_1: 0.2917  loss_ce_dn_1: 0.2923  loss_mask_dn_1: 0.04151  loss_dice_dn_1: 0.3783  loss_bbox_dn_1: 0.06872  loss_giou_dn_1: 0.291  loss_ce_2: 0.8137  loss_mask_2: 0.05009  loss_dice_2: 0.5049  loss_bbox_2: 0.05195  loss_giou_2: 0.2658  loss_ce_dn_2: 0.2218  loss_mask_dn_2: 0.03991  loss_dice_dn_2: 0.3719  loss_bbox_dn_2: 0.0411  loss_giou_dn_2: 0.2229  loss_ce_3: 0.732  loss_mask_3: 0.04949  loss

[32m[07/13 11:27:43 d2.utils.events]: [0m eta: 0:15:41  iter: 1199  total_loss: 30.88  loss_ce: 0.5689  loss_mask: 0.03992  loss_dice: 0.4282  loss_bbox: 0.04898  loss_giou: 0.2269  loss_ce_dn: 0.144  loss_mask_dn: 0.03631  loss_dice_dn: 0.402  loss_bbox_dn: 0.0289  loss_giou_dn: 0.1794  loss_ce_0: 0.965  loss_mask_0: 0.03847  loss_dice_0: 0.4618  loss_bbox_0: 0.07707  loss_giou_0: 0.3951  loss_ce_dn_0: 1.958  loss_mask_dn_0: 0.4711  loss_dice_dn_0: 2.853  loss_bbox_dn_0: 0.286  loss_giou_dn_0: 0.8602  loss_ce_1: 0.8654  loss_mask_1: 0.04665  loss_dice_1: 0.4631  loss_bbox_1: 0.0701  loss_giou_1: 0.2626  loss_ce_dn_1: 0.2863  loss_mask_dn_1: 0.0348  loss_dice_dn_1: 0.4338  loss_bbox_dn_1: 0.07031  loss_giou_dn_1: 0.2862  loss_ce_2: 0.7329  loss_mask_2: 0.03991  loss_dice_2: 0.4698  loss_bbox_2: 0.05435  loss_giou_2: 0.2484  loss_ce_dn_2: 0.2026  loss_mask_dn_2: 0.03552  loss_dice_dn_2: 0.4166  loss_bbox_dn_2: 0.04177  loss_giou_dn_2: 0.2192  loss_ce_3: 0.6429  loss_mask_3: 0.03733  l

[32m[07/13 11:28:46 d2.utils.events]: [0m eta: 0:14:38  iter: 1279  total_loss: 28.89  loss_ce: 0.4499  loss_mask: 0.06212  loss_dice: 0.417  loss_bbox: 0.04129  loss_giou: 0.1882  loss_ce_dn: 0.09904  loss_mask_dn: 0.06102  loss_dice_dn: 0.3661  loss_bbox_dn: 0.03062  loss_giou_dn: 0.1568  loss_ce_0: 0.9191  loss_mask_0: 0.06742  loss_dice_0: 0.464  loss_bbox_0: 0.07549  loss_giou_0: 0.3351  loss_ce_dn_0: 1.812  loss_mask_dn_0: 0.5081  loss_dice_dn_0: 2.997  loss_bbox_dn_0: 0.3941  loss_giou_dn_0: 0.8535  loss_ce_1: 0.8191  loss_mask_1: 0.06179  loss_dice_1: 0.4534  loss_bbox_1: 0.04976  loss_giou_1: 0.2515  loss_ce_dn_1: 0.2471  loss_mask_dn_1: 0.07308  loss_dice_dn_1: 0.4257  loss_bbox_dn_1: 0.07588  loss_giou_dn_1: 0.2693  loss_ce_2: 0.6605  loss_mask_2: 0.05989  loss_dice_2: 0.3972  loss_bbox_2: 0.05494  loss_giou_2: 0.2302  loss_ce_dn_2: 0.1728  loss_mask_dn_2: 0.06483  loss_dice_dn_2: 0.4005  loss_bbox_dn_2: 0.04941  loss_giou_dn_2: 0.199  loss_ce_3: 0.5579  loss_mask_3: 0.061

[32m[07/13 11:29:50 d2.utils.events]: [0m eta: 0:13:35  iter: 1359  total_loss: 32.62  loss_ce: 0.5713  loss_mask: 0.05508  loss_dice: 0.4716  loss_bbox: 0.03819  loss_giou: 0.2156  loss_ce_dn: 0.1486  loss_mask_dn: 0.04271  loss_dice_dn: 0.414  loss_bbox_dn: 0.0353  loss_giou_dn: 0.1778  loss_ce_0: 0.9645  loss_mask_0: 0.04884  loss_dice_0: 0.5663  loss_bbox_0: 0.07992  loss_giou_0: 0.3998  loss_ce_dn_0: 1.783  loss_mask_dn_0: 0.5149  loss_dice_dn_0: 2.869  loss_bbox_dn_0: 0.3981  loss_giou_dn_0: 0.8531  loss_ce_1: 0.8448  loss_mask_1: 0.06465  loss_dice_1: 0.4835  loss_bbox_1: 0.06483  loss_giou_1: 0.2739  loss_ce_dn_1: 0.2873  loss_mask_dn_1: 0.05096  loss_dice_dn_1: 0.4035  loss_bbox_dn_1: 0.07706  loss_giou_dn_1: 0.288  loss_ce_2: 0.7077  loss_mask_2: 0.05045  loss_dice_2: 0.4185  loss_bbox_2: 0.05616  loss_giou_2: 0.241  loss_ce_dn_2: 0.2155  loss_mask_dn_2: 0.04501  loss_dice_dn_2: 0.4077  loss_bbox_dn_2: 0.04719  loss_giou_dn_2: 0.2118  loss_ce_3: 0.6159  loss_mask_3: 0.0568 

[32m[07/13 11:30:54 d2.utils.events]: [0m eta: 0:12:33  iter: 1439  total_loss: 34.49  loss_ce: 0.6593  loss_mask: 0.04811  loss_dice: 0.4682  loss_bbox: 0.0554  loss_giou: 0.2422  loss_ce_dn: 0.1696  loss_mask_dn: 0.0421  loss_dice_dn: 0.355  loss_bbox_dn: 0.03138  loss_giou_dn: 0.1462  loss_ce_0: 1.039  loss_mask_0: 0.05102  loss_dice_0: 0.4284  loss_bbox_0: 0.07617  loss_giou_0: 0.3207  loss_ce_dn_0: 1.782  loss_mask_dn_0: 0.5479  loss_dice_dn_0: 2.956  loss_bbox_dn_0: 0.399  loss_giou_dn_0: 0.8534  loss_ce_1: 0.9174  loss_mask_1: 0.0581  loss_dice_1: 0.4265  loss_bbox_1: 0.0552  loss_giou_1: 0.2451  loss_ce_dn_1: 0.2858  loss_mask_dn_1: 0.05281  loss_dice_dn_1: 0.3966  loss_bbox_dn_1: 0.08182  loss_giou_dn_1: 0.2707  loss_ce_2: 0.8087  loss_mask_2: 0.0527  loss_dice_2: 0.4372  loss_bbox_2: 0.06719  loss_giou_2: 0.2547  loss_ce_dn_2: 0.2175  loss_mask_dn_2: 0.04685  loss_dice_dn_2: 0.3621  loss_bbox_dn_2: 0.04539  loss_giou_dn_2: 0.1903  loss_ce_3: 0.6946  loss_mask_3: 0.04866  lo

[32m[07/13 11:31:57 d2.utils.events]: [0m eta: 0:11:30  iter: 1519  total_loss: 22.76  loss_ce: 0.4168  loss_mask: 0.05527  loss_dice: 0.2486  loss_bbox: 0.036  loss_giou: 0.1184  loss_ce_dn: 0.08531  loss_mask_dn: 0.05942  loss_dice_dn: 0.2342  loss_bbox_dn: 0.03321  loss_giou_dn: 0.1134  loss_ce_0: 0.8847  loss_mask_0: 0.0604  loss_dice_0: 0.2714  loss_bbox_0: 0.04984  loss_giou_0: 0.2006  loss_ce_dn_0: 1.771  loss_mask_dn_0: 0.7653  loss_dice_dn_0: 2.49  loss_bbox_dn_0: 0.4456  loss_giou_dn_0: 0.8499  loss_ce_1: 0.706  loss_mask_1: 0.06289  loss_dice_1: 0.2686  loss_bbox_1: 0.04422  loss_giou_1: 0.1524  loss_ce_dn_1: 0.2276  loss_mask_dn_1: 0.06097  loss_dice_dn_1: 0.2755  loss_bbox_dn_1: 0.09357  loss_giou_dn_1: 0.2257  loss_ce_2: 0.5801  loss_mask_2: 0.06056  loss_dice_2: 0.2536  loss_bbox_2: 0.04199  loss_giou_2: 0.1336  loss_ce_dn_2: 0.1539  loss_mask_dn_2: 0.05632  loss_dice_dn_2: 0.253  loss_bbox_dn_2: 0.05207  loss_giou_dn_2: 0.1457  loss_ce_3: 0.5231  loss_mask_3: 0.06261 

[32m[07/13 11:33:01 d2.utils.events]: [0m eta: 0:10:28  iter: 1599  total_loss: 27.36  loss_ce: 0.5044  loss_mask: 0.03814  loss_dice: 0.3479  loss_bbox: 0.03582  loss_giou: 0.1658  loss_ce_dn: 0.1058  loss_mask_dn: 0.04249  loss_dice_dn: 0.3194  loss_bbox_dn: 0.02876  loss_giou_dn: 0.1453  loss_ce_0: 0.9505  loss_mask_0: 0.04004  loss_dice_0: 0.3797  loss_bbox_0: 0.06091  loss_giou_0: 0.2828  loss_ce_dn_0: 1.773  loss_mask_dn_0: 0.3923  loss_dice_dn_0: 2.723  loss_bbox_dn_0: 0.3093  loss_giou_dn_0: 0.8502  loss_ce_1: 0.8023  loss_mask_1: 0.04002  loss_dice_1: 0.3866  loss_bbox_1: 0.04841  loss_giou_1: 0.2204  loss_ce_dn_1: 0.2488  loss_mask_dn_1: 0.04094  loss_dice_dn_1: 0.3648  loss_bbox_dn_1: 0.06952  loss_giou_dn_1: 0.2667  loss_ce_2: 0.6544  loss_mask_2: 0.03804  loss_dice_2: 0.3801  loss_bbox_2: 0.04016  loss_giou_2: 0.1939  loss_ce_dn_2: 0.1711  loss_mask_dn_2: 0.04195  loss_dice_dn_2: 0.3454  loss_bbox_dn_2: 0.04048  loss_giou_dn_2: 0.1783  loss_ce_3: 0.5717  loss_mask_3: 0.0

[32m[07/13 11:34:04 d2.utils.events]: [0m eta: 0:09:25  iter: 1679  total_loss: 28.68  loss_ce: 0.4642  loss_mask: 0.05873  loss_dice: 0.4371  loss_bbox: 0.03531  loss_giou: 0.2174  loss_ce_dn: 0.08989  loss_mask_dn: 0.05494  loss_dice_dn: 0.3566  loss_bbox_dn: 0.02617  loss_giou_dn: 0.1516  loss_ce_0: 0.9093  loss_mask_0: 0.07022  loss_dice_0: 0.4549  loss_bbox_0: 0.0506  loss_giou_0: 0.3097  loss_ce_dn_0: 1.785  loss_mask_dn_0: 0.447  loss_dice_dn_0: 2.632  loss_bbox_dn_0: 0.3203  loss_giou_dn_0: 0.8564  loss_ce_1: 0.7812  loss_mask_1: 0.06591  loss_dice_1: 0.4022  loss_bbox_1: 0.03749  loss_giou_1: 0.2499  loss_ce_dn_1: 0.242  loss_mask_dn_1: 0.05977  loss_dice_dn_1: 0.3573  loss_bbox_dn_1: 0.07089  loss_giou_dn_1: 0.2552  loss_ce_2: 0.6239  loss_mask_2: 0.0579  loss_dice_2: 0.3818  loss_bbox_2: 0.0384  loss_giou_2: 0.2243  loss_ce_dn_2: 0.1772  loss_mask_dn_2: 0.05699  loss_dice_dn_2: 0.3481  loss_bbox_dn_2: 0.04141  loss_giou_dn_2: 0.184  loss_ce_3: 0.5548  loss_mask_3: 0.0606  

[32m[07/13 11:35:07 d2.utils.events]: [0m eta: 0:08:22  iter: 1759  total_loss: 23.49  loss_ce: 0.4373  loss_mask: 0.0316  loss_dice: 0.2801  loss_bbox: 0.0319  loss_giou: 0.1368  loss_ce_dn: 0.08571  loss_mask_dn: 0.03011  loss_dice_dn: 0.293  loss_bbox_dn: 0.02962  loss_giou_dn: 0.1261  loss_ce_0: 0.8207  loss_mask_0: 0.03232  loss_dice_0: 0.2726  loss_bbox_0: 0.05226  loss_giou_0: 0.2747  loss_ce_dn_0: 1.734  loss_mask_dn_0: 0.3952  loss_dice_dn_0: 2.582  loss_bbox_dn_0: 0.295  loss_giou_dn_0: 0.8523  loss_ce_1: 0.7042  loss_mask_1: 0.03651  loss_dice_1: 0.2662  loss_bbox_1: 0.04029  loss_giou_1: 0.1679  loss_ce_dn_1: 0.2317  loss_mask_dn_1: 0.03669  loss_dice_dn_1: 0.3458  loss_bbox_dn_1: 0.05881  loss_giou_dn_1: 0.235  loss_ce_2: 0.5875  loss_mask_2: 0.03699  loss_dice_2: 0.272  loss_bbox_2: 0.04212  loss_giou_2: 0.1605  loss_ce_dn_2: 0.1458  loss_mask_dn_2: 0.0337  loss_dice_dn_2: 0.3239  loss_bbox_dn_2: 0.03722  loss_giou_dn_2: 0.1654  loss_ce_3: 0.523  loss_mask_3: 0.03238  l

[32m[07/13 11:36:11 d2.utils.events]: [0m eta: 0:07:19  iter: 1839  total_loss: 26.98  loss_ce: 0.5005  loss_mask: 0.03397  loss_dice: 0.354  loss_bbox: 0.03834  loss_giou: 0.1878  loss_ce_dn: 0.09622  loss_mask_dn: 0.0348  loss_dice_dn: 0.3001  loss_bbox_dn: 0.02972  loss_giou_dn: 0.1481  loss_ce_0: 0.9524  loss_mask_0: 0.03575  loss_dice_0: 0.3871  loss_bbox_0: 0.06781  loss_giou_0: 0.3175  loss_ce_dn_0: 1.868  loss_mask_dn_0: 0.5791  loss_dice_dn_0: 2.83  loss_bbox_dn_0: 0.3725  loss_giou_dn_0: 0.8617  loss_ce_1: 0.8038  loss_mask_1: 0.03486  loss_dice_1: 0.3569  loss_bbox_1: 0.05612  loss_giou_1: 0.2214  loss_ce_dn_1: 0.2387  loss_mask_dn_1: 0.03989  loss_dice_dn_1: 0.3601  loss_bbox_dn_1: 0.07591  loss_giou_dn_1: 0.268  loss_ce_2: 0.6592  loss_mask_2: 0.03607  loss_dice_2: 0.3838  loss_bbox_2: 0.04865  loss_giou_2: 0.2049  loss_ce_dn_2: 0.1693  loss_mask_dn_2: 0.03557  loss_dice_dn_2: 0.33  loss_bbox_dn_2: 0.04202  loss_giou_dn_2: 0.1845  loss_ce_3: 0.5742  loss_mask_3: 0.03607 

[32m[07/13 11:37:14 d2.utils.events]: [0m eta: 0:06:16  iter: 1919  total_loss: 31.84  loss_ce: 0.591  loss_mask: 0.04117  loss_dice: 0.4338  loss_bbox: 0.05355  loss_giou: 0.2356  loss_ce_dn: 0.1117  loss_mask_dn: 0.03511  loss_dice_dn: 0.33  loss_bbox_dn: 0.026  loss_giou_dn: 0.1589  loss_ce_0: 0.9671  loss_mask_0: 0.04438  loss_dice_0: 0.4239  loss_bbox_0: 0.08055  loss_giou_0: 0.3478  loss_ce_dn_0: 1.79  loss_mask_dn_0: 0.3311  loss_dice_dn_0: 2.837  loss_bbox_dn_0: 0.3335  loss_giou_dn_0: 0.8548  loss_ce_1: 0.8971  loss_mask_1: 0.04541  loss_dice_1: 0.4311  loss_bbox_1: 0.07024  loss_giou_1: 0.2838  loss_ce_dn_1: 0.2555  loss_mask_dn_1: 0.03967  loss_dice_dn_1: 0.4067  loss_bbox_dn_1: 0.07065  loss_giou_dn_1: 0.2829  loss_ce_2: 0.8048  loss_mask_2: 0.04723  loss_dice_2: 0.4347  loss_bbox_2: 0.06217  loss_giou_2: 0.2647  loss_ce_dn_2: 0.1804  loss_mask_dn_2: 0.03798  loss_dice_dn_2: 0.3608  loss_bbox_dn_2: 0.04323  loss_giou_dn_2: 0.2037  loss_ce_3: 0.7256  loss_mask_3: 0.04404  

[32m[07/13 11:38:20 d2.data.dataset_mapper]: [0m[DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')]
[32m[07/13 11:38:20 d2.data.common]: [0mSerializing the dataset using: <class 'detectron2.data.common._TorchSerializedList'>
[32m[07/13 11:38:20 d2.data.common]: [0mSerializing 58 elements to byte tensors and concatenating them all ...
[32m[07/13 11:38:20 d2.data.common]: [0mSerialized dataset takes 0.14 MiB
[32m[07/13 11:38:20 d2.evaluation.evaluator]: [0mStart inference on 58 batches
[32m[07/13 11:38:23 d2.evaluation.evaluator]: [0mInference done 11/58. Dataloading: 0.0009 s/iter. Inference: 0.1710 s/iter. Eval: 0.0975 s/iter. Total: 0.2694 s/iter. ETA=0:00:12
[32m[07/13 11:38:28 d2.evaluation.evaluator]: [0mInference done 30/58. Dataloading: 0.0011 s/iter. Inference: 0.1714 s/iter. Eval: 0.0977 s/iter. Total: 0.2703 s/iter. ETA=0:00:07
[32m[07/13 11:38:33 d2.evaluation.evaluator]: [0m

[32m[07/13 11:38:36 d2.evaluation.testing]: [0mcopypaste: 76.6973,86.9207,83.1905,48.6512,89.1165,87.7209
[32m[07/13 11:38:36 d2.utils.events]: [0m eta: 0:05:13  iter: 1999  total_loss: 27.89  loss_ce: 0.42  loss_mask: 0.06687  loss_dice: 0.3675  loss_bbox: 0.03435  loss_giou: 0.1562  loss_ce_dn: 0.08987  loss_mask_dn: 0.05798  loss_dice_dn: 0.3414  loss_bbox_dn: 0.02906  loss_giou_dn: 0.1349  loss_ce_0: 0.8905  loss_mask_0: 0.07144  loss_dice_0: 0.3634  loss_bbox_0: 0.05899  loss_giou_0: 0.3189  loss_ce_dn_0: 1.752  loss_mask_dn_0: 0.4722  loss_dice_dn_0: 2.903  loss_bbox_dn_0: 0.3296  loss_giou_dn_0: 0.8522  loss_ce_1: 0.7729  loss_mask_1: 0.06873  loss_dice_1: 0.3338  loss_bbox_1: 0.04728  loss_giou_1: 0.235  loss_ce_dn_1: 0.239  loss_mask_dn_1: 0.07798  loss_dice_dn_1: 0.3722  loss_bbox_dn_1: 0.08199  loss_giou_dn_1: 0.2645  loss_ce_2: 0.6089  loss_mask_2: 0.07073  loss_dice_2: 0.3626  loss_bbox_2: 0.04109  loss_giou_2: 0.2211  loss_ce_dn_2: 0.1615  loss_mask_dn_2: 0.07032  los

[32m[07/13 11:39:40 d2.utils.events]: [0m eta: 0:04:11  iter: 2079  total_loss: 31.78  loss_ce: 0.6404  loss_mask: 0.04633  loss_dice: 0.3494  loss_bbox: 0.03883  loss_giou: 0.1846  loss_ce_dn: 0.1051  loss_mask_dn: 0.04357  loss_dice_dn: 0.3721  loss_bbox_dn: 0.03375  loss_giou_dn: 0.1658  loss_ce_0: 0.9723  loss_mask_0: 0.04983  loss_dice_0: 0.397  loss_bbox_0: 0.06236  loss_giou_0: 0.4095  loss_ce_dn_0: 1.862  loss_mask_dn_0: 0.5399  loss_dice_dn_0: 2.878  loss_bbox_dn_0: 0.3356  loss_giou_dn_0: 0.8491  loss_ce_1: 0.8534  loss_mask_1: 0.05035  loss_dice_1: 0.3122  loss_bbox_1: 0.05051  loss_giou_1: 0.2304  loss_ce_dn_1: 0.2659  loss_mask_dn_1: 0.05129  loss_dice_dn_1: 0.4204  loss_bbox_dn_1: 0.08252  loss_giou_dn_1: 0.2794  loss_ce_2: 0.7328  loss_mask_2: 0.0519  loss_dice_2: 0.3649  loss_bbox_2: 0.04497  loss_giou_2: 0.196  loss_ce_dn_2: 0.1859  loss_mask_dn_2: 0.04571  loss_dice_dn_2: 0.4058  loss_bbox_dn_2: 0.04993  loss_giou_dn_2: 0.2134  loss_ce_3: 0.6911  loss_mask_3: 0.0503

[32m[07/13 11:40:44 d2.utils.events]: [0m eta: 0:03:08  iter: 2159  total_loss: 31.07  loss_ce: 0.5557  loss_mask: 0.04721  loss_dice: 0.3548  loss_bbox: 0.04642  loss_giou: 0.2185  loss_ce_dn: 0.146  loss_mask_dn: 0.03953  loss_dice_dn: 0.399  loss_bbox_dn: 0.03131  loss_giou_dn: 0.1585  loss_ce_0: 0.9197  loss_mask_0: 0.05283  loss_dice_0: 0.4478  loss_bbox_0: 0.0576  loss_giou_0: 0.3352  loss_ce_dn_0: 1.753  loss_mask_dn_0: 0.5189  loss_dice_dn_0: 2.625  loss_bbox_dn_0: 0.3341  loss_giou_dn_0: 0.8559  loss_ce_1: 0.8172  loss_mask_1: 0.04773  loss_dice_1: 0.4306  loss_bbox_1: 0.04938  loss_giou_1: 0.2667  loss_ce_dn_1: 0.2787  loss_mask_dn_1: 0.04339  loss_dice_dn_1: 0.4685  loss_bbox_dn_1: 0.073  loss_giou_dn_1: 0.2742  loss_ce_2: 0.7154  loss_mask_2: 0.04675  loss_dice_2: 0.4357  loss_bbox_2: 0.05106  loss_giou_2: 0.2572  loss_ce_dn_2: 0.2174  loss_mask_dn_2: 0.0453  loss_dice_dn_2: 0.424  loss_bbox_dn_2: 0.04673  loss_giou_dn_2: 0.199  loss_ce_3: 0.6437  loss_mask_3: 0.04395  lo

[32m[07/13 11:41:47 d2.utils.events]: [0m eta: 0:02:05  iter: 2239  total_loss: 29.46  loss_ce: 0.5051  loss_mask: 0.05084  loss_dice: 0.3832  loss_bbox: 0.03742  loss_giou: 0.2049  loss_ce_dn: 0.1143  loss_mask_dn: 0.0432  loss_dice_dn: 0.3901  loss_bbox_dn: 0.02567  loss_giou_dn: 0.1624  loss_ce_0: 0.9104  loss_mask_0: 0.04914  loss_dice_0: 0.4205  loss_bbox_0: 0.06299  loss_giou_0: 0.3372  loss_ce_dn_0: 1.874  loss_mask_dn_0: 0.474  loss_dice_dn_0: 2.768  loss_bbox_dn_0: 0.3176  loss_giou_dn_0: 0.8564  loss_ce_1: 0.784  loss_mask_1: 0.04801  loss_dice_1: 0.4218  loss_bbox_1: 0.04958  loss_giou_1: 0.2401  loss_ce_dn_1: 0.2708  loss_mask_dn_1: 0.05419  loss_dice_dn_1: 0.4177  loss_bbox_dn_1: 0.07139  loss_giou_dn_1: 0.2676  loss_ce_2: 0.6458  loss_mask_2: 0.04449  loss_dice_2: 0.3963  loss_bbox_2: 0.04393  loss_giou_2: 0.2185  loss_ce_dn_2: 0.1756  loss_mask_dn_2: 0.04687  loss_dice_dn_2: 0.3944  loss_bbox_dn_2: 0.04166  loss_giou_dn_2: 0.1952  loss_ce_3: 0.5827  loss_mask_3: 0.0526

[32m[07/13 11:42:51 d2.utils.events]: [0m eta: 0:01:02  iter: 2319  total_loss: 30.72  loss_ce: 0.5373  loss_mask: 0.03038  loss_dice: 0.4391  loss_bbox: 0.04873  loss_giou: 0.2592  loss_ce_dn: 0.125  loss_mask_dn: 0.02873  loss_dice_dn: 0.4049  loss_bbox_dn: 0.02482  loss_giou_dn: 0.1723  loss_ce_0: 0.9794  loss_mask_0: 0.02902  loss_dice_0: 0.4785  loss_bbox_0: 0.06858  loss_giou_0: 0.4207  loss_ce_dn_0: 1.931  loss_mask_dn_0: 0.325  loss_dice_dn_0: 2.861  loss_bbox_dn_0: 0.2517  loss_giou_dn_0: 0.8578  loss_ce_1: 0.8569  loss_mask_1: 0.0313  loss_dice_1: 0.4741  loss_bbox_1: 0.05551  loss_giou_1: 0.2961  loss_ce_dn_1: 0.2691  loss_mask_dn_1: 0.02982  loss_dice_dn_1: 0.4245  loss_bbox_dn_1: 0.05456  loss_giou_dn_1: 0.2936  loss_ce_2: 0.7387  loss_mask_2: 0.03088  loss_dice_2: 0.4312  loss_bbox_2: 0.05365  loss_giou_2: 0.2816  loss_ce_dn_2: 0.19  loss_mask_dn_2: 0.02944  loss_dice_dn_2: 0.405  loss_bbox_dn_2: 0.03781  loss_giou_dn_2: 0.2202  loss_ce_3: 0.6518  loss_mask_3: 0.03087  

[32m[07/13 11:44:01 d2.utils.events]: [0m eta: 0:00:00  iter: 2399  total_loss: 25.16  loss_ce: 0.4077  loss_mask: 0.05206  loss_dice: 0.3056  loss_bbox: 0.04354  loss_giou: 0.1606  loss_ce_dn: 0.07873  loss_mask_dn: 0.05005  loss_dice_dn: 0.3178  loss_bbox_dn: 0.03255  loss_giou_dn: 0.1447  loss_ce_0: 0.8513  loss_mask_0: 0.05454  loss_dice_0: 0.3151  loss_bbox_0: 0.0685  loss_giou_0: 0.301  loss_ce_dn_0: 1.843  loss_mask_dn_0: 0.5186  loss_dice_dn_0: 2.667  loss_bbox_dn_0: 0.4041  loss_giou_dn_0: 0.8579  loss_ce_1: 0.7317  loss_mask_1: 0.05406  loss_dice_1: 0.3176  loss_bbox_1: 0.05487  loss_giou_1: 0.2004  loss_ce_dn_1: 0.2595  loss_mask_dn_1: 0.05548  loss_dice_dn_1: 0.3079  loss_bbox_dn_1: 0.09274  loss_giou_dn_1: 0.2534  loss_ce_2: 0.5933  loss_mask_2: 0.04833  loss_dice_2: 0.2975  loss_bbox_2: 0.04899  loss_giou_2: 0.179  loss_ce_dn_2: 0.1685  loss_mask_dn_2: 0.05068  loss_dice_dn_2: 0.3149  loss_bbox_dn_2: 0.04847  loss_giou_dn_2: 0.186  loss_ce_3: 0.4951  loss_mask_3: 0.0478

[32m[07/13 11:44:19 d2.evaluation.coco_evaluation]: [0mPer-category segm AP: 
| category         | AP     | category       | AP     | category        | AP     |
|:-----------------|:-------|:---------------|:-------|:----------------|:-------|
| apple            | 86.040 | banana         | 88.995 | baseball        | 72.414 |
| cereals          | 79.675 | cheezit        | 84.653 | chocolate_jello | 75.856 |
| cleanser         | 70.902 | coffee_grounds | 79.213 | cola            | 80.089 |
| couch_table      | 49.020 | dice           | 64.219 | fork            | 70.438 |
| iced_tea         | 98.193 | juice_pack     | 84.684 | knife           | 71.218 |
| lemon            | 88.575 | milk           | 83.384 | mustard         | 86.197 |
| orange           | 67.089 | orange_juice   | 99.345 | peach           | 74.010 |
| pear             | 86.717 | plum           | 84.873 | pringles        | 86.802 |
| red_wine         | 89.916 | rubiks_cube    | 82.102 | shelf           | 76.834 |
| shelf

#### Exporting the Detector

The detector is fully described by its model definition in the form of a detectron2 config, its weights after training and a file enumerating the object class labels used in the particular training session. These are gathered and zipped in the following.

In [13]:
# save the model in a format suitable for deployment on the robot
# this is just a zip file containing weights, class label descriptions and a model description
if TRAIN:
    model_path = 'output/model_final.pth'
    conf_path = 'data/robocup_bordeaux_2023/robocup_data/trained_models/conf/swin_config.yaml' if model_type == 'swin' else 'data/robocup_bordeaux_2023/robocup_data/trained_models/conf/r50_config.yaml'
    classes_path = 'output/classes.json'


    time_stamp = get_timestamp()
    result_path = 'output/' + experiment_name + '_' + time_stamp

    try:
        shutil.rmtree(result_path)
    except:
        pass

    os.mkdir(result_path)

    print('Copy result files...')
    shutil.copy(model_path, result_path + '/model.pth')
    shutil.copy(conf_path, result_path + '/detectron2_config.yaml') 
    shutil.copy(classes_path, result_path + '/class_labels.json') 
    print('ZIP result files...')
    make_archive(result_path, result_path+'.zip')
    print('Done!')

Copy result files...
ZIP result files...
Done!


#### Evaluating the Detector

To make sure that the detector is well equipped to find objects relevant to the task at hand, its outputs on the validation set are brought back from detectron2 to fiftyone for visualization. In addition, precision, recall and f1-scores are computed class-wise to help identify any issues.

In [14]:
# if we just want to load the results of a previous model use that models experiment_name here
# else leave model_name at None to use the most recently trained model
model_name = None
weight_path = os.path.join(cfg.OUTPUT_DIR, "model_final.pth") if model_name is None else os.path.join(cfg.OUTPUT_DIR, model_name, 'model.pth')
cfg.MODEL.WEIGHTS =  weight_path
predictor = DefaultPredictor(cfg)

criterion.weight_dict  {'loss_ce': 4.0, 'loss_mask': 5.0, 'loss_dice': 5.0, 'loss_bbox': 5.0, 'loss_giou': 2.0, 'loss_ce_interm': 4.0, 'loss_mask_interm': 5.0, 'loss_dice_interm': 5.0, 'loss_bbox_interm': 5.0, 'loss_giou_interm': 2.0, 'loss_ce_dn': 4.0, 'loss_mask_dn': 5.0, 'loss_dice_dn': 5.0, 'loss_bbox_dn': 5.0, 'loss_giou_dn': 2.0, 'loss_ce_interm_dn': 4.0, 'loss_mask_interm_dn': 5.0, 'loss_dice_interm_dn': 5.0, 'loss_bbox_interm_dn': 5.0, 'loss_giou_interm_dn': 2.0, 'loss_ce_0': 4.0, 'loss_mask_0': 5.0, 'loss_dice_0': 5.0, 'loss_bbox_0': 5.0, 'loss_giou_0': 2.0, 'loss_ce_interm_0': 4.0, 'loss_mask_interm_0': 5.0, 'loss_dice_interm_0': 5.0, 'loss_bbox_interm_0': 5.0, 'loss_giou_interm_0': 2.0, 'loss_ce_dn_0': 4.0, 'loss_mask_dn_0': 5.0, 'loss_dice_dn_0': 5.0, 'loss_bbox_dn_0': 5.0, 'loss_giou_dn_0': 2.0, 'loss_ce_interm_dn_0': 4.0, 'loss_mask_interm_dn_0': 5.0, 'loss_dice_interm_dn_0': 5.0, 'loss_bbox_interm_dn_0': 5.0, 'loss_giou_interm_dn_0': 2.0, 'loss_ce_1': 4.0, 'loss_mask_1':

In [15]:
# bridge back from detectron2 to fiftyone to annotate the validation data with model predictions

val_view = valid_dataset_processed
dataset_dicts = get_fiftyone_dicts(val_view, labels_dict)
predictions = {}
for d in dataset_dicts:
    img_w = d["width"]
    img_h = d["height"]
    img = cv2.imread(d["file_name"])
    outputs = predictor(img)
    detections, instances = detectron_to_fo(outputs, img_w, img_h, classes)
    predictions[d["image_id"]] = detections

valid_dataset_processed.set_values("predictions", predictions, key_field="id")

In [16]:
# launch a fiftyone session to view the model predictions after training
session = fo.launch_app(valid_dataset_processed)

In [17]:
# this is just a quick summary of model performance as a final sanity-check before deployment
# in addition, the training process above yields validation map scores as a side result
results = valid_dataset_processed.evaluate_detections(
    "predictions",
    gt_field="segmentations",
    eval_key="eval",
    use_masks=True,
    compute_mAP=True,
)
results.print_report()

Evaluating detections...
 100% |███████████████████| 58/58 [4.3s elapsed, 0s remaining, 17.7 samples/s]      
Performing IoU sweep...
 100% |███████████████████| 58/58 [3.3s elapsed, 0s remaining, 23.5 samples/s]      
                  precision    recall  f1-score   support

           apple       0.85      0.85      0.85        13
          banana       1.00      0.91      0.95        11
        baseball       0.58      0.70      0.64        10
         cereals       0.73      0.79      0.76        34
         cheezit       1.00      0.70      0.82        10
 chocolate_jello       0.80      0.92      0.86        13
        cleanser       0.91      0.83      0.87        12
  coffee_grounds       0.80      1.00      0.89         8
            cola       0.79      1.00      0.88        15
     couch_table       1.00      0.60      0.75         5
            dice       1.00      1.00      1.00         7
            fork       0.64      0.70      0.67        10
        iced_tea       1.0