In [None]:
from autodistill_grounded_sam import GroundedSAM
from autodistill_grounding_dino import GroundingDINO
from autodistill.detection import CaptionOntology
import cv2, os
import supervision as sv
import random


TAKS = "detection"
# TAKS = "segmentation"

CAPTION_ONTOLOGY = {
    'person': 'person',
    'bicycle': 'bicycle',
    'car': 'car',
    'motorcycle': 'motorcycle',
    'airplane': 'airplane',
    'bus': 'bus',
    'train': 'train',
    'truck': 'truck',
    'boat': 'boat',
    'traffic light': 'traffic light',
    'fire hydrant': 'fire hydrant',
    'stop sign': 'stop sign',
    'parking meter': 'parking meter',
    'bench': 'bench',
    'bird': 'bird',
    'cat': 'cat',
    'dog': 'dog',
    'horse': 'horse',
    'sheep': 'sheep',
    'cow': 'cow',
    'elephant': 'elephant',
    'bear': 'bear',
    'zebra': 'zebra',
    'giraffe': 'giraffe',
    'backpack': 'backpack',
    'umbrella': 'umbrella',
    'handbag': 'handbag',
    'tie': 'tie',
    'suitcase': 'suitcase',
    'frisbee': 'frisbee',
    'skis': 'skis',
    'snowboard': 'snowboard',
    'sports ball': 'sports ball',
    'kite': 'kite',
    'baseball bat': 'baseball bat',
    'baseball glove': 'baseball glove',
    'skateboard': 'skateboard',
    'surfboard': 'surfboard',
    'tennis racket': 'tennis racket',
    'bottle': 'bottle',
    'wine glass': 'wine glass',
    'cup': 'cup',
    'fork': 'fork',
    'knife': 'knife',
    'spoon': 'spoon',
    'bowl': 'bowl',
    'banana': 'banana',
    'apple': 'apple',
    'sandwich': 'sandwich',
    'orange': 'orange',
    'broccoli': 'broccoli',
    'carrot': 'carrot',
    'hot dog': 'hot dog',
    'pizza': 'pizza',
    'donut': 'donut',
    'cake': 'cake',
    'chair': 'chair',
    'couch': 'couch',
    'potted plant': 'potted plant',
    'bed': 'bed',
    'dining table': 'dining table',
    'toilet': 'toilet',
    'tv': 'tv',
    'laptop': 'laptop',
    'mouse': 'mouse',
    'remote': 'remote',
    'keyboard': 'keyboard',
    'cell phone': 'cell phone',
    'microwave': 'microwave',
    'oven': 'oven',
    'toaster': 'toaster',
    'sink': 'sink',
    'refrigerator': 'refrigerator',
    'book': 'book',
    'clock': 'clock',
    'vase': 'vase',
    'scissors': 'scissors',
    'teddy bear': 'teddy bear',
    'hair drier': 'hair drier',
    'toothbrush': 'toothbrush',
    'extinguisher': 'extinguisher'
}


BOX_THRESHOLD = 0.50
TEXT_THRESHOLD = 0.50


dataset_name = "Padel Rackets.v3-renamed.yolov8"


DATASET_SRC_PATH = os.path.join("/content/drive/MyDrive/RoboTO/Dataset", dataset_name + ".zip")
DATASET_DST_PATH = "/content"

UNLABELED_IMAGES_PATH = f"{DATASET_DST_PATH}/train/images"
LABELED_IMAGES_PATH = f"/content/drive/MyDrive/RoboTO/Dataset/{dataset_name}_labeled"

In [None]:
classes = {i: label for i, label in enumerate(CAPTION_ONTOLOGY.values())}

mode_f = GroundedSAM if TAKS == "segmentation" else GroundingDINO
model = mode_f(
    ontology=CaptionOntology(CAPTION_ONTOLOGY),
    box_threshold=BOX_THRESHOLD,
    text_threshold=TEXT_THRESHOLD,
)

## Test that Grounding Dino is working

In [None]:
# From all the images choose one randomly
IMAGE_NAMES = random.choices([im_name for im_name in os.listdir(UNLABELED_IMAGES_PATH) if os.path.isfile(os.path.join(UNLABELED_IMAGES_PATH, im_name))], k = 10)
for IMAGE_NAME in IMAGE_NAMES:
  image_path = os.path.join(UNLABELED_IMAGES_PATH, IMAGE_NAME)

  predictions = model.predict(image_path)

  print(f"Prediction struct length: {len(predictions)}")
  labels = [f"{classes[class_id]} {confidence:0.2f}" for _, _, confidence, class_id, _ , _ in predictions]

  print(labels)

  image = cv2.imread(image_path)

  annotator = sv.BoxAnnotator()
  annotated_image = annotator.annotate(scene=image, detections=predictions, labels=labels)

  sv.plot_image(annotated_image)

In [None]:
model.label(input_folder=UNLABELED_IMAGES_PATH, output_folder=LABELED_IMAGES_PATH)