<a href="https://colab.research.google.com/github/DavidePanza/ml-jaguar-identification/blob/main/notebooks/02_dataset_creation/Gdino_SAM2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from pathlib import Path
import os
import sys
import matplotlib.pyplot as plt

!pip install fiftyone -q
import fiftyone as fo
import fiftyone.zoo as foz
from fiftyone import ViewField as F

from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

from google.colab import userdata
HF_TOKEN = userdata.get('HF_token')

!pip install huggingface_hub -q
from huggingface_hub import login
login(token=HF_TOKEN)

# import from gdino_utils
notebook_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
src_path = os.path.join(notebook_dir, 'src')
sys.path.append(src_path)
from gdino_utils import select_best_detection_head, select_best_detection_body

# Upload the Dataset
This script loads a FiftyOne dataset from the specified directory, linking it to raw image files stored in a separate folder. It enables access to both the dataset annotations and the corresponding image files for visualization or processing.

In [None]:
# load images
image_dir = Path('path/to/your/images')
input_dir = Path('path/to/your/fo_dataset')

dataset = fo.Dataset.from_dir(
    dataset_dir=str(input_dir),
    dataset_type=fo.types.FiftyOneDataset,
    rel_dir=image_dir,
)

# Run Grounding-Dino
This script loads a Grounding DINO zero-shot object detection model from the FiftyOne model zoo, configured to detect either "jaguar's whole body" or "Close-up of a jaguar's head" based on the DETECTION_TYPE flag.   
It runs the model on the dataset, saving predictions in the appropriate raw_bboxes_body or raw_bboxes_head field, using a confidence threshold of 0.2 and a text similarity threshold of 0.6.  
It then selects the best detection depending on the chosen detection type and removes the raw bounding box field after processing.

In [None]:
# Configuration
DETECTION_TYPE = "body"  # or "head" - set this flag to choose processing type

# Load appropriate model based on detection type
model = foz.load_zoo_model(
    "zero-shot-detection-transformer-torch",
    name_or_path="IDEA-Research/grounding-dino-tiny",
    classes=["jaguar's whole body" if DETECTION_TYPE == "body" else "Close-up of a jaguar's head"]
)

# Define the name of the bboxes field
raw_bboxes_name = f"raw_bboxes_{DETECTION_TYPE}"

# run model
dataset.apply_model(model,
                    label_field=raw_bboxes_name,
                    confidence_thresh=0.2,
                    text_threshold=.6)

if DETECTION_TYPE == "body":
    # If you computed bboxes for whole body
    select_best_detection_body(dataset, raw_bboxes_field_name=raw_bboxes_name)
else:
    # If you computed bboxes for head only
    select_best_detection_head(dataset, raw_bboxes_field_name=raw_bboxes_name)

# Remove raw bboxes
dataset.delete_sample_field(raw_bboxes_name)

# Run SAM2
This code loads a SAM segmentation model and applies it to the dataset using bounding boxes as prompts.  
It stores the results in either segmentations_head or segmentations_body based on the detection type.

In [None]:
# Define fields based on detection type
prompt_field = "bboxes_head" if DETECTION_TYPE == "head" else "bboxes_body"
label_field = "segmentations_head" if DETECTION_TYPE == "head" else "segmentations_body"

# Load the segmentation model
model = foz.load_zoo_model("segment-anything-vitb-torch")

# Apply the model to the dataset
dataset.apply_model(
    model,
    label_field=label_field,
    prompt_field=prompt_field,
)

# Store metadata locally

In [None]:
storage_dir = Path('path/to/your/fo_dataset')
os.makedirs(storage_dir, exist_ok=True)

dataset.export(
    export_dir=str(storage_dir),
    dataset_type=fo.types.FiftyOneDataset,
    export_media=False,
    rel_dir=image_dir
)