In [None]:
# Cell 1: Setup (Colab auto-installs, local adds to path)
import sys

IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    %cd /content
    !git clone https://github.com/AntoFratta/DVARF.git
    %cd /content/DVARF
    !grep -v "triton-windows" requirements.txt > requirements_colab.txt
    !pip install -q -r requirements_colab.txt
    print("✅ Setup complete!")
else:
    from pathlib import Path
    project_root = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()
    if str(project_root) not in sys.path:
        sys.path.insert(0, str(project_root))
    print(f"✅ Local: {project_root}")

## 1. Environment Setup

In [None]:
%cd /content

# Clone the DVARF repository
!git clone https://github.com/AntoFratta/DVARF.git

%cd /content/DVARF

## 2. Install Dependencies

In [None]:
# Create a Colab-compatible requirements file by removing Windows-specific packages
!grep -v "triton-windows" requirements.txt > requirements_colab.txt

# Install all dependencies
!pip install -r requirements_colab.txt

## 3. Install SAM 3

In [None]:
%cd /content

# Clone the official SAM 3 repository from Meta
!git clone https://github.com/facebookresearch/sam3.git

In [None]:
%cd /content/sam3

# Install SAM 3 in editable mode
!pip install -e .

## 4. Hugging Face Authentication

In [None]:
from huggingface_hub import login

# Login to Hugging Face (widget will prompt for your token)
login()

## 5. Test Feature Extraction

In [None]:
%cd /content/DVARF

import sys
if "/content/DVARF" not in sys.path:
    sys.path.insert(0, "/content/DVARF")

import os
from pathlib import Path
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

from src.sam3_wrapper import Sam3ImageModel
from src.prompts import CLASS_PROMPTS
from src.yolo_export import sam3_boxes_to_yolo

print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
print(f"\nClasses: {CLASS_PROMPTS}")

In [None]:
# Load SAM3 model (enable debug to see internal structure)
os.environ["SAM3_DEBUG"] = "1"

print("Loading SAM3...")
# Use HuggingFace checkpoint explicitly
model = Sam3ImageModel(checkpoint_path="facebook/sam3-image-large")
print("✅ Model loaded")

In [None]:
# Get test images
project_root = Path("/content/DVARF")
test_images_dir = project_root / "data" / "raw" / "images" / "test"
test_image_files = sorted(test_images_dir.glob("*.jpg"))[:3]

print(f"Test images ({len(test_image_files)}):")
for img in test_image_files:
    print(f"  {img.name}")

In [None]:
# Test single image + single prompt
test_img = test_image_files[0]
test_prompt = CLASS_PROMPTS[0]

print(f"Image: {test_img.name}")
print(f"Prompt: '{test_prompt}'\n")

prediction = model.predict_with_text(test_img, test_prompt)

print(f"\n✅ Boxes: {prediction.boxes.shape}")
print(f"✅ Scores: {prediction.scores.shape}")
print(f"✅ Masks: {prediction.masks.shape}")
print(f"✅ Features: {prediction.features.shape}")

if prediction.boxes.shape[0] > 0:
    print(f"\nScores: {prediction.scores.cpu().numpy()}")
    print(f"Features sample: {prediction.features[0, :10].cpu().numpy()}")
    assert prediction.features.shape[1] == 256, "Expected 256-d features"
    print("\n✅ Feature dimensions correct (256-d)")

In [None]:
# Test all classes on one image
test_img = test_image_files[0]

print(f"Testing all classes on: {test_img.name}\n")

for class_id, prompt in CLASS_PROMPTS.items():
    prediction = model.predict_with_text(test_img, prompt)
    num_det = prediction.boxes.shape[0]
    
    print(f"Class {class_id} ('{prompt}'): {num_det} detections")
    if num_det > 0:
        print(f"  Scores: {prediction.scores.cpu().numpy()}")
        print(f"  Features: {prediction.features.shape}")

print("\n✅ All classes tested")

In [None]:
# Test complete pipeline (simulate run_sam3_on_split)
print(f"Testing pipeline on {len(test_image_files)} images\n")

for idx, img_path in enumerate(test_image_files, 1):
    print(f"[{idx}/{len(test_image_files)}] {img_path.name}")
    
    image = Image.open(img_path).convert("RGB")
    width, height = image.size
    
    all_boxes = []
    
    # Query all classes
    for class_id, prompt in CLASS_PROMPTS.items():
        prediction = model.predict_with_text(img_path, prompt)
        
        yolo_boxes = sam3_boxes_to_yolo(
            prediction=prediction,
            class_id=class_id,
            image_width=width,
            image_height=height,
            score_threshold=0.26,
        )
        
        all_boxes.extend(yolo_boxes)
        print(f"  Class {class_id}: {len(yolo_boxes)} boxes")
    
    # Check all boxes have features
    boxes_with_features = sum(1 for box in all_boxes if box.features is not None)
    boxes_without = len(all_boxes) - boxes_with_features
    
    print(f"  Total: {len(all_boxes)} boxes, {boxes_with_features} with features")
    
    if boxes_without > 0:
        print(f"  ❌ ERROR: {boxes_without} boxes missing features!")
    else:
        # Build 257-d features (256 + score)
        all_features = []
        for box in all_boxes:
            score_val = box.score if box.score is not None else 0.0
            feat_257 = np.concatenate([box.features, [score_val]]).astype(np.float32)
            all_features.append(feat_257)
        
        if all_features:
            features_arr = np.array(all_features, dtype=np.float16)
            print(f"  ✅ Features array: {features_arr.shape} (expected: {len(all_boxes)}, 257)")
            assert features_arr.shape == (len(all_boxes), 257)

print("\n✅ Pipeline test complete!")

In [None]:
# Visualize detections
test_img = test_image_files[0]
image = Image.open(test_img).convert("RGB")
prompt = CLASS_PROMPTS[0]
prediction = model.predict_with_text(test_img, prompt)

fig, ax = plt.subplots(1, 1, figsize=(12, 8))
ax.imshow(image)
ax.set_title(f"{test_img.name} - '{prompt}'")
ax.axis('off')

if prediction.boxes.shape[0] > 0:
    boxes_np = prediction.boxes.cpu().numpy()
    scores_np = prediction.scores.cpu().numpy()
    
    for box, score in zip(boxes_np, scores_np):
        x1, y1, x2, y2 = box
        w, h = x2 - x1, y2 - y1
        
        rect = patches.Rectangle(
            (x1, y1), w, h,
            linewidth=2, edgecolor='red', facecolor='none'
        )
        ax.add_patch(rect)
        
        ax.text(
            x1, y1 - 5, f'{score:.2f}',
            color='white', fontsize=10,
            bbox=dict(facecolor='red', alpha=0.7, edgecolor='none', pad=2)
        )

plt.tight_layout()
plt.show()

print(f"Detections: {prediction.boxes.shape[0]}")
print(f"Features: {prediction.features.shape}")

In [None]:
# Cleanup
os.environ["SAM3_DEBUG"] = "0"
print("✅ Test complete!")

In [None]:
# Cell 2: Imports
import os
from pathlib import Path
import numpy as np
import torch
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

if IN_COLAB:
    project_root = Path("/content/DVARF")
else:
    project_root = Path.cwd().parent if Path.cwd().name == "notebooks" else Path.cwd()

if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from src.sam3_wrapper import Sam3ImageModel
from src.prompts import CLASS_PROMPTS
from src.config import get_images_dir
from src.yolo_export import sam3_boxes_to_yolo, YoloBox

print(f"Project root: {project_root}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA: {torch.cuda.is_available()}")
print(f"\nClasses: {CLASS_PROMPTS}")

In [None]:
# Cell 3: Login to Hugging Face to download SAM3 model
from huggingface_hub import login

print("Please login to Hugging Face to download SAM3:")
login()

In [None]:
# Cell 3: Load SAM3 model (enable debug to see internal structure)
os.environ["SAM3_DEBUG"] = "1"

print("Loading SAM3...")
model = Sam3ImageModel()
print("✅ Model loaded")

In [None]:
# Cell 4: Get test images
test_images_dir = project_root / "data" / "raw" / "images" / "test"
test_image_files = sorted(test_images_dir.glob("*.jpg"))[:3]

print(f"Test images ({len(test_image_files)}):")
for img in test_image_files:
    print(f"  {img.name}")

In [None]:
# Cell 5: Test single image + single prompt
test_img = test_image_files[0]
test_prompt = CLASS_PROMPTS[0]

print(f"Image: {test_img.name}")
print(f"Prompt: '{test_prompt}'\n")

prediction = model.predict_with_text(test_img, test_prompt)

print(f"\n✅ Boxes: {prediction.boxes.shape}")
print(f"✅ Scores: {prediction.scores.shape}")
print(f"✅ Masks: {prediction.masks.shape}")
print(f"✅ Features: {prediction.features.shape}")

if prediction.boxes.shape[0] > 0:
    print(f"\nScores: {prediction.scores.cpu().numpy()}")
    print(f"Features sample: {prediction.features[0, :10].cpu().numpy()}")
    assert prediction.features.shape[1] == 256, "Expected 256-d features"
    print("\n✅ Feature dimensions correct (256-d)")

In [None]:
# Cell 6: Test all classes on one image
test_img = test_image_files[0]

print(f"Testing all classes on: {test_img.name}\n")

for class_id, prompt in CLASS_PROMPTS.items():
    prediction = model.predict_with_text(test_img, prompt)
    num_det = prediction.boxes.shape[0]
    
    print(f"Class {class_id} ('{prompt}'): {num_det} detections")
    if num_det > 0:
        print(f"  Scores: {prediction.scores.cpu().numpy()}")
        print(f"  Features: {prediction.features.shape}")

print("\n✅ All classes tested")

In [None]:
# Cell 7: Test complete pipeline (simulate run_sam3_on_split)
print(f"Testing pipeline on {len(test_image_files)} images\n")

for idx, img_path in enumerate(test_image_files, 1):
    print(f"[{idx}/{len(test_image_files)}] {img_path.name}")
    
    image = Image.open(img_path).convert("RGB")
    width, height = image.size
    
    all_boxes = []
    
    # Query all classes
    for class_id, prompt in CLASS_PROMPTS.items():
        prediction = model.predict_with_text(img_path, prompt)
        
        yolo_boxes = sam3_boxes_to_yolo(
            prediction=prediction,
            class_id=class_id,
            image_width=width,
            image_height=height,
            score_threshold=0.26,
        )
        
        all_boxes.extend(yolo_boxes)
        print(f"  Class {class_id}: {len(yolo_boxes)} boxes")
    
    # Check all boxes have features
    boxes_with_features = sum(1 for box in all_boxes if box.features is not None)
    boxes_without = len(all_boxes) - boxes_with_features
    
    print(f"  Total: {len(all_boxes)} boxes, {boxes_with_features} with features")
    
    if boxes_without > 0:
        print(f"  ❌ ERROR: {boxes_without} boxes missing features!")
    else:
        # Build 257-d features (256 + score)
        all_features = []
        for box in all_boxes:
            score_val = box.score if box.score is not None else 0.0
            feat_257 = np.concatenate([box.features, [score_val]]).astype(np.float32)
            all_features.append(feat_257)
        
        if all_features:
            features_arr = np.array(all_features, dtype=np.float16)
            print(f"  ✅ Features array: {features_arr.shape} (expected: {len(all_boxes)}, 257)")
            assert features_arr.shape == (len(all_boxes), 257)

print("\n✅ Pipeline test complete!")

In [None]:
# Cell 8: Visualize detections
test_img = test_image_files[0]
image = Image.open(test_img).convert("RGB")
prompt = CLASS_PROMPTS[0]
prediction = model.predict_with_text(test_img, prompt)

fig, ax = plt.subplots(1, 1, figsize=(12, 8))
ax.imshow(image)
ax.set_title(f"{test_img.name} - '{prompt}'")
ax.axis('off')

if prediction.boxes.shape[0] > 0:
    boxes_np = prediction.boxes.cpu().numpy()
    scores_np = prediction.scores.cpu().numpy()
    
    for box, score in zip(boxes_np, scores_np):
        x1, y1, x2, y2 = box
        w, h = x2 - x1, y2 - y1
        
        rect = patches.Rectangle(
            (x1, y1), w, h,
            linewidth=2, edgecolor='red', facecolor='none'
        )
        ax.add_patch(rect)
        
        ax.text(
            x1, y1 - 5, f'{score:.2f}',
            color='white', fontsize=10,
            bbox=dict(facecolor='red', alpha=0.7, edgecolor='none', pad=2)
        )

plt.tight_layout()
plt.show()

print(f"Detections: {prediction.boxes.shape[0]}")
print(f"Features: {prediction.features.shape}")

In [None]:
# Cell 9: Cleanup
os.environ["SAM3_DEBUG"] = "0"
print("✅ Test complete!")