This notebook is for the first task: evaluate models on SPair-71k using PCK as metric.\
This file is intended to be run on Colab, not locally.

In [1]:
# repositories
!git clone https://github.com/Luffy65/Semantic-Correspondence.git # Clone repo
!git clone https://github.com/facebookresearch/dinov3.git # DINOv3
!pip install git+https://github.com/facebookresearch/segment-anything.git # SAM

# Install requirements (requirements.txt)
!pip install -r Semantic-Correspondence/requirements.txt
!pip install -r dinov3/requirements.txt

Cloning into 'Semantic-Correspondence'...
remote: Enumerating objects: 84, done.[K
remote: Counting objects: 100% (84/84), done.[K
remote: Compressing objects: 100% (61/61), done.[K
remote: Total 84 (delta 24), reused 71 (delta 13), pack-reused 0 (from 0)[K
Receiving objects: 100% (84/84), 4.65 MiB | 44.91 MiB/s, done.
Resolving deltas: 100% (24/24), done.
Collecting git+https://github.com/facebookresearch/segment-anything.git
  Cloning https://github.com/facebookresearch/segment-anything.git to /tmp/pip-req-build-858hc5sy
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/segment-anything.git /tmp/pip-req-build-858hc5sy
  Resolved https://github.com/facebookresearch/segment-anything.git to commit dca509fe793f601edb92606367a655c15ac00fdf
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: segment_anything
  Building wheel for segment_anything (setup.py) ... [?25l[?25hdone
  Created wheel for segment_

In [2]:
# Dependencies
import torch
import os
import shutil
import gzip
import cv2
from google.colab import drive

Mounted at /content/drive
Extracting /content/drive/MyDrive/AML-PROJECT-DATA/dataset/SPair-71k.tar.gz to local VM...
Done! Data is ready at: /content/data


In [None]:
# Connect google drive, load and unzip data
# 1. Mount Drive
drive.mount('/content/drive')

# 2. Define Paths
DRIVE_ROOT = '/content/drive/MyDrive/AML-PROJECT-DATA/'
DATASET_ROOT = os.path.join(DRIVE_ROOT, 'dataset/')
DATASET_ARCHIVE = os.path.join(DATASET_ROOT, 'SPair-71k.tar.gz')
LOCAL_DATA_DIR = '/content/data'

# 3. Copy and Extract
if not os.path.exists(LOCAL_DATA_DIR):
    print(f"Extracting {DATASET_ARCHIVE} to local VM...")
    os.makedirs(LOCAL_DATA_DIR, exist_ok=True)

    # shutil works for .zip, .tar, .tar.gz, etc.
    # format='gztar' explicitly tells it to handle gzip compression
    shutil.unpack_archive(DATASET_ARCHIVE, LOCAL_DATA_DIR, format='gztar')

    print("Done! Data is ready at:", LOCAL_DATA_DIR)
else:
    print("Data already loaded.")

In [3]:
# Instantiate models
from segment_anything import SamPredictor, sam_model_registry

DINOV3_REPO_DIR = "dinov3"
CHECKPOINTS_ROOT = os.path.join(DRIVE_ROOT, 'checkpoints/')
SAM_WEIGHTS_PATH = os.path.join(CHECKPOINTS_ROOT, 'sam_vit_h_4b8939.pth')
DINOV3_WEIGHTS_PATH = os.path.join(CHECKPOINTS_ROOT, 'dinov3_vitb16_pretrain_lvd1689m-73cec8be.pth')

sam = sam_model_registry["default"](checkpoint=SAM_WEIGHTS_PATH)
sampredictor = SamPredictor(sam)

dinov2_vitb14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitb14')
dinov3_vitb16 = torch.hub.load(DINOV3_REPO_DIR, 'dinov3_vitb16', source='local', weights=DINOV3_WEIGHTS_PATH) # DINOv3 ViT model pretrained on web images

Downloading: "https://github.com/facebookresearch/dinov2/zipball/main" to /root/.cache/torch/hub/main.zip




Downloading: "https://dl.fbaipublicfiles.com/dinov2/dinov2_vitb14/dinov2_vitb14_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dinov2_vitb14_pretrain.pth


100%|██████████| 330M/330M [00:00<00:00, 361MB/s]


Downloading: "file:///content/drive/MyDrive/AML-PROJECT-DATA/checkpoints/dinov3_vitb16_pretrain_lvd1689m-73cec8be.pth" to /root/.cache/torch/hub/checkpoints/dinov3_vitb16_pretrain_lvd1689m-73cec8be.pth


100%|██████████| 327M/327M [00:03<00:00, 92.3MB/s]


In [5]:
# See if the models work
# SAM (it works)
# print(sam) # prints the layers
AEROPLANES_DIR = os.path.join(LOCAL_DATA_DIR, 'SPair-71k/JPEGImages/aeroplane')
for image_name in os.listdir(AEROPLANES_DIR):
    image_path = os.path.join(AEROPLANES_DIR, image_name)
    image = cv2.imread(image_path)
    sampredictor.set_image(image)
    # masks, _, _ = predictor.predict(<input_prompts>)
    print(sampredictor.predict())

(array([[[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [False, False, False, ..., False, Fa

KeyboardInterrupt: 

In [None]:
# Define evaluation metric
def computePCKatT(thresholds=[0.05, 0.1, 0.2]):
    ... #TODO

In [None]:
# Access Dataset
# ... TODO

In [None]:
# Evaluate models on SPair-71k
# computePCKatT(sam(Dataset)) TODO