In [3]:
import numpy as np
import torch
from transformers import AutoImageProcessor, AutoModel
import matplotlib.pyplot as plt
from PIL import Image
from tifffile import imread

In [4]:

model_name = "facebook/dinov3-vitl16-pretrain-sat493m"  # smaller + sat-trained

processor = AutoImageProcessor.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained(model_name).eval().to(device)

print("device:", device)

device: cuda


In [5]:
labels = imread("/run/media/mak/Partition of 1TB disk/SH_dataset/planet_labels_2022.tif")


In [6]:
images_folder = "/home/mak/PycharmProjects/SegEdge/experiments/get_data_from_api/patches_mt/"
labels = imread("/run/media/mak/Partition of 1TB disk/SH_dataset/planet_labels_2022.tif")


In [9]:
import rasterio
from rasterio.warp import reproject, Resampling
from rasterio.mask import mask
from shapely.geometry import box
import glob as glob


def subset_label_to_image(img_path, lab_path):
    # --- 1. Open the image (this defines the reference grid) ---
    with rasterio.open(img_path) as src_img:
        img_bounds = src_img.bounds
        img_meta   = src_img.meta.copy()
        img_crs    = src_img.crs

    # --- 2. Open the label file ---
    with rasterio.open(lab_path) as src_lab:

        # If CRS mismatch → reproject labels onto image grid
        if src_lab.crs != img_crs:
            # prepare an in-memory raster with same grid as img
            new_meta = img_meta.copy()
            new_meta.update(
                dtype = src_lab.dtypes[0],
                count = src_lab.count,
            )
            with rasterio.io.MemoryFile() as mem:
                with mem.open(**new_meta) as dst:
                    for i in range(1, src_lab.count + 1):
                        reproject(
                            source=rasterio.band(src_lab, i),
                            destination=rasterio.band(dst, i),
                            src_transform=src_lab.transform,
                            src_crs=src_lab.crs,
                            dst_transform=img_meta["transform"],
                            dst_crs=img_crs,
                            dst_width=img_meta["width"],
                            dst_height=img_meta["height"],
                            resampling=Resampling.nearest,
                        )
                    labels_aligned = dst.read()  # (bands, H, W)
        else:
            # CRS is already same → just mask/crop
            geom = [box(*img_bounds).__geo_interface__]
            labels_aligned, _ = mask(src_lab, geom, crop=True)

    return labels_aligned



#Create image,label pairs
image_paths = glob.glob(images_folder + "*.tif")
print(f"Found {len(image_paths)} images")

image_label_pairs = []
for img_path in image_paths:
    img_labels = subset_label_to_image(img_path, "/run/media/mak/Partition of 1TB disk/SH_dataset/planet_labels_2022.tif")
    image_label_pairs.append((img_path, img_labels))

print(f"Created {len(image_label_pairs)} image-label pairs")



Found 96 images


In [10]:
def extract_patch_features_single_scale(
    image_hw3,
    model,
    processor,
    device,
    ps=16,
    aggregate_layers=None,
):
    """
    Extract DINOv3 patch features for a single scale.
    We disable internal resizing/cropping in the processor and rely
    on external cropping to multiples of ps.
    Returns:
        feats: (Hp, Wp, C) numpy, L2-normalized
        Hp, Wp: patch-grid size
    """
    # processor: keep resolution
    inputs = processor(
        images=image_hw3,
        return_tensors="pt",
        do_resize=False,
        do_center_crop=False
    ).to(device)

    px = inputs["pixel_values"]
    _, _, Hproc, Wproc = px.shape

    with torch.no_grad():
        if aggregate_layers is None:
            out = model(**inputs)
            tokens = out.last_hidden_state
        else:
            out = model(**inputs, output_hidden_states=True)
            hs = out.hidden_states
            layers = [hs[i] for i in aggregate_layers]
            tokens = torch.stack(layers, dim=0).mean(0)

    R = getattr(model.config, "num_register_tokens", 0)
    patch_tokens = tokens[:, 1 + R:, :]  # drop CLS + registers

    N, C = patch_tokens.shape[1], patch_tokens.shape[2]
    Hp = Hproc // ps
    Wp = Wproc // ps
    assert Hp * Wp == N, f"Patch-grid mismatch: {Hp}*{Wp} != {N}"

    feats = patch_tokens[0].cpu().numpy().reshape(Hp, Wp, C)
    feats = l2_normalize(feats)
    return feats, Hp, Wp

[('/home/mak/PycharmProjects/SegEdge/experiments/get_data_from_api/patches_mt/dop20_592000_5977000_1km_20cm.tif',
  array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]]], shape=(1, 5000, 5000), dtype=uint8)),
 ('/home/mak/PycharmProjects/SegEdge/experiments/get_data_from_api/patches_mt/dop20_592000_5976000_1km_20cm.tif',
  array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0]]], shape=(1, 5000, 5000), dtype=uint8)),
 ('/home/mak/PycharmProjects/SegEdge/experiments/get_data_from_api/patches_mt/dop20_592000_5973000_1km_20cm.tif',
  array([[[0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          [0, 0, 0, ..., 0, 0, 0],
          ...,
        