In [1]:
import torch
import os
from PIL import Image
import torchvision.transforms as T
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import PCA

In [2]:
dinov2 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')
dinov2 = dinov2.cuda()

Using cache found in /home/roger/.cache/torch/hub/facebookresearch_dinov2_main
xFormers not available
xFormers not available


In [3]:
def interpolate_to_patch_size(img_bchw, patch_size):
    # Interpolate the image so that H and W are multiples of the patch size
    _, _, H, W = img_bchw.shape
    target_H = H // patch_size * patch_size
    target_W = W // patch_size * patch_size
    img_bchw = torch.nn.functional.interpolate(img_bchw, size=(target_H, target_W))
    return img_bchw, target_H, target_W

transform = T.Compose([
    T.ToTensor(),
    T.Normalize(mean=[0.5], std=[0.5]),
])

In [4]:
src_base_dir = '/home/roger/gaussian_feature/feat_data/scene0050_00'
src_image_dir = os.path.join(src_base_dir, 'images')
target_feat_dir = os.path.join(src_base_dir, 'dinov2_vits14')

os.makedirs(target_feat_dir, exist_ok=True)

In [5]:
image_fn_list = sorted(os.listdir(src_image_dir))

pca = None

for image_fn in tqdm(image_fn_list):
    image_path = os.path.join(src_image_dir, image_fn)
    image = Image.open(image_path)
    image = transform(image)[:3].unsqueeze(0)
    image, target_H, target_W = interpolate_to_patch_size(image, dinov2.patch_size)
    image = image.cuda()
    with torch.no_grad():
        features = dinov2.forward_features(image)["x_norm_patchtokens"][0]

    features = features.cpu().numpy()

    features_hwc = features.reshape((target_H // dinov2.patch_size, target_W // dinov2.patch_size, -1))
    features_chw = features_hwc.transpose((2, 0, 1))

    if pca is None:
        pca = PCA(n_components=3)
        pca.fit(features)

    pca_features = pca.transform(features)
    pca_features = (pca_features - pca_features.min()) / (pca_features.max() - pca_features.min())
    pca_features = pca_features * 255

    pca_features_viz = pca_features.reshape(target_H // dinov2.patch_size, target_W // dinov2.patch_size, 3).astype(np.uint8)

    feat_save_path = os.path.join(target_feat_dir, image_fn.replace('.jpg', '.npy'))
    feat_viz_save_path = os.path.join(target_feat_dir, image_fn.replace('.jpg', '.png'))

    np.save(feat_save_path, features_chw)
    Image.fromarray(pca_features_viz).save(feat_viz_save_path)

100%|██████████| 145/145 [00:29<00:00,  4.88it/s]
