In [None]:
import pandas as pd
from pathlib import Path


In [None]:
dataroot_img = Path('./../data/raw/nuimages-v1.0-mini')
dataroot_lidar = Path('./../data/raw/nuScenes-panoptic-v1.0-mini')

In [None]:
import torch
from torch import nn # nn contains all of PyTorch's building blocks for neural networks
import matplotlib.pyplot as plt

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"


In [None]:
dinov2_vitg14_reg = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitg14_reg')
dinov2_vitg14_reg.eval()

In [None]:
from torchvision import transforms

image_transforms = transforms.Compose([
    transforms.Resize(256, interpolation=transforms.InterpolationMode.BICUBIC),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

In [None]:
import torchinfo 
from torchinfo import summary

# summary(dinov2_vitg14_reg,
#         input_size = image_transforms(img).size(),
#         col_names=["num_params", "trainable"],
#         )

summary_list = summary(dinov2_vitg14_reg,
        col_names=["num_params", "trainable"],
        )
summary_list

In [None]:
next(dinov2_vitg14_reg.parameters())

In [None]:
from PIL import Image

# 2. Open image
img = Image.open(dataroot_img/'samples/CAM_BACK/n003-2018-01-03-12-03-23+0800__CAM_BACK__1514952316316487.jpg')
img = image_transforms(img).unsqueeze(0)  # Add batch dimension

In [None]:
# Perform inference
with torch.no_grad():
    outputs = dinov2_vitg14_reg(img)

In [None]:
# Since DINOv2 is primarily for feature extraction, it doesn't provide direct segmentation
# You might need additional steps to derive segmentation from the features
features = outputs[0]

In [None]:
# For classification, assuming a linear layer on top of the extracted features
# Replace this with your classification head if you have a different setup
classifier = torch.nn.Linear(features.shape[1], 6)  # num_classes is the number of classes in your dataset
classification_logits = classifier(features)

# Post-process the classification output
classification_scores = torch.nn.functional.softmax(classification_logits, dim=1)

# Convert to numpy arrays or other formats as needed
classification_scores = classification_scores.numpy()

# Process these outputs as needed for your application

In [None]:
res_img = dinov2_vitg14_reg(img).detach().numpy()

In [None]:
dinov2_vitg14_reg(image_transforms(img).unsqueeze(0)).size()

In [None]:
res_img.size

In [None]:
new = Image.open(dinov2_vitg14_reg(image_transforms(img).unsqueeze(0)).detach().numpy())

In [None]:
import math
math.sqrt(1536)

In [None]:
import torch
import requests
from pathlib import Path

# Import dinov2 backbone
dinov2_vitl14_reg = torch.hub.load('facebookresearch/dinov2', 'dinov2_vitl14_reg')

# Import pretrained head for depth estimation
head_path = './../models/dinov2/dinov2_vitl14_kitti_dpt_head.pth'
if not Path(head_path).exists():
    request = requests.get('https://dl.fbaipublicfiles.com/dinov2/dinov2_vitl14/dinov2_vitl14_kitti_dpt_head.pth')
    with open("./../models/dinov2/dinov2_vitl14_kitti_dpt_head.pth", "wb") as f:
        f.write(request.content)