Remember to

```sh
sudo apt install ffmpeg libavcodec-extra
```

In [None]:
# Download model checkpoints:
import torch

dinov2_vits14_reg_lc = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14_reg_lc')

In [None]:
import sys
import os
import torch
import numpy as np
from tqdm import tqdm

if 'dinov2' in sys.modules:
    del sys.modules['dinov2']
    
# Get the absolute path of the project root
project_root = os.path.abspath("..")  # Adjust path if your notebook is deeper in directories

# Add project root to sys.path
sys.path.insert(0, project_root)

# sys.path.append(project_root)

    
# Now you can import dinov2
import dinov2
print("Currently using dinov2 from:", dinov2.__file__)



from dinov2.eval.setup import build_model_for_eval
from dinov2.configs import load_and_merge_config
from dinov2.utils.visualize import *

device = "cuda"
device = torch.device(device if torch.cuda.is_available() else "cpu")
print(device)

In [None]:
def main(video_path, model, output_path, threshold=0.6, device='cuda'):
    raw_tensor, input_tensor, fps = load_preprocess_video(video_path, target_size=448*1, patch_size=model.patch_size)  # 448 is multiple of patch_size (14)
    B, C, H, W, patch_size, embedding_dim, patch_num = print_video_model_stats(input_tensor, model)
    
    patch_embed_list = []
    with torch.no_grad():
        for i in tqdm(range(B), desc="Processing Frames", unit="frame"):
            frame = input_tensor[i].unsqueeze(0) # (1, C, H, W)
            patch_embed = get_patch_embeddings(model, frame)
            patch_embed_list.append(patch_embed)
            
    patch_embeds = np.vstack(patch_embed_list)  # (B, num_patches, embedding_dim)

    # Show progress for PCA processing
    print("Performing Two-Stage PCA...")
    reduced_embeds, reduced_fg_embeds, nums_of_fg_patches, masks = two_stage_pca(patch_embeds, threshold=threshold)

    # Saving video with progress
    print("Saving output video...")
    save_triple_video(raw_tensor, reduced_embeds, reduced_fg_embeds, nums_of_fg_patches, masks, patch_num, patch_size, output_path=output_path, fps=fps)

    print("Processing completed! ✅")
    


For crane, use b and 0.7.
For dog, use b and 0.7.
For pong, use b and 0.

In [None]:
threshold=0.8
model_size = "b"

video_path = "./data/pong/pong.mp4.mp4"
output_path = f"./data/embed_pong_{model_size}_{threshold}.mp4"


# Use `dinov2_vitb14_pretrain`
conf = load_and_merge_config(f'eval/vit{model_size}14_pretrain')
model = build_model_for_eval(conf, f'../dinov2/checkpoints/dinov2_vit{model_size}14_reg4_pretrain.pth')

main(video_path, model, output_path, threshold, device)