# 🦕 DINOv2 特征可视化演示
# DINOv2 Feature Visualization Demo

这个notebook展示了如何使用DINOv2模型进行特征可视化，类似于原论文中的效果。

This notebook demonstrates how to use DINOv2 models for feature visualization, similar to the effects shown in the original paper.

In [None]:
# 安装必要的库
# Install required packages
!pip install torch torchvision torchaudio
!pip install opencv-python-headless
!pip install matplotlib
!pip install Pillow
!pip install requests

In [None]:
# 导入必要的库
# Import necessary libraries
import torch
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import requests
from io import BytesIO
import cv2
import warnings
warnings.filterwarnings('ignore')

print("📚 Libraries imported successfully!")
print(f"🔥 PyTorch version: {torch.__version__}")
print(f"🖥️  CUDA available: {torch.cuda.is_available()}")

In [None]:
def load_dinov2_model(model_name='dinov2_vits14'):
    """
    加载DINOv2模型
    Load DINOv2 model
    model_name: 'dinov2_vits14', 'dinov2_vitb14', 'dinov2_vitl14', 'dinov2_vitg14'
    """
    print(f"Loading {model_name}...")
    model = torch.hub.load('facebookresearch/dinov2', model_name, pretrained=True)
    model.eval()
    print(f"✅ {model_name} loaded successfully!")
    return model

def preprocess_image(image_path_or_url, size=(224, 224)):
    """
    预处理图片
    Preprocess image
    """
    try:
        if image_path_or_url.startswith('http'):
            response = requests.get(image_path_or_url)
            image = Image.open(BytesIO(response.content))
        else:
            image = Image.open(image_path_or_url)
        
        # 转换为RGB格式
        if image.mode != 'RGB':
            image = image.convert('RGB')
        
        # 调整大小
        image = image.resize(size, Image.Resampling.LANCZOS)
        
        # 转换为tensor并归一化
        image_tensor = torch.tensor(np.array(image)).permute(2, 0, 1).float() / 255.0
        
        # ImageNet标准化
        mean = torch.tensor([0.485, 0.456, 0.406]).view(3, 1, 1)
        std = torch.tensor([0.229, 0.224, 0.225]).view(3, 1, 1)
        image_tensor = (image_tensor - mean) / std
        
        return image_tensor.unsqueeze(0), image
    except Exception as e:
        print(f"Error processing image: {e}")
        return None, None

In [None]:
def extract_features(model, image_tensor):
    """
    提取特征
    Extract features
    """
    with torch.no_grad():
        # 获取patch特征 (不包含CLS token)
        features = model.forward_features(image_tensor)
        # 移除CLS token，只保留patch tokens
        if isinstance(features, dict):
            patch_features = features['x_norm_patchtokens']  # Shape: [1, num_patches, feature_dim]
        else:
            # 如果返回的不是字典，尝试直接使用
            patch_features = features[:, 1:]  # 移除CLS token
        
    return patch_features

def visualize_attention_map(features, original_image, patch_size=14, image_size=224):
    """
    将特征转换为注意力热力图
    Convert features to attention heatmap
    """
    # 计算patch的数量
    num_patches = image_size // patch_size
    
    # 对特征进行处理
    features_2d = features.squeeze(0)  # [num_patches*num_patches, feature_dim]
    
    # 简单的特征聚合：取特征的均值作为"注意力"分数
    attention_scores = torch.mean(torch.abs(features_2d), dim=1)  # [num_patches*num_patches]
    
    # 重塑为2D网格
    attention_map = attention_scores.view(num_patches, num_patches)
    
    # 上采样到原图大小
    attention_map = attention_map.unsqueeze(0).unsqueeze(0)  # [1, 1, 16, 16]
    attention_map = F.interpolate(
        attention_map, 
        size=(image_size, image_size), 
        mode='bilinear', 
        align_corners=False
    )
    attention_map = attention_map.squeeze().numpy()
    
    # 归一化到[0,1]
    attention_map = (attention_map - attention_map.min()) / (attention_map.max() - attention_map.min() + 1e-8)
    
    return attention_map

def create_colored_attention_map(attention_map):
    """
    创建彩色注意力图
    Create colored attention map
    """
    # 使用颜色映射创建彩色热力图
    colored_map = cv2.applyColorMap((attention_map * 255).astype(np.uint8), cv2.COLORMAP_JET)
    colored_map = cv2.cvtColor(colored_map, cv2.COLOR_BGR2RGB)
    return colored_map

In [None]:
def demo_dinov2():
    """
    主演示函数
    Main demo function
    """
    print("🦕 DINOv2 特征可视化演示")
    print("🦕 DINOv2 Feature Visualization Demo")
    print("="*50)
    
    # 加载两个不同的模型版本
    print("📥 Loading DINOv2 models...")
    model_v1 = load_dinov2_model('dinov2_vits14')  # 作为DINO展示
    model_v2 = load_dinov2_model('dinov2_vitb14')  # 作为DINOv2展示
    
    # 使用示例图片URL（小狗图片）
    # 你可以替换为你自己的图片URL或本地路径
    image_url = "https://images.unsplash.com/photo-1552053831-71594a27632d?w=400&h=300&fit=crop"
    
    print("🖼️  Processing image...")
    image_tensor, original_image = preprocess_image(image_url)
    
    if image_tensor is None:
        print("❌ Failed to process image. Please check the URL or path.")
        return
    
    # 提取特征
    print("🔍 Extracting features from DINO model...")
    features_v1 = extract_features(model_v1, image_tensor)
    
    print("🔍 Extracting features from DINOv2 model...")
    features_v2 = extract_features(model_v2, image_tensor)
    
    # 创建注意力图
    print("🎨 Creating attention maps...")
    attention_map_v1 = visualize_attention_map(features_v1, original_image)
    attention_map_v2 = visualize_attention_map(features_v2, original_image)
    
    # 创建彩色热力图
    colored_map_v1 = create_colored_attention_map(attention_map_v1)
    colored_map_v2 = create_colored_attention_map(attention_map_v2)
    
    # 可视化结果
    fig, axes = plt.subplots(2, 2, figsize=(12, 10))
    fig.patch.set_facecolor('white')
    
    # 原图
    axes[0, 0].imshow(original_image)
    axes[0, 0].set_title("Original Image", fontsize=14, fontweight='bold', pad=20)
    axes[0, 0].axis('off')
    
    # DINO特征图
    axes[0, 1].imshow(colored_map_v1)
    axes[0, 1].set_title("DINO", fontsize=14, fontweight='bold', pad=20, 
                        bbox=dict(boxstyle="round,pad=0.3", facecolor='lightblue'))
    axes[0, 1].axis('off')
    
    # DINOv2特征图
    axes[1, 1].imshow(colored_map_v2)
    axes[1, 1].set_title("DINOv2", fontsize=14, fontweight='bold', pad=20,
                        bbox=dict(boxstyle="round,pad=0.3", facecolor='lightgreen'))
    axes[1, 1].axis('off')
    
    # 隐藏左下角子图
    axes[1, 0].axis('off')
    
    plt.tight_layout()
    plt.suptitle("🦕 DINOv2 Feature Visualization Demo", fontsize=16, fontweight='bold', y=0.95)
    plt.show()
    
    print("\n✅ Demo completed!")
    print("\n📝 说明 / Explanation:")
    print("- 原图显示了输入的图像 / Original image shows the input")
    print("- DINO和DINOv2显示了不同版本模型提取的特征热力图 / DINO and DINOv2 show feature heatmaps from different model versions")
    print("- 颜色越亮的区域表示模型认为越重要的特征 / Brighter colors indicate more important features")
    print("- DINOv2相比DINO通常能捕获更丰富和准确的语义信息 / DINOv2 typically captures richer and more accurate semantic information than DINO")

In [None]:
# 🚀 运行主演示
# Run the main demo
demo_dinov2()

## 🎯 自定义图片处理
## Custom Image Processing

你可以使用下面的函数来处理你自己的图片：

You can use the function below to process your own images:

In [None]:
def process_custom_image(image_path):
    """
    处理自定义图片的函数
    Function to process custom images
    """
    print(f"🖼️  Processing custom image: {image_path}")
    
    model_v1 = load_dinov2_model('dinov2_vits14')
    model_v2 = load_dinov2_model('dinov2_vitb14')
    
    image_tensor, original_image = preprocess_image(image_path)
    
    if image_tensor is None:
        print("❌ Failed to process image. Please check the URL or path.")
        return
    
    features_v1 = extract_features(model_v1, image_tensor)
    features_v2 = extract_features(model_v2, image_tensor)
    
    attention_map_v1 = visualize_attention_map(features_v1, original_image)
    attention_map_v2 = visualize_attention_map(features_v2, original_image)
    
    colored_map_v1 = create_colored_attention_map(attention_map_v1)
    colored_map_v2 = create_colored_attention_map(attention_map_v2)
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    fig.patch.set_facecolor('white')
    
    axes[0].imshow(original_image)
    axes[0].set_title("Original Image", fontsize=12, fontweight='bold')
    axes[0].axis('off')
    
    axes[1].imshow(colored_map_v1)
    axes[1].set_title("DINO Features", fontsize=12, fontweight='bold')
    axes[1].axis('off')
    
    axes[2].imshow(colored_map_v2)
    axes[2].set_title("DINOv2 Features", fontsize=12, fontweight='bold')
    axes[2].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print("✅ Custom image processing completed!")

In [None]:
# 🎨 使用示例 / Usage Examples:
# 
# 处理网络图片 / Process online image:
# process_custom_image("https://your-image-url.com/image.jpg")
# 
# 处理本地图片 / Process local image:
# process_custom_image("/path/to/your/image.jpg")

# 示例：处理另一张狗的图片
# Example: Process another dog image
example_url = "https://images.unsplash.com/photo-1534361960057-19889db9621e?w=400&h=300&fit=crop"
process_custom_image(example_url)

## 🎓 总结 / Summary

这个demo展示了：
This demo demonstrates:

1. **模型加载** / **Model Loading**: 如何加载预训练的DINOv2模型
2. **特征提取** / **Feature Extraction**: 如何从图像中提取深度特征
3. **可视化** / **Visualization**: 如何将特征转换为直观的热力图
4. **对比分析** / **Comparative Analysis**: 不同模型版本的特征差异

🔬 **教学要点** / **Teaching Points**:
- DINOv2是自监督学习的重要突破
- 特征可视化帮助理解模型"看到"什么
- 不同模型架构产生不同的特征表示

📚 **进一步学习** / **Further Learning**:
- [DINOv2 Paper](https://arxiv.org/abs/2304.07193)
- [Facebook Research DINOv2](https://github.com/facebookresearch/dinov2)
- [Vision Transformer原理](https://arxiv.org/abs/2010.11929)