# SwellSight Wave Analysis - DINOv2 Backbone Integration

This notebook implements DINOv2 self-supervised vision transformer as the feature extraction backbone for wave analysis.

## Overview
This notebook provides:
- DINOv2 ViT-L/14 model loading and configuration
- 4-channel input adaptation (RGB + Depth)
- Feature extraction with frozen backbone
- Feature quality validation and visualization
- Integration tests with beach cam images

## DINOv2 Architecture
- **Model**: Vision Transformer Large (ViT-L/14)
- **Input**: 4 channels (RGB + Depth), 518x518 resolution
- **Output**: 1024-dimensional feature vectors
- **Training**: Frozen backbone (preserves pre-trained knowledge)

## Prerequisites
- Complete execution of notebooks 01-05
- Depth maps available from notebook 03
- Beach cam images available

---

## 1. Setup and Configuration

In [None]:
import sys
import os
from pathlib import Path
import json
import logging
import warnings
warnings.filterwarnings('ignore')

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Add src to path for production modules
sys.path.insert(0, str(Path.cwd()))

print("📦 Importing SwellSight production modules...")

# Import production modules
from src.swellsight.models.backbone import DINOv2Backbone
from src.swellsight.utils.hardware import HardwareManager
from src.swellsight.utils.error_handler import error_handler, retry_with_backoff
from src.swellsight.utils.performance import PerformanceOptimizer
from src.swellsight.utils.config import load_config

print("✅ Production modules loaded successfully")

In [None]:
import numpy as np
import torch
import matplotlib.pyplot as plt
from PIL import Image
from tqdm.auto import tqdm

print("\n🔧 Loading configuration...")

# Load pipeline configuration
config = load_config("config.json")

print(f"✅ Configuration loaded: {config['pipeline']['name']}")
print(f"   Version: {config['pipeline']['version']}")

# Set up paths
DATA_DIR = Path(config['paths']['data_dir'])
OUTPUT_DIR = Path(config['paths']['output_dir'])
DEPTH_MAPS_DIR = OUTPUT_DIR / "depth_maps"
FEATURES_DIR = OUTPUT_DIR / "dinov2_features"

# Create output directory
FEATURES_DIR.mkdir(parents=True, exist_ok=True)

print(f"\n📁 Working directories:")
print(f"   Data: {DATA_DIR}")
print(f"   Depth maps: {DEPTH_MAPS_DIR}")
print(f"   Features output: {FEATURES_DIR}")

## 2. Hardware Detection and Configuration

In [None]:
print("🔍 Detecting hardware configuration...")

# Initialize hardware manager
hardware_manager = HardwareManager()
hw_info = hardware_manager.hardware_info

print(f"\n🚀 Hardware Configuration:")
print(f"   Device: {hw_info.device_type}")
print(f"   Name: {hw_info.device_name}")
print(f"   Memory: {hw_info.memory_total_gb:.1f} GB total")

if hw_info.device_type == "cuda":
    print(f"   CUDA Version: {hw_info.cuda_version}")
    print(f"   Compute Capability: {hw_info.compute_capability}")
    
    # Check memory requirements for DINOv2
    if hw_info.memory_total_gb < 8:
        print("\n⚠️  Warning: Less than 8GB GPU memory")
        print("   Consider using smaller model variant or CPU")
    else:
        print("\n✅ Sufficient GPU memory for DINOv2 ViT-L/14")
else:
    print("\n⚠️  Running on CPU - processing will be slower")

# Store device configuration
device = hw_info.device_type
print(f"\n✅ Using device: {device}")

## 3. Sub-task 7.1: DINOv2 Backbone Loading and Adaptation

In [None]:
print("🧠 Sub-task 7.1: Loading DINOv2 backbone...")
print("\nInitializing DINOv2 ViT-L/14 with:")
print("   - Model: dinov2_vitl14")
print("   - Feature dimension: 1024")
print("   - Frozen backbone: True")
print("   - Input channels: 4 (RGB + Depth)")

try:
    # Initialize DINOv2 backbone
    backbone = DINOv2Backbone(
        model_name="dinov2_vitl14",
        freeze=True
    )
    
    # Move to device
    backbone = backbone.to(device)
    backbone.eval()  # Set to evaluation mode
    
    print("\n✅ DINOv2 backbone loaded successfully!")
    print(f"   Feature dimension: {backbone.get_feature_dim()}")
    print(f"   Input channels: {backbone.input_channels}")
    print(f"   Target resolution: {backbone.target_size}")
    print(f"   Frozen: {backbone.freeze}")
    
    # Verify backbone is frozen
    trainable_params = sum(p.numel() for p in backbone._backbone.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in backbone._backbone.parameters())
    
    print(f"\n📊 Parameter Status:")
    print(f"   Total parameters: {total_params:,}")
    print(f"   Trainable parameters: {trainable_params:,}")
    print(f"   Frozen parameters: {total_params - trainable_params:,}")
    
    if trainable_params == 0:
        print("   ✅ Backbone is properly frozen")
    else:
        print(f"   ⚠️  Warning: {trainable_params} parameters are trainable")
    
except Exception as e:
    print(f"\n❌ Failed to load DINOv2 backbone: {e}")
    print("\nTroubleshooting:")
    print("   1. Check internet connection for model download")
    print("   2. Verify torch hub cache directory")
    print("   3. Try clearing torch hub cache")
    raise

## 4. Sub-task 7.2: 4-Channel Input Adaptation

In [None]:
print("🔧 Sub-task 7.2: Testing 4-channel input adaptation...")

# Create test input (RGB + Depth)
batch_size = 2
test_input = torch.randn(batch_size, 4, 518, 518).to(device)

print(f"\nTest input shape: {test_input.shape}")
print(f"   Batch size: {test_input.shape[0]}")
print(f"   Channels: {test_input.shape[1]} (RGB + Depth)")
print(f"   Resolution: {test_input.shape[2]}x{test_input.shape[3]}")

try:
    # Test forward pass
    with torch.no_grad():
        features = backbone(test_input)
    
    print(f"\n✅ 4-channel input adaptation successful!")
    print(f"   Output shape: {features.shape}")
    print(f"   Feature dimension: {features.shape[1]}")
    
    # Verify feature dimension
    expected_dim = 1024
    if features.shape[1] == expected_dim:
        print(f"   ✅ Feature dimension matches expected: {expected_dim}")
    else:
        print(f"   ⚠️  Feature dimension mismatch: got {features.shape[1]}, expected {expected_dim}")
    
    # Test with different resolutions
    print("\n🔍 Testing automatic resizing...")
    test_resolutions = [(256, 256), (512, 512), (1024, 1024)]
    
    for h, w in test_resolutions:
        test_input_resized = torch.randn(1, 4, h, w).to(device)
        with torch.no_grad():
            features_resized = backbone(test_input_resized)
        print(f"   Input {h}x{w} -> Output {features_resized.shape[1]}-dim: ✅")
    
    print("\n✅ All resolution tests passed!")
    
except Exception as e:
    print(f"\n❌ 4-channel input adaptation failed: {e}")
    raise

## 5. Sub-task 7.3: Feature Extraction and Validation

In [None]:
print("🔍 Sub-task 7.3: Feature extraction with real beach cam images...")

# Find available beach cam images and depth maps
image_dir = DATA_DIR / "real" / "images"
image_files = list(image_dir.glob("*.jpg"))[:10]  # Process first 10 images

if not image_files:
    print("❌ No beach cam images found")
    print(f"   Please ensure images are in: {image_dir}")
else:
    print(f"\n📊 Found {len(image_files)} beach cam images")
    print(f"   Processing first {min(len(image_files), 10)} images...")
    
    # Storage for results
    extraction_results = []
    feature_vectors = []
    
    print("\n🚀 Extracting features...")
    
    for img_path in tqdm(image_files, desc="Extracting features"):
        try:
            # Load image
            image = Image.open(img_path).convert('RGB')
            image_np = np.array(image)
            
            # Load corresponding depth map
            depth_path = DEPTH_MAPS_DIR / f"{img_path.stem}_depth.npy"
            
            if not depth_path.exists():
                print(f"\n⚠️  Depth map not found for {img_path.name}, skipping...")
                continue
            
            depth_map = np.load(depth_path)
            
            # Prepare 4-channel input (RGB + Depth)
            # Resize image to match depth map if needed
            if image_np.shape[:2] != depth_map.shape:
                from PIL import Image as PILImage
                image_resized = PILImage.fromarray(image_np).resize(
                    (depth_map.shape[1], depth_map.shape[0]),
                    PILImage.BILINEAR
                )
                image_np = np.array(image_resized)
            
            # Normalize image to [0, 1]
            image_normalized = image_np.astype(np.float32) / 255.0
            
            # Normalize depth to [0, 1]
            depth_normalized = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min() + 1e-8)
            depth_normalized = depth_normalized.astype(np.float32)
            
            # Stack RGB + Depth
            rgbd_input = np.concatenate([
                image_normalized,
                depth_normalized[..., np.newaxis]
            ], axis=-1)
            
            # Convert to tensor [1, 4, H, W]
            rgbd_tensor = torch.from_numpy(rgbd_input).permute(2, 0, 1).unsqueeze(0).to(device)
            
            # Extract features
            with torch.no_grad():
                features = backbone(rgbd_tensor)
            
            # Store results
            features_np = features.cpu().numpy()[0]
            feature_vectors.append(features_np)
            
            extraction_results.append({
                'image_path': str(img_path),
                'depth_path': str(depth_path),
                'feature_shape': features.shape,
                'feature_mean': float(features_np.mean()),
                'feature_std': float(features_np.std()),
                'feature_min': float(features_np.min()),
                'feature_max': float(features_np.max())
            })
            
            # Save features
            feature_save_path = FEATURES_DIR / f"{img_path.stem}_features.npy"
            np.save(feature_save_path, features_np)
            
        except Exception as e:
            logger.error(f"Failed to process {img_path.name}: {e}")
            continue
    
    print(f"\n✅ Feature extraction completed!")
    print(f"   Processed: {len(extraction_results)} images")
    print(f"   Features saved to: {FEATURES_DIR}")

## 6. Feature Quality Validation

In [None]:
if extraction_results:
    print("📊 Feature Quality Validation...")
    
    # Calculate statistics
    feature_means = [r['feature_mean'] for r in extraction_results]
    feature_stds = [r['feature_std'] for r in extraction_results]
    
    print(f"\n📈 Feature Statistics:")
    print(f"   Mean across images: {np.mean(feature_means):.4f} ± {np.std(feature_means):.4f}")
    print(f"   Std across images: {np.mean(feature_stds):.4f} ± {np.std(feature_stds):.4f}")
    
    # Check feature diversity
    if len(feature_vectors) > 1:
        feature_matrix = np.stack(feature_vectors)
        
        # Calculate pairwise cosine similarities
        from sklearn.metrics.pairwise import cosine_similarity
        similarities = cosine_similarity(feature_matrix)
        
        # Get upper triangle (excluding diagonal)
        upper_tri = similarities[np.triu_indices_from(similarities, k=1)]
        
        print(f"\n🔍 Feature Similarity Analysis:")
        print(f"   Mean similarity: {upper_tri.mean():.4f}")
        print(f"   Std similarity: {upper_tri.std():.4f}")
        print(f"   Min similarity: {upper_tri.min():.4f}")
        print(f"   Max similarity: {upper_tri.max():.4f}")
        
        if upper_tri.mean() < 0.95:
            print("   ✅ Features show good diversity")
        else:
            print("   ⚠️  Features may be too similar")
    
    # Validate feature dimension
    expected_dim = 1024
    actual_dim = feature_vectors[0].shape[0]
    
    print(f"\n✅ Feature Dimension Validation:")
    print(f"   Expected: {expected_dim}")
    print(f"   Actual: {actual_dim}")
    
    if actual_dim == expected_dim:
        print("   ✅ Feature dimension matches specification")
    else:
        print(f"   ❌ Feature dimension mismatch!")
else:
    print("⚠️  No features extracted for validation")

## 7. Feature Visualization

In [None]:
if feature_vectors:
    print("📊 Visualizing feature distributions...")
    
    fig, axes = plt.subplots(2, 2, figsize=(15, 10))
    
    # Plot 1: Feature distribution for first image
    axes[0, 0].hist(feature_vectors[0], bins=50, alpha=0.7, edgecolor='black')
    axes[0, 0].set_title('Feature Distribution (First Image)')
    axes[0, 0].set_xlabel('Feature Value')
    axes[0, 0].set_ylabel('Frequency')
    axes[0, 0].grid(True, alpha=0.3)
    
    # Plot 2: Feature statistics across images
    feature_means = [r['feature_mean'] for r in extraction_results]
    feature_stds = [r['feature_std'] for r in extraction_results]
    
    axes[0, 1].scatter(feature_means, feature_stds, alpha=0.6)
    axes[0, 1].set_title('Feature Statistics Across Images')
    axes[0, 1].set_xlabel('Mean Feature Value')
    axes[0, 1].set_ylabel('Std Feature Value')
    axes[0, 1].grid(True, alpha=0.3)
    
    # Plot 3: Feature similarity heatmap
    if len(feature_vectors) > 1:
        feature_matrix = np.stack(feature_vectors)
        from sklearn.metrics.pairwise import cosine_similarity
        similarities = cosine_similarity(feature_matrix)
        
        im = axes[1, 0].imshow(similarities, cmap='viridis', aspect='auto')
        axes[1, 0].set_title('Feature Similarity Matrix')
        axes[1, 0].set_xlabel('Image Index')
        axes[1, 0].set_ylabel('Image Index')
        plt.colorbar(im, ax=axes[1, 0])
    
    # Plot 4: Feature range across dimensions
    feature_matrix = np.stack(feature_vectors)
    feature_ranges = feature_matrix.max(axis=0) - feature_matrix.min(axis=0)
    
    axes[1, 1].plot(feature_ranges, alpha=0.7)
    axes[1, 1].set_title('Feature Range Across Dimensions')
    axes[1, 1].set_xlabel('Feature Dimension')
    axes[1, 1].set_ylabel('Range (Max - Min)')
    axes[1, 1].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(FEATURES_DIR / 'feature_analysis.png', dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"\n✅ Visualization saved to: {FEATURES_DIR / 'feature_analysis.png'}")
else:
    print("⚠️  No features available for visualization")

## 8. Save Results and Metadata

In [None]:
print("💾 Saving results and metadata...")

# Prepare metadata
metadata = {
    'notebook': '09_DINOv2_Backbone_Integration',
    'model': {
        'name': 'dinov2_vitl14',
        'feature_dim': 1024,
        'input_channels': 4,
        'frozen': True
    },
    'processing': {
        'total_images': len(extraction_results),
        'successful_extractions': len(feature_vectors),
        'device': device
    },
    'feature_statistics': {
        'mean_feature_mean': float(np.mean([r['feature_mean'] for r in extraction_results])) if extraction_results else 0,
        'mean_feature_std': float(np.mean([r['feature_std'] for r in extraction_results])) if extraction_results else 0
    },
    'extraction_results': extraction_results
}

# Save metadata
metadata_path = FEATURES_DIR / 'dinov2_metadata.json'
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"✅ Metadata saved to: {metadata_path}")

# Display summary
print(f"\n{'='*60}")
print("DINOV2 BACKBONE INTEGRATION SUMMARY")
print(f"{'='*60}")
print(f"Model: dinov2_vitl14")
print(f"Feature Dimension: 1024")
print(f"Input Channels: 4 (RGB + Depth)")
print(f"Backbone Frozen: True")
print(f"\nProcessing Results:")
print(f"   Images Processed: {len(extraction_results)}")
print(f"   Features Extracted: {len(feature_vectors)}")
print(f"   Output Directory: {FEATURES_DIR}")
print(f"\n✅ All sub-tasks completed successfully!")
print(f"   ✅ 7.1: DINOv2 backbone loaded and adapted")
print(f"   ✅ 7.2: 4-channel input adaptation verified")
print(f"   ✅ 7.3: Feature extraction and validation completed")
print(f"{'='*60}")