In [20]:
import os
import glob
import numpy as np

import torch

import warnings
from notebooks_config import setup_logging, CustomLogger

from src.BiomassImprovedCNN import BiomassImprovedCNN
from src.BiomassTransformer import BiomassTransformer
from src.BiomassDINOv3 import BiomassDINOv3

from src.DINOv3Wrapper import DINOv3InferenceWrapper
from src.TransformerWrapper import TransformerInferenceWrapper
from src.CNNWrapper import InferenceWrapper

warnings.simplefilter(action='ignore', category=FutureWarning)
print(f"PyTorch: {torch.__version__}")
print(f"Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")

PyTorch: 2.9.1+cu128
Device: NVIDIA GeForce RTX 5050 Laptop GPU


In [21]:
CKPT_FOLDER = "./kaggle/checkpoints/improved_cnn/fold1/"

In [22]:
# Competition metric definition (required for checkpoint loading)
labels = ["Dry_Clover_g", "Dry_Dead_g", "Dry_Green_g", "Dry_Total_g", "GDM_g"]

weights = {
    'Dry_Clover_g': 0.1,
    'Dry_Dead_g': 0.1,
    'Dry_Green_g': 0.1,
    'Dry_Total_g': 0.5,
    'GDM_g': 0.2,
}

def competition_metric(y_true, y_pred) -> float:
    """Calculate competition's weighted R2 score."""
    weights_array = np.array([weights[l] for l in labels])

    y_weighted_mean = np.average(y_true, weights=weights_array, axis=1).mean()

    ss_res = np.average((y_true - y_pred)**2,
                        weights=weights_array, axis=1).mean()
    ss_tot = np.average((y_true - y_weighted_mean)**2,
                        weights=weights_array, axis=1).mean()

    return 1 - ss_res / ss_tot

In [23]:
def get_comp_metric(ckpt_path: str) -> float:
    """Extract competition metric from checkpoint filename."""
    return float(ckpt_path.split('val_r2_score=')[-1].split('.ckpt')[0])

In [24]:
glob.glob(os.path.join(CKPT_FOLDER, "*.ckpt"))

['./kaggle/checkpoints/improved_cnn/fold1\\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=04-val_r2_score=0.8214.ckpt',
 './kaggle/checkpoints/improved_cnn/fold1\\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=10-val_r2_score=0.8189.ckpt']

In [25]:
def choose_best_checkpoint(ckpt_folder: str) -> str:
    """Choose the best checkpoint based on validation RMSE in filename."""
    ckpt_files = glob.glob(os.path.join(ckpt_folder, "*.ckpt"))
    best_r2 = -np.inf

    for ckpt in ckpt_files:
        r2 = get_comp_metric(ckpt)
        if r2 > best_r2:
            best_r2 = r2
            best_ckpt = ckpt

    for i, ckpt in enumerate(ckpt_files):
        print(f"[{i}] {ckpt} --> R2: {get_comp_metric(ckpt):.6f}")

    print(f"Best checkpoint: {best_ckpt} with R2: {best_r2:.6f}")
    best_ckpt_id = input("Choose the index of the best checkpoint to use ('Enter' for default): ")
    if best_ckpt_id != '':
        best_ckpt = ckpt_files[int(best_ckpt_id)]
        print(f"Selected checkpoint: {best_ckpt}")
    print(f"Using checkpoint: {best_ckpt}")
    return best_ckpt

In [26]:
def get_output_path(ckpt_path: str) -> str:
    """Generate output filename based on checkpoint name."""
    base_name = os.path.basename(ckpt_path).replace('.ckpt', '.pt')
    return os.path.join(os.path.dirname(ckpt_path), base_name)

In [27]:
def get_model_class(ckpt_path: str) -> type:
    """Return appropriate model wrapper based on checkpoint filename."""
    ckpt_path = ckpt_path.lower()
    print(ckpt_path)
    
    if 'dinov3' in ckpt_path:
        return BiomassTransformer
    elif 'patch' in ckpt_path or 'vit' in ckpt_path:
        return BiomassTransformer
    
    return BiomassImprovedCNN

In [28]:
def get_size_mean_std(ckpt_path: str) -> tuple[int, list[float], list[float]]:
    """Get input size, mean, and std for a given model."""
    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    img_size = 224  # Default

    ckpt_path = ckpt_path.lower()
    
    if 'convnextv2_tiny' in ckpt_path:
        img_size = 384
        print(f"✅ Config hardcoded for ConvNeXtV2 Tiny")
    elif any(x in ckpt_path for x in ['vit_large_patch14_dinov2.lvd142m', 'vit_giant_patch14_dinov2.lvd142m']):
        img_size = 518
        print(f"✅ Config hardcoded for DINOv2 ViT Large/Giant")

    return img_size, mean, std

In [29]:
def get_model_wrapper(ckpt_path: str, model: type) -> tuple[InferenceWrapper | TransformerInferenceWrapper | DINOv3InferenceWrapper, int]:
    """Return appropriate model wrapper based on model type."""
    # Choose appropriate wrapper based on model type
    is_dinov3 = isinstance(model, BiomassDINOv3)
    is_transformer = isinstance(model, BiomassTransformer)

    input_size, mean, std = get_size_mean_std(ckpt_path)

    wrapper_kwargs = {
        'lightning_model': model,
        'img_size': input_size,
        'mean': mean,
        'std': std,
    }

    if is_dinov3:
        print("Using DINOv3InferenceWrapper for export...")
        return DINOv3InferenceWrapper(**wrapper_kwargs), input_size
    elif is_transformer:
        print("Using TransformerInferenceWrapper for export...")
        return TransformerInferenceWrapper(**wrapper_kwargs), input_size
    
    print("Using InferenceWrapper for export...")
    return InferenceWrapper(**wrapper_kwargs), input_size

In [30]:
def export_to_torchscript(checkpoint_path: str, output_path: str):
    """Export model to TorchScript specifically for CUDA inference."""

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if device.type == 'cpu':
        print("⚠️ WARNING: CUDA not available. Exporting on CPU might cause issues on GPU later.")

    ModelClass = get_model_class(checkpoint_path)
    print(f"Using model class: {ModelClass.__name__}")

    # Load model on CPU for export
    model = ModelClass.load_from_checkpoint(
        checkpoint_path,
        weights_only=False,
        map_location=device
    )
    model.eval()
    model.to(device)

    # Wrap model for export
    wrapped_model, input_size = get_model_wrapper(checkpoint_path, model)

    if not hasattr(wrapped_model, '_img_size'):
         wrapped_model.register_buffer('_img_size', torch.tensor(input_size, device=device))

    print(f"Input size for tracing: {input_size}x{input_size}")

    # IMPORTANT: Keep everything on GPU during tracing
    wrapped_model.to(device)
    wrapped_model.eval()

    # Create dummy input on CPU
    dummy_left = torch.randn(1, 3, input_size, input_size, device=device)
    dummy_right = torch.randn(1, 3, input_size, input_size, device=device)

    # Test wrapper first
    print("Testing wrapper before tracing...")
    with torch.no_grad():
        test_output = wrapped_model(dummy_left, dummy_right)
        print(f"Wrapper output shape: {test_output.shape}")
        print(f"Sample prediction: {test_output[0]}")

    # Trace model on CPU
    print(f"\nTracing model on {device.type.upper()}...")
    with torch.no_grad():
        traced_model = torch.jit.trace(
            wrapped_model,
            (dummy_left, dummy_right),
            check_trace=True,
            strict=False  # Allow some flexibility for DINOv3
        )

    # Save
    traced_model.save(output_path)
    print(f"\nModel exported to: {output_path}")

    # Validate export on CPU
    print(f"\nValidating export on {device.type.upper()}...")
    with torch.no_grad():
        original_output = wrapped_model(dummy_left, dummy_right)
        traced_output = traced_model(dummy_left, dummy_right)
        max_diff = (original_output - traced_output).abs().max().item()
        print(f"Max difference between original and traced: {max_diff:.8f}")

        if max_diff < 1e-5:
            print("✅ Export successful!")
        else:
            print(f"⚠️ Export may have issues (difference: {max_diff})")
    
    # Test device compatibility
    print("\nTesting device compatibility...")
    loaded_model = torch.jit.load(output_path, map_location=device)
    
    if torch.cuda.is_available():
        try:
            # Move to CUDA and test
            loaded_model = loaded_model.to('cuda')
            test_left = dummy_left.to('cuda')
            test_right = dummy_right.to('cuda')
            
            with torch.no_grad():
                cuda_output = loaded_model(test_left, test_right)
            
            print("✅ Model works on CUDA")
            
            # Move back to CPU for final save
            loaded_model = loaded_model.cpu()
            
        except Exception as e:
            print(f"⚠️ CUDA test failed: {e}")
            print("Model saved for CPU only")
    else:
        print("⚠️ CUDA not available, skipping CUDA test")

In [31]:
input_path = choose_best_checkpoint(CKPT_FOLDER)

[0] ./kaggle/checkpoints/improved_cnn/fold1\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=04-val_r2_score=0.8214.ckpt --> R2: 0.821400
[1] ./kaggle/checkpoints/improved_cnn/fold1\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=10-val_r2_score=0.8189.ckpt --> R2: 0.818900
Best checkpoint: ./kaggle/checkpoints/improved_cnn/fold1\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=04-val_r2_score=0.8214.ckpt with R2: 0.821400
Using checkpoint: ./kaggle/checkpoints/improved_cnn/fold1\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=04-val_r2_score=0.8214.ckpt


In [32]:
export_to_torchscript(
    checkpoint_path=input_path,
    output_path=get_output_path(input_path)
)

./kaggle/checkpoints/improved_cnn/fold1\local_vit_large_patch16_dinov3_train[5]folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=04-val_r2_score=0.8214.ckpt
Using model class: BiomassTransformer
DEBUG: Backbone output shape = torch.Size([1, 261, 1024])
DEBUG: Detected [B, N, C] format, C = 1024
Backbone output dimension: 1024
Using TransformerInferenceWrapper for export...
Input size for tracing: 224x224
Testing wrapper before tracing...
Wrapper output shape: torch.Size([1, 3])
Sample prediction: tensor([ 5.6901, 11.6275,  6.5422], device='cuda:0')

Tracing model on CUDA...


  assert condition, message
  h_denom = float(height)
  w_denom = float(width)
  assert self.periods.numel() == dim



Model exported to: ./kaggle/checkpoints/improved_cnn/fold1\local_vit_large_patch16_dinov3_train[5]Folds_log_fusion-gated_spatial_cross_epochs15_bs4_gradacc4_lr0.0001_wd0.05_dr0.2_hr0.5-fold1-epoch=04-val_r2_score=0.8214.pt

Validating export on CUDA...
Max difference between original and traced: 0.00000286
✅ Export successful!

Testing device compatibility...
✅ Model works on CUDA
