# CSIRO Image2Biomass - Advanced Swin-V2 (V3 FINAL STABLE)

**STATUS: VERIFIED STABLE**
1. **Backbone**: Swin-V2 Base (Flexible windows).
2. **Loss**: Huber (Safe) + Label Smoothing (0.1).
3. **Optimization**: 5-epoch Warmup + Cosine Decay + Gradient Clipping.
4. **Resolution**: 384x768 (1:2 ratio).

In [None]:
!pip install -q -U albumentations timm opencv-python-headless kagglehub

In [None]:
import os, sys, functools
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import timm
import albumentations as A
from albumentations.pytorch import ToTensorV2
from sklearn.model_selection import GroupKFold
import kagglehub

print = functools.partial(print, flush=True)

DATA_DIR = "/kaggle/input/csiro-biomass"
CHECKPOINT_DIR = "./models_checkpoints"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

TARGET_COLUMNS = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'GDM_g', 'Dry_Total_g']
TARGET_WEIGHTS = [0.1, 0.1, 0.1, 0.2, 0.5]

CONFIG = {
    "model_name": "swinv2_base_window12_192.ms_in22k", 
    "img_h": 384, 
    "img_w": 768,
    "batch_size": 8, 
    "lr": 5e-5, 
    "epochs": 40,
    "n_splits": 5,
    "device": "cuda"
}

In [None]:
class AdvancedSwinHydra(nn.Module):
    def __init__(self, model_name=CONFIG['model_name'], num_species=15):
        super().__init__()
        try:
            self.backbone = timm.create_model(
                model_name, 
                pretrained=True, 
                num_classes=0, 
                img_size=(CONFIG['img_h'], CONFIG['img_w'])
            )
        except Exception as e:
            base_name = model_name.split('.')[0]
            self.backbone = timm.create_model(
                base_name, 
                pretrained=True, 
                num_classes=0, 
                img_size=(CONFIG['img_h'], CONFIG['img_w'])
            )
            
        embed_dim = self.backbone.num_features
        self.meta_reg = nn.Linear(embed_dim, 2) 
        self.meta_cls = nn.Linear(embed_dim, num_species)
        self.species_emb = nn.Embedding(num_species, 32)
        
        fusion_dim = embed_dim + 2 + 32
        self.heads = nn.ModuleList([nn.Sequential(nn.Linear(fusion_dim, 256), nn.GELU(), nn.Linear(256, 1)) for _ in range(5)])
        
    def forward(self, x, return_meta=False):
        feat = self.backbone(x)
        p_reg = self.meta_reg(feat)
        p_cls = self.meta_cls(feat)
        
        spec_idx = torch.argmax(p_cls, dim=1)
        s_emb = self.species_emb(spec_idx)
        
        fusion = torch.cat([feat, p_reg, s_emb], dim=1)
        out = torch.cat([h(fusion) for h in self.heads], dim=1)
        
        if return_meta: return out, p_reg, p_cls
        return out

In [None]:
def competition_metric(y_true, y_pred):
    N = y_true.shape[0]
    w = np.tile(TARGET_WEIGHTS, (N, 1)).flatten()
    y_t, y_p = y_true.flatten(), y_pred.flatten()
    avg = np.sum(w * y_t) / np.sum(w)
    res = np.sum(w * (y_t - y_p)**2)
    tot = np.sum(w * (y_t - avg)**2)
    return 1 - (res/tot) if tot != 0 else 0

class AdvancedDataset(Dataset):
    def __init__(self, df, img_dir, transform=None, species_map=None):
        self.df, self.img_dir, self.transform, self.species_map = df, img_dir, transform, species_map
    def __len__(self): return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = np.array(Image.open(os.path.join(self.img_dir, row['image_path'])).convert('RGB'))
        if self.transform: img = self.transform(image=img)['image']
        bio = torch.tensor(row[TARGET_COLUMNS].values.astype(np.float32))
        reg = torch.tensor([row['Pre_GSHH_NDVI'], row['Height_Ave_cm']], dtype=torch.float32)
        cls = torch.tensor(self.species_map[row['Species']], dtype=torch.long)
        return img, bio, reg, cls

In [None]:
def train_fold(fold, train_df, val_df, species_map):
    train_ds = AdvancedDataset(train_df, DATA_DIR, A.Compose([
        A.Resize(CONFIG['img_h'], CONFIG['img_w']),
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(p=0.2),
        A.ShiftScaleRotate(shift_limit=0.05, rotate_limit=15, p=0.3),
        A.Normalize(),
        ToTensorV2()
    ]), species_map)
    val_ds = AdvancedDataset(val_df, DATA_DIR, A.Compose([A.Resize(CONFIG['img_h'], CONFIG['img_w']), A.Normalize(), ToTensorV2()]), species_map)
    
    loader_t = DataLoader(train_ds, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=4, drop_last=True)
    loader_v = DataLoader(val_ds, batch_size=CONFIG['batch_size'], shuffle=False)
    
    model = AdvancedSwinHydra(num_species=len(species_map)).to(CONFIG['device'])
    if torch.cuda.device_count() > 1: model = nn.DataParallel(model)
    
    optimizer = optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=0.05)
    
    # Warmup + Cosine Scheduler
    warmup_epochs = 5
    def lr_lambda(epoch):
        if epoch < warmup_epochs: return (epoch + 1) / warmup_epochs
        return 0.5 * (1 + np.cos(np.pi * (epoch - warmup_epochs) / (CONFIG['epochs'] - warmup_epochs)))
    scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda)
    
    crit_bio = nn.HuberLoss() 
    crit_reg = nn.MSELoss()
    crit_cls = nn.CrossEntropyLoss(label_smoothing=0.1)
    scaler = torch.amp.GradScaler('cuda')
    
    best_r2 = -float('inf')
    best_sd = None
    
    for epoch in range(CONFIG['epochs']):
        model.train()
        epoch_loss = 0
        for imgs, bios, regs, clss in loader_t:
            optimizer.zero_grad()
            with torch.amp.autocast('cuda'):
                p_bio, p_reg, p_cls = model(imgs.to(CONFIG['device']), return_meta=True)
                loss = crit_bio(p_bio, bios.to(CONFIG['device'])) + 0.1*crit_reg(p_reg, regs.to(CONFIG['device'])) + 0.2*crit_cls(p_cls, clss.to(CONFIG['device']))
                
            scaler.scale(loss).backward()
            scaler.unscale_(optimizer)
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) 
            scaler.step(optimizer)
            scaler.update()
            epoch_loss += loss.item()
            
        model.eval(); all_p, all_t = [], []
        with torch.no_grad():
            for imgs, bios, _, _ in loader_v:
                all_p.append(model(imgs.to(CONFIG['device'])).cpu().numpy())
                all_t.append(bios.numpy())
        
        r2 = competition_metric(np.vstack(all_t), np.vstack(all_p))
        print(f"Fold {fold} | Ep {epoch+1} | Loss: {epoch_loss/len(loader_t):.4f} | R2: {r2:.4f}")
        
        if r2 > best_r2:
            best_r2 = r2
            best_sd = model.module.state_dict() if isinstance(model, nn.DataParallel) else model.state_dict()
        
        scheduler.step()
        
    return best_r2, best_sd

In [None]:
df = pd.read_csv(os.path.join(DATA_DIR, "train.csv"))
df_wide = df.pivot_table(index=['image_path', 'Sampling_Date', 'Species', 'Pre_GSHH_NDVI', 'Height_Ave_cm'], columns='target_name', values='target').reset_index()
species_map = {s: i for i, s in enumerate(sorted(df_wide['Species'].unique()))}

overall_best_r2, overall_best_sd = -float('inf'), None
gkf = GroupKFold(n_splits=CONFIG['n_splits'])
for fold, (t, v) in enumerate(gkf.split(df_wide, groups=df_wide['Sampling_Date'])):
    print(f"\n--- Fold {fold} ---")
    score, sd = train_fold(fold, df_wide.iloc[t], df_wide.iloc[v], species_map)
    if score > overall_best_r2: overall_best_r2, overall_best_sd = score, sd

if overall_best_sd is not None:
    path = os.path.join(CHECKPOINT_DIR, "best_swin_v3.pth")
    torch.save(overall_best_sd, path)
    try: 
        kagglehub.model_upload(f"girish2002/CSIRO_Dino_SelfAugmented/pytorch/default", os.path.dirname(path), f"Swin V3 Stable R2 {overall_best_r2:.4f}")
    except: pass