In [1]:
import os
import sys
import gc
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from PIL import Image
from pathlib import Path
from sklearn.model_selection import StratifiedKFold

# ==========================================
# 1. CONFIGURATION
# ==========================================
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
IMG_SIZE = 320     # The winning resolution
BATCH_SIZE = 12    # Reduced slightly to fit 2 heavy models
EPOCHS = 15        # MixUp needs time to converge
LEARNING_RATE = 2e-4
N_FOLDS = 5
IMAGE_DIR = Path("/kaggle/input/csiro-biomass")
TARGET_COLS = ['Dry_Green_g', 'Dry_Dead_g', 'Dry_Clover_g', 'GDM_g', 'Dry_Total_g']

print(f"Running DUAL-MIXUP ENSEMBLE (ResNet34 + DenseNet121) on {DEVICE}...")

# ==========================================
# 2. 4-CHANNEL DATASET
# ==========================================
class Biomass4ChannelDataset(Dataset):
    def __init__(self, df, target_cols=None, is_test=False):
        self.df = df.reset_index(drop=True)
        self.target_cols = target_cols
        self.is_test = is_test
        self.root_dir = IMAGE_DIR
        self.mean = torch.tensor([0.485, 0.456, 0.406, 0.5]).view(4,1,1)
        self.std = torch.tensor([0.229, 0.224, 0.225, 0.5]).view(4,1,1)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        rel_path = self.df.loc[idx, "image_path"]
        img_path = self.root_dir / rel_path
        try:
            pil_img = Image.open(img_path).convert("RGB").resize((IMG_SIZE, IMG_SIZE))
            img = np.array(pil_img).astype(np.float32) / 255.0
        except:
            img = np.zeros((IMG_SIZE, IMG_SIZE, 3), dtype=np.float32)

        # Standard ExG
        r, g, b = img[:,:,0], img[:,:,1], img[:,:,2]
        exg = (2 * g) - r - b
        exg = (exg - exg.min()) / (exg.max() - exg.min() + 1e-6)
        
        img_4c = np.dstack((img, exg))
        image = torch.tensor(img_4c.transpose(2, 0, 1), dtype=torch.float32)
        
        if not self.is_test:
            if np.random.random() > 0.5: image = torch.flip(image, dims=[2])
            if np.random.random() > 0.5: image = torch.flip(image, dims=[1])

        image = (image - self.mean) / self.std

        if self.is_test:
            img_id = Path(rel_path).stem 
            return image, img_id
        else:
            targets = self.df.loc[idx, self.target_cols].values.astype(float)
            targets = np.log1p(targets) 
            return image, torch.tensor(targets, dtype=torch.float32)

# ==========================================
# 3. MODEL FACTORY
# ==========================================
def load_weights_safely(model, alias):
    weights_path = None
    for dirname, _, filenames in os.walk('/kaggle/input'):
        for filename in filenames:
            if alias in filename and '.pth' in filename:
                weights_path = os.path.join(dirname, filename)
                break
        if weights_path: break
    
    if weights_path:
        try: model.load_state_dict(torch.load(weights_path, weights_only=False))
        except: pass
        print(f"Loaded {alias} weights.")
    else:
        print(f"Warning: {alias} weights not found. Training from scratch (Performance Risk).")

def get_resnet34():
    model = models.resnet34(weights=None)
    load_weights_safely(model, 'resnet34')
    original_conv1 = model.conv1
    model.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)
    with torch.no_grad():
        model.conv1.weight[:, :3, :, :] = original_conv1.weight
        model.conv1.weight[:, 3:4, :, :] = torch.mean(original_conv1.weight, dim=1, keepdim=True)
    model.fc = nn.Linear(model.fc.in_features, 5)
    return model.to(DEVICE)

def get_densenet121():
    model = models.densenet121(weights=None)
    load_weights_safely(model, 'densenet121')
    original_conv0 = model.features.conv0
    model.features.conv0 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False)
    with torch.no_grad():
        model.features.conv0.weight[:, :3, :, :] = original_conv0.weight
        model.features.conv0.weight[:, 3:4, :, :] = torch.mean(original_conv0.weight, dim=1, keepdim=True)
    model.classifier = nn.Linear(model.classifier.in_features, 5)
    return model.to(DEVICE)

# ==========================================
# 4. TRAINING ENGINE (With MixUp)
# ==========================================
class WeightedHuberLoss(nn.Module):
    def __init__(self, delta=1.0):
        super().__init__()
        self.huber = nn.HuberLoss(reduction='none', delta=delta)
        self.weights = torch.tensor([0.1, 0.1, 0.1, 0.2, 0.5]).to(DEVICE)
    def forward(self, preds, targets):
        return (self.huber(preds, targets) * self.weights).mean()

def train_model(model_name, splits, train_pivot):
    print(f"\n--- Training {model_name} with MixUp ---")
    
    for fold, (train_idx, val_idx) in enumerate(splits):
        print(f"Fold {fold+1}/{N_FOLDS}")
        
        train_loader = DataLoader(
            Biomass4ChannelDataset(train_pivot.iloc[train_idx], TARGET_COLS),
            batch_size=BATCH_SIZE, shuffle=True, num_workers=2
        )
        valid_loader = DataLoader(
            Biomass4ChannelDataset(train_pivot.iloc[val_idx], TARGET_COLS),
            batch_size=BATCH_SIZE, shuffle=False, num_workers=2
        )
        
        if model_name == 'resnet34': model = get_resnet34()
        else: model = get_densenet121()
            
        criterion = WeightedHuberLoss(delta=1.0)
        optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
        
        best_loss = float('inf')
        
        for epoch in range(EPOCHS):
            model.train()
            for x, y in train_loader:
                x, y = x.to(DEVICE), y.to(DEVICE)
                optimizer.zero_grad()
                
                # --- MIXUP LOGIC (The Winning Formula) ---
                if np.random.random() < 0.5:
                    lam = np.random.beta(1.0, 1.0)
                    index = torch.randperm(x.size(0)).to(DEVICE)
                    mixed_x = lam * x + (1 - lam) * x[index]
                    
                    # Unlog -> Mix -> Relog
                    y_lin_a = torch.expm1(y)
                    y_lin_b = torch.expm1(y[index])
                    mixed_y = torch.log1p(lam * y_lin_a + (1 - lam) * y_lin_b)
                    
                    preds = model(mixed_x)
                    loss = criterion(preds, mixed_y)
                else:
                    preds = model(x)
                    loss = criterion(preds, y)
                
                loss.backward()
                optimizer.step()
                
            model.eval()
            val_loss = 0
            with torch.no_grad():
                for x, y in valid_loader:
                    x, y = x.to(DEVICE), y.to(DEVICE)
                    val_loss += criterion(model(x), y).item()
            
            avg_val = val_loss / len(valid_loader)
            scheduler.step()
            
            if avg_val < best_loss:
                best_loss = avg_val
                torch.save(model.state_dict(), f"{model_name}_fold{fold}.pth")
        
        print(f"  Best Loss: {best_loss:.4f}")
        del model, optimizer, train_loader, valid_loader
        torch.cuda.empty_cache()
        gc.collect()

# ==========================================
# 5. EXECUTION
# ==========================================
raw_df = pd.read_csv("/kaggle/input/csiro-biomass/train.csv")
train_pivot = raw_df.pivot(index='image_path', columns='target_name', values='target').reset_index().fillna(0.0)
train_pivot['bin'] = pd.qcut(train_pivot['Dry_Total_g'], q=10, labels=False, duplicates='drop')

skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
splits = list(skf.split(train_pivot, train_pivot['bin']))

# TRAIN BOTH (This will take ~4-5 hours)
train_model('resnet34', splits, train_pivot)
train_model('densenet121', splits, train_pivot)

# ==========================================
# 6. ENSEMBLE INFERENCE
# ==========================================
print("\nStarting Ensemble Inference...")
test_df_raw = pd.read_csv("/kaggle/input/csiro-biomass/test.csv")
test_unique = test_df_raw[['image_path']].drop_duplicates()
test_ds = Biomass4ChannelDataset(test_unique, is_test=True)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

final_preds_accum = {}
models_to_run = ['resnet34', 'densenet121']

for model_name in models_to_run:
    for fold in range(N_FOLDS):
        print(f"Predicting {model_name} Fold {fold+1}...")
        
        if model_name == 'resnet34': model = get_resnet34()
        else: model = get_densenet121()
            
        model.load_state_dict(torch.load(f"{model_name}_fold{fold}.pth", weights_only=True))
        model.eval()
        
        with torch.no_grad():
            for images, img_ids in test_loader:
                images = images.to(DEVICE)
                
                # TTA: 3 Views (Normal + Flip H + Flip V)
                # This ensures maximum coverage
                p1 = model(images)
                p2 = model(torch.flip(images, dims=[3]))
                p3 = model(torch.flip(images, dims=[2]))
                
                avg_log = (p1 + p2 + p3) / 3.0
                preds = np.expm1(avg_log.cpu().numpy())
                
                for i, img_id in enumerate(img_ids):
                    if img_id not in final_preds_accum: final_preds_accum[img_id] = np.zeros(5)
                    final_preds_accum[img_id] += preds[i]
        
        del model
        torch.cuda.empty_cache()
        gc.collect()

# Average across (2 models * 5 folds * 3 TTA = 30 votes per image!)
results = []
divisor = len(models_to_run) * N_FOLDS

for img_id, total_preds in final_preds_accum.items():
    avg_pred = total_preds / divisor
    for j, col in enumerate(TARGET_COLS):
        results.append({'sample_id': f"{img_id}__{col}", 'target': float(avg_pred[j])})

submission_df = pd.DataFrame(results)
submission_df['target'] = submission_df['target'].clip(lower=0.0)
submission_df.to_csv("submission.csv", index=False)
print("Dual-MixUp Ensemble Submission Ready.")

Running DUAL-MIXUP ENSEMBLE (ResNet34 + DenseNet121) on cuda...

--- Training resnet34 with MixUp ---
Fold 1/5
Loaded resnet34 weights.
  Best Loss: 0.0270
Fold 2/5
Loaded resnet34 weights.
  Best Loss: 0.0173
Fold 3/5
Loaded resnet34 weights.
  Best Loss: 0.0183
Fold 4/5
Loaded resnet34 weights.
  Best Loss: 0.0197
Fold 5/5
Loaded resnet34 weights.
  Best Loss: 0.0230

--- Training densenet121 with MixUp ---
Fold 1/5
Loaded densenet121 weights.
  Best Loss: 0.0422
Fold 2/5
Loaded densenet121 weights.
  Best Loss: 0.0337
Fold 3/5
Loaded densenet121 weights.
  Best Loss: 0.0322
Fold 4/5
Loaded densenet121 weights.
  Best Loss: 0.0278
Fold 5/5
Loaded densenet121 weights.
  Best Loss: 0.0333

Starting Ensemble Inference...
Predicting resnet34 Fold 1...
Loaded resnet34 weights.
Predicting resnet34 Fold 2...
Loaded resnet34 weights.
Predicting resnet34 Fold 3...
Loaded resnet34 weights.
Predicting resnet34 Fold 4...
Loaded resnet34 weights.
Predicting resnet34 Fold 5...
Loaded resnet34 weig

In [2]:
submission_df.head()

Unnamed: 0,sample_id,target
0,ID1001187975__Dry_Green_g,17.583729
1,ID1001187975__Dry_Dead_g,27.12185
2,ID1001187975__Dry_Clover_g,0.351697
3,ID1001187975__GDM_g,17.067521
4,ID1001187975__Dry_Total_g,43.925161
