## CSIRO simple baseline

**Predict pasture biomass components** from field images.  
For each image, we must estimate five target variables (in grams):

- **Dry_Green_g** → Dry green vegetation (non-clover)  
- **Dry_Dead_g** → Dry dead material  
- **Dry_Clover_g** → Dry clover biomass  
- **GDM_g** → Green dry matter  
- **Dry_Total_g** → Total dry biomass  

**Evaluation metric:** Weighted average of the **R² score** across the five targets.

---

## Baseline Strategy

A **simple but competitive multi-output regression model** that predicts all biomass components jointly.


## Setup and Configuration


In [None]:
# =========================
# 0) Imports & config
# =========================
import os, gc, math, random, time, json
from pathlib import Path

import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold
from sklearn.metrics import r2_score
from sklearn.preprocessing import OneHotEncoder

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

import timm
from safetensors.torch import load_file 
import albumentations as A
from albumentations.pytorch import ToTensorV2
import cv2

SEED = 42
IMG_SIZE = 448
BATCH_SIZE = 16
EPOCHS = 10
LR = 2e-4
BACKBONE = "tf_efficientnetv2_s"   # try "convnext_base" if you have more GPU
USE_META = True                     # toggle meta features on/off
META_OHE_TOP_N = 30                 # top N categories kept per column, rest grouped as "Other"
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

TARGETS = ["Dry_Green_g", "Dry_Dead_g", "Dry_Clover_g", "GDM_g", "Dry_Total_g"]
TARGET_WEIGHTS = np.array([1,1,1,1,1], dtype=np.float32)  # equal weights unless competition states otherwise
TARGET_WEIGHTS = TARGET_WEIGHTS / TARGET_WEIGHTS.sum()

random.seed(SEED); np.random.seed(SEED); torch.manual_seed(SEED); torch.cuda.manual_seed_all(SEED)

INPUT_DIR = Path("/kaggle/input/csiro-biomass")
WORK_DIR = Path("/kaggle/working")

subdirs = [p for p in INPUT_DIR.iterdir() if p.is_dir()]
COMP_DIR = subdirs[0] if len(subdirs)==1 else INPUT_DIR  # fallback

print("Using data from:", COMP_DIR)

## Load and Prepare the Data

In [None]:
# =========================
# 1) Load data & pivot wide
# =========================
train = pd.read_csv(COMP_DIR / "train.csv")
test  = pd.read_csv(COMP_DIR / "test.csv")
print(train.shape, test.shape)

# Extract image_id (basename without extension) for grouping and for sample_id construction
def path_to_id(p):
    return Path(p).stem

train["image_id"] = train["image_path"].apply(path_to_id)
test["image_id"]  = test["image_path"].apply(path_to_id)

# Wide targets per image
pivot_targets = train.pivot_table(index="image_id", columns="target_name", values="target", aggfunc="first").reset_index()
# Grab representative per-image meta (same across 5 target rows)
meta_cols = ["image_path","Sampling_Date","State","Species","Pre_GSHH_NDVI","Height_Ave_cm"]
meta_per_img = train.drop_duplicates(subset=["image_id"])[["image_id"] + meta_cols]

df = pivot_targets.merge(meta_per_img, on="image_id", how="left")
assert all(t in df.columns for t in TARGETS)

print("Wide train shape:", df.shape)
df.head()


## Create Meta Features

In [None]:
# =========================
# 2) Meta preprocessing
# =========================
# We’ll one-hot 'State' and the Top-N frequent categories of 'Species' (rest -> 'Other').
def topn_ohe_fit(series, top_n):
    vc = series.value_counts()
    cats = list(vc.head(top_n).index)
    return cats

def topn_ohe_transform(series, cats, prefix):
    out = pd.DataFrame({f"{prefix}_{c}": (series == c).astype(int) for c in cats})
    if not out.shape[1]:
        return pd.DataFrame(index=series.index)
    # 'Other'
    out[f"{prefix}_Other"] = (~series.isin(cats)).astype(int)
    return out

state_cats = topn_ohe_fit(df["State"].fillna("Unknown"), top_n=10)
species_cats = topn_ohe_fit(df["Species"].fillna("Unknown"), top_n=META_OHE_TOP_N)

state_ohe   = topn_ohe_transform(df["State"].fillna("Unknown"), state_cats, "State")
species_ohe = topn_ohe_transform(df["Species"].fillna("Unknown"), species_cats, "Species")

meta_num = df[["Pre_GSHH_NDVI","Height_Ave_cm"]].copy()
meta_num = meta_num.fillna(meta_num.median())

META_FEATS = pd.concat([meta_num, state_ohe, species_ohe], axis=1).astype(np.float32)
META_DIM = META_FEATS.shape[1] if USE_META else 0
print("META_DIM:", META_DIM)

# Save mapping to apply same OHE on test
meta_map = {
    "state_cats": state_cats,
    "species_cats": species_cats,
    "meta_num_cols": ["Pre_GSHH_NDVI","Height_Ave_cm"]
}

## Dataset and Augmentations
albumentations need internet access a pre-dl lib should resolve

In [None]:
# =========================
# 3) Albumentations & Dataset
# =========================
train_tfms = A.Compose([
    A.LongestMaxSize(max_size=IMG_SIZE),
    A.PadIfNeeded(IMG_SIZE, IMG_SIZE, border_mode=cv2.BORDER_REFLECT_101),
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.2),
    A.ColorJitter(p=0.2),
    A.Normalize(),
    ToTensorV2(),
])

valid_tfms = A.Compose([
    A.LongestMaxSize(max_size=IMG_SIZE),
    A.PadIfNeeded(IMG_SIZE, IMG_SIZE, border_mode=cv2.BORDER_REFLECT_101),
    A.CenterCrop(IMG_SIZE, IMG_SIZE, p=1.0),
    A.Normalize(),
    ToTensorV2(),
])

class PastureDataset(Dataset):
    def __init__(self, df_img, targets, img_root, tfms, meta=None):
        self.df = df_img.reset_index(drop=True)
        self.targets = targets  # np array [N, 5] or None (for test)
        self.img_root = Path(img_root)
        self.tfms = tfms
        self.meta = meta.astype(np.float32) if meta is not None else None
        self.image_paths = self.df["image_path"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_path_rel = self.image_paths[idx]
        img_fp = self.img_root / img_path_rel
        img = cv2.imread(str(img_fp))
        if img is None:
            img_fp_alt = Path(str(self.img_root)) / "train" / Path(img_path_rel).name
            img = cv2.imread(str(img_fp_alt))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.tfms:
            img = self.tfms(image=img)["image"]

        x_meta = None
        if self.meta is not None:
            x_meta = torch.from_numpy(self.meta[idx])

        y = None
        if self.targets is not None:
            y = torch.tensor(self.targets[idx], dtype=torch.float32)

        return img, x_meta, y

## Model Architecture
- **timm** backbone (EfficientNetV2-S by default) for image feature extraction.
- small **MLP head** that combines image and meta features.
- The final layer outputs 5 regression values (one per biomass component).

We can also experiment with convnextv2, swinv2, or efficientformer.

In [None]:
# =========================
# 4) Model (Enhanced Meta + Multi-Head)
# =========================
import torch
import torch.nn as nn
import timm
from safetensors.torch import load_file

class MultiHeadRegressor(nn.Module):
    def __init__(self, 
                 backbone="tf_efficientnetv2_s", 
                 meta_dim=0,
                 pretrained=True, 
                 pretrained_path=None):
        """
        Enhanced model combining:
          - A shared CNN backbone (e.g., EfficientNetV2)
          - A stronger MLP for metadata
          - Separate output heads for each biomass target
        """
        super().__init__()

        # ---------------------
        # Backbone
        # ---------------------
        self.backbone = timm.create_model(
            backbone,
            pretrained=False,   # We'll load manually if path is provided
            num_classes=0,
            global_pool='avg'
        )

        # Optional manual pretrained weights
        if pretrained and pretrained_path is not None:
            print(f"Loading pretrained weights from: {pretrained_path}")
            if pretrained_path.endswith(".safetensors"):
                state_dict = load_file(pretrained_path)
            else:
                state_dict = torch.load(pretrained_path, map_location="cpu")
                if "state_dict" in state_dict:
                    state_dict = state_dict["state_dict"]
            missing, unexpected = self.backbone.load_state_dict(state_dict, strict=False)
            print(f"Loaded weights — missing: {len(missing)}, unexpected: {len(unexpected)}")

        in_dim = self.backbone.num_features
        self.meta_dim = meta_dim

        # ---------------------
        # Stronger meta MLP
        # ---------------------
        if meta_dim > 0:
            self.meta_mlp = nn.Sequential(
                nn.Linear(meta_dim, 128),
                nn.ReLU(),
                nn.BatchNorm1d(128),
                nn.Dropout(0.2),
                nn.Linear(128, 64),
                nn.ReLU(),
                nn.BatchNorm1d(64),
            )
            in_dim = in_dim + 64  # concatenate image + meta embeddings

        # ---------------------
        # Multi-head regression
        # ---------------------
        hidden = 256
        self.heads = nn.ModuleDict({
            t: nn.Sequential(
                nn.Linear(in_dim, hidden),
                nn.ReLU(),
                nn.Dropout(0.3),
                nn.Linear(hidden, 1)
            ) for t in TARGETS
        })

    def forward(self, x, meta=None):
        feats = self.backbone(x)
        if self.meta_dim > 0 and meta is not None:
            meta_emb = self.meta_mlp(meta)
            feats = torch.cat([feats, meta_emb], dim=1)

        # Predict each target independently
        outs = []
        for t in TARGETS:
            outs.append(self.heads[t](feats))
        out = torch.cat(outs, dim=1)  # shape [B, 5]
        return out


## Training Functions and Evaluation Metric

In [None]:
# =========================
# 5) Metric & training loop
# =========================
def weighted_r2(y_true, y_pred):
    # y_true, y_pred: numpy arrays [N, 5]
    r2s = []
    for i in range(len(TARGETS)):
        r2s.append(r2_score(y_true[:, i], y_pred[:, i]))
    r2s = np.array(r2s)
    return float((r2s * TARGET_WEIGHTS).sum()), r2s

def train_one_epoch(model, loader, optimizer, scaler, criterion, lam_total_consistency=0.0):
    model.train()
    total_loss = 0.0
    for imgs, metas, ys in loader:
        imgs = imgs.to(DEVICE)
        ys = ys.to(DEVICE)
        metas = metas.to(DEVICE) if metas is not None else None

        optimizer.zero_grad()
        with torch.cuda.amp.autocast(enabled=True):
            preds = model(imgs, metas)
            loss = criterion(preds, ys)
            # Optional soft constraint: Dry_Total ≈ Dry_Green + Dry_Dead + Dry_Clover
            if lam_total_consistency > 0:
                total_est = preds[:, 0] + preds[:, 1] + preds[:, 2]  # order must match TARGETS!
                loss_cons = nn.functional.l1_loss(total_est, preds[:, 4])
                loss = loss + lam_total_consistency * loss_cons

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item() * imgs.size(0)
    return total_loss / len(loader.dataset)

@torch.no_grad()
def validate(model, loader, criterion):
    model.eval()
    total_loss = 0.0
    all_y, all_p = [], []
    for imgs, metas, ys in loader:
        imgs = imgs.to(DEVICE)
        metas = metas.to(DEVICE) if metas is not None else None
        ys = ys.to(DEVICE)
        preds = model(imgs, metas)
        loss = criterion(preds, ys)
        total_loss += loss.item() * imgs.size(0)
        all_y.append(ys.cpu().numpy())
        all_p.append(preds.cpu().numpy())
    all_y = np.concatenate(all_y)
    all_p = np.concatenate(all_p)
    wr2, r2s = weighted_r2(all_y, all_p)
    return total_loss / len(loader.dataset), wr2, r2s, all_p, all_y

## Cross-Validation Strategy

**GroupKFold (5 folds)**:
- Groups are based on image_id to prevent data leakage between multiple target rows from the same image.
- Each fold is trained and validated independently.


In [None]:
# =========================
# 6) Build CV folds
# =========================
# Group by image to avoid leakage across the 5 target rows originally
groups = df["image_id"].values
gkf = GroupKFold(n_splits=5)

# Targets matrix Y [N, 5]
Y = df[TARGETS].values.astype(np.float32)

# Compose meta features matrix aligned with df rows
META_MAT = META_FEATS.values if USE_META else None

# Where are the actual image files?
# train images usually under COMP_DIR/"train" or per paths in df["image_path"].
IMG_ROOT = COMP_DIR  # we will resolve using relative paths like "train/IDxxx.jpg"

## Model Training Across Folds

In [None]:
# =========================
# 7) Train CV and out-of-fold predictions
# =========================
oof_preds = np.zeros_like(Y, dtype=np.float32)
fold_scores = []
for fold, (tr_idx, va_idx) in enumerate(gkf.split(df, groups=groups)):
    print(f"\n===== FOLD {fold} =====")

    tr_df, va_df = df.iloc[tr_idx], df.iloc[va_idx]
    tr_Y, va_Y = Y[tr_idx], Y[va_idx]
    tr_meta = META_MAT[tr_idx] if USE_META else None
    va_meta = META_MAT[va_idx] if USE_META else None

    tr_ds = PastureDataset(tr_df, tr_Y, IMG_ROOT, train_tfms, tr_meta)
    va_ds = PastureDataset(va_df, va_Y, IMG_ROOT, valid_tfms, va_meta)

    tr_loader = DataLoader(tr_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, pin_memory=True, drop_last=True)
    va_loader = DataLoader(va_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

    #model = TimmRegressor(backbone=BACKBONE, out_dim=len(TARGETS), meta_dim=(META_DIM if USE_META else 0)).to(DEVICE)
    #model = TimmRegressor(backbone=BACKBONE,out_dim=len(TARGETS),meta_dim=(META_DIM if USE_META else 0),pretrained=True, pretrained_path="/kaggle/input/timm-weights/timm_weights/tf_efficientnetv2_s_21ft1k-d7dafa41.safetensors").to(DEVICE)
    model = MultiHeadRegressor(
        backbone=BACKBONE, 
        meta_dim=(META_DIM if USE_META else 0), 
        pretrained=True,
        pretrained_path="/kaggle/input/timm-weights/timm_weights/tf_efficientnetv2_s_21ft1k-d7dafa41.safetensors"  # or specify custom weights
    ).to(DEVICE)
    optimizer = torch.optim.AdamW(model.parameters(), lr=LR, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS)
    scaler = torch.cuda.amp.GradScaler(enabled=(DEVICE=="cuda"))
    criterion = nn.MSELoss()

    best_wr2 = -1e9
    best_state = None

    for epoch in range(1, EPOCHS+1):
        tr_loss = train_one_epoch(model, tr_loader, optimizer, scaler, criterion, lam_total_consistency=0.02)
        va_loss, va_wr2, va_r2s, va_p, va_y = validate(model, va_loader, criterion)
        scheduler.step()

        print(f"Epoch {epoch:02d} | tr_loss {tr_loss:.4f} | va_loss {va_loss:.4f} | WR2 {va_wr2:.4f} | per-target R2 {np.round(va_r2s,3)}")
        if va_wr2 > best_wr2:
            best_wr2 = va_wr2
            best_state = {k:v.cpu() for k,v in model.state_dict().items()}

    print(f"Fold {fold} best WR2: {best_wr2:.4f}")
    fold_scores.append(best_wr2)

    # Save OOF predictions with the best model
    model.load_state_dict({k:v.to(DEVICE) for k,v in best_state.items()})
    va_loss, va_wr2, va_r2s, va_p, va_y = validate(model, va_loader, criterion)
    oof_preds[va_idx] = va_p

    # Save fold weights for inference ensembling later (optional)
    torch.save(best_state, WORK_DIR / f"model_fold{fold}.pt")
    del model; gc.collect(); torch.cuda.empty_cache()

cv_wr2, cv_r2s = weighted_r2(Y, oof_preds)
print("\nCV weighted R2:", cv_wr2, " | per-target:", np.round(cv_r2s, 4))
np.save(WORK_DIR / "oof_preds.npy", oof_preds)

## Inference and Submission File

In [None]:
# =========================
# 8) Inference on test & submission
# =========================
# Build test meta with the same OHE mapping
def build_test_meta(test_df, comp_dir):
    # test.csv has image_path, target_name per row; we need 1 row per image to run the model once per image
    test_imgs = test_df.drop_duplicates(subset=["image_id"])[["image_id","image_path"]].copy()
    # We don't have Sampling_Date/State/Species in test.csv according to the prompt; if they exist, keep them
    # Here we’ll set missing meta to neutral values:
    meta_num = pd.DataFrame({
        "Pre_GSHH_NDVI": np.full(len(test_imgs), df["Pre_GSHH_NDVI"].median()),
        "Height_Ave_cm": np.full(len(test_imgs), df["Height_Ave_cm"].median()),
    })
    state_series = pd.Series(["Unknown"] * len(test_imgs))
    species_series = pd.Series(["Unknown"] * len(test_imgs))
    state_ohe = topn_ohe_transform(state_series, state_cats, "State")
    species_ohe = topn_ohe_transform(species_series, species_cats, "Species")
    meta_mat = pd.concat([meta_num, state_ohe, species_ohe], axis=1).astype(np.float32)
    return test_imgs, meta_mat

test_imgs, test_meta = build_test_meta(test, COMP_DIR)

# Dataloader for test images
class TestImageDataset(Dataset):
    def __init__(self, df_img, img_root, tfms, meta=None):
        self.df = df_img.reset_index(drop=True)
        self.img_root = Path(img_root)
        self.tfms = tfms
        self.meta = meta.astype(np.float32) if meta is not None else None
        self.image_paths = self.df["image_path"].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_fp = self.img_root / self.image_paths[idx]
        img = cv2.imread(str(img_fp))
        if img is None:
            img_fp_alt = Path(str(self.img_root)) / "test" / Path(self.image_paths[idx]).name
            img = cv2.imread(str(img_fp_alt))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.tfms(image=img)["image"]
        x_meta = torch.from_numpy(self.meta[idx]) if self.meta is not None else None
        return img, x_meta

test_ds = TestImageDataset(test_imgs, COMP_DIR, valid_tfms, test_meta.values if USE_META else None)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=2, pin_memory=True)

# Load fold models and ensemble (mean)
fold_preds = []
for fold in range(5):
    model = MultiHeadRegressor(backbone=BACKBONE, meta_dim=(META_DIM if USE_META else 0)).to(DEVICE)
    state_dict = torch.load(WORK_DIR / f"model_fold{fold}.pt", map_location=DEVICE)
    model.load_state_dict(state_dict)
    model.eval()
    preds_all = []
    with torch.no_grad():
        for imgs, metas in test_loader:
            imgs = imgs.to(DEVICE)
            metas = metas.to(DEVICE) if USE_META else None
            p = model(imgs, metas).cpu().numpy()
            preds_all.append(p)
    preds_all = np.vstack(preds_all)  # [num_test_images, 5]
    fold_preds.append(preds_all)
    del model; gc.collect(); torch.cuda.empty_cache()

preds_mean = np.mean(fold_preds, axis=0)  # [N_test_images, 5]
preds_mean = np.clip(preds_mean, a_min=0.0, a_max=None)  # biomass can’t be negative

# Map back to long format using test.csv order:
# test has multiple rows per image (one per target_name). We need sample_id,target accordingly.
imgid_to_row = {iid:i for i, iid in enumerate(test_imgs["image_id"].values)}

pred_rows = []
for _, r in test.iterrows():
    iid = r["image_id"]
    tname = r["target_name"]
    col_idx = TARGETS.index(tname)
    pred = preds_mean[imgid_to_row[iid], col_idx]
    pred_rows.append((r["sample_id"], float(pred)))

sub_df = pd.DataFrame(pred_rows, columns=["sample_id","target"])
sub_df.to_csv("submission.csv", index=False)
sub_df.head(10)


Thanks for reading made with <3