In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os

In [None]:
import os

path = '/kaggle/input/csiro-biomass'

sample = pd.read_csv(os.path.join(path, 'sample_submission.csv'))
train_df = pd.read_csv(os.path.join(path, 'train.csv'))
test_df = pd.read_csv(os.path.join(path, 'test.csv'))

# Fix image paths
train_df["image_path"] = train_df["image_path"].apply(lambda x: os.path.join(path, str(x)))
test_df["image_path"]  = test_df["image_path"].apply(lambda x: os.path.join(path, str(x)))

# Drop unnecessary columns
cols_to_drop = ["Sampling_Date", "State", "Species", "Pre_GSHH_NDVI", "Height_Ave_cm"]
train_df = train_df.drop(columns=cols_to_drop, errors='ignore')  # safe if some columns missing


In [None]:
import torch
from torch.utils.data import Dataset
from PIL import Image

class CloverDataset(Dataset):
    def __init__(self, df, transforms=None):
        self.df = df
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row["image_path"]).convert("RGB")

        if self.transforms:
            img = self.transforms(img)

        target = torch.tensor(row["target"], dtype=torch.float32)
        return img, target

import torch.nn as nn
import torchvision.models as models

class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = models.resnet18(weights=None)
        self.backbone.fc = nn.Linear(self.backbone.fc.in_features, 1)

    def forward(self, x):
        return self.backbone(x).squeeze(1)

from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim
import torchvision.transforms as T
from tqdm import tqdm
from sklearn.metrics import r2_score

# Image augmentations
train_tfms = T.Compose([
    T.Resize((224, 224)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
])

valid_tfms = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
])

device = "cuda" if torch.cuda.is_available() else "cpu"
model = SimpleCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as T
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tqdm import tqdm
import numpy as np

EPOCHS=1

# List of all 5 biomass targets
target_names = ["Dry_Clover_g", "Dry_Dead_g", "Dry_Green_g", "Dry_Total_g", "GDM_g"]

# Add placeholder target in test
test_df["target"] = np.nan

import pandas as pd

all_test_preds = []  # ✅ Will store all subsets with predictions

# Loop over each target name
for target_name in target_names:
    print(f"\n==================== {target_name} ====================")

    # 1️⃣ Filter train & test subsets
    df_sub = train_df[train_df["target_name"] == target_name].reset_index(drop=True)
    test_sub = test_df[test_df["target_name"] == target_name].reset_index(drop=True)

    # 2️⃣ Split train/valid
    train_df_c, valid_df_c = train_test_split(df_sub, test_size=0.2, random_state=42)

    # 3️⃣ Datasets & loaders
    train_ds = CloverDataset(train_df_c, transforms=train_tfms)
    valid_ds = CloverDataset(valid_df_c, transforms=valid_tfms)
    train_dl = DataLoader(train_ds, batch_size=16, shuffle=True)
    valid_dl = DataLoader(valid_ds, batch_size=16, shuffle=False)

    # 4️⃣ Model, loss, optim
    model = SimpleCNN().to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-4)

    # 5️⃣ Training loop
    for epoch in range(EPOCHS):
        model.train()
        train_loss = 0
        for imgs, targets in tqdm(train_dl, desc=f"{target_name} Epoch {epoch+1}/{EPOCHS}"):
            imgs, targets = imgs.to(device), targets.to(device)
            preds = model(imgs)
            loss = criterion(preds, targets)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        valid_preds, valid_targets = [], []
        with torch.no_grad():
            for imgs, targets in valid_dl:
                imgs, targets = imgs.to(device), targets.to(device)
                preds = model(imgs)
                valid_preds.extend(preds.cpu().numpy())
                valid_targets.extend(targets.cpu().numpy())

        valid_loss = criterion(torch.tensor(valid_preds), torch.tensor(valid_targets)).item()
        r2 = r2_score(valid_targets, valid_preds)
        print(f"Train Loss: {train_loss/len(train_dl):.4f} | Valid Loss: {valid_loss:.4f} | R²: {r2:.4f}")

    # 6️⃣ Predict on test subset
    test_ds = CloverDataset(test_sub, transforms=valid_tfms)
    test_dl = DataLoader(test_ds, batch_size=16, shuffle=False)

    preds = []
    model.eval()
    with torch.no_grad():
        for imgs, _ in test_dl:
            imgs = imgs.to(device)
            pred = model(imgs).cpu().numpy()
            preds.extend(pred)

    # 7️⃣ Create a DataFrame for this target with predictions
    test_sub_preds = test_sub.copy()
    test_sub_preds["target"] = np.array(preds).reshape(-1)
    all_test_preds.append(test_sub_preds)

# 8️⃣ Combine all subsets into a single DataFrame
test_preds_df = pd.concat(all_test_preds, axis=0).reset_index(drop=True)
print("\n✅ All 5 models done and combined into test_preds_df!")


In [None]:
# Create a mapping from sample_id to predicted target
pred_map = dict(zip(test_preds_df["sample_id"], test_preds_df["target"]))

# Fill the target column in test_df
test_df["target"] = test_df["sample_id"].map(pred_map)

print("✅ test_df['target'] updated from test_preds_df!")


In [None]:
submission = test_df[["sample_id", "target"]].copy()
submission.to_csv("submission.csv", index=False)
print("\n✅ submission.csv created!")

In [None]:
submission