In [21]:
import os
import numpy as np
import pandas as pd
from PIL import Image

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

from torchvision.models import efficientnet_b0
from torchvision import transforms

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cpu


In [22]:
BASE_DIR = "/kaggle/input/csiro-biomass"
FOLDS_DIR = "/kaggle/input/csiro-effnet-baseline-weights1"

target_cols = ["Dry_Clover_g", "Dry_Dead_g", "Dry_Green_g", "Dry_Total_g", "GDM_g"]

NDVI_MEAN = 0.657423
HEIGHT_LOG_MEAN = 2.151295

tab_cols = ["Pre_GSHH_NDVI", "Height_log"] 

In [23]:
IMG_SIZE = 224

val_transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.CenterCrop(IMG_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225],
    ),
])

In [24]:
class EffnetWithTabular(nn.Module):
    def __init__(self, num_tab_features: int, num_targets: int):
        super().__init__()

        # ВАЖЛИВО: БЕЗ претрейну в сабмішні (інтернету немає)
        self.backbone = efficientnet_b0(weights=None)
        in_features = self.backbone.classifier[1].in_features

        # Забираємо стандартний classifier
        self.backbone.classifier = nn.Identity()

        self.tab_mlp = nn.Sequential(
            nn.Linear(num_tab_features, 32),
            nn.ReLU(),
            nn.Linear(32, 16),
            nn.ReLU(),
        )

        self.head = nn.Sequential(
            nn.Linear(in_features + 16, 256),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(256, num_targets),
        )

    def forward(self, images, tab_feats):
        img_emb = self.backbone(images)
        tab_emb = self.tab_mlp(tab_feats)
        x = torch.cat([img_emb, tab_emb], dim=1)
        return self.head(x) 

In [25]:
def load_fold_model(path):
    model = EffnetWithTabular(num_tab_features=len(tab_cols),
                              num_targets=len(target_cols)).to(device)
    state = torch.load(path, map_location=device)
    model.load_state_dict(state)
    model.eval()
    return model

In [26]:
fold_models = []
for i in range(5):
    full_path = os.path.join(FOLDS_DIR, f"effnet_tab_fold{i}.pth")
    print("Loading:", full_path)
    fold_models.append(load_fold_model(full_path))


Loading: /kaggle/input/csiro-effnet-baseline-weights1/effnet_tab_fold0.pth
Loading: /kaggle/input/csiro-effnet-baseline-weights1/effnet_tab_fold1.pth
Loading: /kaggle/input/csiro-effnet-baseline-weights1/effnet_tab_fold2.pth
Loading: /kaggle/input/csiro-effnet-baseline-weights1/effnet_tab_fold3.pth
Loading: /kaggle/input/csiro-effnet-baseline-weights1/effnet_tab_fold4.pth


In [27]:
class TestDataset(Dataset):
    """
    Test.csv НЕ містить Pre_GSHH_NDVI та Height_Ave_cm,
    тому тут таб-фічі задаємо як КОНСТАНТИ (середні train):
        NDVI_MEAN, HEIGHT_LOG_MEAN
    """
    def __init__(self, df, base_dir=BASE_DIR, transform=None):
        self.df = df.reset_index(drop=True)
        self.base_dir = base_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]

        img_path = os.path.join(self.base_dir, row["image_path"])
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        # Табличні фічі як константи
        tab = torch.tensor(
            [NDVI_MEAN, HEIGHT_LOG_MEAN],
            dtype=torch.float32
        )

        return image, tab

In [28]:
test = pd.read_csv(os.path.join(BASE_DIR, "test.csv"))

test_images = test["image_path"].unique()
test_df = pd.DataFrame({"image_path": test_images}).reset_index(drop=True)

test_ds = TestDataset(test_df, transform=val_transform)
test_loader = DataLoader(test_ds, batch_size=32, shuffle=False)

In [29]:
def get_tta_images(images):
    """Повертає список різних TTA-версій batch."""
    tta = []
    tta.append(images)                              # оригінал
    tta.append(torch.flip(images, dims=[3]))        # горизонтальний фліп
    tta.append(torch.flip(images, dims=[2]))        # вертикальний фліп
    return tta

In [30]:
all_preds = []

with torch.no_grad():
    for images, tabs in test_loader:
        images = images.to(device)
        tabs = tabs.to(device)

        tta_outputs_all = []

        for tta_images in get_tta_images(images):
            fold_outputs = []

            for model in fold_models:
                out_log = model(tta_images, tabs)  
                fold_outputs.append(out_log.cpu().numpy())

            fold_outputs = np.stack(fold_outputs, axis=0)      
            mean_over_folds = fold_outputs.mean(axis=0)       

            tta_outputs_all.append(mean_over_folds)

        mean_log = np.stack(tta_outputs_all, axis=0).mean(axis=0)  

        preds = np.expm1(mean_log)
        preds = np.maximum(preds, 0.0)

        all_preds.append(preds)

all_preds = np.concatenate(all_preds, axis=0)
print("Predictions shape:", all_preds.shape)

Predictions shape: (1, 5)


In [31]:
pred_wide = pd.DataFrame(all_preds, columns=target_cols)
pred_wide["image_path"] = test_df["image_path"]

pred_long = pred_wide.melt(
    id_vars=["image_path"],
    value_vars=target_cols,
    var_name="target_name",
    value_name="target"
)

submission = test.merge(pred_long,
                        on=["image_path", "target_name"],
                        how="left")

submission = submission[["sample_id", "target"]].sort_values("sample_id")

submission.to_csv("submission.csv", index=False)
print("Saved submission.csv")

submission.head()


Saved submission.csv


Unnamed: 0,sample_id,target
0,ID1001187975__Dry_Clover_g,0.419846
1,ID1001187975__Dry_Dead_g,22.164917
2,ID1001187975__Dry_Green_g,23.180721
3,ID1001187975__Dry_Total_g,51.718681
4,ID1001187975__GDM_g,22.557177
