# Distillation. Student Inference


## Imports


In [1]:
import os
import gc
import numpy as np
import pandas as pd
from PIL import Image

import cv2
import timm
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from tqdm import tqdm
from torch.optim import AdamW
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR

import warnings

warnings.simplefilter(action='ignore', category=FutureWarning)
print(f"PyTorch: {torch.__version__}")
print(f"Device: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU'}")



PyTorch: 2.6.0+cu124
Device: Tesla T4


In [2]:
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        # print(os.path.join(dirname, filename))
        pass

In [3]:
cpu_count = os.cpu_count()
NUM_WORKERS = 0

LR = 1e-4
EPOCHS = 25
N_FOLDS = 5
GRAD_ACCUM = 1
BATCH_SIZE = 16
DROPOUT_RATE = 0.3
# Weight for distillation loss
# Loss = DISTILL_ALPHA * Distillation_Loss + (1 - DISTILL_ALPHA) * Hard_Loss
DISTILL_ALPHA = 0.5
WEIGHT_DECAY = 0.05
HIDDEN_RATIO = 0.5
TRAIN_SPLIT_RATIO = 0.02  # Used if N_FOLDS = 0

MODEL = "swinv2_tiny_window8_256"
WEIGHTS_PATH = f"/kaggle/input/distillation-models/backbone/{MODEL}.pth"
MODEL_STAGE = "student"  # 'teacher' or 'student'
PROJECT_NAME = "csiro-image2biomass-prediction"
CHECKPOINTS_DIR = f"./kaggle/checkpoints/{MODEL_STAGE}/"
# Whether to use OOF soft targets or 100% ensemble soft targets
USE_OOF_SOFT_TARGETS = False

# Each patch is 1000x1000, resize to 768x768 for vision transformers
SIZE = 768
USE_LOG_TARGET = True   # Whether to use log1p transformation on target variable
FUSION_METHOD = 'mean'  # ('concat', 'mean', 'max')

DESCRIPTION = "kaggle" + \
    (f"_train{TRAIN_SPLIT_RATIO}" if N_FOLDS == 0 else f"_train[{N_FOLDS}]Folds") + (
        f"_log" if USE_LOG_TARGET else "") + f"_fusion-{FUSION_METHOD}"
DESCRIPTION_FULL = MODEL + "-" + DESCRIPTION + \
    f"_epochs{EPOCHS}_bs{BATCH_SIZE}_gradacc{GRAD_ACCUM}_lr{LR}_wd{WEIGHT_DECAY}_dr{DROPOUT_RATE}_hr{HIDDEN_RATIO}"
SUBMISSION_NAME = f"{DESCRIPTION_FULL}_submission.csv"
SUBMISSION_NAME = f"submission.csv"
SUBMISSION_ENSEMBLE_NAME = f"{DESCRIPTION_FULL}_ensemble_submission.csv"
SUBMISSION_MSG = DESCRIPTION_FULL.replace("_", " ")

SEED = 1488
torch.manual_seed(SEED)
np.random.seed(SEED)
pl.seed_everything(SEED)

print("DESCRIPTION_FULL:", DESCRIPTION_FULL)
print(f"Effective batch size: {BATCH_SIZE * GRAD_ACCUM}")

Seed set to 1488


DESCRIPTION_FULL: swinv2_tiny_window8_256-kaggle_train[5]Folds_log_fusion-mean_epochs25_bs16_gradacc1_lr0.0001_wd0.05_dr0.3_hr0.5
Effective batch size: 16


In [4]:
# setting device on GPU if available, else CPU
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', DEVICE)
print('NUM_WORKERS:', NUM_WORKERS)
print()

# Additional Info when using cuda
if DEVICE.type == 'cuda':
    # clean GPU memory
    torch.cuda.empty_cache()
    gc.collect()

    # torch.set_float32_matmul_precision('high')

    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3, 1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3, 1), 'GB')

Using device: cuda
NUM_WORKERS: 0

Tesla T4
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


## Model Architecture

In [5]:
labels = [
    "Dry_Clover_g",
    "Dry_Dead_g",
    "Dry_Green_g",
    "Dry_Total_g",
    "GDM_g"
]

weights = {
    'Dry_Green_g': 0.1,
    'Dry_Dead_g': 0.1,
    'Dry_Clover_g': 0.1,
    'GDM_g': 0.2,
    'Dry_Total_g': 0.5,
}


def competition_metric(y_true, y_pred) -> float:
    """Function to calculate the competition's official evaluation metric (weighted R2 score)."""
    weights_array = np.array([weights[l] for l in labels])

    # Align with this calculation method
    y_weighted_mean = np.average(y_true, weights=weights_array, axis=1).mean()

    # For ss_res and ss_tot, also take the weighted average on axis=1, then the mean of the result
    ss_res = np.average((y_true - y_pred)**2,
                        weights=weights_array, axis=1).mean()
    ss_tot = np.average((y_true - y_weighted_mean)**2,
                        weights=weights_array, axis=1).mean()

    return 1 - ss_res / ss_tot

In [6]:
class StudentDistillationLoss(nn.Module):
    """
    Custom loss for Student model combining:
    1. Distillation loss (learn from Teacher)
    2. Hard loss (learn from real targets with competition weights)
    """

    def __init__(self, alpha: float = 0.5, use_log_space: bool = True):
        """
        Args:
            alpha: Weight for distillation loss (0.5 = equal weight to Teacher and ground truth)
            use_log_space: If True, compute loss in log space
        """
        super().__init__()
        self.alpha = alpha
        self.use_log_space = use_log_space

        # Competition weights
        self.w_green = 0.1
        self.w_clover = 0.1
        self.w_dead = 0.1
        self.w_gdm = 0.2
        self.w_total = 0.5

    def forward(
        self,
        student_preds: torch.Tensor,  # [B, 3] predictions in log space
        hard_targets: torch.Tensor,   # [B, 3] ground truth in log space
        soft_targets: torch.Tensor    # [B, 3] Teacher predictions in log space
    ) -> tuple[torch.Tensor, dict]:
        """
        Returns:
            total_loss: Combined loss
            loss_dict: Dictionary with individual loss components
        """

        # 1. Distillation Loss (MSE with Teacher's soft targets)
        loss_distill = F.mse_loss(student_preds, soft_targets)

        # 2. Hard Loss with competition weights
        # Individual components
        loss_clover = F.mse_loss(
            student_preds[:, 0], hard_targets[:, 0])  # Dry_Clover_g
        loss_dead = F.mse_loss(
            student_preds[:, 1], hard_targets[:, 1])  # Dry_Dead_g
        loss_green = F.mse_loss(
            student_preds[:, 2], hard_targets[:, 2])  # Dry_Green_g

        # Derived targets (computed from components)
        # Dry_Total_g = sum of all 3 components
        student_total = student_preds.sum(dim=1)
        hard_total = hard_targets.sum(dim=1)
        loss_total = F.mse_loss(student_total, hard_total)

        # GDM_g = Clover + Green
        student_gdm = student_preds[:, 0] + \
            student_preds[:, 2]  # Clover + Green
        hard_gdm = hard_targets[:, 0] + hard_targets[:, 2]
        loss_gdm = F.mse_loss(student_gdm, hard_gdm)

        # Weighted hard loss (following competition metric weights)
        loss_hard = (
            self.w_green * loss_green +
            self.w_clover * loss_clover +
            self.w_dead * loss_dead +
            self.w_gdm * loss_gdm +
            self.w_total * loss_total
        )

        # 3. Total loss (weighted combination)
        total_loss = self.alpha * loss_distill + (1 - self.alpha) * loss_hard

        # Return loss dict for logging
        loss_dict = {
            'loss_distill': loss_distill.item(),
            'loss_hard': loss_hard.item(),
            'loss_green': loss_green.item(),
            'loss_clover': loss_clover.item(),
            'loss_dead': loss_dead.item(),
            'loss_total': loss_total.item(),
            'loss_gdm': loss_gdm.item(),
        }

        return total_loss, loss_dict

In [7]:
class BiomassStudentModel(pl.LightningModule):
    """
    Student model for biomass prediction.
    Uses ONLY images (dual-patch), NO tabular features.
    Learns from Teacher's soft targets + ground truth.
    """

    def __init__(
        self,
        backbone_name: str = 'swinv2_tiny_window8_256',
        num_targets: int = 3,
        lr: float = 1e-4,
        weight_decay: float = 1e-5,
        hidden_ratio: float = 0.5,
        dropout: float = 0.3,
        fusion_method: str = 'mean',
        distill_alpha: float = 0.5,
        use_log_target: bool = True,
        pretrained_backbone: bool = True  # NEW PARAMETER!
    ):
        super().__init__()
        self.save_hyperparameters()

        # Image backbone - load pretrained only if specified
        self.backbone = timm.create_model(
            backbone_name,
            pretrained=pretrained_backbone,  # CHANGED: use parameter
            num_classes=0,
            global_pool='avg'
        )

        self.lr = lr
        self.weight_decay = weight_decay
        self.fusion_method = fusion_method
        self.use_log_target = use_log_target

        # Get backbone output dimension
        with torch.no_grad():
            dummy = torch.randn(1, 3, SIZE, SIZE)
            feat_dim = self.backbone(dummy).shape[1]

        self.feat_dim = feat_dim

        # NO tabular features - only image features!
        if self.fusion_method == 'concat':
            self.combined_dim = feat_dim * 2
        else:  # mean or max
            self.combined_dim = feat_dim

        # Regression heads (simpler than Teacher)
        hidden_size = max(32, int(self.combined_dim * hidden_ratio))

        def make_head():
            return nn.Sequential(
                nn.Linear(self.combined_dim, hidden_size),
                nn.ReLU(inplace=True),
                nn.Dropout(dropout),
                nn.Linear(hidden_size, 1)
            )

        self.head_green = make_head()
        self.head_clover = make_head()
        self.head_dead = make_head()

        # Custom distillation loss
        self.criterion = StudentDistillationLoss(
            alpha=distill_alpha,
            use_log_space=use_log_target
        )

        # Storage for validation
        self.validation_step_outputs = []

        print(f"Student model initialized: backbone={backbone_name}, feat_dim={feat_dim}, "
              f"combined_dim={self.combined_dim}, fusion={fusion_method}, "
              f"distill_alpha={distill_alpha}, pretrained_backbone={pretrained_backbone}")

    def forward(self, batch: dict) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
        """
        Args:
            batch: dict with 'left_image', 'right_image'

        Returns:
            (green, clover, dead) predictions
        """
        # Extract features from each patch
        left_feat = self.backbone(batch['left_image'])
        right_feat = self.backbone(batch['right_image'])

        # Fuse image features
        if self.fusion_method == 'concat':
            img_feat = torch.cat([left_feat, right_feat], dim=1)
        elif self.fusion_method == 'mean':
            img_feat = (left_feat + right_feat) / 2
        elif self.fusion_method == 'max':
            img_feat = torch.maximum(left_feat, right_feat)
        else:
            raise ValueError(f"Unknown fusion method: {self.fusion_method}")

        # Predict each target
        green = self.head_green(img_feat).squeeze(1)
        clover = self.head_clover(img_feat).squeeze(1)
        dead = self.head_dead(img_feat).squeeze(1)

        return green, clover, dead

    def compute_all_targets(self, green: torch.Tensor, clover: torch.Tensor, dead: torch.Tensor) -> torch.Tensor:
        """Compute all 5 targets from 3 predicted ones"""
        green = torch.clamp(green, min=0.0)
        clover = torch.clamp(clover, min=0.0)
        dead = torch.clamp(dead, min=0.0)

        total = green + dead + clover
        gdm = clover + green

        all_targets = torch.stack([clover, dead, green, total, gdm], dim=1)
        return all_targets

    def training_step(self, batch: dict, batch_idx: int) -> torch.Tensor:
        green, clover, dead = self(batch)

        # Stack predictions [B, 3] in order: [clover, dead, green]
        preds = torch.stack([clover, dead, green], dim=1)

        # Compute distillation loss
        loss, loss_dict = self.criterion(
            preds,
            batch['hard_targets'],
            batch['soft_targets']
        )

        # Log all loss components
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True,
                 batch_size=batch['hard_targets'].size(0))
        self.log('train_loss_distill',
                 loss_dict['loss_distill'], on_step=True, on_epoch=True)
        self.log('train_loss_hard',
                 loss_dict['loss_hard'], on_step=True, on_epoch=True)

        return loss

    def validation_step(self, batch: dict, batch_idx: int) -> torch.Tensor:
        green_pred, clover_pred, dead_pred = self(batch)

        preds = torch.stack([clover_pred, dead_pred, green_pred], dim=1)

        # Compute loss
        loss, loss_dict = self.criterion(
            preds,
            batch['hard_targets'],
            batch['soft_targets']
        )

        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True,
                 batch_size=batch['hard_targets'].size(0))
        self.log('val_loss_distill',
                 loss_dict['loss_distill'], on_step=False, on_epoch=True)
        self.log('val_loss_hard',
                 loss_dict['loss_hard'], on_step=False, on_epoch=True)

        # Convert to original scale for metric
        if self.use_log_target:
            green_pred = torch.expm1(green_pred)
            clover_pred = torch.expm1(clover_pred)
            dead_pred = torch.expm1(dead_pred)

            hard_targets_original = torch.expm1(batch['hard_targets'])
        else:
            hard_targets_original = batch['hard_targets']

        # Compute all 5 targets
        preds_all = self.compute_all_targets(
            green_pred, clover_pred, dead_pred)

        clover_true = hard_targets_original[:, 0]
        dead_true = hard_targets_original[:, 1]
        green_true = hard_targets_original[:, 2]
        targets_all = self.compute_all_targets(
            green_true, clover_true, dead_true)

        self.validation_step_outputs.append({
            'preds': preds_all.detach().cpu(),
            'targets': targets_all.detach().cpu()
        })

        return loss

    def on_validation_epoch_end(self):
        if len(self.validation_step_outputs) == 0:
            return

        all_preds = torch.cat(
            [x['preds'] for x in self.validation_step_outputs], dim=0).numpy()
        all_targets = torch.cat(
            [x['targets'] for x in self.validation_step_outputs], dim=0).numpy()

        comp_metric = competition_metric(all_targets, all_preds)
        self.log('val_comp_metric', comp_metric, on_epoch=True, prog_bar=True)

        self.validation_step_outputs.clear()

    def predict_step(self, batch: dict, batch_idx: int) -> torch.Tensor:
        green, clover, dead = self(batch)
        preds = torch.stack([clover, dead, green], dim=1)

        if self.use_log_target:
            preds = torch.expm1(preds)

        preds = torch.clamp(preds, min=0.0)
        return preds

    def configure_optimizers(self):
        optimizer = AdamW(
            self.parameters(),
            lr=self.lr,
            weight_decay=self.weight_decay
        )

        scheduler = CosineAnnealingLR(
            optimizer,
            T_max=self.trainer.max_epochs or 20,
            eta_min=self.lr * 0.01
        )

        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'interval': 'epoch'
            }
        }

In [8]:
# Image backbone (processes each patch independently)
temp_backbone = timm.create_model(MODEL, pretrained=False)

checkpoint = torch.load(WEIGHTS_PATH, map_location='cpu')  # FIXME: 'cuda' 
temp_backbone.load_state_dict(checkpoint)

temp_backbone.reset_classifier(0, global_pool='avg')

print(temp_backbone.default_cfg)

{'url': 'https://github.com/SwinTransformer/storage/releases/download/v2.0.0/swinv2_tiny_patch4_window8_256.pth', 'hf_hub_id': 'timm/swinv2_tiny_window8_256.ms_in1k', 'architecture': 'swinv2_tiny_window8_256', 'tag': 'ms_in1k', 'custom_load': False, 'input_size': (3, 256, 256), 'fixed_input_size': True, 'interpolation': 'bicubic', 'crop_pct': 0.9, 'crop_mode': 'center', 'mean': (0.485, 0.456, 0.406), 'std': (0.229, 0.224, 0.225), 'num_classes': 1000, 'pool_size': (8, 8), 'first_conv': 'patch_embed.proj', 'classifier': 'head.fc', 'license': 'mit'}


In [9]:
inputs_size = temp_backbone.default_cfg['input_size']
mean = temp_backbone.default_cfg['mean']
std = temp_backbone.default_cfg['std']

SIZE = int(inputs_size[1]) if inputs_size is not None and inputs_size[1] == inputs_size[2] else 256
print(f"Backbone expected input size: {inputs_size}, using SIZE={SIZE}")
print(f"Backbone expected mean: {mean}, std: {std}")

# Get backbone output dimension
with torch.no_grad():
    try:
        dummy = torch.randn(1, 3, SIZE, SIZE)
        feat_dim = temp_backbone(dummy).shape[1]
    except Exception as e:
        print(f"Error getting backbone feature dimension: {e}")
        raise e

Backbone expected input size: (3, 256, 256), using SIZE=256
Backbone expected mean: (0.485, 0.456, 0.406), std: (0.229, 0.224, 0.225)


In [10]:
student_val_transform = transforms.Compose([
    transforms.Resize((SIZE, SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
])

## Student Model Inference on Test Set


In [11]:
PATH_DATA = '/kaggle/input/csiro-biomass'
STUDENT_MODELS_DIR = '/kaggle/input/distillation-models/student'
PATH_TEST_CSV = os.path.join(PATH_DATA, 'test.csv')
PATH_TEST_IMG = os.path.join(PATH_DATA, 'test')

In [12]:
# Load test CSV
test_df = pd.read_csv(PATH_TEST_CSV)
test_df = test_df[~test_df['target_name'].isin(['Dry_Total_g', 'GDM_g'])]

# Pivot to one row per image
test_pivot = test_df.pivot_table(
    index='image_path',
    aggfunc='first'
).reset_index()

print(f"Test set size: {len(test_pivot)}")
print(test_pivot.head())

Test set size: 1
              image_path                   sample_id   target_name
0  test/ID1001187975.jpg  ID1001187975__Dry_Clover_g  Dry_Clover_g


In [13]:
# Find all ckpt files
ckpt_files = sorted([f for f in os.listdir(
    STUDENT_MODELS_DIR) if f.endswith('.ckpt')])
print(f"Found {len(ckpt_files)} student checkpoints:")
for f in ckpt_files:
    print(f"  - {f}")

# Load models WITHOUT internet (offline inference on Kaggle)
student_models = []
for ckpt_file in ckpt_files:
    ckpt_path = os.path.join(STUDENT_MODELS_DIR, ckpt_file)
    print(f"\nLoading: {ckpt_file}")

    # Load checkpoint
    checkpoint = torch.load(ckpt_path, map_location='cpu')
    hparams = checkpoint['hyper_parameters']
    
    # Create model WITHOUT pretrained backbone (all weights in checkpoint)
    model = BiomassStudentModel(
        backbone_name=hparams['backbone_name'],
        num_targets=hparams['num_targets'],
        lr=hparams['lr'],
        weight_decay=hparams['weight_decay'],
        hidden_ratio=hparams['hidden_ratio'],
        dropout=hparams['dropout'],
        fusion_method=hparams['fusion_method'],
        distill_alpha=hparams['distill_alpha'],
        use_log_target=hparams['use_log_target'],
        pretrained_backbone=False  # IMPORTANT: No internet needed!
    )
    
    # Load all trained weights from checkpoint
    model.load_state_dict(checkpoint['state_dict'])
    
    model.eval()
    model.to(DEVICE)
    student_models.append(model)
    
    print(f"Loaded (backbone: {hparams['backbone_name']}, no internet required)")

print(f"\nSuccessfully loaded {len(student_models)} student models")
print("Ready for offline inference on Kaggle!")

Found 2 student checkpoints:
  - student_best_fold1.ckpt
  - student_best_fold4.ckpt

Loading: student_best_fold1.ckpt
Student model initialized: backbone=swinv2_tiny_window8_256, feat_dim=768, combined_dim=768, fusion=mean, distill_alpha=0.5, pretrained_backbone=False
Loaded (backbone: swinv2_tiny_window8_256, no internet required)

Loading: student_best_fold4.ckpt
Student model initialized: backbone=swinv2_tiny_window8_256, feat_dim=768, combined_dim=768, fusion=mean, distill_alpha=0.5, pretrained_backbone=False
Loaded (backbone: swinv2_tiny_window8_256, no internet required)

Successfully loaded 2 student models
Ready for offline inference on Kaggle!


In [14]:
# Create test dataset
class BiomassTestDataset(Dataset):
    """Test dataset for inference - no targets needed."""

    def __init__(self, df: pd.DataFrame, img_dir: str, transform=None):
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # Load image
        img_path = os.path.join(
            self.img_dir, row['image_path'].replace('test/', ''))
        image = cv2.imread(img_path)

        if image is None:
            raise FileNotFoundError(f"Cannot load image: {img_path}")

        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Split into left and right patches
        h, w, c = image.shape
        mid_w = w // 2

        left_patch = image[:, :mid_w, :]
        right_patch = image[:, mid_w:, :]

        # Convert to PIL
        left_pil = Image.fromarray(left_patch)
        right_pil = Image.fromarray(right_patch)

        # Apply transforms
        if self.transform:
            left_tensor = self.transform(left_pil)
            right_tensor = self.transform(right_pil)
        else:
            left_tensor = transforms.ToTensor()(left_pil)
            right_tensor = transforms.ToTensor()(right_pil)

        return {
            'left_image': left_tensor,
            'right_image': right_tensor,
            'image_id': row['image_path'].split('/')[-1].replace('.jpg', '')
        }


# Create test dataloader
test_dataset = BiomassTestDataset(
    df=test_pivot,
    img_dir=PATH_TEST_IMG,
    transform=student_val_transform
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE * 2,
    shuffle=False,
    num_workers=min(NUM_WORKERS, 4),
    pin_memory=True if torch.cuda.is_available() else False
)

print(f"Test loader created: {len(test_loader)} batches")

Test loader created: 1 batches


In [15]:
# Run inference on test set
print("Running inference on test set...")

all_predictions = []
all_image_ids = []

with torch.no_grad():
    for batch_idx, batch in enumerate(tqdm(test_loader, desc="Inference")):
        # Move to device
        batch['left_image'] = batch['left_image'].to(DEVICE)
        batch['right_image'] = batch['right_image'].to(DEVICE)

        # Ensemble predictions from all models
        batch_preds_list = []

        for model in student_models:
            green_pred, clover_pred, dead_pred = model(batch)

            # Convert to original scale if needed
            if USE_LOG_TARGET:
                green_pred = torch.expm1(green_pred)
                clover_pred = torch.expm1(clover_pred)
                dead_pred = torch.expm1(dead_pred)

            # Clamp to non-negative
            green_pred = torch.clamp(green_pred, min=0.0)
            clover_pred = torch.clamp(clover_pred, min=0.0)
            dead_pred = torch.clamp(dead_pred, min=0.0)

            # Compute all 5 targets
            total = green_pred + clover_pred + dead_pred
            gdm = clover_pred + green_pred

            # Stack: [clover, dead, green, total, gdm]
            preds_all = torch.stack(
                [clover_pred, dead_pred, green_pred, total, gdm], dim=1)
            batch_preds_list.append(preds_all.cpu().numpy())

        # Average predictions across models
        batch_preds_avg = np.mean(batch_preds_list, axis=0)  # [B, 5]

        all_predictions.append(batch_preds_avg)
        all_image_ids.extend(batch['image_id'])

# Concatenate all predictions
all_predictions_array = np.concatenate(all_predictions, axis=0)
print(f"Predictions shape: {all_predictions_array.shape}")
print(f"Image IDs count: {len(all_image_ids)}")

Running inference on test set...


Inference: 100%|██████████| 1/1 [00:00<00:00,  1.04it/s]

Predictions shape: (1, 5)
Image IDs count: 1





In [16]:
# Format submission CSV
# Columns order: Dry_Clover_g, Dry_Dead_g, Dry_Green_g, Dry_Total_g, GDM_g
target_names = ['Dry_Clover_g', 'Dry_Dead_g',
                'Dry_Green_g', 'Dry_Total_g', 'GDM_g']

submission_rows = []

for img_idx, image_id in enumerate(all_image_ids):
    predictions = all_predictions_array[img_idx]  # [5] values for 5 targets

    for target_idx, target_name in enumerate(target_names):
        sample_id = f"{image_id}__{target_name}"
        target_value = float(predictions[target_idx])

        submission_rows.append({
            'sample_id': sample_id,
            'target': target_value
        })

# Create submission dataframe
submission_df = pd.DataFrame(submission_rows)

print(f"Submission shape: {submission_df.shape}")
print(f"Expected shape: ({len(test_pivot) * 5}, 2)")
print(submission_df.head(10))

Submission shape: (5, 2)
Expected shape: (5, 2)
                    sample_id     target
0  ID1001187975__Dry_Clover_g   0.055007
1    ID1001187975__Dry_Dead_g  30.748314
2   ID1001187975__Dry_Green_g  21.311224
3   ID1001187975__Dry_Total_g  52.114544
4         ID1001187975__GDM_g  21.366230


In [17]:
# Save submission
submission_df.to_csv(SUBMISSION_NAME, index=False)

print(f"Submission saved to: {SUBMISSION_NAME}")

Submission saved to: submission.csv
