- Notebook modified from https://www.kaggle.com/code/markwijkhuizen/planttraits2024-eda-training-pub.
- Training only, EDA part not included.
- Image model only, tabular data not used.

## Import Libraries 

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import imageio.v3 as imageio
import albumentations as A

from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold, cross_val_score

import torch
import timm
import glob
import torchmetrics
import time
import psutil
import os
import time
import pickle

tqdm.pandas()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [2]:
class Config():
    IMAGE_SIZE = 384
#     BACKBONE = 'swin_large_patch4_window12_384.ms_in22k_ft_in1k'
    BACKBONE = 'tf_efficientnet_b0'
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    
    N_TARGETS = len(TARGET_COLUMNS)
    BATCH_SIZE = 32
    LR_MAX = 1e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 8
    TRAIN_MODEL = False
    IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
    
#     MODEL_PATH = '/kaggle/input/plainttraits2024-swintransformer/model.pth'
    MODEL_PATH = '/kaggle/input/planttraits2024-swintransformer-tabular-full/model_full.pth'
#     MODEL_PATH = '/kaggle/input/planttraits2024-efficientnet-tabular/model.pth'
        
CONFIG = Config()

## Load Data

In [3]:
%%time
read_images = False

if not read_images:
    train = pd.read_pickle('/kaggle/input/plainttraits2024-swintransformer/train.pkl')
    test = pd.read_pickle('/kaggle/input/plainttraits2024-swintransformer/test.pkl')
else: 
    # if CONFIG.TRAIN_MODEL:
    train = pd.read_csv('/kaggle/input/planttraits2024/train.csv')
    train['file_path'] = train['id'].apply(lambda s: f'/kaggle/input/planttraits2024/train_images/{s}.jpeg')
    train['jpeg_bytes'] = train['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())
    train.to_pickle('train.pkl')

    test = pd.read_csv('/kaggle/input/planttraits2024/test.csv')
    test['file_path'] = test['id'].apply(lambda s: f'/kaggle/input/planttraits2024/test_images/{s}.jpeg')
    test['jpeg_bytes'] = test['file_path'].progress_apply(lambda fp: open(fp, 'rb').read())
    test.to_pickle('test.pkl')

for column in CONFIG.TARGET_COLUMNS:
    lower_quantile = train[column].quantile(0.005)
    upper_quantile = train[column].quantile(0.985)  
    train = train[(train[column] >= lower_quantile) & (train[column] <= upper_quantile)]    
    
sd_columns = [col for col in train.columns if col.endswith('_sd')]
train[sd_columns] = train[sd_columns].fillna(0)
train.reset_index(inplace=True)
    
CONFIG.N_TRAIN_SAMPLES = len(train)
CONFIG.N_STEPS_PER_EPOCH = (CONFIG.N_TRAIN_SAMPLES // CONFIG.BATCH_SIZE)
CONFIG.N_STEPS = CONFIG.N_STEPS_PER_EPOCH * CONFIG.N_EPOCHS + 1    
CONFIG.TABULAR_COLUMNS = train.filter(regex='^(WORLDCLIM_BIO|SOIL|MODIS_2000|VOD)').columns
    
if CONFIG.TRAIN_MODEL:
    print('N_TRAIN_SAMPLES:', len(train), 'N_TEST_SAMPLES:', len(test))
else:
    print('N_TEST_SAMPLES:', len(test))

N_TEST_SAMPLES: 6545
CPU times: user 1.58 s, sys: 2.85 s, total: 4.44 s
Wall time: 31.2 s


In [4]:
# if CONFIG.TRAIN_MODEL:
LOG_FEATURES = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']

y_df = np.zeros_like(train[CONFIG.TARGET_COLUMNS], dtype=np.float32)
for target_idx, target in enumerate(CONFIG.TARGET_COLUMNS):
    v = train[target].values
    if target in LOG_FEATURES:
        v = np.log10(v)
        
    y_df[:, target_idx] = v

SCALER = StandardScaler()
y_df = SCALER.fit_transform(y_df)

In [5]:
# Splitting the data into training and validation sets
train_df, val_df, y_train, y_val = train_test_split(train, y_df, test_size=0.2, random_state=42)

In [6]:
SCALER_tabular = StandardScaler()
tabular_df_train = SCALER_tabular.fit_transform(train_df[CONFIG.TABULAR_COLUMNS])
tabular_df_val = SCALER_tabular.transform(val_df[CONFIG.TABULAR_COLUMNS])
tabular_df_test = SCALER_tabular.fit_transform(test[CONFIG.TABULAR_COLUMNS])

In [7]:
MEAN = np.array([0.485, 0.456, 0.406])
STD = np.array([0.229, 0.224, 0.225])

TRAIN_TRANSFORMS = A.Compose([
        A.HorizontalFlip(p=0.5),
        A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.2, rotate_limit=45, p=0.4),
#         A.RandomSizedCrop(
#             [int(0.85*CONFIG.IMAGE_SIZE), CONFIG.IMAGE_SIZE],
#             CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE, w2h_ratio=1.0, p=0.75),
#         A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.RandomCrop(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.RandomBrightnessContrast(brightness_limit=0.1, contrast_limit=0.1, p=0.25),
        A.HueSaturationValue(p=0.3),
        A.ImageCompression(quality_lower=85, quality_upper=100, p=0.25),
        A.ToFloat(),
        A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

TEST_TRANSFORMS = A.Compose([
#         A.Resize(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),
        A.ToFloat(),
#         A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
        ToTensorV2(),
    ])

class Dataset(Dataset):
    def __init__(self, X_jpeg_bytes, X_tabular, y, transforms=None):
        self.X_jpeg_bytes = X_jpeg_bytes
        self.X_tabular = X_tabular
        self.y = y
        self.transforms = transforms

    def __len__(self):
        return len(self.X_jpeg_bytes)

    def __getitem__(self, index):
        X_sample = self.transforms(
            image=imageio.imread(self.X_jpeg_bytes[index]),
        )
        X_sample['tabular'] = self.X_tabular[index].astype('float32')
        y_sample = self.y[index]
        
        return X_sample, y_sample

if CONFIG.TRAIN_MODEL:
    # Creating datasets for training and validation
    train_dataset = Dataset(
        train_df['jpeg_bytes'].values,
        tabular_df_train,
        y_train,
        TRAIN_TRANSFORMS,
    )

    val_dataset = Dataset(
        val_df['jpeg_bytes'].values,
        tabular_df_val,
        y_val,
        TEST_TRANSFORMS,
    )

    # Creating dataloaders for training and validation
    train_dataloader = DataLoader(
        train_dataset,
        batch_size=CONFIG.BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=psutil.cpu_count(),
    )

    val_dataloader = DataLoader(
        val_dataset,
        batch_size=CONFIG.BATCH_SIZE,
        shuffle=False,  # No need to shuffle validation data
        num_workers=psutil.cpu_count(),
    )



test_dataset = Dataset(
    test['jpeg_bytes'].values,
    tabular_df_test,
    test['id'].values,
    TEST_TRANSFORMS,
)

In [8]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.backbone = timm.create_model(
                CONFIG.BACKBONE,
                pretrained=True,
                num_classes=0,
        )
        
        # EfficientNet = 1280, SwinTrans = 1536, Tabular = 163
        self.custom_layers = nn.Sequential(
            nn.Linear(1280+163, 512),
            nn.ReLU(), 
            nn.Linear(512, 6)  
        )
        
    def forward(self, inputs):
        image = inputs['image']
        tabular = inputs['tabular']

        x = self.backbone(image)
        x = torch.cat((tabular, x), dim=1)
        x = self.custom_layers(x)
        
        return x


model = Model()
model = model.to(device)
# print(model.backbone.head)

model.safetensors:   0%|          | 0.00/21.4M [00:00<?, ?B/s]

In [9]:
def get_lr_scheduler(optimizer):
    return torch.optim.lr_scheduler.OneCycleLR(
        optimizer=optimizer,
        max_lr=CONFIG.LR_MAX,
        total_steps=CONFIG.N_STEPS,
        pct_start=0.1,
        anneal_strategy='cos',
        div_factor=1e1,
        final_div_factor=1e1,
    )

class AverageMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val):
        self.sum += val.sum()
        self.count += val.numel()
        self.avg = self.sum / self.count

if CONFIG.TRAIN_MODEL:
    MAE = torchmetrics.regression.MeanAbsoluteError().to(device)
    R2 = torchmetrics.regression.R2Score(num_outputs=CONFIG.N_TARGETS, multioutput='uniform_average').to(device)
    LOSS = AverageMeter()

    Y_MEAN = torch.tensor(y_df).mean(dim=0).to(device)
    EPS = torch.tensor([1e-6]).to(device)

    def r2_loss(y_pred, y_true):
        ss_res = torch.sum((y_true - y_pred)**2, dim=0)
        ss_total = torch.sum((y_true - Y_MEAN)**2, dim=0)
        ss_total = torch.maximum(ss_total, EPS)
        r2 = torch.mean(ss_res / ss_total)
        return r2

    LOSS_FN = r2_loss

    optimizer = torch.optim.AdamW(
        params=model.parameters(),
        lr=CONFIG.LR_MAX,
        weight_decay=CONFIG.WEIGHT_DECAY,
    )

    LR_SCHEDULER = get_lr_scheduler(optimizer)

In [10]:
if CONFIG.TRAIN_MODEL:
    print("Start Training:")
    
    best = float('inf')
    for epoch in range(CONFIG.N_EPOCHS):
        MAE.reset()
        R2.reset()
        LOSS.reset()
        model.train()

        for step, (X_batch, y_true) in enumerate(train_dataloader):
            X_batch['image'] = X_batch['image'].to(device)
            X_batch['tabular'] = X_batch['tabular'].to(device)
            y_true = y_true.to(device)
            t_start = time.perf_counter_ns()
            y_pred = model(X_batch)
            loss = LOSS_FN(y_pred, y_true)
            LOSS.update(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            LR_SCHEDULER.step()
            MAE.update(y_pred, y_true)
            R2.update(y_pred, y_true)

            if not CONFIG.IS_INTERACTIVE and (step+1) == len(train_dataloader):
                print(
                    f'EPOCH {epoch+1:02d}, {step+1:04d}/{len(train_dataloader)} | ' + 
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                )
            elif CONFIG.IS_INTERACTIVE:
                print(
                    f'\rEPOCH {epoch+1:02d}, {step+1:04d}/{len(train_dataloader)} | ' + 
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                    end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
                )
        
        print()
        MAE.reset()
        R2.reset()
        LOSS.reset()
        model.eval()
        
        for step, (X_batch, y_true) in enumerate(val_dataloader):
            X_batch['image'] = X_batch['image'].to(device)
            X_batch['tabular'] = X_batch['tabular'].to(device)
            y_true = y_true.to(device)
            t_start = time.perf_counter_ns()
            with torch.no_grad():
                y_pred = model(X_batch)
                loss = LOSS_FN(y_pred, y_true)
                
            LOSS.update(loss)
            MAE.update(y_pred, y_true)
            R2.update(y_pred, y_true)
            if not CONFIG.IS_INTERACTIVE and (step+1) == len(val_dataloader):
                print(
                    f'EPOCH VAL, {epoch+1:02d}, {step+1:04d}/{len(val_dataloader)} | ' + 
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                )
            elif CONFIG.IS_INTERACTIVE:
                print(
                    f'\rEPOCH VAL {epoch+1:02d}, {step+1:04d}/{len(val_dataloader)} | ' + 
                    f'loss: {LOSS.avg:.4f}, mae: {MAE.compute().item():.4f}, r2: {R2.compute().item():.4f}, ' +
                    f'step: {(time.perf_counter_ns()-t_start)*1e-9:.3f}s, lr: {LR_SCHEDULER.get_last_lr()[0]:.2e}',
                    end='\n' if (step + 1) == CONFIG.N_STEPS_PER_EPOCH else '', flush=True,
                )
        print()
        if LOSS.avg < best:
            best = LOSS.avg
            torch.save(model, 'model.pth')
            
            
else:
    model = torch.load(CONFIG.MODEL_PATH)
    model.to(device)

In [11]:
if CONFIG.TRAIN_MODEL:
    model = torch.load('/kaggle/working/model.pth')
    model.to(device);

In [12]:
# Define the number of random crops
NUM_CROPS = 8  # You can adjust this number as needed

# Define the transformations
TEST_TRANSFORMS = A.Compose([
    A.RandomCrop(CONFIG.IMAGE_SIZE, CONFIG.IMAGE_SIZE),  # Add random crop here
    A.ToFloat(),
    A.Normalize(mean=MEAN, std=STD, max_pixel_value=1),
    ToTensorV2(),
])

SUBMISSION_ROWS = []
model.eval()

# Iterate over each image in the test dataset
for X_sample_test, test_id in tqdm(test_dataset):
    y_pred_sum = np.zeros(len(CONFIG.TARGET_COLUMNS))  # Initialize sum of predictions
    
    # Apply random crop and make predictions multiple times
    for _ in range(NUM_CROPS):
        # Apply transformations
        transformed = TEST_TRANSFORMS(image=X_sample_test['image'].numpy().transpose(1, 2, 0))
        
        # Prepare data for prediction
        image = torch.Tensor(transformed['image']).unsqueeze(0).to(device)
        tabular = torch.Tensor(X_sample_test['tabular']).unsqueeze(0).to(device)
        
        # Make predictions
        with torch.no_grad():
            y_pred = model({'image': image, 'tabular': tabular}).detach().cpu().numpy()
        
        # Inverse transform predictions
        y_pred = SCALER.inverse_transform(y_pred).squeeze()
        
        # Logarithmic transformation for log features
        for idx, k in enumerate(CONFIG.TARGET_COLUMNS):
            if k in LOG_FEATURES:
                y_pred[idx] = 10 ** y_pred[idx]
        
        # Sum the predictions for averaging later
        y_pred_sum += y_pred
    
    # Average the predictions
    y_pred_avg = y_pred_sum / NUM_CROPS
    
    # Prepare submission row
    row = {'id': test_id}
    for k, v in zip(CONFIG.TARGET_COLUMNS, y_pred_avg):
        row[k.replace('_mean', '')] = v
        
    SUBMISSION_ROWS.append(row)

# Create submission DataFrame
submission_df = pd.DataFrame(SUBMISSION_ROWS)
submission_df.to_csv('submission.csv', index=False)
print("Submit!")

  0%|          | 0/6545 [00:00<?, ?it/s]

Submit!


In [13]:
submission_df

Unnamed: 0,id,X4,X11,X18,X50,X26,X3112
0,201238668,0.607543,8.406076,2.023334,1.883451,1.680902,466.856895
1,202310319,0.531942,14.973386,0.318883,1.287831,0.192930,675.525566
2,202604412,0.561591,15.408670,0.956610,1.255313,2.622868,738.316200
3,201353439,0.410455,18.299898,0.124738,1.503358,0.404792,1912.121231
4,195351745,0.478589,7.187330,0.107517,1.799594,0.828794,287.522835
...,...,...,...,...,...,...,...
6540,195548469,0.669277,11.500955,0.833545,1.957218,4.155447,128.479075
6541,199261251,0.556049,18.650699,4.730811,1.171644,3.627272,7820.018677
6542,203031744,0.426482,31.662518,0.392394,1.144586,3.701209,2808.235229
6543,197736382,0.502657,19.201098,0.330422,1.482899,1.220370,557.850903
