In [67]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import imageio.v3 as imageio
import albumentations as A

from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from torch import nn
from tqdm.notebook import tqdm
from sklearn.preprocessing import StandardScaler

import torch
import timm
import glob
import torchmetrics
import time
import psutil
import os

tqdm.pandas()

In [68]:
class Config():
    sub = "/kaggle/input/planttraits2024/sample_submission.csv"
    trgts = "/kaggle/input/planttraits2024/target_name_meta.tsv"
    train_path = "/kaggle/input/planttraits2024/train.csv"
    test_path = "/kaggle/input/planttraits2024/test.csv"
    train_image_path = "/kaggle/input/planttraits2024/train_images/"
    test_image_path = "/kaggle/input/planttraits2024/test_images/"
    IMAGE_SIZE = 384
    BACKBONE = 'swin_large_patch4_window12_384.ms_in22k_ft_in1k'
    TARGET_COLUMNS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    TARGET_COLS = ['X4_mean', 'X11_mean', 'X18_mean', 'X50_mean', 'X26_mean', 'X3112_mean']
    N_TARGETS = len(TARGET_COLUMNS)
    BATCH_SIZE = 10
    LR_MAX = 1e-4
    WEIGHT_DECAY = 0.01
    N_EPOCHS = 4
    TRAIN_MODEL = True
    IS_INTERACTIVE = os.environ['KAGGLE_KERNEL_RUN_TYPE'] == 'Interactive'
        
CONFIG = Config()

In [69]:
import os
import random 

train = pd.read_csv(CONFIG.train_path)
#train

image_path = '/kaggle/input/planttraits2024/train_images'

image_files = [f for f in os.listdir(image_path) if f.endswith('.jpeg')]

random_images = random.sample(image_files, 5)


train["image_path"] = CONFIG.train_image_path + train['id'].astype(str) + '.jpeg'

train= train[['id', 'image_path', 'X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']].copy()
# Drop duplicates and nans
train = train.drop_duplicates().dropna()
train

Unnamed: 0,id,image_path,X4_mean,X11_mean,X18_mean,X26_mean,X50_mean,X3112_mean
0,192027691,/kaggle/input/planttraits2024/train_images/192...,0.401753,11.758108,0.117484,1.243779,1.849375,50.216034
1,195542235,/kaggle/input/planttraits2024/train_images/195...,0.480334,15.748846,0.389315,0.642940,1.353468,574.098472
2,196639184,/kaggle/input/planttraits2024/train_images/196...,0.796917,5.291251,8.552908,0.395241,2.343153,1130.096731
3,195728812,/kaggle/input/planttraits2024/train_images/195...,0.525236,9.568305,1.083629,0.154200,1.155308,1042.686546
4,195251545,/kaggle/input/planttraits2024/train_images/195...,0.411821,14.528877,0.657585,10.919966,2.246226,2386.467180
...,...,...,...,...,...,...,...,...
55484,190558785,/kaggle/input/planttraits2024/train_images/190...,0.337243,11.572778,0.233690,1.783193,1.608341,969.547831
55485,194523231,/kaggle/input/planttraits2024/train_images/194...,0.424371,6.114448,1.017099,12.713048,2.418300,1630.015480
55486,195888987,/kaggle/input/planttraits2024/train_images/195...,0.639659,5.549596,2.717395,10.206478,2.722599,602.229880
55487,135487319,/kaggle/input/planttraits2024/train_images/135...,0.774642,7.024218,4.429659,9.372170,3.251739,244.387170


In [70]:
train["image_path"] =CONFIG.train_image_path + train['id'].astype(str) + '.jpeg'

train= train[['id', 'image_path', 'X4_mean', 'X11_mean', 'X18_mean', 'X26_mean', 'X50_mean', 'X3112_mean']].copy()
# Drop duplicates and nans
train = train.drop_duplicates().dropna()

In [71]:
train[CONFIG.TARGET_COLS] = np.log1p(train[CONFIG.TARGET_COLS])
train = train.dropna()

split_index = int(0.7 * len(train))
# Split the DataFrame into train and validation sets
train_data = train.iloc[:split_index].reset_index(drop=True)
val_data = train.iloc[split_index:].reset_index(drop=True)
train_data.shape, val_data.shape


  result = func(self.values, **kwargs)


((38819, 8), (16637, 8))

In [72]:
split_index = int(0.5 * len(val_data))
# Split the DataFrame into test and validation sets
valid_data = val_data.iloc[:split_index].reset_index(drop=True)
test_data = val_data.iloc[split_index:].reset_index(drop=True)

In [73]:
paths = train_data.image_path.tolist()

class CustomDataset(Dataset):
    def __init__(self, paths, labels, transform=None):
        self.paths = paths
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.paths)

    def __getitem__(self, idx):
        image = Image.open(self.paths[idx]).convert('RGB')
        
        # Use normalized labels
        label = torch.tensor(self.labels[idx], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label

    
from torchvision import transforms
# Define any image transformations you want to apply, here we also add augmentation. 
transform = transforms.Compose([
    transforms.Resize((384, 384)),
    transforms.RandomResizedCrop(384),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

## Creating Train, Test and Val dataset 

In [74]:

train_paths  = train_data.image_path.tolist()
train_labels = train_data[CONFIG.TARGET_COLS].values

val_paths  = val_data.image_path.tolist()
val_labels = val_data[CONFIG.TARGET_COLS].values


#torch dataset
batch_size = CONFIG.BATCH_SIZE

# Create the datasets

dataset_train = CustomDataset(train_paths, train_labels, transform=transform)
train_dataloader = DataLoader(dataset_train, batch_size=batch_size, shuffle=True)


dataset_val = CustomDataset(val_paths, val_labels, transform=transform)
val_dataloader = DataLoader(dataset_val, batch_size=batch_size, shuffle=True)


# Define your dataset size and other configuration parameters
dataset_size = len(dataset_train)  # Assuming you have defined 'dataset' earlier
total_epochs = 5  # Total number of epochs

# Calculate total train steps
total_train_steps = dataset_size * batch_size * total_epochs

# Define warmup steps as 10% of total train steps
warmup_steps = int(total_train_steps * 0.10)

# Define decay steps as the remaining steps after warmup
decay_steps = total_train_steps - warmup_steps

print(f"Total Train Steps: {total_train_steps}")
print(f"Warmup Steps: {warmup_steps}")
print(f"Decay Steps: {decay_steps}")


Total Train Steps: 1940950
Warmup Steps: 194095
Decay Steps: 1746855


In [None]:
test_paths  = test_data.image_path.tolist()

dataset_test = CustomDataset(val_paths, test_labels, transform=transform)
test_dataloader = DataLoader(dataset_test, batch_size=batch_size, shuffle=True)

In [75]:
class MAELoss(nn.Module):
    def __init__(self):
        super(MAELoss, self).__init__()

    def forward(self, y_pred, y_true):
        mae = torch.mean(torch.abs(y_true - y_pred))
        return mae


class MSELoss(nn.Module):
    def __init__(self):
        super(MSELoss, self).__init__()

    def forward(self, y_pred, y_true):
        mse = torch.mean((y_true - y_pred) ** 2)
        return mse

In [76]:
class R2Loss(nn.Module):#causes nans
    def __init__(self):
        super(R2Loss, self).__init__()

    def forward(self, y_pred, y_true):

        SS_res = torch.sum((y_true - y_pred)**2)
        SS_tot = torch.sum((y_true - torch.mean(y_true))**2)

        epsilon = 1e-6  # Small epsilon to avoid division by zero
        r2 = 1 - (SS_res / (SS_tot + epsilon))
        mean_r2 = torch.mean(r2)

        return mean_r2

In [77]:
model = torch.load('/kaggle/input/modified-planttraits2024-eda-training/model.pth')

In [78]:
r2_loss =R2Loss()
test_mse = MSELoss()
test_mae = MAELoss()

In [61]:
model.eval()  
test_loss = 0.0
with torch.no_grad():
    for images, targets in tqdm(test_dataloader, desc=f"Test Epoch {epoch + 1}/{total_epochs}"):
        images = images.to('cuda')
        targets = targets.to('cuda')
        
        outputs = model(images)
        test_loss += criterion(outputs, targets)#.item()
            
        r2_value = r2_loss(outputs, targets)
        test_mse = MSELoss(outputs, targets)
        test_mae = MAELoss(outputs, targets)
                
    test_loss /= len(test_dataloader)
    # Append loss to lists
    train_losses.append(loss.item())
    test_losses.append(test_loss)


Val Epoch 1/1:   0%|          | 0/1664 [00:00<?, ?it/s]

In [63]:
print(f"Test R2: {r2_value:.4f} - Test MSE: {test_mse:.4f} - Test MAE: {test_mae:.4f}")

Test R2: 0.9747 - Test MSE: 0.1534  - Test MAE:0.2792
