In [None]:
!pip install richprint
!pip install torch
!pip install torchvision
!pip install timm

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import random

import PIL
from PIL import Image
import torch.nn as nn
import torch

import torchvision
from torch.utils.data import Dataset
from torchvision import transforms, datasets, models
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings('ignore')

In [None]:
DATA_PATH = '/kaggle/input/csiro-biomass/'
N_CLASSES = 5
BATCH_SIZE = 5
NUM_WORKERS = 0

train_df = pd.read_csv(DATA_PATH + 'train.csv')
test_df = pd.read_csv(DATA_PATH + 'test.csv')
train_df.head(5)

In [None]:
# the class names would be in order of [Dry_Clover_g, Dry_Dead_g, Dry_Green_g, Dry_Total_g, GDM_g]

def get_unique_dataframe(df, target_parameter):
    return np.unique(df[target_parameter].tolist())

def mf_dataframe(df, target_parameter, inference = False):
    mod_df = pd.DataFrame()
    image_paths_unique = get_unique_dataframe(df, target_parameter)
    for image_path in tqdm(image_paths_unique, desc = "Processing Dataframe"):
        selective = df[df[target_parameter] == image_path]
        if inference:
            current_series = pd.Series({
                'path': image_path,
            })
        else:
            current_series = pd.Series({
                'path': image_path,
                'Dry_Clover_g': selective[selective["target_name"] == "Dry_Clover_g"]["target"].tolist()[0],
                'Dry_Dead_g': selective[selective["target_name"] == "Dry_Dead_g"]["target"].tolist()[0],
                'Dry_Green_g': selective[selective["target_name"] == "Dry_Green_g"]["target"].tolist()[0],
                'Dry_Total_g': selective[selective["target_name"] == "Dry_Total_g"]["target"].tolist()[0],
                'GDM_g': selective[selective["target_name"] == "GDM_g"]["target"].tolist()[0],
            })
        mod_df = pd.concat([mod_df, current_series.to_frame().T], ignore_index = True)
    return mod_df

unique_train_df = mf_dataframe(df = train_df, target_parameter = 'image_path')

In [None]:
img_transform = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size = 224, scale = (0.8, 1.0), ratio = (0.9, 1.1)),
        transforms.RandomHorizontalFlip(p = 0.5),
        transforms.RandomRotation(degrees = 15),
        transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406],
                             std = [0.229, 0.224, 0.225])
    ]),

    'valid': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406],
                             std = [0.229, 0.224, 0.225])
    ]),

    'test': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406],
                             std = [0.229, 0.224, 0.225])
    ]),
}

In [None]:
from torch.utils.data import Subset, random_split, DataLoader

class Configure(torch.utils.data.Dataset):
    def __init__(self, dataframe, task_type = 'train'):
        self.dataframe = dataframe
        self.task_type = task_type
    def __len__(self):
        return len(self.dataframe)
    def __getitem__(self, index):
        image = self.dataframe.iloc[index]["path"]
        pil = Image.open(DATA_PATH + image)
        pil = img_transform[self.task_type](pil)
        if self.task_type == "test":
            return pil
        labels = self.dataframe.iloc[index][1:].tolist()
        return pil, torch.tensor(labels)


dataset = Configure(dataframe = unique_train_df, task_type = 'train')
train_size = int(0.8 * len(dataset))
valid_size = len(dataset) - train_size

train_dataset, valid_indices = random_split(dataset, [train_size, valid_size])

valid_dataset = Subset(Configure(dataframe = unique_train_df, task_type = 'valid'), valid_indices.indices)
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, num_workers = NUM_WORKERS, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size = BATCH_SIZE, num_workers = NUM_WORKERS, shuffle = False)

dataloaders = {
    'train': train_dataloader,
    'valid': valid_dataloader
}

In [None]:
use_cuda = torch.cuda.is_available()
device = "cuda" if use_cuda else "cpu"
model_resnet = torchvision.models.resnet152(pretrained = True)
for p in model_resnet.parameters():
    p.requires_grad = False

in_features = model_resnet.fc.in_features
model_resnet.fc = nn.Linear(in_features, N_CLASSES)
model_resnet.to(device)

In [None]:
grad_parameters_resnet = filter(lambda p: p.requires_grad, model_resnet.parameters())
optimizer_resnet = torch.optim.SGD(grad_parameters_resnet, lr = 1e-2, weight_decay = 1e-4)

def r2_loss(output, target):
    ss_res = torch.sum((target - output) ** 2)
    ss_tot = torch.sum((target - torch.mean(target)) ** 2)
    r2 = 1 - ss_res / (ss_tot + 1e-8)
    return 1 - r2  

criterion_resnet = r2_loss

In [None]:
# Model trained with a simple ES implementation, using patience = 4 for baseline.

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path = 'safetensor.pth'):
    valid_loss_min = np.Inf 
    patience = 0
    for epoch in range(1, n_epochs + 1):
        if patience >= 4:
            break
        train_loss = 0.0
        valid_loss = 0.0
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            
            loss.backward()
            optimizer.step()
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            if batch_idx % 100 == 0:
                print('Epoch: %d \tBatch: %d \tTraining RMSE Loss: %.6f' %(epoch, batch_idx + 1, train_loss))

        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        print('Epoch: {} \tTraining Loss: {:.4f} \tValidation Loss: {:.4f}'.format(epoch, train_loss, valid_loss))
        if valid_loss <= valid_loss_min:
            patience = 0
            torch.save(model.state_dict(), save_path)
            print('BOOM! Validation loss decreased/const ({:.4f} --> {:.4f}).  Saving model...'.format(valid_loss_min, valid_loss))
            valid_loss_min = valid_loss
        else:
            patience += 1

    return model

In [None]:
n_epochs = 100
use_cuda = True if str(device) == "cuda" else False
model_resnet =  train(n_epochs, dataloaders, model_resnet, optimizer_resnet, criterion_resnet, use_cuda)

In [None]:
unique_test_df = mf_dataframe(df = test_df, target_parameter = 'image_path', inference = True)
dataset = Configure(dataframe = unique_test_df, task_type = 'test')
test_dataloader = DataLoader(dataset, batch_size = BATCH_SIZE, num_workers = NUM_WORKERS, shuffle = False)

def inference(model):
    model.eval()
    submission_rows = []
    with torch.no_grad():
        for batch_idx, images in enumerate(tqdm(test_dataloader)):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            name = unique_test_df.iloc[batch_idx].path.split('test/')[1].split('.jpg')[0]
    
            suffixes = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']
            for i, suffix in enumerate(suffixes):
                submission_rows.append({
                    'sample_id': f'{name}__{suffix}'.strip(),
                    'target': float(max(0.0, outputs[i]))
                })
    
    submission = pd.DataFrame(submission_rows, columns = ['sample_id', 'target'])
    return submission

In [None]:
submission_resnet = inference(model = model_resnet)

In [None]:
submission_resnet