# ResNet152 Inference Notebook

- I am using ResNet152 pretrained on ImageNet weights for image-to-biomass prediction.  
- This will not give you an impeccable score on the test set. [LB 0.24]  
- A Vision Transformer would be a better choice for this kind of task.  

# Why this notebook?
- Anyone having trouble with processing and inference can refer to this notebook.  
- Anyone who wants to practice tuning (transfer learning models) for a real competition may find this notebook helpful.  

I have used a pretrained model in this notebook. You can also check out the public [Training Notebook](https://www.kaggle.com/code/shivsatyam/baseline-resnet152-tl-simple-tuning-example).  

References for those who want to learn more:  
- Transfer Learning: https://www.ibm.com/think/topics/transfer-learning  
- ResNet152: https://arxiv.org/pdf/1512.03385


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import glob
import random

import PIL
from PIL import Image
import torch.nn as nn
import torch

import torchvision
from torch.utils.data import Dataset
from torch.utils.data import DataLoader, random_split
from torchvision import transforms, datasets, models
from tqdm.auto import tqdm

import warnings
warnings.filterwarnings('ignore')

In [None]:
DATA_PATH = '/kaggle/input/csiro-biomass/'
N_CLASSES = 5
BATCH_SIZE = 8
NUM_WORKERS = 0

train_df = pd.read_csv(DATA_PATH + 'train.csv')
test_df = pd.read_csv(DATA_PATH + 'test.csv')
train_df.head(5)

In [None]:
# the class names would be in order of [Dry_Clover_g, Dry_Dead_g, Dry_Green_g, Dry_Total_g, GDM_g]

def get_unique_dataframe(df, target_parameter):
    return np.unique(df[target_parameter].tolist())

def mf_dataframe(df, target_parameter, inference = False):
    mod_df = pd.DataFrame()
    image_paths_unique = get_unique_dataframe(df, target_parameter)
    for image_path in tqdm(image_paths_unique, desc = "Processing Dataframe"):
        selective = df[df[target_parameter] == image_path]
        if inference:
            current_series = pd.Series({
                'path': image_path,
            })
        else:
            current_series = pd.Series({
                'path': image_path,
                'Dry_Clover_g': selective[selective["target_name"] == "Dry_Clover_g"]["target"].tolist()[0],
                'Dry_Dead_g': selective[selective["target_name"] == "Dry_Dead_g"]["target"].tolist()[0],
                'Dry_Green_g': selective[selective["target_name"] == "Dry_Green_g"]["target"].tolist()[0],
                'Dry_Total_g': selective[selective["target_name"] == "Dry_Total_g"]["target"].tolist()[0],
                'GDM_g': selective[selective["target_name"] == "GDM_g"]["target"].tolist()[0],
            })
        mod_df = pd.concat([mod_df, current_series.to_frame().T], ignore_index = True)
    return mod_df

unique_train_df = mf_dataframe(df = train_df, target_parameter = 'image_path')

In [None]:
img_transform = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(size = 224, scale = (0.8, 1.0), ratio = (0.9, 1.1)),
        transforms.RandomHorizontalFlip(p = 0.5),
        transforms.RandomRotation(degrees = 15),
        transforms.ColorJitter(brightness = 0.2, contrast = 0.2, saturation = 0.2, hue = 0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406],
                             std = [0.229, 0.224, 0.225])
    ]),

    'valid': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406],
                             std = [0.229, 0.224, 0.225])
    ]),

    'test': transforms.Compose([
        transforms.Resize(size = 256),
        transforms.CenterCrop(size = 224),
        transforms.ToTensor(),
        transforms.Normalize(mean = [0.485, 0.456, 0.406],
                             std = [0.229, 0.224, 0.225])
    ]),
}

In [None]:
from torch.utils.data import Subset, random_split, DataLoader

class Configure(nn.Module):
    def __init__(self, dataframe, task_type = 'train'):
        self.dataframe = dataframe
        self.task_type = task_type
    def __len__(self):
        return len(self.dataframe)
    def __getitem__(self, index):
        image = self.dataframe.iloc[index]["path"]
        pil = Image.open(DATA_PATH + image)
        pil = img_transform[self.task_type](pil)
        if self.task_type == "test":
            return pil
        labels = self.dataframe.iloc[index][1:].tolist()
        return pil, torch.tensor(labels)


dataset = Configure(dataframe = unique_train_df, task_type = 'train')
train_size = int(0.8 * len(dataset))
valid_size = len(dataset) - train_size

train_dataset, valid_indices = random_split(dataset, [train_size, valid_size])

valid_dataset = Subset(Configure(dataframe = unique_train_df, task_type = 'valid'), valid_indices.indices)
train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, num_workers = NUM_WORKERS, shuffle = True)
valid_dataloader = DataLoader(valid_dataset, batch_size = BATCH_SIZE, num_workers = NUM_WORKERS, shuffle = False)

In [None]:
dataloaders = {
    'train': train_dataloader,
    'valid': valid_dataloader
}

use_cuda = torch.cuda.is_available()
device = "cuda" if use_cuda else "cpu"
model = models.resnet152(weights = None)
model.fc = torch.nn.Linear(model.fc.in_features, 5)
model.load_state_dict(torch.load('/kaggle/input/img2biomass-hypertuned-resnet152/pytorch/v1/1/safetensor.pth', map_location = 'cpu'))
model.to(device)
model.eval()

In [None]:
unique_test_df = mf_dataframe(df = test_df, target_parameter = 'image_path', inference = True)
dataset = Configure(dataframe = unique_test_df, task_type = 'test')
test_dataloader = DataLoader(dataset, batch_size = 1, num_workers = NUM_WORKERS, shuffle = False)

In [None]:
def inference(model):
    model.eval()
    submission_rows = []
    with torch.no_grad():
        for batch_idx, images in enumerate(tqdm(test_dataloader)):
            images = images.to(device)
            outputs = model(images).squeeze().cpu().numpy()
            name = unique_test_df.iloc[batch_idx].path.split('test/')[1].split('.jpg')[0].strip()
            suffixes = ['Dry_Clover_g', 'Dry_Dead_g', 'Dry_Green_g', 'Dry_Total_g', 'GDM_g']
            for i, suffix in enumerate(suffixes):
                submission_rows.append({
                    'sample_id': f'{name}__{suffix}'.strip(),
                    'target': float(outputs[i])
                })
    
    submission = pd.DataFrame(submission_rows, columns = ['sample_id', 'target'])
    submission['sample_id'] = submission['sample_id'].str.strip()
    submission['target'] = submission['target'].astype(float)
    return submission


In [None]:
submission_resnet = inference(model)
submission_resnet.head(5)

In [None]:
submission_resnet.to_csv('submission.csv', index = False)

In [None]:
submission_resnet.head(10)