# Pytorch starter - FasterRCNN Inference

- You can find the [train notebook here](https://www.kaggle.com/pestipeti/pytorch-starter-fasterrcnn-train)
- The weights are [available here](https://www.kaggle.com/dataset/7d5f1ed9454c848ecb909c109c6fa8e573ea4de299e249c79edc6f47660bf4c5)

In [None]:
import pandas as pd
import numpy as np
import cv2
import os, sys
import re

from PIL import Image

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

import torch
import torchvision

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator

from torch.utils.data import DataLoader, Dataset
from torch.utils.data.sampler import SequentialSampler

from matplotlib import pyplot as plt

sys.path.append('/kaggle/input/my-wbf')
from ensemble_boxes_wbf import *

DIR_INPUT = '/kaggle/input/global-wheat-detection'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'

DIR_WEIGHTS = '/kaggle/input/weight-baseline'

WEIGHTS_FILE = f'{DIR_WEIGHTS}/fasterrcnn_resnet50_fpn_best.pth'

In [None]:
cv2.__version__

In [None]:
torch.__version__

In [None]:
test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
test_df.shape

In [None]:

class TTA_Model:
    def __init__(self, model, device, cpu_device):
        self.model = model
        self.device = device
        self.cpu_device = cpu_device
        self.transforms = [A.Compose([A.HorizontalFlip(p=0), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}),
            A.Compose([A.HorizontalFlip(p=1), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}),
            A.Compose([A.VerticalFlip(p=1), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}),
            A.Compose([A.HorizontalFlip(p=1), A.VerticalFlip(p=1), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})]
        self.transforms_inv = [A.Compose([A.HorizontalFlip(p=0), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}),
            A.Compose([A.HorizontalFlip(p=1), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}),
            A.Compose([A.VerticalFlip(p=1), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']}),
            A.Compose([A.HorizontalFlip(p=1), A.VerticalFlip(p=1), ToTensorV2(p=1)], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})]
    
    def __call__(self, image, postprocess=2):
        sample = {
            'image': image.permute(1, 2, 0).data.cpu().numpy(),
            'bboxes': [[2.0000e+02, 3.3600e+02, 2.3900e+02, 3.7000e+02]],
            'labels': [1]
        }
        samples = [transform(**sample) for transform in self.transforms]
        ts_images = list(sample['image'].to(self.device) for sample in samples)
        outputs_transformed = self.model(ts_images)
        outputs_transformed = [{k: v.to(self.cpu_device) for k, v in t.items()} for t in outputs_transformed]
        samples_transformed = [{'image':image.permute(1, 2, 0).data.cpu().numpy(),
                                'bboxes':output['boxes'],
                                'labels':[1 for _ in output['boxes']]} for output in outputs_transformed]
        outputs = [t(**s) for t, s in zip(self.transforms_inv, samples_transformed)]
        boxes = []
        scores = []
        labels = []
        for idx in range(len(outputs)):
            boxes += outputs[idx]['bboxes']
            scores += outputs_transformed[idx]['scores'].data.cpu().numpy().tolist()
            labels += [1 for _ in outputs_transformed[idx]['scores']]
        if postprocess==0:
            final_boxes, final_scores, final_labels = np.array(boxes[0]), np.array(scores[0]), np.array(labels[0])
        elif postprocess==1:
            final_boxes, final_scores, final_labels = soft_nms(boxes, scores, labels, method=2, iou_thr=0.3, sigma=1.0, thresh=0.001, weights=None)
        elif postprocess==2:
            image_size = (np.sum(image.shape)-3)/2
            boxes = [np.array(box)/(image_size-1) for box in boxes]
            final_boxes, final_scores, final_labels = weighted_boxes_fusion([boxes],
                                                                            [scores],
                                                                            [labels],
                                                                            weights=None,
                                                                            iou_thr=0.7,
                                                                            skip_box_thr=0.7)
            final_boxes = np.array(final_boxes) * (image_size-1)
        return {'boxes': final_boxes, 'scores':final_scores, 'labels':final_labels}

In [None]:
import numpy as np

def cpu_soft_nms_float(dets, sc, Nt, sigma, thresh, method):
    """
    Based on: https://github.com/DocF/Soft-NMS/blob/master/soft_nms.py
    It's different from original soft-NMS because we have float coordinates on range [0; 1]
    :param dets:   boxes format [x1, y1, x2, y2]
    :param sc:     scores for boxes
    :param Nt:     required iou 
    :param sigma:  
    :param thresh: 
    :param method: 1 - linear soft-NMS, 2 - gaussian soft-NMS, 3 - standard NMS
    :return:       index of boxes to keep
    """

    # indexes concatenate boxes with the last column
    N = dets.shape[0]
    indexes = np.array([np.arange(N)])
    dets = np.concatenate((dets, indexes.T), axis=1)

    # the order of boxes coordinate is [y1, x1, y2, x2]
    y1 = dets[:, 1]
    x1 = dets[:, 0]
    y2 = dets[:, 3]
    x2 = dets[:, 2]
    scores = sc
    areas = (x2 - x1) * (y2 - y1)

    for i in range(N):
        # intermediate parameters for later parameters exchange
        tBD = dets[i, :].copy()
        tscore = scores[i].copy()
        tarea = areas[i].copy()
        pos = i + 1

        #
        if i != N - 1:
            maxscore = np.max(scores[pos:], axis=0)
            maxpos = np.argmax(scores[pos:], axis=0)
        else:
            maxscore = scores[-1]
            maxpos = 0
        if tscore < maxscore:
            dets[i, :] = dets[maxpos + i + 1, :]
            dets[maxpos + i + 1, :] = tBD
            tBD = dets[i, :]

            scores[i] = scores[maxpos + i + 1]
            scores[maxpos + i + 1] = tscore
            tscore = scores[i]

            areas[i] = areas[maxpos + i + 1]
            areas[maxpos + i + 1] = tarea
            tarea = areas[i]

        # IoU calculate
        xx1 = np.maximum(dets[i, 1], dets[pos:, 1])
        yy1 = np.maximum(dets[i, 0], dets[pos:, 0])
        xx2 = np.minimum(dets[i, 3], dets[pos:, 3])
        yy2 = np.minimum(dets[i, 2], dets[pos:, 2])

        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        inter = w * h
        ovr = inter / (areas[i] + areas[pos:] - inter)

        # Three methods: 1.linear 2.gaussian 3.original NMS
        if method == 1:  # linear
            weight = np.ones(ovr.shape)
            weight[ovr > Nt] = weight[ovr > Nt] - ovr[ovr > Nt]
        elif method == 2:  # gaussian
            weight = np.exp(-(ovr * ovr) / sigma)
        else:  # original NMS
            weight = np.ones(ovr.shape)
            weight[ovr > Nt] = 0

        scores[pos:] = weight * scores[pos:]

    # select the boxes and keep the corresponding indexes
    inds = dets[:, 4][scores > thresh]
    keep = inds.astype(int)
    return keep


def nms_float_fast(dets, scores, thresh):
    """
    # It's different from original nms because we have float coordinates on range [0; 1]
    :param dets: numpy array of boxes with shape: (N, 5). Order: x1, y1, x2, y2, score. All variables in range [0; 1]
    :param thresh: IoU value for boxes
    :return: 
    """
    x1 = dets[:, 0]
    y1 = dets[:, 1]
    x2 = dets[:, 2]
    y2 = dets[:, 3]

    areas = (x2 - x1) * (y2 - y1)
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)
        xx1 = np.maximum(x1[i], x1[order[1:]])
        yy1 = np.maximum(y1[i], y1[order[1:]])
        xx2 = np.minimum(x2[i], x2[order[1:]])
        yy2 = np.minimum(y2[i], y2[order[1:]])

        w = np.maximum(0.0, xx2 - xx1)
        h = np.maximum(0.0, yy2 - yy1)
        inter = w * h
        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= thresh)[0]
        order = order[inds + 1]

    return keep


def nms_method(boxes, scores, labels, method=3, iou_thr=0.5, sigma=0.5, thresh=0.001, weights=None):
    """
    :param boxes: list of boxes predictions from each model, each box is 4 numbers. 
    It has 3 dimensions (models_number, model_preds, 4)
    Order of boxes: x1, y1, x2, y2. We expect float normalized coordinates [0; 1] 
    :param scores: list of scores for each model 
    :param labels: list of labels for each model
    :param method: 1 - linear soft-NMS, 2 - gaussian soft-NMS, 3 - standard NMS
    :param iou_thr: IoU value for boxes to be a match 
    :param sigma: Sigma value for SoftNMS
    :param thresh: threshold for boxes to keep (important for SoftNMS)
    :param weights: list of weights for each model. Default: None, which means weight == 1 for each model
    :return: boxes: boxes coordinates (Order of boxes: x1, y1, x2, y2). 
    :return: scores: confidence scores
    :return: labels: boxes labels
    """

    # If weights are specified
    if weights is not None:
        if len(boxes) != len(weights):
            print('Incorrect number of weights: {}. Must be: {}. Skip it'.format(len(weights), len(boxes)))
        else:
            weights = np.array(weights)
            for i in range(len(weights)):
                scores[i] = (np.array(scores[i]) * weights[i]) / weights.sum()

    # We concatenate everything
    boxes = np.concatenate(boxes)
    scores = np.concatenate(scores)
    labels = np.concatenate(labels)

    # Run NMS independently for each label
    unique_labels = np.unique(labels)
    final_boxes = []
    final_scores = []
    final_labels = []
    for l in unique_labels:
        condition = (labels == l)
        boxes_by_label = boxes[condition]
        scores_by_label = scores[condition]
        labels_by_label = np.array([l] * len(boxes_by_label))

        if method != 3:
            keep = cpu_soft_nms_float(boxes_by_label.copy(), scores_by_label.copy(), Nt=iou_thr, sigma=sigma, thresh=thresh, method=method)
        else:
            # Use faster function
            keep = nms_float_fast(boxes_by_label, scores_by_label, thresh=iou_thr)

        final_boxes.append(boxes_by_label[keep])
        final_scores.append(scores_by_label[keep])
        final_labels.append(labels_by_label[keep])
    final_boxes = np.concatenate(final_boxes)
    final_scores = np.concatenate(final_scores)
    final_labels = np.concatenate(final_labels)

    return final_boxes, final_scores, final_labels


def nms(boxes, scores, labels, iou_thr=0.5, weights=None):
    """
    Short call for standard NMS 
    
    :param boxes: 
    :param scores: 
    :param labels: 
    :param iou_thr: 
    :param weights: 
    :return: 
    """
    return nms_method(boxes, scores, labels, method=3, iou_thr=iou_thr, weights=weights)


def soft_nms(boxes, scores, labels, method=2, iou_thr=0.5, sigma=0.5, thresh=0.001, weights=None):
    """
    Short call for Soft-NMS
     
    :param boxes: 
    :param scores: 
    :param labels: 
    :param method: 
    :param iou_thr: 
    :param sigma: 
    :param thresh: 
    :param weights: 
    :return: 
    """
    return nms_method(boxes, scores, labels, method=method, iou_thr=iou_thr, sigma=sigma, thresh=thresh, weights=weights)

In [None]:
class WheatTestDataset(Dataset):

    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __getitem__(self, index: int):

        image_id = self.image_ids[index]
        records = self.df[self.df['image_id'] == image_id]

        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        if self.transforms:
            sample = {
                'image': image,
            }
            sample = self.transforms(**sample)
            image = sample['image']

        return image, image_id

    def __len__(self) -> int:
        return self.image_ids.shape[0]

In [None]:
# Albumentations
def get_test_transform():
    return A.Compose([
        # A.Resize(512, 512),
        ToTensorV2(p=1.0)
    ])


In [None]:
# load a model; pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
cpu_device = torch.device('cpu')

num_classes = 2  # 1 class (wheat) + background

# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features

# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

# Load the trained weights
model.load_state_dict(torch.load(WEIGHTS_FILE))
model.eval()

x = model.to(device)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

test_dataset = WheatTestDataset(test_df, DIR_TEST, get_test_transform())

test_data_loader = DataLoader(
    test_dataset,
    batch_size=1,
    shuffle=False,
    num_workers=4,
    drop_last=False,
    collate_fn=collate_fn
)

In [None]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

In [None]:
detection_threshold = 0.5
results = []
postprocess = 0
tta_model = TTA_Model(model, device, cpu_device)
for images, image_ids in test_data_loader:

    output = tta_model(images[0])
    final_boxes = output['boxes']
    final_scores = output['scores'] 
    final_labels = output['labels']
    #     for i, image in enumerate(images):
    #         # print(outputs[i]['boxes'].detach(), outputs[i]['scores'].detach()) 
    #         boxes = [outputs[i]['boxes'].data.cpu().numpy().tolist()]
    #         scores = [outputs[i]['scores'].data.cpu().numpy().tolist()]
    #         labels = [[1 for _ in scores[0]]]
    #         if postprocess==0:
    #             final_boxes, final_scores, final_labels = np.array(boxes[0]), np.array(scores[0]), np.array(labels[0])
    #         elif postprocess==1:
    #             final_boxes, final_scores, final_labels = soft_nms(boxes, scores, labels, method=2, iou_thr=0.5, sigma=0.5, thresh=0.001, weights=None)
    #         elif postprocess==2:
    #             image_size = (np.sum(image.shape)-3)/2
    #             boxes = [np.array(box)/(image_size-1) for box in boxes]
    #             final_boxes, final_scores, final_labels = weighted_boxes_fusion(boxes,
    #                                                                             scores,
    #                                                                             labels,
    #                                                                             weights=None,
    #                                                                             iou_thr=0.7,
    #                                                                             skip_box_thr=0.7)
    #             final_boxes = np.array(final_boxes) * (image_size-1)
    #         final_boxes = final_boxes[final_scores >= detection_threshold].astype(np.int32)
    #         final_scores = final_scores[final_scores >= detection_threshold]
    image_id = image_ids[0]
        
    final_boxes[:, 2] = final_boxes[:, 2] - final_boxes[:, 0]
    final_boxes[:, 3] = final_boxes[:, 3] - final_boxes[:, 1]

    result = {
        'image_id': image_id,
        'PredictionString': format_prediction_string(final_boxes, final_scores)
    }

    results.append(result)

In [None]:
results[0:2]

In [None]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.head()

In [None]:
# sample = images[1].permute(1,2,0).cpu().numpy()
# boxes = outputs[1]['boxes'].data.cpu().numpy()
# scores = outputs[1]['scores'].data.cpu().numpy()

# boxes = boxes[scores >= detection_threshold].astype(np.int32)

In [None]:
# fig, ax = plt.subplots(1, 1, figsize=(16, 8))

# for box in boxes:
#     cv2.rectangle(sample,
#                   (box[0], box[1]),
#                   (box[2], box[3]),
#                   (220, 0, 0), 2)
    
# ax.set_axis_off()
# ax.imshow(sample)

In [None]:
test_df.to_csv('submission.csv', index=False)