Hello everyone, this is my first competition in object detection, in this competition I chose to use the FasterCNN model.

The competition is wheat detection.

I used Ensemble and TTA to improve the prediction result.
Initially the model result without Ensemble and TTA was 0.67.
After using TTA I raised the result to 0.695. After using Ensemble + TTA i raised the result to 0.713

In [1]:
import sys
sys.path.insert(0, "/kaggle/input/weightedboxesfusion")

import ensemble_boxes

import pandas as pd
import numpy as np
import cv2
import os, re
import gc
import random

import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, FasterRCNN
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone

import albumentations as A
from albumentations.pytorch.transforms import ToTensorV2

from torch.utils.data import DataLoader, Dataset

from matplotlib import pyplot as plt

In [2]:
DATA_DIR = "/kaggle/input/global-wheat-detection"
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
test_df = pd.read_csv(os.path.join(DATA_DIR, "sample_submission.csv"))
test_df.shape

(10, 2)

# Data Loader For Test

In [4]:
class WheatDataset(Dataset):
    def __init__(self, dataframe, image_dir, transforms=None):
        super().__init__()

        self.image_ids = dataframe['image_id'].unique()
        self.df = dataframe
        self.image_dir = image_dir
        self.transforms = transforms

    def __len__(self) -> int:
        return len(self.image_ids)

    def __getitem__(self, idx: int):
        image_id = self.image_ids[idx]
        image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
        image /= 255.0

        records = self.df[self.df['image_id'] == image_id]
    
        if self.transforms:
            sample = {"image": image}
            sample = self.transforms(**sample)
            image = sample['image']

        return image, image_id


# Load All Model

In [5]:
def fasterrcnn_resnet50_fpn(path,pretrained_backbone=False):

    backbone = resnet_fpn_backbone('resnet50', pretrained_backbone)
    model = FasterRCNN(backbone, 2)
    model.load_state_dict(torch.load(path))
    model.to(DEVICE)
    model.eval()
    return model

In [6]:
def get_model_101( path,pretrained=False):
    backbone = resnet_fpn_backbone('resnet101', pretrained=pretrained)
    model = FasterRCNN(backbone, num_classes=2)
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)
    model.load_state_dict(torch.load(path))
    model.to(DEVICE)
    model.eval()
    return model

In [7]:
#The model 101_145 get best score with TTA 0.695
model =get_model_101("../input/faster-cnn-final-model/fastercnn_resnet_101_145.pth")
model2 =get_model_101("../input/faster-cnn-final-model/fastercnn_resnet_101_145.pth")
model3=get_model_101("../input/faster-cnn-101-155-fix/fastercnn_resnet_101_fix_155.pth")
model4= get_model_101("../input/fastercnn-155/fastercnn_101_155.pth")
model5= get_model_101("../input/faster-cnn-125/fastercnn_resnet_101_125.pth")
faster_CNN_50_90=fasterrcnn_resnet50_fpn("../input/isfix-model/fastercnn_50_fix.pth")
faster_CNN_50_90_2=fasterrcnn_resnet50_fpn("../input/isfix-model/fastercnn_50_fix.pth")
faster_CNN_50_105=fasterrcnn_resnet50_fpn("../input/faster-50-fix-115/fastercnn_50_fix_115.pth")

models=[model,model3,model4,model5,faster_CNN_50_105,faster_CNN_50_90,faster_CNN_50_90_2
       ,model2
       ]

# Ensemble Prediction

In [8]:
from ensemble_boxes import *

def make_ensemble_predictions(images):
    images = list(image.to(DEVICE) for image in images)    
    result = []
    for model in models:
        with torch.no_grad():
            outputs = model(images)
            result.append(outputs)
            del model
            gc.collect()
            torch.cuda.empty_cache()
    return result

def run_wbf_ensemble(predictions, image_index, image_size=1024, iou_thr=0.45, skip_box_thr=0.43, weights=None):
    boxes = [prediction[image_index]['boxes'].data.cpu().numpy()/(image_size-1) for prediction in predictions]
    scores = [prediction[image_index]['scores'].data.cpu().numpy() for prediction in predictions]
    labels = [np.ones(prediction[image_index]['scores'].shape[0]) for prediction in predictions]
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels

In [9]:
def get_test_transforms():
    return A.Compose([
            ToTensorV2(p=1.0)
        ], p=1.0)

In [10]:
def collate_fn(batch):
    return tuple(zip(*batch))

test_dataset = WheatDataset(test_df, os.path.join(DATA_DIR, "test"), get_test_transforms())

test_data_loader = DataLoader(
    test_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=1,
    drop_last=False,
    collate_fn=collate_fn
)

# Format For CSV File

In [11]:
def format_prediction_string(boxes, scores):
    pred_strings = []
    for j in zip(scores, boxes):
        pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))

    return " ".join(pred_strings)

# TTA

In [12]:
class BaseWheatTTA:
    """ author: @shonenkov """
    image_size = 1024

    def augment(self, image):
        raise NotImplementedError
    
    def batch_augment(self, images):
        raise NotImplementedError
    
    def deaugment_boxes(self, boxes):
        raise NotImplementedError

class TTAHorizontalFlip(BaseWheatTTA):
    """ author: @shonenkov """

    def augment(self, image):
        return image.flip(1)
    
    def batch_augment(self, images):
        return images.flip(2)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [1,3]] = self.image_size - boxes[:, [3,1]]
        return boxes

class TTAVerticalFlip(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return image.flip(2)
    
    def batch_augment(self, images):
        return images.flip(3)
    
    def deaugment_boxes(self, boxes):
        boxes[:, [0,2]] = self.image_size - boxes[:, [2,0]]
        return boxes
    
class TTARotate90(BaseWheatTTA):
    """ author: @shonenkov """
    
    def augment(self, image):
        return torch.rot90(image, 1, (1, 2))

    def batch_augment(self, images):
        return torch.rot90(images, 1, (2, 3))
    
    def deaugment_boxes(self, boxes):
        res_boxes = boxes.copy()
        res_boxes[:, [0,2]] = self.image_size - boxes[:, [3,1]] 
        res_boxes[:, [1,3]] = boxes[:, [0,2]]
        return res_boxes

class TTACompose(BaseWheatTTA):
    """ author: @shonenkov """
    def __init__(self, transforms):
        self.transforms = transforms
        
    def augment(self, image):
        for transform in self.transforms:
            image = transform.augment(image)
        return image
    
    def batch_augment(self, images):
        for transform in self.transforms:
            images = transform.batch_augment(images)
        return images
    
    def prepare_boxes(self, boxes):
        result_boxes = boxes.copy()
        result_boxes[:,0] = np.min(boxes[:, [0,2]], axis=1)
        result_boxes[:,2] = np.max(boxes[:, [0,2]], axis=1)
        result_boxes[:,1] = np.min(boxes[:, [1,3]], axis=1)
        result_boxes[:,3] = np.max(boxes[:, [1,3]], axis=1)
        return result_boxes
    
    def deaugment_boxes(self, boxes):
        for transform in self.transforms[::-1]:
            boxes = transform.deaugment_boxes(boxes)
        return self.prepare_boxes(boxes)

In [13]:
from itertools import product

tta_transforms = []
for tta_combination in product([TTAHorizontalFlip(), None], 
                               [TTAVerticalFlip(), None],
                               [TTARotate90(), None]):
    tta_transforms.append(TTACompose([tta_transform for tta_transform in tta_combination if tta_transform]))

In [14]:
def make_tta_predictions(images, score_threshold=0.57):
    with torch.no_grad():
        images = torch.stack(images).float().to(DEVICE)
        predictions = []
        for tta_transform in tta_transforms:
            result = []
            #ensemble predict
            outputs = make_ensemble_predictions(tta_transform.batch_augment(images.clone()))
            #outputs = model(tta_transform.batch_augment(images.clone()))

            for i, image in enumerate(images):
                #chose the boxes and scores
                boxes, scores, labels = run_wbf_ensemble(outputs, image_index=i)
                #boxes = outputs[i]['boxes'].data.cpu().numpy()   
                #scores = outputs[i]['scores'].data.cpu().numpy()
                
                indexes = np.where(scores > score_threshold)[0]
                boxes = boxes[indexes]
                boxes = tta_transform.deaugment_boxes(boxes.copy())
                result.append({
                    'boxes': boxes,
                    'scores': scores[indexes],
                })
            predictions.append(result)
    return predictions

In [15]:
def run_wbf(predictions, image_index, image_size=1024, iou_thr=0.4, skip_box_thr=0.43, weights=None):
    boxes = [(prediction[image_index]['boxes']/(image_size-1)).tolist() for prediction in predictions]
    scores = [prediction[image_index]['scores'].tolist() for prediction in predictions]
    labels = [np.ones(prediction[image_index]['scores'].shape[0]).astype(int).tolist() for prediction in predictions]
    boxes, scores, labels = ensemble_boxes.ensemble_boxes_wbf.weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    boxes = boxes*(image_size-1)
    return boxes, scores, labels

In [16]:
results = []

for images, image_ids in test_data_loader:

    predictions = make_tta_predictions(images)
    for i, image in enumerate(images):
        boxes, scores, labels = run_wbf(predictions, image_index=i)
        boxes = boxes.round().astype(np.int32).clip(min=0, max=1023)
        image_id = image_ids[i]
        
        boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
        boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
        
        result = {
            'image_id': image_id,
            'PredictionString': format_prediction_string(boxes, scores)
        }
        results.append(result)

	nonzero(Tensor input, *, Tensor out)
Consider using one of the following signatures instead:
	nonzero(Tensor input, *, bool as_tuple)


In [17]:
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df

Unnamed: 0,image_id,PredictionString
0,aac893a91,0.9980 69 2 105 160 0.9969 249 90 129 141 0.99...
1,51f1be19e,0.9967 611 91 149 166 0.9938 810 762 101 94 0....
2,f5a1f0358,0.9982 944 435 79 186 0.9981 136 749 163 123 0...
3,796707dd7,0.9976 711 826 108 98 0.9972 897 330 109 94 0....
4,51b3e36ab,0.9988 546 29 248 133 0.9982 838 448 185 148 0...
5,348a992bb,0.9977 734 222 140 88 0.9970 542 34 72 95 0.99...
6,cc3532ff6,0.9993 771 830 164 161 0.9987 264 642 99 165 0...
7,2fd875eaa,0.9992 108 587 140 84 0.9992 467 351 123 99 0....
8,cb8d261a3,0.9982 21 558 182 106 0.9981 265 775 112 69 0....
9,53f253011,0.9986 621 103 120 143 0.9986 14 34 144 109 0....


In [18]:
test_df.to_csv('submission.csv', index=False)