# Ensemble of YOLOv5 s6 and n6 model
To improve the performace of COTS prediction we create an ensemble of the YOLOv5 s6 and n6 models using the weighted box fusion technique

## Install Libraries

In [1]:
# !pip download bbox-utility
# !pip download ensemble-boxes

In [2]:
!pip install --no-index --find-links '/kaggle/input/' '/kaggle/input/icevision-essentials/icevision-repos/repos/loguru-0.6.0-py3-none-any.whl'
!pip install --no-index --find-links '/kaggle/input/modules/' '/kaggle/input/modules/bbox_utility-1.0.13-py3-none-any.whl'
!pip install --no-index --find-links '/kaggle/input/modules/' '/kaggle/input/modules/ensemble_boxes-1.0.9-py3-none-any.whl'

## Import Libraries

In [3]:
import numpy as np
from tqdm.notebook import tqdm
tqdm.pandas()
import pandas as pd
import os
import cv2
import matplotlib.pyplot as plt
import glob

import shutil
import sys
sys.path.append('../input/tensorflow-great-barrier-reef')
sys.path.append('/kaggle/input/weightedboxesfusion/')

from IPython.display import display
from ensemble_boxes import *
from PIL import Image
import torch

In [4]:
ROOT_DIR  = '/kaggle/input/tensorflow-great-barrier-reef/'

In [5]:
df = pd.read_csv(f'{ROOT_DIR}/train.csv')
df['image_path'] = f'{ROOT_DIR}/train_images/video_'+df.video_id.astype(str)+'/'+df.video_frame.astype(str)+'.jpg'
df['annotations'] = df['annotations'].progress_apply(eval)
display(df.head(2))

## Clean Data
> Since ~80% (5k) images are without any bbox we will drop them. The model have already been trained so this being done to get some visualizations on the boxes predicted by s6, n6 and ensemble model of the two.

In [6]:
df['num_bbox'] = df['annotations'].progress_apply(lambda x: len(x))
data = (df.num_bbox>0).value_counts(normalize=True)*100
print(f"No BBox: {data[0]:0.2f}% | With BBox: {data[1]:0.2f}%")

In [7]:
df = df.query("num_bbox>0")

## Create BBox
Since an image can have multiple bounding boxes we are just converting this information in the dataframe to a list type

In [9]:
def get_bbox(annots):
    bboxes = [list(annot.values()) for annot in annots]
    return bboxes

In [10]:
df['bboxes'] = df.annotations.progress_apply(get_bbox)
df.head(2)

## Ensembling

In [11]:
from bbox.utils import coco2yolo, coco2voc, voc2yolo, voc2coco
from bbox.utils import draw_bboxes, load_image
from bbox.utils import clip_bbox, str2annot, annot2str


def load_image(image_path):
    return cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)


def plot_one_box(x, img, color=None, label=None, line_thickness=None,score=None):
    tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(img, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    label=label+"{:.2f}%".format(score)
    if label:
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(img, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)

def draw_bboxes(img, bboxes, classes, class_ids, colors = None, show_classes = None, bbox_format = 'yolo',\
                class_name = False, line_thickness = 2,scores=None): 
    image = img.copy()
    show_classes = classes if show_classes is None else show_classes
    colors = (0, 255 ,0) if colors is None else colors
    
    if bbox_format == 'coco':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            score   = scores[idx]*100
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes:            
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                w  = int(round(bbox[2]))
                h  = int(round(bbox[3]))

                voc_bbox = (x1, y1, x1+w, y1+h)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness,score=score)

    elif bbox_format == 'voc_pascal':
        
        for idx in range(len(bboxes)):  
            
            bbox  = bboxes[idx]
            cls   = classes[idx]
            cls_id = class_ids[idx]
            score   = scores[idx]*100
            color = colors[cls_id] if type(colors) is list else colors
            
            if cls in show_classes: 
                x1 = int(round(bbox[0]))
                y1 = int(round(bbox[1]))
                x2 = int(round(bbox[2]))
                y2 = int(round(bbox[3]))
                voc_bbox = (x1, y1, x2, y2)
                plot_one_box(voc_bbox, 
                             image,
                             color = color,
                             label = cls if class_name else str(cls_id),
                             line_thickness = line_thickness,score=score)
    else:
        raise ValueError('wrong bbox format')

    return image

def show_img(img, bboxes, bbox_format='yolo',scores=None):
    names  = ['starfish']*len(bboxes)
    labels = [0]*len(bboxes)
    img    = draw_bboxes(img = img,
                           bboxes = bboxes, 
                           classes = names,
                           class_ids = labels,
                           class_name = True, 
                           colors = colors, 
                           bbox_format = bbox_format,
                           line_thickness = 2,scores=scores)
    return Image.fromarray(img).resize((600, 300))

np.random.seed(32)
colors = [(np.random.randint(255), np.random.randint(255), np.random.randint(255))\
          for idx in range(1)]

In [12]:
def load_model(ckpt_path):
    model = torch.hub.load('/kaggle/input/yolov5-lib-ds/','custom',path=ckpt_path, source='local',force_reload=True)
    model.conf = 0.25 
    model.iou  = 0.4
    model.classes = None   
    model.multi_label = False 
    model.max_det = 1000  
    return model

def get_prediction(model, img):
    results = model(img, size=3000, augment=True)
    preds   = results.pandas().xyxy[0]
    bboxes  = preds[['xmin','ymin','xmax','ymax']].values
    if len(bboxes) > 0:
        return bboxes, preds['confidence'].values
    return [], []

def wbf(bboxes, confs):
    boxes =  [bbox/1280 for bbox in bboxes]
    scores = [conf for conf in confs]
    labels = [np.ones(conf.shape[0]) for conf in confs]
    
    boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=[1,1], iou_thr=0.2, skip_box_thr=0.001)
    
    boxes = boxes*(1280-1)
    return boxes, scores, labels

In [13]:
!mkdir /root/.config/Ultralytics
!cp /kaggle/input/yolov5-font/Arial.ttf /root/.config/Ultralytics/Arial.ttf

In [14]:
m1 = load_model('/kaggle/input/yolowts/best.pt')
m2 = load_model('/kaggle/input/yolowts/best-2.pt') 

In [15]:
paths = df[df.num_bbox>1].image_path.tolist()

## Visualization of predictions made my WBF ensemble model

For visualization purposes only, we take our training data and compare COTS prediction using 3 models - YOLOV5s6, YOLOV5s6 and WBF ensemble model

In [16]:
count = 0

for p in paths:
    img = cv2.imread(p)[...,::-1]
    b1, c1 = get_prediction(m1, img)
    b2, c2 = get_prediction(m2, img)
    if len(b1) > 0:
        b1 = np.array(b1)
    if len(b2) > 0:
        b2 = np.array(b2)
    
    # voc  => [x1, y1, x2, y2]
    # coco => [xmin, ymin, w, h]
    # model predictions are in voc format
    # for testing we need predictions in coco format
    if len(b1) > 0 and len(b2) > 0:
        b, c, labels = wbf([b1, b2], [c1, c2])
        b = voc2coco(b).astype(int)
    elif len(b1) > 0:
        b, c = voc2coco(b1, image_height=720, image_width=1280), c1
    elif len(b2) > 0:
        b, c = voc2coco(b2, image_height=720, image_width=1280), c2
    else:
        b, c = [], []

    if True:
        print('\n\nYOLOV5s6 Predictions ')
        if len(b1) > 0:            
            display(show_img(img, b1, bbox_format='voc_pascal',scores=c1))
        else:        
            display(show_img(img, [], bbox_format='voc_pascal',scores=c1))

        print('\n\nYoloV5n6 Predictions ')  
        if len(b2) > 0:
            display(show_img(img, b2, bbox_format='voc_pascal',scores=c2))
        else:
            display(show_img(img, [], bbox_format='voc_pascal',scores=c2))

        print('\n\nEnsemble (WBF) Predictions ')
        if len(b) > 0: 
            display(show_img(img, b, bbox_format='coco',scores=c))
        else:
            display(show_img(img, [], bbox_format='coco',scores=None))
    count += 1
    if count == 3:
        break

# Submission
The test dataset comprises of about 13000 images and is completely hidden. We have to follow the instructions here https://www.kaggle.com/competitions/tensorflow-great-barrier-reef/overview/evaluation to create a environment and get the iterator of this image set. We then predict the bounding box coordinates in COCO format and the confidences and add this to a dataframe. 

To get the final F2 scores for our model, we submit this notebook to the competition which runs the notebook and evaluates the prediction in the dataframe (written to a submisssion.csv file in /kaggle/working dir)

In [17]:
%cd /kaggle/working/

In [18]:
import greatbarrierreef
env = greatbarrierreef.make_env()
iter_test = env.iter_test()  

In [19]:
for (img, prediction_df) in iter_test:
    b1, c1 = get_prediction(m1, img)
    b2, c2 = get_prediction(m2, img)
    
    if len(b1) > 0:
        b1 = np.array(b1)
    if len(b2) > 0:
        b2 = np.array(b2)
    
    if len(b1) > 0 and len(b2) > 0:
        b, c, labels = wbf([b1, b2], [c1, c2])
        b = voc2coco(b).astype(int)
    elif len(b1) > 0:
        b, c = voc2coco(b1, 720, 1280), c1
    elif len(b2) > 0:
        b, c = voc2coco(b2, 720, 1280), c2
    else:
        b = []
    
    predictions = list()
    for i in range(len(b)):
        box = b[i]        
        score = c[i]
        if score > 0.1:
            x, y = int(box[0]), int(box[1])
            width, height = int(box[2]), int(box[3])
            predictions.append('{:.2f} {} {} {} {}'.format(score, x, y, width, height))
    print(predictions)
    prediction_df['annotations'] = ' '.join(predictions)
    env.predict(prediction_df)

Reference:
https://www.kaggle.com/code/mahipalsingh/gbr-yolox-yolov5-ensemble-2-o