In [None]:
!pip install ensemble-boxes

In [None]:
import os
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from pathlib import Path
from pprint import pprint
from ensemble_boxes import weighted_box_fusion

In [2]:
class_map = {
    'anthracnose' : 0,
    'cssvd' : 1,
    'healthy' : 2
}

id_map = {
    v: k for k, v in class_map.items()
}

In [None]:
test = pd.read_csv(r'data/test.csv')

In [4]:
test.head()

Unnamed: 0.1,Unnamed: 0,Image_ID,class,confidence,ymin,xmin,ymax,xmax,class_id,ImagePath,width,height
0,0,ID_Genxyu.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/kaggle/input/amini-cocoa-contamination-challe...,2448,3264
1,1,ID_svY6TG.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/kaggle/input/amini-cocoa-contamination-challe...,416,416
2,2,ID_d0gpda.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/kaggle/input/amini-cocoa-contamination-challe...,2448,3264
3,3,ID_frWmBT.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/kaggle/input/amini-cocoa-contamination-challe...,416,416
4,4,ID_TaRW6o.jpg,0.0,0.0,0.0,0.0,0.0,0.0,0.0,/kaggle/input/amini-cocoa-contamination-challe...,2448,3264


In [12]:
def run_wbf(directory : Path, files_to_exclude: list, test: pd.DataFrame, method = 'wbf', filename = 'sub'):

    result_df = pd.DataFrame(columns=test.columns.tolist())
    files = os.listdir(directory)
    dfs = []
    for file in tqdm(files):
        if file not in files_to_exclude:
            file_path  = directory / file
            df = pd.read_csv(str(file_path))
            df = pd.merge(df, test[['Image_ID', 'width', 'height']], on='Image_ID')
            df['xmin'] = (df['xmin'] / df['width']).clip(0, 1)
            df['ymin'] = (df['ymin'] / df['height']).clip(0, 1)
            df['xmax'] = (df['xmax'] / df['width']).clip(0, 1)
            df['ymax'] = (df['ymax'] / df['height']).clip(0, 1)
            df['class'] = df['class'].map(class_map)
            df = df.sort_values('Image_ID')
            dfs.append(df)

    iou_thr = 0.6
    skip_box_thr = 0.0001
    weights  = [2, 1]

    df1 = dfs[0]
    for image_id in tqdm(test['Image_ID'].unique().tolist(), total=test.Image_ID.nunique()):
        df1_subset = df1[df1.Image_ID == image_id]

        height = df1_subset['height'].values.tolist()[0]
        width = df1_subset['width'].values.tolist()[0]       
        boxes = [
            df[df.Image_ID == image_id][['xmin', 'ymin', 'xmax', 'ymax']].values.tolist() for df in dfs 
        ]
        scores = [
            df[df.Image_ID == image_id]['confidence'].values.tolist() for df in dfs 
        ]          
        classes = [
            df[df.Image_ID == image_id]['class'].values.tolist() for df in dfs 
        ]
    
        if method == 'nms':
            boxes, scores, classes = nms(boxes, scores, classes, iou_thr, weights)

        elif method == 'soft-1':
            #param method: 1 - linear soft-NMS, 2 - gaussian soft-NMS, 3 - standard 
            boxes, scores, classes = soft_nms(boxes, scores, classes, weights=weights, iou_thr=iou_thr,  method = 1)

        elif method == 'soft-2':
            boxes, scores, classes = soft_nms(boxes, scores, classes, weights=weights, iou_thr=iou_thr,  method = 2)
            
        elif method == 'soft-3':
            boxes, scores, classes = soft_nms(boxes, scores, classes, weights=weights, iou_thr=iou_thr,  method = 3)

        elif method == 'nmw':
            boxes, scores, classes = non_maximum_weighted(boxes, scores, classes,weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)

        elif method == 'wbf-avg':
            boxes, scores, classes = weighted_boxes_fusion(boxes, scores, classes,weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr, conf_type='avg', allows_overflow=False)

        elif method == 'wbf-max':
            boxes, scores, classes = weighted_boxes_fusion(boxes, scores, classes,weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr, conf_type='max', allows_overflow=False)


        #box_and_model_avg': box and model wise hybrid weighted average, 'absent_model_aware_avg': 
        elif method == 'wbf-box_and_model_avg':
            boxes, scores, classes = weighted_boxes_fusion(boxes, scores, classes,weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr, conf_type='box_and_model_avg', allows_overflow=False)

        elif method == 'absent_model_aware_avg':
            boxes, scores, classes = weighted_boxes_fusion(boxes, scores, classes,weights=weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr, conf_type='absent_model_aware_avg', allows_overflow=False)


        else:
            raise ValueError(f'wrong method. Got {method}, expected on one "soft-1/2/3", "nms", "wbf"')

        if len(boxes) > 1:
            temp_df = pd.DataFrame.from_dict({
                'Image_ID' : [image_id] * len(boxes),
                'class': classes,
                'confidence' : scores
            })

            temp_df[['xmin', 'ymin', 'xmax', 'ymax']] = boxes
            temp_df = temp_df.assign(xmin = temp_df['xmin'] * width,
                                 xmax = temp_df['xmax'] * width,
                                 ymin = temp_df['ymin'] * height,
                                 ymax = temp_df['ymax'] * height)
        else:
            temp_df = pd.DataFrame.from_dict({
                'Image_ID' : [image_id],
                'class': [2],
                'confidence' : [0],
                'xmin': [0],
                'ymin': [0],
                'xmax': [0],
                'ymax': [0]
            })

        

        if result_df.empty:
            result_df = temp_df
        else:
            result_df = pd.concat([
                result_df, temp_df], 
                ignore_index = True)

    result_df['class'] = result_df['class'].map(id_map)
    result_df.to_csv(f'{filename}_{method}.csv', index = False)
    return result_df


In [None]:
directory = Path("full linear") #set the path to be the folder where the best csv file is stored.
files_to_exclude = []

for m in tqdm(['wbf-max']):
    _ = run_wbf(directory, files_to_exclude, test, method = m, filename='rfdetr_9_epochs_full_linear_best_ema')

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1 [00:00<?, ?it/s]

  0%|          | 0/1626 [00:00<?, ?it/s]

