## **Libraries**

In [None]:
!pip install ensemble_boxes

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting ensemble_boxes
  Downloading ensemble_boxes-1.0.9-py3-none-any.whl (23 kB)
Installing collected packages: ensemble_boxes
Successfully installed ensemble_boxes-1.0.9


In [None]:
# Libraries
import pandas as pd
from ensemble_boxes import weighted_boxes_fusion

## **Data**

In [None]:
submission_data_path = "/content/drive/MyDrive/AI/sample_submission_with_sizes.csv"
submission_data = pd.read_csv(submission_data_path)

## **Ensemblist Method with WBF**

In [None]:
# Function that assembles the 5 different YOLOv5x submissions and create a final unique submission using WBF
# INPUT: list with submission csv paths, IoU score, path of the output file, a threshold for skipping bounding box
# OUPUT: None
def ensemble(subm_list, iou_same=0.5, out_path=None, skip_box_thr=0.00000001):

    # Initialise lists
    preds = []
    weights = []
    checker = None

    # Go through each csv file and add a weight for each prediction (by default it's 1 for every submission csv)
    for path, weight in subm_list:
        s = pd.read_csv(path)
        s.sort_values('image_id', inplace=True)
        s.reset_index(drop=True, inplace=True)
        ids = s['image_id']
        if checker:
            if tuple(ids) != checker:
                print(set(checker) - set(ids))
                print('Different IDS!', len(tuple(ids)), path)
                exit()
        else:
            checker = tuple(ids)
        
        # Save prediction for the same image
        preds.append(s['PredictionString'].values)
        weights.append(weight)

    # Write the output file
    out = open(out_path, 'w')
    out.write('image_id,PredictionString\n')

    # Go through the checker
    for j, id in enumerate(list(checker)):
        
        # Initialise lists
        boxes_list = []
        scores_list = []
        labels_list = []
        empty = True

        # Go through the predictions made by each csv file
        for i in range(len(preds)):

            # Initialise lists within each prediction
            boxes = []
            scores = []
            labels = []
            p1 = preds[i][j]
            if str(p1) != 'nan':
                arr = p1.strip().split(' ')
                for k in range(0, len(arr), 6):

                    # Restore the class, the score associated and bounding boxes which are then standardised
                    cls = int(arr[k])
                    prob = float(arr[k + 1])
                    x1 = float(arr[k + 2]) / submission_data[submission_data.image_id == id]['width'].values[0]
                    y1 = float(arr[k + 3]) / submission_data[submission_data.image_id == id]['height'].values[0]
                    x2 = float(arr[k + 4]) / submission_data[submission_data.image_id == id]['width'].values[0]
                    y2 = float(arr[k + 5]) / submission_data[submission_data.image_id == id]['height'].values[0]

                    # Save the bounding boxes, scores and labels
                    boxes.append([x1, y1, x2, y2])
                    scores.append(prob)
                    labels.append(cls)

            # Save lists into lists
            boxes_list.append(boxes)
            scores_list.append(scores)
            labels_list.append(labels)

        # Apply the "WBF" method
        boxes, scores, labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list, iou_thr=iou_same, skip_box_thr=skip_box_thr, weights=weights, allows_overflow=True)
        
        # If no bounding boxes, then it's classified as "No Finding"
        if len(boxes) == 0:
            out.write('{},14 1 0 0 1 1\n'.format(id, ))
        
        # Otherwise, bounding boxes real values are recovering
        else:
            final_str = ''
            for i in range(len(boxes)):
                cls = int(labels[i])
                prob = scores[i]
                x1 = int(boxes[i][0] * submission_data[submission_data.image_id == id]['width'].values[0])
                y1 = int(boxes[i][1] * submission_data[submission_data.image_id == id]['height'].values[0])
                x2 = int(boxes[i][2] * submission_data[submission_data.image_id == id]['width'].values[0])
                y2 = int(boxes[i][3] * submission_data[submission_data.image_id == id]['height'].values[0])

                # Write the final prediction
                if cls == 14:
                    final_str += '{} {} {} {} {} {} '.format(cls, prob, 0, 0, 1, 1)
                else:
                    final_str += '{} {} {} {} {} {} '.format(cls, prob, x1, y1, x2, y2)
            out.write('{},{}\n'.format(id, final_str.strip()))

    # Close the output file
    out.close()

In [None]:
# Main function that apllying the ensemblist method on the 5 different YOLOv5x submissions and create a final unique submission
# INPUT: None
# OUPUT: None
def ensemble_experiment_v5_yolo():

    # Root path
    sp = '/content/'

    # List of submissions with their weight initialised at 1
    subm_list = [
        (sp + 'submission_v5_60_640_wbf_0.csv', 1),
        (sp + 'submission_v5_60_640_wbf_1.csv', 1),
        (sp + 'submission_v5_60_640_wbf_2.csv', 1),
        (sp + 'submission_v5_60_640_wbf_3.csv', 1),
        (sp + 'submission_v5_60_640_wbf_4.csv', 1),
    ]

    # IoU score for the "WBF" technique
    best_iou = 0.3

    # Output path of the output file
    out_path = sp + 'ensemble_yolo_standard.csv'.format(len(subm_list), best_iou)

    # Ensemblist method function
    ensemble(subm_list, best_iou, out_path)

In [None]:
# Apply function to obtain a csv file with predictions coming from the 5 YOLOv5x folds
ensemble_experiment_v5_yolo()

In [None]:
# Display this resulting dataframe
yolov5 = pd.read_csv("/content/ensemble_yolo_standard.csv")
yolov5

Unnamed: 0,image_id,PredictionString
0,002a34c58c5b758217ed1f584ccbcfe9,3 0.2200000047683716 833 1228 1901 1623 11 0.1...
1,004f33259ee4aef671c2b95d54e4be68,0 0.7799999713897705 1268 589 1530 901
2,008bdde2af2462e86fd373a445d0f4cd,0 0.8399999737739563 1427 826 1734 1178 3 0.77...
3,009bc039326338823ca3aa84381f17f1,3 0.7599999904632568 662 1062 1559 1348 0 0.54...
4,00a2145de1886cb9eb88869c85d74080,3 0.8199999928474426 770 1294 1861 1644 0 0.72...
...,...,...
2995,ff91fb82429a27521bbec8569b041f02,0 0.8600000143051147 1622 629 1885 917 3 0.800...
2996,ff9fcc4087ed5e941209aa3fa948e364,0 0.800000011920929 1150 695 1423 1041 3 0.240...
2997,ffaa288c8abca300974f043b57d81521,10 0.6199999809265136 1849 1526 2336 2107 11 0...
2998,ffc441e0c8b7153844047483a577e7c3,0 0.3599999904632568 991 640 1227 919 14 0.2 0...


In [None]:
# Import the dataframe from ResNet18 results
resnet = pd.read_csv("/content/2_submission_2_cls_resnet18.csv")
resnet

Unnamed: 0,image_id,class_name,class_id,score
0,002a34c58c5b758217ed1f584ccbcfe9,No Finding,0,0.088070
1,004f33259ee4aef671c2b95d54e4be68,No Finding,0,0.005598
2,008bdde2af2462e86fd373a445d0f4cd,No Finding,0,0.001754
3,009bc039326338823ca3aa84381f17f1,No Finding,0,0.016275
4,00a2145de1886cb9eb88869c85d74080,No Finding,0,0.007829
...,...,...,...,...
2995,ff91fb82429a27521bbec8569b041f02,No Finding,0,0.053834
2996,ff9fcc4087ed5e941209aa3fa948e364,No Finding,0,0.002410
2997,ffaa288c8abca300974f043b57d81521,No Finding,0,0.275810
2998,ffc441e0c8b7153844047483a577e7c3,Finding,1,0.997017


In [None]:
# Function that combines results from YOLOv5x ensemblist method and ResNet18 to predict the submission data
# INPUT: 3 dataframes
# OUPUT: None
def submission(yolov5, resnet, submission_data):
    
    # Initialise list of final prediction
    final_PredictionString = []

    # Go through resnet dataframe 
    for i in range(len(resnet)):
      score = resnet.score[i]
      class_id = resnet.class_id[i]

      # If the image is classified as "No Finding" by the ResNet18
      if class_id == 0:

        # And the probability is higher than 0.9
        if score > 0.9:

          # Prediction stored as "No Finding"
          final_PredictionString.append("14 1 0 0 1 1")

        else:
          # Otherwise, "No Finding" prediction is added to the YOLOv5x ensemblist method prediction
          final_PredictionString.append(yolov5.PredictionString[i] + " 14 1 0 0 1 1")
      
      # If the image is classified as "Finding" by the ResNet18
      else:
        # Keep the YOLOv5x ensemblist method prediction
        final_PredictionString.append(yolov5.PredictionString[i])
    
    # Drop columns of the submission dataframe
    submission_data.drop(["PredictionString", "width", "height"], axis=1, inplace=True)

    # Add new predictions
    submission_data["PredictionString"] = final_PredictionString
    
    # Save to csv
    submission_data.to_csv('/content/submission_ensemble_yolov5_resnet18.csv',index = False)

    # Display
    submission_data.tail()

In [None]:
submission(yolov5, resnet, submission_data)

Unnamed: 0,image_id,PredictionString
2995,ff91fb82429a27521bbec8569b041f02,0 0.8600000143051147 1622 629 1885 917 3 0.800...
2996,ff9fcc4087ed5e941209aa3fa948e364,0 0.800000011920929 1150 695 1423 1041 3 0.240...
2997,ffaa288c8abca300974f043b57d81521,10 0.6199999809265136 1849 1526 2336 2107 11 0...
2998,ffc441e0c8b7153844047483a577e7c3,0 0.3599999904632568 991 640 1227 919 14 0.2 0...
2999,ffccf1709d0081d122a1d1f9edbefdf1,0 0.6800000071525574 1379 877 1731 1320 13 0.4...
