In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!pip install ensemble_boxes

Collecting ensemble_boxes
  Downloading https://files.pythonhosted.org/packages/5c/b8/ecf44eaf796feee16a32dab193fcbbb625c61a188be47c7616aa020bc97c/ensemble_boxes-1.0.4-py3-none-any.whl
Installing collected packages: ensemble-boxes
Successfully installed ensemble-boxes-1.0.4


In [32]:
import numpy as np
import pandas as pd
import time
from multiprocessing import Pool, Process, cpu_count, Manager
from ensemble_boxes import *




def process_single_id(id, res_boxes, weights, params):
    run_type = params['run_type']
    verbose = params['verbose']

    # print('Go for ID: {}'.format(id))
    boxes_list = []
    scores_list = []
    labels_list = []
    labels_to_use_forward = dict()
    labels_to_use_backward = dict()

    for i in range(len(res_boxes[id])):
        boxes = []
        scores = []
        labels = []

        dt = res_boxes[id][i]

        for j in range(0, len(dt)):
            lbl = dt[j][5]
            scr = float(dt[j][4])
            box_x1 = float(dt[j][0]/1024)
            box_y1 = float(dt[j][1]/1024)
            box_x2 = float(dt[j][2]/1024)
            box_y2 = float(dt[j][3]/1024)

            if box_x1 >= box_x2:
                if verbose:
                    print('Problem with box x1 and x2: {}. Skip it'.format(dt[j]))
                continue
            if box_y1 >= box_y2:
                if verbose:
                    print('Problem with box y1 and y2: {}. Skip it'.format(dt[j]))
                continue
            if scr <= 0:
                if verbose:
                    print('Problem with box score: {}. Skip it'.format(dt[j]))
                continue

            boxes.append([box_x1, box_y1, box_x2, box_y2])
            scores.append(scr)
            if lbl not in labels_to_use_forward:
                cur_point = len(labels_to_use_forward)
                labels_to_use_forward[lbl] = cur_point
                labels_to_use_backward[cur_point] = lbl
            labels.append(labels_to_use_forward[lbl])

        boxes = np.array(boxes, dtype=np.float32)
        scores = np.array(scores, dtype=np.float32)
        labels = np.array(labels, dtype=np.int32)

        boxes_list.append(boxes)
        scores_list.append(scores)
        labels_list.append(labels)

    # Empty predictions for all models
    if len(boxes_list) == 0:
        return np.array([]), np.array([]), np.array([])

    if run_type == 'wbf':
        merged_boxes, merged_scores, merged_labels = weighted_boxes_fusion(boxes_list, scores_list, labels_list,
                                                                       weights=weights, iou_thr=params['intersection_thr'],
                                                                       skip_box_thr=params['skip_box_thr'],
                                                                           conf_type=params['conf_type'])
    elif run_type == 'nms':
        iou_thr = params['iou_thr']
        merged_boxes, merged_scores, merged_labels = nms(boxes_list, scores_list, labels_list, weights=weights, iou_thr=iou_thr)
    elif run_type == 'soft-nms':
        iou_thr = params['iou_thr']
        sigma = params['sigma']
        thresh = params['thresh']
        merged_boxes, merged_scores, merged_labels = soft_nms(boxes_list, scores_list, labels_list,
                                                              weights=weights, iou_thr=iou_thr, sigma=sigma, thresh=thresh)
    elif run_type == 'nmw':
        merged_boxes, merged_scores, merged_labels = non_maximum_weighted(boxes_list, scores_list, labels_list,
                                                                       weights=weights, iou_thr=params['intersection_thr'],
                                                                       skip_box_thr=params['skip_box_thr'])

    # print(len(boxes_list), len(merged_boxes))
    if 'limit_boxes' in params:
        limit_boxes = params['limit_boxes']
        if len(merged_boxes) > limit_boxes:
            merged_boxes = merged_boxes[:limit_boxes]
            merged_scores = merged_scores[:limit_boxes]
            merged_labels = merged_labels[:limit_boxes]

    # Rename labels back
    merged_labels_string = []
    for m in merged_labels:
        merged_labels_string.append(labels_to_use_backward[m])
    merged_labels = np.array(merged_labels_string, dtype=np.str)

    # Create IDs array
    ids_list = [id] * len(merged_labels)

    return merged_boxes.copy(), merged_scores.copy(), merged_labels.copy(), ids_list.copy()


def process_part_of_data(proc_number, return_dict, ids_to_use, res_boxes, weights, params):
    print('Start process: {} IDs to proc: {}'.format(proc_number, len(ids_to_use)))
    result = []
    for id in ids_to_use:
        merged_boxes, merged_scores, merged_labels, ids_list = process_single_id(id, res_boxes, weights, params)
        # print(merged_boxes.shape, merged_scores.shape, merged_labels.shape, len(ids_list))
        result.append((merged_boxes, merged_scores, merged_labels, ids_list))
    return_dict[proc_number] = result.copy()


def ensemble_predictions(pred_filenames, weights, params):
    verbose = False
    if 'verbose' in params:
        verbose = params['verbose']

    start_time = time.time()
    procs_to_use = max(cpu_count() // 2, 1)
    # procs_to_use = 6
    print('Use processes: {}'.format(procs_to_use))
    weights = np.array(weights)

    res_boxes = dict()
    ref_ids = None
    for j in range(len(pred_filenames)):
        if weights[j] == 0:
            continue
        print('Read {}...'.format(pred_filenames[j]))
        s = pd.read_csv(pred_filenames[j], dtype={'image_id': np.str, 'class': np.str})
        s.sort_values('image_id', inplace=True)
        s.reset_index(drop=True, inplace=True)
        ids = s['image_id'].values
        unique_ids = sorted(s['image_id'].unique())
        if ref_ids is None:
            ref_ids = tuple(unique_ids)
        else:
            if ref_ids != tuple(unique_ids):
                print('Different IDs in ensembled CSVs! {} != {}'.format(len(ref_ids), len(unique_ids)))
                s = s[s['image_id'].isin(ref_ids)]
                s.sort_values('image_id', inplace=True)
                s.reset_index(drop=True, inplace=True)
                ids = s['image_id'].values
        preds = s[['xmin', 'ymin', 'xmax', 'ymax', 'score', 'class']].values
        single_res = dict()
        for i in range(len(ids)):
            id = ids[i]
            if id not in single_res:
                single_res[id] = []
            single_res[id].append(preds[i])
        for el in single_res:
            if el not in res_boxes:
                res_boxes[el] = []
            res_boxes[el].append(single_res[el])

    # Reduce weights if needed
    weights = weights[weights != 0]

    ids_to_use = sorted(list(res_boxes.keys()))
    manager = Manager()
    return_dict = manager.dict()
    jobs = []
    for i in range(procs_to_use):
        start = i * len(ids_to_use) // procs_to_use
        end = (i+1) * len(ids_to_use) // procs_to_use
        if i == procs_to_use - 1:
            end = len(ids_to_use)
        p = Process(target=process_part_of_data, args=(i, return_dict, ids_to_use[start:end], res_boxes, weights, params))
        jobs.append(p)
        p.start()

    for i in range(len(jobs)):
        jobs[i].join()

    results = []
    for i in range(len(jobs)):
        results += return_dict[i]

    # p = Pool(processes=procs_to_use)
    # results = p.starmap(process_single_id, zip(ids_to_use, repeat(weights), repeat(params)))

    all_ids = []
    all_boxes = []
    all_scores = []
    all_labels = []
    for boxes, scores, labels, ids_list in results:
        if boxes is None:
            continue
        all_boxes.append(boxes)
        all_scores.append(scores)
        all_labels.append(labels)
        all_ids.append(ids_list)

    all_ids = np.concatenate(all_ids)
    all_boxes = np.concatenate(all_boxes)
    all_scores = np.concatenate(all_scores)
    all_labels = np.concatenate(all_labels)
    if verbose:
        print(all_ids.shape, all_boxes.shape, all_scores.shape, all_labels.shape)

    res = pd.DataFrame(all_ids, columns=['image_id'])
    res['class'] = all_labels
    res['score'] = all_scores
    res['xmin'] = all_boxes[:, 0]*1024
    res['ymin'] = all_boxes[:, 1] * 1024
    res['xmax'] = all_boxes[:, 2]*1024
    res['ymax'] = all_boxes[:, 3]*1024
    res['width'] = all_boxes[:, 3]*0+1024
    res['height'] = all_boxes[:, 3] * 0 + 1024
    print('Run time: {:.2f}'.format(time.time() - start_time))
    return res


def ensemble(benchmark_csv, weights, params):
    ensemble_preds = ensemble_predictions(benchmark_csv, weights, params)
    ensemble_preds.to_csv("ensemble.csv", index=False)


if __name__ == '__main__':
    if 0:
        params = {
            'run_type': 'nms',
            'iou_thr': 0.5,
            'verbose': True,
        }
    if 0:
        params = {
            'run_type': 'soft-nms',
            'iou_thr': 0.5,
            'thresh': 0.0001,
            'sigma': 0.1,
            'verbose': True,
        }
    if 0:
        params = {
            'run_type': 'nmw',
            'skip_box_thr': 0.000000001,
            'intersection_thr': 0.5,
            'limit_boxes': 30000,
            'verbose': True,
        }
    if 1:
        params = {
            'run_type': 'wbf',
            'skip_box_thr': 0.001,
            'intersection_thr': 0.5,
            'conf_type': 'avg',
            'limit_boxes': 30000,
            'verbose': False,
        }

    in_dir = './'
    benchmark_csv = [
        '/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet_5reshuffled_new_epoch54.csv',
        '/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet5_epoch37.csv',
        '/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet5new_epoch47iou22.csv',
        '/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet7_epoch33.csv', #LOSS 0.46301
        '/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet6_epoch31.csv',#LOSS 0.49701 
        '/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/yolov5_output_latest.csv',
    ]
    weights = [10,9,8,8,7,7]
    print(len(benchmark_csv))
    print(len(weights))
    assert(len(benchmark_csv) == len(weights))
    ensemble(benchmark_csv, weights, params)

6
6
Use processes: 1
Read /content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet_5reshuffled_new_epoch54.csv...
Read /content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet5_epoch37.csv...
Different IDs in ensembled CSVs! 494 != 497
Read /content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet5new_epoch47iou22.csv...
Different IDs in ensembled CSVs! 494 != 496
Read /content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet7_epoch33.csv...
Different IDs in ensembled CSVs! 494 != 483
Read /content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/test_result_effdet6_epoch31.csv...
Different IDs in ensembled CSVs! 494 != 496
Read /content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/yolov5_output_latest.csv...
Different IDs in ensembled CSVs! 494 != 495
Start process: 0 IDs to proc: 494




Run time: 3.24


In [33]:
df = pd.read_csv("/content/ensemble.csv")
df

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height
0,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,car,0.764835,589.925110,682.124512,640.815491,755.910645,1024.0,1024.0
1,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,motorbike,0.693201,277.216492,691.708191,362.716522,894.874207,1024.0,1024.0
2,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,pickup,0.658696,639.704895,561.003906,777.335754,807.286560,1024.0,1024.0
3,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,pickup,0.625430,778.966553,530.397705,1021.253601,892.857178,1024.0,1024.0
4,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,minivan,0.290136,533.340820,671.381287,574.791992,739.023010,1024.0,1024.0
...,...,...,...,...,...,...,...,...,...
9569,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,pickup,0.056187,582.000549,760.000305,618.000488,814.000305,1024.0,1024.0
9570,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,pickup,0.053471,872.000000,704.000000,1024.000000,874.000000,1024.0,1024.0
9571,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,ambulance,0.040248,54.000130,676.000244,233.000443,976.000488,1024.0,1024.0
9572,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,minibus,0.030971,59.000320,674.999817,231.000580,978.999817,1024.0,1024.0


In [34]:
test_result = df

In [35]:
test_result['xmin'] = test_result['xmin'].astype(int)

In [36]:
test_result['ymin'] = test_result['ymin'].astype(int)

In [37]:
test_result['xmax'] = test_result['xmax'].astype(int)

In [38]:
test_result['ymax'] = test_result['ymax'].astype(int)
test_result['width'] = test_result['width'].astype(int)
test_result['height'] = test_result['height'].astype(int)

In [39]:
test_result[test_result['xmin']<0]

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height


In [40]:
test_result[test_result['xmax']>1024]

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height


In [41]:
test_result[test_result['ymin']<0]

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height


In [42]:
test_result[test_result['ymax']>1024]

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height


In [43]:
test_result = test_result.drop_duplicates(subset=['image_id','xmin','ymin','xmax','ymax'])
test_result = test_result.sort_values('score', ascending=False).drop_duplicates(subset=['image_id','xmin','ymin','xmax','ymax']).sort_index()
test_result

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height
0,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,car,0.764835,589,682,640,755,1024,1024
1,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,motorbike,0.693201,277,691,362,894,1024,1024
2,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,pickup,0.658696,639,561,777,807,1024,1024
3,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,pickup,0.625430,778,530,1021,892,1024,1024
4,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,minivan,0.290136,533,671,574,739,1024,1024
...,...,...,...,...,...,...,...,...,...
9568,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,truck,0.062444,872,704,1024,874,1024,1024
9569,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,pickup,0.056187,582,760,618,814,1024,1024
9571,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,ambulance,0.040248,54,676,233,976,1024,1024
9572,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,minibus,0.030971,59,674,231,978,1024,1024


In [44]:
test_result = test_result[test_result['score']>0.15]
test_result = test_result.reset_index()
test_result = test_result.drop(columns=['index'])
test_result

Unnamed: 0,image_id,class,score,xmin,ymin,xmax,ymax,width,height
0,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,car,0.764835,589,682,640,755,1024,1024
1,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,motorbike,0.693201,277,691,362,894,1024,1024
2,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,pickup,0.658696,639,561,777,807,1024,1024
3,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,pickup,0.625430,778,530,1021,892,1024,1024
4,Asraf_50_jpg.rf.7026694f0b9f37a6790982295c7e8663,minivan,0.290136,533,671,574,739,1024,1024
...,...,...,...,...,...,...,...,...,...
3411,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,bus,0.684419,868,707,1023,878,1024,1024
3412,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,suv,0.528665,49,671,230,974,1024,1024
3413,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,pickup,0.276453,726,715,822,854,1024,1024
3414,sabiha(309)_jpg.rf.93e77c106e2f415529533bacade...,truck,0.193893,582,760,618,817,1024,1024


In [45]:
test_result.to_csv("/content/drive/My Drive/Dhaka-AI 2020/Code/sadia_effdet_test_result/ensemble_res.csv",index=False)