In [7]:
import pandas as pd
import numpy as np

thresh = 0.8
 
def py_cpu_nms(dets,  thresh):
    """Pure Python NMS baseline."""
    x1 = dets[:, 0]  #xmin
    y1 = dets[:, 1]  #ymin
    x2 = dets[:, 2]  #xmax
    y2 = dets[:, 3]  #ymax
    scores = dets[:, 4]  #confidence
 
    areas = (x2 - x1 + 1) * (y2 - y1 + 1)  #the size of bbox
    order = scores.argsort()[::-1]  #sort bounding boxes by decreasing order, returning array([3, 1, 2, 0])
 
    keep = []        # store the final bounding boxes
    while order.size > 0:
        i = order[0]      # 永远取置信率最高的框作为物体的最佳框
        keep.append(i)    # 保存起来
 
        # 获取两个框相交矩形的坐标
        # 左上角坐标取二者最大；右下角取二者最小
        xx1 = np.maximum(x1[i], x1[order[1:]]) #array([ 257.,  280.,  255.])
        yy1 = np.maximum(y1[i], y1[order[1:]]) #array([ 118.,  135.,  118.])
        xx2 = np.minimum(x2[i], x2[order[1:]]) #array([ 360.,  360.,  358.])
        yy2 = np.minimum(y2[i], y2[order[1:]]) #array([ 235.,  235.,  235.])
 
        w = np.maximum(0.0, xx2 - xx1 + 1)   #array([ 104.,   81.,  104.])
        h = np.maximum(0.0, yy2 - yy1 + 1)   #array([ 118.,  101.,  118.])
        # 计算相交矩形面积
        inter = w * h   #array([ 12272.,   8181.,  12272.])
 
        # 计算IOU
        # Cross Area / (bbox + particular area - Cross Area)
        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        #reserve all the boundingbox whose ovr less than thresh
        # 找出与当前置信率最高框相交小于阈值的框的索引
        inds = np.where(ovr <= thresh)[0]
        # 只保留比例小于阙值的框，然后继续处理
        order = order[inds + 1]
 
    return keep

table_main = pd.read_csv('./result_two.csv', encoding='GBK',  header=0)
table_main = table_main[['文件名', '虫子编号', '左上角x坐标', '左上角y坐标', '右下角x坐标', '右下角y坐标', 'conf']]

In [9]:
result = pd.DataFrame(columns={'文件名':"", '虫子编号':"", '左上角x坐标':"", '左上角y坐标':"", '右下角x坐标':"", '右下角y坐标':"", 'conf':""})
img_list = list(table_main['文件名'].unique())

for img in img_list:
    table_tmp = table_main[table_main['文件名'] == img]
    
    id_list = list(table_tmp['虫子编号'].unique())
    for id_insects in id_list:
        table_tmp_in = table_tmp[table_tmp['虫子编号'] == id_insects]
        
        # print(table_tmp_in)
        det = table_tmp_in[['左上角x坐标', '左上角y坐标', '右下角x坐标', '右下角y坐标', 'conf']]
        det = np.array(det)
        # 给出最后的标签
        index  = py_cpu_nms(det,  thresh)
        
        result = pd.concat([result, table_tmp_in.iloc[index]], axis=0)
        # print(table_tmp_in.iloc[index])

print(result)

result.to_csv('./out.csv', encoding="utf_8_sig")

            文件名 虫子编号  左上角x坐标  左上角y坐标  右下角x坐标  右下角y坐标      conf
0     00004.jpg  115  3612.0   681.0  3756.0   951.0  0.133291
2     00004.jpg   10  2740.0   495.0  2814.0   583.0  0.859443
1     00004.jpg   10  2952.0   989.0  3030.0  1111.0   0.80573
5     00004.jpg   10  2115.0   250.0  2192.0   372.0  0.397118
3     00004.jpg    9  2114.0   249.0  2194.0   372.0  0.414722
...         ...  ...     ...     ...     ...     ...       ...
4045  03812.jpg  280  3335.0  2631.0  3694.0  3006.0  0.925843
4046  03812.jpg  280  1752.0  1794.0  2347.0  2155.0  0.136557
4047  03813.jpg  280  2889.0    74.0  3260.0   353.0   0.92336
4048  03813.jpg   10  2281.0   568.0  2344.0   675.0  0.964337
4049  03813.jpg  256  1715.0  2911.0  1997.0  3291.0  0.945507

[4049 rows x 7 columns]
