In [1]:
import re
import os
import cv2
import glob
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from ensemble_boxes import weighted_boxes_fusion,nms


In [2]:

def class_to_color(class_id):
    colors = {"abw":(255,0,0),"pbw":(0,255,0),"comp":(0,0,255)}
    return colors[class_id]

def draw_bounding_box(img, annotation,filter_bbox=False):

    img_h ,img_w,_ = img.shape
    x_center,y_center,w,h = [annotation[1]*img_w,annotation[2]*img_h,annotation[3]*img_w,annotation[4]*img_h,]
    x_min, y_min = int(x_center-w/2),int(y_center-h/2)
    x_max, y_max = int(x_center+w/2),int(y_center+h/2)
    class_id = "abw" if annotation[0]==0 else "pbw" if annotation[0]==1 else "comp"
    color = class_to_color(class_id)

    cv2.rectangle(img,(x_min,y_min),(x_max,y_max), color, 2)
    if len(annotation)==6:
        if filter_bbox:
            if annotation[0]!=2:
                cv2.putText(img = img,
                  text = str(annotation[5]),
                  org = (x_min, y_min),
                  fontFace = cv2.FONT_HERSHEY_DUPLEX,
                  fontScale = 0.7,
                  color = (255,0,0),
                  thickness = 1
                )
        else:
            cv2.putText(img = img,
                  text = str(annotation[5]),
                  org = (x_min, y_min),
                  fontFace = cv2.FONT_HERSHEY_DUPLEX,
                  fontScale = 0.7,
                  color = (255,0,0),
                  thickness = 1
                )

## WBF

In [78]:

def get_wbf_data(img_id, annotations,dim ):
    boxes = []
    scores = []
    labels = []
    img_h,img_w =  dim
    for annotation in annotations:
        x_center,y_center,h,w = annotation[1:5]
        x_center,w = x_center*img_w, w*img_w
        y_center,h = y_center*img_h, h*img_h
        
        x1,y1 = x_center-w/2,y_center-h/2
        x2,y2 = x_center+w/2,y_center+h/2
        
        x1,x2 = max(0,x1/img_w),min(1,x2/img_w)
        y1,y2 = max(0,y1/img_h),min(1,y2/img_h)
#         print([x1,y1,x2,y2])
        boxes.append([x1,y1,x2,y2])
        scores.append(annotation[5])     
        labels.append(annotation[0])   
    return boxes,scores,labels

def get_yolo_data( boxes,scores,labels,dim):
    annotations = []
    img_h,img_w = dim
    for i in range(len(boxes)):
        x1,y1,x2,y2 = boxes[i]
        x1,x2 = x1*img_w,x2*img_w
        y1,y2 = y1*img_h,y2*img_h
        W = x2-x1
        H= y2-y1
        x_center =x1+W/2
        y_center = y1+H/2 
        x_center /= img_w 
        y_center /=img_h
        W /=img_w
        H /=img_h
        annotation = f"{labels[i]} {' '.join(map(str,[x_center,y_center,W,H]))} {scores[i]}"
        annotations.append(annotation)
    
    return annotations

def get_wbf_bbox(img_id):
    all_boxes,all_scores,all_labels = [],[],[]

    img_id  = img_id.split(".")[0]
    img_h,img_w,_ = cv2.imread(f"{raw_data}/jpg/{img_id}.jpg").shape
    for experiment in experiments:
        path =f"{experiment}/labels/{img_id}.txt"
#         print(path)
        try:
            with open(path) as fp:
                annotations =np.array([i.split()[:] for i in fp.read().split("\n")[:-1]],dtype=float)
                boxes,scores,labels = get_wbf_data(img_id,annotations, dim = (img_h,img_w))
                all_boxes.append(boxes)
                all_scores.append(scores)
                all_labels.append(labels)
        except FileNotFoundError: 
            all_boxes.append([])
            all_scores.append([])
            all_labels.append([]) 
    try:
#         print(iou_thr,skip_box_thr)
        boxes,scores,labels = weighted_boxes_fusion(all_boxes, all_scores, all_labels, weights=exp_weights, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
    
    except Exception as e: 
#         print(img_id,all_boxes)
        raise e
    out_dict =  {"img_id":img_id,"annotations":get_yolo_data(boxes,scores,labels,dim =(img_h,img_w))}
    return out_dict

## Preds

In [4]:
def get_counts(img_id,annotations):
    pred_count = {}
#     if post_process_check: 
#         preds = post_process(preds)
    abw_count = 0
    pbw_count =0
    for pred in annotations: 
        if int(pred[0])==0: abw_count+=1
        else: pbw_count+=1        
    return {"img_id":img_id,"abw":abw_count,"pbw":pbw_count}


In [101]:
def create_df(wbf_processed_data):
    wbf_count_df =pd.DataFrame(columns=["image_id","worm_type","pred_count"])
    for wbf_img in tqdm(wbf_processed_data,desc= "Creating DF"):
        img_id = wbf_img["img_id"]
        annotations = wbf_img["annotations"]
        counts = get_counts(img_id,annotations)
        wbf_count_df.loc[len(wbf_count_df)] = [img_id+".txt","abw",counts["abw"]]
        wbf_count_df.loc[len(wbf_count_df)] = [img_id+".txt","pbw",counts["pbw"]] 
    return wbf_count_df
def run_wbf(img_ids):
    print(exp_weights)
    wbf_processed_data  = [get_wbf_bbox(img_id) for img_id in tqdm(img_ids,desc= "Running WBF")]
    wbf_count_df  = create_df(wbf_processed_data)
    return wbf_count_df,wbf_processed_data

## Compare

In [18]:
def infer(data,plot_img=False,post_process_check=False):
    img_id= data["img_id"].split(".")[0]
    annotations =data["annotations"]
    if post_process_check: 
            annotations = post_process(annotations)
    img = cv2.imread(f"{raw_data}/jpg/{img_id}.jpg")
    img =cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
    if plot_img:
        plt.imshow(img)
        plt.show()
    for annotation in annotations:
        annotation = list(map(float,annotation.split()))
#         print(annotation)
        draw_bounding_box(img,annotation)
    print(img.shape)

    plt.imshow(img)
    plt.title(len(annotations))
    plt.show()

In [19]:
def txt2df(path,post_process_check=True):
    files = glob.glob(path+"/*.txt")
    df = pd.DataFrame(columns=["image_id","worm_type","count"])
    if post_process_check: print("post_processing")
    for file in files:
        with open(file) as fp:
            preds =np.array([i.split()[:] for i in fp.read().split("\n")[:-1]],dtype=float)
            if post_process_check: 
                preds = post_process(preds)
            abw = 0
            pbw =0
            for pred in preds: 
                if int(pred[0])==0: abw+=1
                else: pbw+=1

            df.loc[len(df)] = [os.path.basename(file),"abw",abw]
            df.loc[len(df)] = [os.path.basename(file),"pbw",pbw]
    return df


In [22]:
!ls  data/raw

SampleSubmission.csv  Train.csv  images.zip
Test.csv	      images	 images_bboxes.csv


In [25]:
# og_df = txt2df("detection/yolov5/detection/yolo_data/labels/validation",post_process_check=False)
# og_df 

In [28]:
!ls detection/yolov5/runs/detect/


initial_last_256_150		   val_best_1536_100_nms0.3_conf0.3
raw_test_best_1536_100		   val_best_1536_100_nms0.3_conf0.32
raw_val_best_1024_100		   val_best_1536_100_nms0.3_conf0.4
raw_val_best_1024_1002		   val_best_1536_100_nms0.3_conf0.42
test_best_1536_100_default	   val_ensemble_best_1536_100_nms0.25_conf0.3
test_best_1536_100_nms0.3_conf0.3  val_ensemble_best_1536_1024_nms0.3_conf0.3
test_initial_last_1024_100	   val_ensemble_best_1536_256_nms0.25_conf0.3
test_initial_last_256_150	   val_last_640_50
test_last_640_50		   val_last_highres++_100
val_best_1536_100_default	   val_lowsz_best_1536_100_nms0.3_conf0.3
val_best_1536_100_nms0.25_conf0.3


In [102]:
iou_thr = 0.3
skip_box_thr = 0.3
exp_weights = [10,1]
experiments = ["detection/yolov5/runs/detect/raw_test_best_1536_100","detection/detectron2/output1024"]
for i in experiments: print(len(os.listdir(i+"/labels")))
img_ids = os.listdir("detection/yolov5/detection/yolo_data/images/test/")
raw_data = "data/preproc/"
wbf_count_df,wbf_processed_data  = run_wbf(img_ids)

1960
1993
[10, 1]


Running WBF: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 1993/1993 [00:32<00:00, 61.52it/s]
Creating DF: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 1993/1993 [00:04<00:00, 443.48it/s]


In [103]:
def post_process(wbf_processed_data,thr):
    new_wbf = []
    
    for data in wbf_processed_data:
        new_annotations =[]
        for annotation in data["annotations"]:
            if float(annotation.split()[-1])>thr:
                new_annotations.append(annotation)
        new_wbf.append({"img_id":data["img_id"],"annotations":new_annotations})
    return new_wbf


In [104]:
wbf_count_df

Unnamed: 0,image_id,worm_type,pred_count
0,id_f0363dbf53755c5900c66b77.txt,abw,0
1,id_f0363dbf53755c5900c66b77.txt,pbw,61
2,id_6e52c2f722c67f5b4b2589cc.txt,abw,0
3,id_6e52c2f722c67f5b4b2589cc.txt,pbw,6
4,id_716206e74f543068344bc1f7.txt,abw,0
...,...,...,...
3981,id_b653f60c63ba7208b8eef848.txt,pbw,32
3982,id_306566ec9a376d62e5ceb273.txt,abw,4
3983,id_306566ec9a376d62e5ceb273.txt,pbw,0
3984,id_bfb6d7e393c07c3e6de3feb4.txt,abw,5


In [105]:
wbf_count_df["image_id_worm"] = wbf_count_df.apply(lambda x: f"{x['image_id'].split('.')[0]}_{x['worm_type']}",axis=1)
wbf_count_df = wbf_count_df[["image_id_worm","pred_count"]]
wbf_count_df.columns = ["image_id_worm","number_of_worms"]

In [106]:
sub_df = pd.read_csv("data/raw/SampleSubmission.csv")
# sub_df

In [107]:
sub_df = sub_df[["image_id_worm"]].merge(wbf_count_df, on=["image_id_worm"],how='left').fillna(0)
sub_df

Unnamed: 0,image_id_worm,number_of_worms
0,id_00332970f80fa9a47a39516d_abw,4.0
1,id_00332970f80fa9a47a39516d_pbw,0.0
2,id_0035981bc3ae42eb5b57a317_abw,0.0
3,id_0035981bc3ae42eb5b57a317_pbw,21.0
4,id_005102f664b820f778291dee_abw,9.0
...,...,...
5601,id_ffbcb27fa549278f47505515_pbw,0.0
5602,id_ffc0e41e10b0c964d4a02811_abw,0.0
5603,id_ffc0e41e10b0c964d4a02811_pbw,0.0
5604,id_fff8c253115aacded09ad7ed_abw,0.0


In [108]:
# merged_df1 = wbf_count_df.merge(sub_df,on=["image_id","worm_type"],how="left")
# merged_df1.columns = ["image_id","worm_type","pred_count","count"]
# # merged_df1["diff"] = merged_df1.apply(lambda x: abs(x["count"]-x["pred_count"]), axis=1)
# merged_df1

In [109]:
# !ls data/submission

In [110]:
sub_df[sub_df["image_id_worm"]=="id_a93adfa0157cd5ac1ca7484f_pbw"]

Unnamed: 0,image_id_worm,number_of_worms
3699,id_a93adfa0157cd5ac1ca7484f_pbw,0.0


In [114]:
len(sub_df.image_id_worm.unique())

5606

In [112]:
sub_df.to_csv("data/submission/weighted10:1_ensemble_d2_yolo.csv",index=False)

In [113]:
# import random
# for i in random.sample(wbf_post_process,3):
#     infer(i)

In [127]:
wbf_post_process = post_process(wbf_processed_data,0.2)
wbf_post_process_df = create_df(wbf_post_process)

Creating DF: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 648/648 [00:01<00:00, 356.99it/s]


In [128]:
merged_df2 = wbf_post_process_df.merge(og_df,on=["image_id","worm_type"])
merged_df2.columns = ["image_id","worm_type","pred_count","count"]
merged_df2["diff"] = merged_df2.apply(lambda x: abs(x["count"]-x["pred_count"]), axis=1)
merged_df2.describe()

Unnamed: 0,pred_count,count,diff
count,1296.0,1296.0,1296.0
mean,9.089506,10.167438,2.108796
std,24.279289,30.802581,8.848779
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.0,1.0,0.0
75%,7.0,7.0,1.0
max,313.0,453.0,140.0


In [129]:
merged_df2

Unnamed: 0,image_id,worm_type,pred_count,count,diff
0,id_095efeae209e071b85c680ce.txt,abw,0,0,0
1,id_095efeae209e071b85c680ce.txt,pbw,45,55,10
2,id_062e434219b5a3612a414996.txt,abw,0,0,0
3,id_062e434219b5a3612a414996.txt,pbw,4,5,1
4,id_07741c21ea0cf05171f394ac.txt,abw,0,0,0
...,...,...,...,...,...
1291,id_22a36dae9a130ff5d7751e93.txt,pbw,33,35,2
1292,id_c6af05f4d28a2f31f2735e7d.txt,abw,0,0,0
1293,id_c6af05f4d28a2f31f2735e7d.txt,pbw,0,2,2
1294,id_bddefe14f422e0ab79aec5b2.txt,abw,4,4,0


In [130]:
thr=5
len(merged_df1[merged_df1["diff"]>thr]),len(merged_df2[merged_df2["diff"]>thr])

(97, 97)

## Test

In [105]:
!ls detection/yolov5/runs/detect

initial_last_256_150	    test_initial_last_256_150  val_last_640_50
test_initial_last_1024_100  test_last_640_50	       val_last_highres++_100


In [107]:
iou_thr = 0.4
skip_box_thr = 0.4
experiments = ["test_initial_last_256_150","test_initial_last_1024_100"]
test_data = pd.read_csv("data/preds/test_droupout0.3_filter_preds.csv")
img_ids =test_data[test_data["preds"]==1].image_id_worm.values
raw_data = "data/preproc/"
wbf_count_df  = run_wbf(img_ids)



Creating DF:   0%|                                                                                                                                         | 0/1993 [00:00<?, ?it/s][A[A

Creating DF:   2%|███                                                                                                                            | 48/1993 [00:00<00:04, 470.59it/s][A[A

Creating DF:   5%|██████                                                                                                                         | 96/1993 [00:00<00:04, 462.50it/s][A[A

Creating DF:   7%|█████████                                                                                                                     | 143/1993 [00:00<00:04, 457.98it/s][A[A

Creating DF:   9%|███████████▉                                                                                                                  | 189/1993 [00:00<00:03, 455.00it/s][A[A

Creating DF:  12%|██████████████▊                         

In [110]:
wbf_count_df

Unnamed: 0,image_id,worm_type,pred_count
0,id_00332970f80fa9a47a39516d.txt,abw,4
1,id_00332970f80fa9a47a39516d.txt,pbw,0
2,id_0035981bc3ae42eb5b57a317.txt,abw,0
3,id_0035981bc3ae42eb5b57a317.txt,pbw,23
4,id_005102f664b820f778291dee.txt,abw,9
...,...,...,...
3981,id_ffad8f3773a4222f8fe5ba1a.txt,pbw,8
3982,id_ffb65e6de900c49d8f2ef95a.txt,abw,0
3983,id_ffb65e6de900c49d8f2ef95a.txt,pbw,124
3984,id_fff8c253115aacded09ad7ed.txt,abw,0


In [118]:
sub_df =pd.read_csv("data/raw/SampleSubmission.csv")
sub_df

Unnamed: 0,image_id_worm,number_of_worms
0,id_00332970f80fa9a47a39516d_abw,0
1,id_00332970f80fa9a47a39516d_pbw,0
2,id_0035981bc3ae42eb5b57a317_abw,0
3,id_0035981bc3ae42eb5b57a317_pbw,0
4,id_005102f664b820f778291dee_abw,0
...,...,...
5601,id_ffbcb27fa549278f47505515_pbw,0
5602,id_ffc0e41e10b0c964d4a02811_abw,0
5603,id_ffc0e41e10b0c964d4a02811_pbw,0
5604,id_fff8c253115aacded09ad7ed_abw,0


In [115]:
wbf_count_df["image_id_worm"] = wbf_count_df.apply(lambda x: f"{x['image_id'][:-4]}_{x['worm_type']}",axis=1)
wbf_count_df = wbf_count_df[["image_id_worm","pred_count"]]
wbf_count_df.columns  = ["image_id_worm","number_of_worms"]

wbf_count_df

Unnamed: 0,image_id_worm,number_of_worms
0,id_00332970f80fa9a47a39516d_abw,4
1,id_00332970f80fa9a47a39516d_pbw,0
2,id_0035981bc3ae42eb5b57a317_abw,0
3,id_0035981bc3ae42eb5b57a317_pbw,23
4,id_005102f664b820f778291dee_abw,9
...,...,...
3981,id_ffad8f3773a4222f8fe5ba1a_pbw,8
3982,id_ffb65e6de900c49d8f2ef95a_abw,0
3983,id_ffb65e6de900c49d8f2ef95a_pbw,124
3984,id_fff8c253115aacded09ad7ed_abw,0


In [119]:
sub_df = sub_df[["image_id_worm"]].merge(wbf_count_df, on=["image_id_worm"],how='left').fillna(0)
sub_df

Unnamed: 0,image_id_worm,number_of_worms
0,id_00332970f80fa9a47a39516d_abw,4.0
1,id_00332970f80fa9a47a39516d_pbw,0.0
2,id_0035981bc3ae42eb5b57a317_abw,0.0
3,id_0035981bc3ae42eb5b57a317_pbw,23.0
4,id_005102f664b820f778291dee_abw,9.0
...,...,...
5601,id_ffbcb27fa549278f47505515_pbw,0.0
5602,id_ffc0e41e10b0c964d4a02811_abw,0.0
5603,id_ffc0e41e10b0c964d4a02811_pbw,0.0
5604,id_fff8c253115aacded09ad7ed_abw,0.0


In [120]:
sub_df.to_csv("data/submission/ensemble_256_1024_150.csv",index=False)