In [1]:
import os
import numpy as np
import pandas as pd
import glob
from tqdm import tqdm

In [2]:
def txt2df(path,post_process_check=True):
    files = glob.glob(path+"/*.txt")
    df = pd.DataFrame(columns=["image_id","worm_type","count"])
    if post_process_check: print("post_processing")
    for file in files:
        with open(file) as fp:
            preds =np.array([i.split()[:] for i in fp.read().split("\n")[:-1]],dtype=float)
            if post_process_check: 
                preds = post_process(preds)
            abw = 0
            pbw =0
            for pred in preds: 
                if int(pred[0])==0: abw+=1
                else: pbw+=1

            df.loc[len(df)] = [os.path.basename(file),"abw",abw]
            df.loc[len(df)] = [os.path.basename(file),"pbw",pbw]
    return df


def get_df(path):
    files = glob.glob(path)
    post_process_check = False
    if post_process_check: print("post processing")

    preds_df = pd.DataFrame(columns=["image_id","worm_type","count"])
    for file in tqdm(files):
        with open(file) as fp:
            preds =np.array([i.split()[:] for i in fp.read().split("\n")[:-1]],dtype=float)
            if post_process_check: 
                preds = post_process(preds)
            abw = 0
            pbw =0
            for pred in preds: 
                if int(pred[0])==0: abw+=1
                else: pbw+=1

            preds_df.loc[len(preds_df)] = [os.path.basename(file),"abw",abw]
            preds_df.loc[len(preds_df)] = [os.path.basename(file),"pbw",pbw]        
    return preds_df

In [18]:
!ls detection/yolov5/runs/detect/.old

bw_150
dummy
ensemble_150_300
ensemble_150_3002
ensemble_150_3003
ensemble_pre_50_300
ensemble_pre__last_50_300
exp
last__bw_150
temp
test2.0_last_highres++_aug_100
test_all_last_highres++_100
test_best_highres++_100
test_best_highres++_1002
test_best_highres_50
test_best_highres_multires_50
test_conf0.3_nms0.3_best_highres++_100
test_conf0.3_nms0.3_best_highres++_iw_150
test_conf0.3_nms0.3_best_highres++_iw_20
test_last_highres++_100
test_last_highres++_aug_100
test_last_highres_50
test_last_pre_300
test_nms0.3_best_highres++_200
test_nms0.3_last_highres++_100
test_nms2__last_highres++_100
test_nms3__last_highres++_100
test_nms3__last_highres++_1002
test_nms__last_highres++_100
test_pretrained_last__bw_150
test_pretrained_last__bw_1502
test_pretrained_last__bw_300
val_best_highres++_100
val_best_highres_50
val_best_highres_obj365_50
val_conf0.3_best_highres++_100
val_conf0.3_nms0.3_best_highres++_100
val_conf0.3_nms0.5_best_highres++_100
val_conf0

In [19]:
df1 = get_df("detection/yolov5/runs/detect/val_best_1536_100_default/labels/*.txt")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 645/645 [00:01<00:00, 434.48it/s]


In [20]:
df2 = get_df("detection/yolov5/runs/detect/.old/val_best_highres_50/labels/*.txt")

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 643/643 [00:01<00:00, 329.97it/s]


In [21]:
og_df = txt2df("detection/yolov5/detection/yolo_data/labels/validation",post_process_check=False)
og_df 

Unnamed: 0,image_id,worm_type,count
0,id_095efeae209e071b85c680ce.txt,abw,0
1,id_095efeae209e071b85c680ce.txt,pbw,55
2,id_062e434219b5a3612a414996.txt,abw,0
3,id_062e434219b5a3612a414996.txt,pbw,5
4,id_07741c21ea0cf05171f394ac.txt,abw,0
...,...,...,...
1291,id_22a36dae9a130ff5d7751e93.txt,pbw,35
1292,id_c6af05f4d28a2f31f2735e7d.txt,abw,0
1293,id_c6af05f4d28a2f31f2735e7d.txt,pbw,2
1294,id_bddefe14f422e0ab79aec5b2.txt,abw,4


In [22]:
def merge(df,og_df):
    merged_df1 = df.merge(og_df,on=["image_id","worm_type"])
    merged_df1.columns = ["image_id","worm_type","pred_count","count"]
    merged_df1["diff"] = merged_df1.apply(lambda x: abs(x["count"]-x["pred_count"]), axis=1)
    return merged_df1


merge1 = merge(df1,og_df)
merge2 = merge(df2,og_df)

In [23]:
merge1.describe()

Unnamed: 0,pred_count,count,diff
count,1290.0,1290.0,1290.0
mean,11.175194,10.211628,1.437984
std,33.555974,30.867308,6.62866
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.0,1.0,0.0
75%,7.0,7.0,1.0
max,427.0,453.0,130.0


In [24]:
merge2.describe()

Unnamed: 0,pred_count,count,diff
count,1286.0,1286.0,1286.0
mean,11.089425,10.241058,1.848367
std,32.374856,30.910757,8.634085
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.0,1.0,0.0
75%,7.0,7.0,1.0
max,435.0,453.0,125.0


In [25]:
count_merge = df1.merge(df2,on=["image_id","worm_type"],how='left').fillna(0)

count_merge

Unnamed: 0,image_id,worm_type,count_x,count_y
0,id_095efeae209e071b85c680ce.txt,abw,0,0.0
1,id_095efeae209e071b85c680ce.txt,pbw,62,66.0
2,id_062e434219b5a3612a414996.txt,abw,0,0.0
3,id_062e434219b5a3612a414996.txt,pbw,4,4.0
4,id_07741c21ea0cf05171f394ac.txt,abw,0,0.0
...,...,...,...,...
1285,id_22a36dae9a130ff5d7751e93.txt,pbw,32,34.0
1286,id_c6af05f4d28a2f31f2735e7d.txt,abw,0,0.0
1287,id_c6af05f4d28a2f31f2735e7d.txt,pbw,1,0.0
1288,id_bddefe14f422e0ab79aec5b2.txt,abw,2,4.0


In [26]:
count_merge["mean"] = count_merge.apply(lambda x: x["count_x"] if x["count_y"]==0 else int(0.5*x["count_x"]+0.5*x["count_y"]),axis=1)
count_merge


Unnamed: 0,image_id,worm_type,count_x,count_y,mean
0,id_095efeae209e071b85c680ce.txt,abw,0,0.0,0
1,id_095efeae209e071b85c680ce.txt,pbw,62,66.0,64
2,id_062e434219b5a3612a414996.txt,abw,0,0.0,0
3,id_062e434219b5a3612a414996.txt,pbw,4,4.0,4
4,id_07741c21ea0cf05171f394ac.txt,abw,0,0.0,0
...,...,...,...,...,...
1285,id_22a36dae9a130ff5d7751e93.txt,pbw,32,34.0,33
1286,id_c6af05f4d28a2f31f2735e7d.txt,abw,0,0.0,0
1287,id_c6af05f4d28a2f31f2735e7d.txt,pbw,1,0.0,1
1288,id_bddefe14f422e0ab79aec5b2.txt,abw,2,4.0,3


In [14]:
count_merge_df = count_merge[["image_id","worm_type","mean"]]

In [15]:
merge2 = merge(count_merge_df,og_df)
merge2

Unnamed: 0,image_id,worm_type,pred_count,count,diff
0,id_095efeae209e071b85c680ce.txt,abw,0,0,0
1,id_095efeae209e071b85c680ce.txt,pbw,58,55,3
2,id_062e434219b5a3612a414996.txt,abw,0,0,0
3,id_062e434219b5a3612a414996.txt,pbw,4,5,1
4,id_07741c21ea0cf05171f394ac.txt,abw,0,0,0
...,...,...,...,...,...
1281,id_02d739e1c393e30c003b7c41.txt,pbw,89,94,5
1282,id_22a36dae9a130ff5d7751e93.txt,abw,0,0,0
1283,id_22a36dae9a130ff5d7751e93.txt,pbw,31,35,4
1284,id_bddefe14f422e0ab79aec5b2.txt,abw,3,4,1


In [16]:
merge2.describe()

Unnamed: 0,pred_count,count,diff
count,1286.0,1286.0,1286.0
mean,10.31804,10.241058,1.182737
std,30.370538,30.910757,5.777324
min,0.0,0.0,0.0
25%,0.0,0.0,0.0
50%,1.0,1.0,0.0
75%,7.0,7.0,0.0
max,384.0,453.0,94.0
