In [24]:
import pandas as pd
import numpy as np
import os

In [25]:
def make_object_detection(path):
    with open(path) as f:
        text= f.read()
    maindict=[]
    for val in text.split("\n"):
        if(val==""):
            continue 
        dictv={}
        dictv["file"]=val.split(",")[0].strip()
        dictv["label"]= val.split(",")[1].strip()
        dictv["confidence"]= val.split(",")[2].strip()
        dictv["box"]=val.split("[")[-1][:-1]
        
        maindict.append(dictv)
    return pd.DataFrame(maindict)

In [26]:
cropped_df= make_object_detection("../commons/detr_labels_cropped.txt")
original_df= make_object_detection("../commons/detr_labels_original.txt")
ns_df= make_object_detection("../commons/detr_labels_ns.txt")
telea_df= make_object_detection("../commons/detr_labels_telea.txt")

In [27]:
cropped_df_yolo= make_object_detection("../commons/yolo_labels_cropped.txt")
original_df_yolo= make_object_detection("../commons/yolo_labels_original.txt")
ns_df_yolo= make_object_detection("../commons/yolo_labels_ns.txt")
telea_df_yolo= make_object_detection("../commons/yolo_labels_telea.txt")

In [28]:
tempdf= []
with open("../commons/crop_offset.txt") as f:
    text= f.read()
for val in text.split("\n"):
    if(val==""):
        continue
    dictv={}
    dictv["file"]=os.path.basename(val.split(", ")[0])
    dictv["offset"]=float(val.split(", ")[1])
    tempdf.append(dictv)
tempdf=pd.DataFrame(tempdf)
tdf= cropped_df.merge(tempdf)

In [29]:
#Unsurprisingly, the number of objects detected when cropped is significantly lesser
#Yet with telea, the objects detected seem to be increasing, which does not seem correct
#We need to have differences between the two inspected
print(len(cropped_df), len(original_df), len(ns_df), len(telea_df))

107914 112302 118405 117258


In [30]:
#number of files in which objects were detected
print(f"original_df: {len(original_df['file'].value_counts())}")
print(f"cropped_df: {len(cropped_df['file'].value_counts())}")
print(f"ns_df: {len(ns_df['file'].value_counts())}")
print(f"telea_df: {len(telea_df['file'].value_counts())}")

original_df: 11815
cropped_df: 11819
ns_df: 11875
telea_df: 11871


In [31]:
#number of files in which objects were detected
print(f"original_df: {len(original_df_yolo['file'].value_counts())}")
print(f"cropped_df: {len(cropped_df_yolo['file'].value_counts())}")
print(f"ns_df: {len(ns_df_yolo['file'].value_counts())}")
print(f"telea_df: {len(telea_df_yolo['file'].value_counts())}")

original_df: 11299
cropped_df: 11379
ns_df: 11455
telea_df: 11468


In [32]:
print(len(original_df_yolo[[float(val)>=0.8 for val in original_df_yolo["confidence"]]]))
print(len(cropped_df_yolo[[float(val)>=0.8 for val in cropped_df_yolo["confidence"]]]))
print(len(ns_df_yolo[[float(val)>=0.8 for val in ns_df_yolo["confidence"]]]))
print(len(telea_df_yolo[[float(val)>=0.8 for val in telea_df_yolo["confidence"]]]))

9964
11366
10688
10685


In [33]:
print(len(original_df[[float(val)>=0.8 for val in original_df["confidence"]]]))
print(len(cropped_df[[float(val)>=0.8 for val in cropped_df["confidence"]]]))
print(len(ns_df[[float(val)>=0.8 for val in ns_df["confidence"]]]))
print(len(telea_df[[float(val)>=0.8 for val in telea_df["confidence"]]]))

47841
45041
48370
48884


In [34]:
def find_area(arr):
    if(arr[0]>arr[2] or arr[1]>arr[3]):
        return 0
    else:
        return (arr[2]-arr[0])*(arr[3]-arr[1])

def find_iou(str1, str2, offset):
    arr1= [float(val) for val in str1.split(",")]
    if(offset!=None):
        arr1[1]+=offset
        arr1[3]+=offset
    arr2= [float(val) for val in str2.split(",")]
    union=[min(arr1[0], arr2[0]), min(arr1[1], arr2[1]), max(arr1[2], arr2[2]), max(arr1[3], arr2[3])]
    intersection=[max(arr1[0], arr2[0]), max(arr1[1], arr2[1]), min(arr1[2], arr2[2]), min(arr1[3], arr2[3])]
    
    if(find_area(union)!=0):
        iou_val= (find_area(intersection)/find_area(union))
    else:
        iou_val=0
    return iou_val

def myfunc(e):
    return e[1]

def mean_val(arr):
    if(len(arr)==0):
        return 0
    else:
        return np.mean(arr)

In [35]:
def calculate_metrics(dfo, dft):
    #dfo is new, dft is original
    temparr=[]
    disappearance=[] #
    confidence_inc= [] #
    confidence_dec=[] #
    iou_arr= [] #
    label_change=[] #
    appearance=[] #
    
    dict_info={}


    for j in range(len(dft)):
        maxval=0
        maxarea=0
        for i in range(len(dfo)):
            # if(len(offset)!=0):
            #     offset_ind= offset[j]
            #     new_area= find_iou(dft.iloc[j].box, dfo.iloc[i].box, offset=offset_ind)
            # else:
            new_area= find_iou(dft.iloc[j].box, dfo.iloc[i].box, offset=None)
            if(new_area>=maxarea):
                maxarea=new_area
                maxval=i 
        #having a 50% threshold for classifying things as same 
        if(maxarea>=0.5):
            if(maxval not in dict_info):
                dict_info[maxval]=[]
            dict_info[maxval].append((j, maxarea))
            dict_info[maxval].sort(reverse=True, key=myfunc)
        else:
            temparr.append([(j, maxarea)])

    for val in dict_info.values():
        if(len(val)>=2):
            temparr.append(val[1:])

    for arr in temparr:
        for tup in arr:
            disappearance.append(float(dft.iloc[tup[0]].confidence))

    for i in range(len(dfo)):
        if(i not in dict_info):
            appearance.append(float(dfo.iloc[i].confidence))

    for dfo_ind in dict_info.keys():
        dft_ind= dict_info[dfo_ind][0][0]
        iou_arr.append(dict_info[dfo_ind][0][1])
        label1= dfo.iloc[dfo_ind].label
        label2= dft.iloc[dft_ind].label
        if(label1==label2):
            conf= float(dfo.iloc[dfo_ind].confidence) - float(dft.iloc[dft_ind].confidence)
            #equality of confidence in same thing will not be significant
            if(conf>0):
                confidence_inc.append(conf)
            elif(conf<0):
                confidence_dec.append(-conf)
        else:
            label_change.append(float(dfo.iloc[dfo_ind].confidence) + float(dft.iloc[dft_ind].confidence))

    return(mean_val(appearance), mean_val(disappearance), mean_val(confidence_inc), mean_val(confidence_dec), mean_val(iou_arr), mean_val(label_change)) 

In [36]:
#TEMPORARILY ALTERED 
# if(len(offset)!=0):
#     offset_ind= offset[j]
#     new_area= find_iou(dft.iloc[j].box, dfo.iloc[i].box, offset=offset_ind)
# else:
#     new_area= find_iou(dft.iloc[j].box, dfo.iloc[i].box, offset=None)

# main_dict_crop=[]
# for file in original_df['file'].value_counts().keys():
#     dfo= original_df[original_df['file']==file]
#     dft= tdf[tdf['file']==file]
#     tempdict={}
#     tempdict["file"]=file
#     offset_arr= tdf["offset"]
#     tempdict["appearance"], tempdict["disappearance"], tempdict["confidence_inc"], tempdict["confidence_dec"], tempdict["iou_arr"], tempdict["label_change"]=calculate_metrics(dfo, dft, offset_arr)
#     main_dict_crop.append(tempdict)

In [38]:
main_dict_telea=[]
for file in original_df_yolo['file'].value_counts().keys():
    dfo= original_df_yolo[original_df_yolo['file']==file]
    dft= telea_df_yolo[telea_df_yolo['file']==file]
    tempdict={}
    tempdict["file"]=file
    tempdict["appearance"], tempdict["disappearance"], tempdict["confidence_inc"], tempdict["confidence_dec"], tempdict["iou_arr"], tempdict["label_change"]=calculate_metrics(dfo, dft)
    main_dict_telea.append(tempdict)

In [None]:
main_dict_ns=[]
for file in original_df_yolo['file'].value_counts().keys():
    dfo= original_df_yolo[original_df_yolo['file']==file]
    dft= ns_df_yolo[ns_df_yolo['file']==file]
    tempdict={}
    tempdict["file"]=file
    tempdict["appearance"], tempdict["disappearance"], tempdict["confidence_inc"], tempdict["confidence_dec"], tempdict["iou_arr"], tempdict["label_change"]=calculate_metrics(dfo, dft)
    main_dict_ns.append(tempdict)

In [None]:
df_telea_origin_yolo= pd.DataFrame(main_dict_telea)
df_ns_origin_yolo= pd.DataFrame(main_dict_ns)
# df_crop_origin= pd.DataFrame(main_dict_crop)