This notebook calculates the sizes of predicted and target bounding boxes.

In [None]:
import pickle
import torch
from PIL import Image
import statistics
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

In [5]:
def load_data(model,mode, dataset, split):
#load predicted and gold bounding boxes
        
    try:

        #the predicted bounding box
        with open(r"/home/users/fschreiber/project/bboxes_"+model+"/"+dataset+"/"+split+"_pred_bbox_list.p","rb") as f:
            pred_bbox_list=list(pickle.load(f))

        if mode=="non_inc":
            #the target bounding box
            with open(r"/home/users/fschreiber/project/bboxes_noninc_"+model+"/"+dataset+"/"+split+"_pred_bbox_list.p","rb") as f:
                target_bbox_list=list(pickle.load(f))

        elif mode == "inc":
            #the target bounding box
            with open(r"/home/users/fschreiber/project/bboxes_"+model+"/"+dataset+"/"+split+"_target_bbox_list.p","rb") as f:
                target_bbox_list=list(pickle.load(f))
        else:
            print("The mode can only be non_inc or inc")
            return -1,-1,-1,-1,-1

        #the number of one sentence split up incrementally ("the left zebra" would have length 3)
        with open(r"/home/users/fschreiber/project/incremental_pickles/length_incremental_units/"+dataset+"_"+split+"_length_unit.p","rb") as f:
            inc_len=pickle.load(f)

        #the original model data split up incrementally
        data_model=torch.load("/home/users/fschreiber/project/ready_inc_data/"+dataset+"/"+dataset+"_"+split+".pth")

        with open(r"/home/users/fschreiber/project/binary_grouped/"+model+"/"+mode+"/"+dataset+split+".p","rb") as f:
            binary_grouped=pickle.load(f)

        
        if mode=="non_inc":
            target_bbox_list=[x for x,y in zip(target_bbox_list,inc_len) for _ in range(y)]
            
        if model=="TVG":
            #pred_bbox_list,target_bbox_list=TVG_prep(pred_bbox_list,target_bbox_list)

            for i in range(len(data_model)):
    
                path="/home/users/fschreiber/project/TransVG/ln_data/other/images/mscoco/images/train2014/"+data_model[i][0]
                image = Image.open(path)
                image_width, image_height = image.size
                
                pred_bbox_list[i]=transform_coordinates(pred_bbox_list[i],image_width,image_height)
                target_bbox_list[i]=transform_coordinates(target_bbox_list[i],image_width,image_height)

        return pred_bbox_list,target_bbox_list,inc_len,data_model,binary_grouped
    
    except FileNotFoundError as e:
        #print(e)
        
        return  -1,-1,-1,-1,-1
    


#TVG needs some extra adjustments to fit the same data format as Resc
def TVG_prep(pred_bbox_list,target_bbox_list):
    #print("TVG")
    for ind,(pred,targ) in enumerate (zip (pred_bbox_list,target_bbox_list)):

        pred=pred.view(1,-1)

        pred=xywh2xyxy(pred)
        pred=torch.clamp(pred,0,1)

        pred_bbox_list[ind]=pred

        targ=targ.view(1,-1)
        targ=xywh2xyxy(targ)

        target_bbox_list[ind]=targ
    return pred_bbox_list,target_bbox_list

#copied from TransVG needed to transform the bounding box vectors
def xywh2xyxy(x):
    x_c, y_c, w, h = x.unbind(-1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=-1)



#TVG coordinates are normalized between 0 and 1 reshape them to fit the image
def transform_coordinates(normalized_coords, image_width, image_height):
    # Multiply the normalized coordinates by image size
    pixel_coords = normalized_coords * torch.tensor([[image_width, image_height, image_width, image_height]])

    return pixel_coords

In [6]:
pred_bbox_list,target_bbox_list,inc_len,model,binary_grouped=load_data("TVG","inc","unc","testB")


In [27]:
#group sentences that belong to one incremental unit
def group_by_increment(bbox_list,inc_len):
    counter=0
    group_list=[]
    for i in inc_len:
        
        group_list.append(bbox_list[counter:counter+i])
        counter=counter+i
    return group_list


pred_group=group_by_increment(pred_bbox_list,inc_len)
targ_group=group_by_increment(target_bbox_list,inc_len)
model_group=group_by_increment(model,inc_len)

In [28]:
#split up the list into entries where each guess is correct,wrong or mixed save the indices
#with one being correct guesses and zero incorrect guesses

def split_by_correct(binary_grouped):
    only_one=[]
    only_zero=[]
    mixed=[]

    for index,entry in enumerate(binary_grouped):
        if all(p == 1 for p in entry):
            only_one.append(index)
        elif all (p ==0 for p in entry):
            only_zero.append(index)
        else:
            mixed.append(index)
    
    return only_one,only_zero,mixed


only_one,only_zero,mixed=split_by_correct(binary_grouped)


In [29]:
def covered_area(pred_group,ind_list,model_group):

    img_area=[]

    for i in range(len(pred_group)):
        path="/home/users/fschreiber/project/TransVG/ln_data/other/images/mscoco/images/train2014/"+model_group[i][0][0]

        image= Image.open(path)

        image_width, image_height = image.size

        img_area.append(image_width*image_height)

    prog=[]
    covered_area=[]
    
    for i in ind_list:
                
        hold_area=[]
        hold_cov=[]


        for entry in pred_group[i]:
            width = abs((entry[0][0]-entry[0][2]))
            height = abs((entry[0][1]-entry[0][3]))
            

            area_bb=width*height
            hold_area.append(area_bb)

            cov_perc = (area_bb/ img_area[i]) * 100

            hold_cov.append(cov_perc)


        prog.append(hold_area)
        covered_area.append(hold_cov)

    return covered_area

cov_area_one=covered_area(pred_group,only_one,model_group)
cov_area_zero=covered_area(pred_group,only_zero,model_group)
cov_area_mixed=covered_area(pred_group,mixed,model_group)


cov_area_all=cov_area_one+cov_area_zero+cov_area_mixed


In [8]:
"""The function calculates the area of an image covered by the bounding box to determine its size
pred_group is a list of the coordinates of prediction bounding boxes
ind_list hold the index of the condition used (right,wrong mixes)
model_group holds model information
"""
def covered_area(pred_group, ind_list, model_group):

    # Create an empty list to store the areas of images.
    img_area = []

    # Iterate through the prediction coordinates.
    for i in range(len(pred_group)):
        
        # Generate the path to the image file based on the model information.
        path = "/home/users/fschreiber/project/TransVG/ln_data/other/images/mscoco/images/train2014/" + model_group[i][0][0]

        image = Image.open(path)

        image_width, image_height = image.size

        # Calculate the image size
        img_area.append(image_width * image_height)

   
    prog = []
    covered_area = []

    #Iterate through each entry in ind_list.
    for i in ind_list:
        hold_cov = []

        # Iterate through the coordinates in pred_group.
        for entry in pred_group[i]:
           
            # Calculate the width and height of the bounding box.
            width = abs((entry[0][0] - entry[0][2]))
            height = abs((entry[0][1] - entry[0][3]))

            # Calculate the area of the bounding box.
            area_bb = width * height
            hold_area.append(area_bb)

            # Calculate the coverage percentage of the bounding box relative to the image.
            cov_perc = (area_bb / img_area[i]) * 100
            hold_cov.append(cov_perc)

        # Append the lists of areas and coverage percentages for this index to prog and covered_area, respectively.
        prog.append(hold_area)
        covered_area.append(hold_cov)

    # Return the list of coverage percentages for all specified indices.
    return covered_area


cov_area_one = covered_area(pred_group, only_one, model_group)
cov_area_zero = covered_area(pred_group, only_zero, model_group)
cov_area_mixed = covered_area(pred_group, mixed, model_group)

# Combine the coverage percentages from different conditions into a single list.
cov_area_all = cov_area_one + cov_area_zero + cov_area_mixed


In [30]:
def plot_dist(cov_area_all,dataset,split):
    cov_area_all_flat=lst = [item for sublist in cov_area_all for item in sublist]


    fig, ax = plt.subplots()
    counts, bins, patches = ax.hist(cov_area_all_flat,bins=15, edgecolor='black')
    #plt.hist(cov_area_all_flat, density=False, bins=20,edgecolor='black')

    plt.xlabel("Percentage of image covered by bounding box")
    plt.ylabel("Count")
    plt.title(f"Distribution of bounding box size for the {dataset} {split} dataset")
    ax.set_xticks(bins)
    ax.xaxis.set_major_formatter(FormatStrFormatter('%0.0f'))
    plt.tight_layout()
    plt.show()

In [None]:
#Plot a histogram of bounding box sizes
def plot_dist(cov_area_all, dataset, split):
    
    # Flatten the list with coverage information
    cov_area_all_flat = [item for sublist in cov_area_all for item in sublist]

    fig, ax = plt.subplots()
    counts, bins, patches = ax.hist(cov_area_all_flat, bins=15, edgecolor='black')


    #Set labels
    plt.xlabel("Percentage of image covered by bounding box")
    plt.ylabel("Count")
    plt.title(f"Distribution of bounding box size for the {dataset} {split} dataset")

    ax.set_xticks(bins)
    ax.xaxis.set_major_formatter(FormatStrFormatter('%0.0f'))

    plt.tight_layout()

   
    plt.show()


In [31]:

one_sum,one_std,zero_sum,zero_std,mixed_sum,mixed_std,all_sum,all_std=([] for i in range(8))

box_mode="targ"
mode="inc"
model_input="TVG"
split_list=["testB","testA","val","test"]
dataset_list=["unc","unc+","gref_umd","gref"]
#dataset_list=["unc"]
found_sets_list=[]

for file in dataset_list:
    for split in split_list:
        
        pred_bbox_list,target_bbox_list,inc_len,model,binary_grouped=load_data(model_input,mode,file,split)

        #if the file is not found pass
        if pred_bbox_list==-1 or target_bbox_list==-1 or inc_len==-1:
                pass
    

        else:
            print(file+split)
            found_sets_list.append(file+split)

            pred_group=group_by_increment(pred_bbox_list,inc_len)
            targ_group=group_by_increment(target_bbox_list,inc_len)
            model_group=group_by_increment(model,inc_len)

            only_one,only_zero,mixed=split_by_correct(binary_grouped)


            if box_mode=="pred":
                cov_area_one=(covered_area(pred_group,only_one,model_group))
                cov_area_zero=(covered_area(pred_group,only_zero,model_group))
                cov_area_mixed=(covered_area(pred_group,mixed,model_group))
            
            elif box_mode=="targ":
                cov_area_one=(covered_area(targ_group,only_one,model_group))
                cov_area_zero=(covered_area(targ_group,only_zero,model_group))
                cov_area_mixed=(covered_area(targ_group,mixed,model_group))

            else: 
                print("Box Mode must be either pred or targ. It is:",box_mode)
                break
                
            cov_area_all=cov_area_one+cov_area_zero+cov_area_mixed

            #plot_dist(cov_area_all,file,split)
            
            cov_list=[cov_area_one,cov_area_zero,cov_area_mixed,cov_area_all]
            cov_type=["Right","Wrong","Mixed","All"]

            for lst,type in zip(cov_list,cov_type):
                
                lst = [item for sublist in lst for item in sublist]
                lst = [tensor.tolist() for tensor in lst]

                if type=="Right":
                    one_sum.append(round(sum(lst)/len(lst),2))
                    one_std.append(round(statistics.stdev(lst),2))
                elif type=="Wrong":
                    zero_sum.append(round(sum(lst)/len(lst),2))
                    zero_std.append(round(statistics.stdev(lst),2))
                elif type=="Mixed":
                    mixed_sum.append(round(sum(lst)/len(lst),2))
                    mixed_std.append(round(statistics.stdev(lst),2))
                elif type=="All":
                    all_sum.append(round(sum(lst)/len(lst),2))
                    all_std.append(round(statistics.stdev(lst),2))


data={}

data[model_input+" "+box_mode]=found_sets_list
data["Right Avg Size"]=one_sum
#data["Right Standard Deviation"]=one_std

data["Wrong Avg Size"]=zero_sum
#data["Wrong Standard Deviation"]=zero_std

data["Mixed Avg Size"]=mixed_sum
#data["Mixed Standard Deviation"]=mixed_std

data["All Avg Size"]=all_sum
#data["All Standard Deviation"]=all_std


df=pd.DataFrame(data)

df.set_index(model_input+" "+box_mode,inplace=True)

df

unctestB
unctestA
uncval
unc+testB
unc+testA
unc+val
gref_umdval
gref_umdtest
grefval


Unnamed: 0_level_0,Right Avg Size,Wrong Avg Size,Mixed Avg Size,All Avg Size
TVG targ,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
unctestB,0.0,0.0,0.0,0.0
unctestA,0.0,0.0,0.0,0.0
uncval,0.0,0.0,0.0,0.0
unc+testB,0.0,0.0,0.0,0.0
unc+testA,0.0,0.0,0.0,0.0
unc+val,0.0,0.0,0.0,0.0
gref_umdval,0.0,0.0,0.0,0.0
gref_umdtest,0.0,0.0,0.0,0.0
grefval,0.0,0.0,0.0,0.0


In [None]:

one_sum, one_std, zero_sum, zero_std, mixed_sum, mixed_std, all_sum, all_std = ([] for i in range(8))

# Define configuration parameters.
box_mode = "targ"  # 'targ' mode indicates using target bounding boxes
mode = "inc"
model_input = "TVG"
split_list = ["testB", "testA", "val", "test"]
dataset_list = ["unc", "unc+", "gref_umd", "gref"]
#dataset_list = ["unc"]  

found_sets_list = []  

# Iterate over datasets and splits.
for file in dataset_list:
    for split in split_list:

        # Load data from specified sources.
        pred_bbox_list, target_bbox_list, inc_len, model, binary_grouped = load_data(model_input, mode, file, split)

        # If any of the data is not found, skip this iteration.
        if pred_bbox_list == -1 or target_bbox_list == -1 or inc_len == -1:
            pass
        else:
            print(file + split)
            found_sets_list.append(file + split)  

            #group the predicted coordinates, target coordinates and model information by sentence
            pred_group = group_by_increment(pred_bbox_list, inc_len)
            targ_group = group_by_increment(target_bbox_list, inc_len)
            model_group = group_by_increment(model, inc_len)

            # Split bounding boxes into categories (Right, Wrong, Mixed) 
            only_one, only_zero, mixed = split_by_correct(binary_grouped)

            #calculate the covered area
            if box_mode == "pred":
                cov_area_one = covered_area(pred_group, only_one, model_group)
                cov_area_zero = covered_area(pred_group, only_zero, model_group)
                cov_area_mixed = covered_area(pred_group, mixed, model_group)
            elif box_mode == "targ":
                cov_area_one = covered_area(targ_group, only_one, model_group)
                cov_area_zero = covered_area(targ_group, only_zero, model_group)
                cov_area_mixed = covered_area(targ_group, mixed, model_group)
            else:
                print("Box Mode must be either pred or targ. It is:", box_mode)
                break  

            cov_area_all = cov_area_one + cov_area_zero + cov_area_mixed

            # Uncomment the following line to plot the distribution of coverage percentages.
            # plot_dist(cov_area_all, file, split)

            # Create a list of coverage areas for different categories and types.
            cov_list = [cov_area_one, cov_area_zero, cov_area_mixed, cov_area_all]
            cov_type = ["Right", "Wrong", "Mixed", "All"]

            # Calculate average sizes and standard deviations for different categories.
            for lst, type in zip(cov_list, cov_type):
                lst = [item for sublist in lst for item in sublist]
                lst = [tensor.tolist() for tensor in lst]

                if type == "Right":
                    one_sum.append(round(sum(lst) / len(lst), 2))
                    one_std.append(round(statistics.stdev(lst), 2))
                elif type == "Wrong":
                    zero_sum.append(round(sum(lst) / len(lst), 2))
                    zero_std.append(round(statistics.stdev(lst), 2))
                elif type == "Mixed":
                    mixed_sum.append(round(sum(lst) / len(lst), 2))
                    mixed_std.append(round(statistics.stdev(lst), 2))
                elif type == "All":
                    all_sum.append(round(sum(lst) / len(lst), 2))
                    all_std.append(round(statistics.stdev(lst), 2))


data = {}
data[model_input + " " + box_mode] = found_sets_list
data["Right Avg Size"] = one_sum
#data["Right Standard Deviation"] = one_std

data["Wrong Avg Size"] = zero_sum
#data["Wrong Standard Deviation"] = zero_std

data["Mixed Avg Size"] = mixed_sum
#data["Mixed Standard Deviation"] = mixed_std

data["All Avg Size"] = all_sum
#data["All Standard Deviation"] = all_std

# Create a DataFrame using the data dictionary and set the index.
df = pd.DataFrame(data)
df.set_index(model_input + " " + box_mode, inplace=True)

# Display the DataFrame.
df
