In [15]:
import nltk
import pickle
import torch
import pandas as pd

from nltk.corpus import stopwords

#lists taken from the attention parser also used to evaluate model performance
position_words =['right', 'left', 'top', 'bottom', 'middle', 'mid', 'front', 'closest', 'nearest', 'center', 'central',
                               'corner', 'upper', 'back', 'far', 'leftmost', 'lower', 'low', 'rightmost',
                               'farthest', 'furthest', 'next', 'last', 'up', 'above', 'below', 'down', 'side']


color_words = ['white', 'green', 'blue', 'red', 'yellow', 'black', 'brown', 'pink', 'dark', 'darker', 'orange',
                            'gray', 'purple', 'beige', 'bright']

size_words = ["big", "bigger", "small", "smaller", "tall", "taller", "large", "larger", "little", "short", "shorter",
                           'tiny', "long", "longer", 'huge']

rel_pos_words=['above', 'about', 'below',"behind" 'beneath', 'beside', 'between', 'by','against', 
               'from', 'through', 'under', 'underneath', 'with','near', 'inside', 'from']

In [16]:
def load_data(model,mode, dataset, split):
#load predicted and gold bounding boxes

    try:

        #the predicted bounding box
        with open(r"/home/users/fschreiber/project/bboxes_"+model+"/"+dataset+"/"+split+"_pred_bbox_list.p","rb") as f:
            pred_bbox_list=list(pickle.load(f))

        if mode=="non_inc":
            #the target bounding box
            with open(r"/home/users/fschreiber/project/bboxes_noninc_"+model+"/"+dataset+"/"+split+"_pred_bbox_list.p","rb") as f:
                target_bbox_list=list(pickle.load(f))

        elif mode == "inc":
            #the target bounding box
            with open(r"/home/users/fschreiber/project/bboxes_"+model+"/"+dataset+"/"+split+"_target_bbox_list.p","rb") as f:
                target_bbox_list=list(pickle.load(f))
        else:
            print("The mode can only be non_inc or inc")
            return -1,-1,-1,-1,-1

        #the number of one sentence split up incrementally ("the left zebra" would have length 3)
        with open(r"/home/users/fschreiber/project/incremental_pickles/length_incremental_units/"+dataset+"_"+split+"_length_unit.p","rb") as f:
            inc_len=pickle.load(f)

        #the original model data split up incrementally
        data_model=torch.load("/home/users/fschreiber/project/ready_inc_data/"+dataset+"/"+dataset+"_"+split+".pth")

        with open(r"/home/users/fschreiber/project/binary_grouped/"+model+"/"+mode+"/"+dataset+split+".p","rb") as f:
            binary_grouped=pickle.load(f)

        
        if mode=="non_inc":
            target_bbox_list=[x for x,y in zip(target_bbox_list,inc_len) for _ in range(y)]
            
        if model=="TVG":
            pred_bbox_list,target_bbox_list=TVG_prep(pred_bbox_list,target_bbox_list)
                    

        return pred_bbox_list,target_bbox_list,inc_len,data_model,binary_grouped
    
    except FileNotFoundError as e:
        #print(e)
        
        return  -1,-1,-1,-1,-1
    


#TVG needs some extra adjustments to fit the same data format as Resc
def TVG_prep(pred_bbox_list,target_bbox_list):
    #print("TVG")
    for ind,(pred,targ) in enumerate (zip (pred_bbox_list,target_bbox_list)):

        pred=pred.view(1,-1)

        pred=xywh2xyxy(pred)
        pred=torch.clamp(pred,0,1)

        pred_bbox_list[ind]=pred

        targ=targ.view(1,-1)
        targ=xywh2xyxy(targ)

        target_bbox_list[ind]=targ
    return pred_bbox_list,target_bbox_list

#copied from TransVG needed to transform the bounding box vectors
def xywh2xyxy(x):
    x_c, y_c, w, h = x.unbind(-1)
    b = [(x_c - 0.5 * w), (y_c - 0.5 * h),
         (x_c + 0.5 * w), (y_c + 0.5 * h)]
    return torch.stack(b, dim=-1)


In [17]:
pred_bbox_list,target_bbox_list,inc_len,model,binary_grouped=load_data("ReSc","inc","unc","testB")


In [18]:
#group sentences that belong to one incremental unit
def group_by_increment(bbox_list,inc_len):
    counter=0
    group_list=[]
    for i in inc_len:
        
        group_list.append(bbox_list[counter:counter+i])
        counter=counter+i
    return group_list

model_group=group_by_increment(model,inc_len)

In [19]:
#helper function to make code more readable 
#calculates the ratio of stop,position,color and size words 
#to all words in the first position

def calculate_ratios(stop, position,color,size, rel_pos, rest, first_words):
                
                #print(stop)
                #print(first_words)
                all_words=len(stop)+len(position)+len(color)+len(size)+len(rel_pos)+len(rest)
                
                #print(len(first_words))
                #print(all_words)

                #stop_ratio = len(stop) / all_words
                #pos_ratio = len(position) / all_words
                #color_ratio = len(color) / all_words
                #size_ratio = len(size) / all_words
                #rel_pos_ratio = len(rel_pos) / all_words
                #rest_ratio = len(rest) / all_words

                stop_ratio = round(len(stop) / all_words, 2)
                pos_ratio = round(len(position) / all_words, 2)
                color_ratio = round(len(color) / all_words, 2)
                size_ratio = round(len(size) / all_words, 2)
                rel_pos_ratio = round(len(rel_pos) / all_words, 2)
                rest_ratio = round(len(rest) / all_words, 2)
                
                return stop_ratio, pos_ratio,color_ratio,size_ratio,rel_pos_ratio, rest_ratio

In [7]:
"""
def get_change_words(model_group,mixed_values,mixed):
    
    change_right=[]
    change_wrong=[]
    
    for entry,indi in zip(mixed_values,mixed):
        for i in range(len(entry)-1):
            
            m=model_group[indi]
            
            if entry[i] == entry[i + 1]:
                pass
            
            elif entry[i]==0 and entry[i+1]==1:
                change_right.append(m[i+1][3].split()[-1])

            elif entry[i]==1 and entry[i+1]==0:
                change_wrong.append(m[i+1][3].split()[-1])
                
            
            else:
                print("Something is very wrong.")

    return change_right,change_wrong

"""

        
    

'\ndef get_change_words(model_group,mixed_values,mixed):\n    \n    change_right=[]\n    change_wrong=[]\n    \n    for entry,indi in zip(mixed_values,mixed):\n        for i in range(len(entry)-1):\n            \n            m=model_group[indi]\n            \n            if entry[i] == entry[i + 1]:\n                pass\n            \n            elif entry[i]==0 and entry[i+1]==1:\n                change_right.append(m[i+1][3].split()[-1])\n\n            elif entry[i]==1 and entry[i+1]==0:\n                change_wrong.append(m[i+1][3].split()[-1])\n                \n            \n            else:\n                print("Something is very wrong.")\n\n    return change_right,change_wrong\n\n'

In [20]:
def get_change_words(model_group,mixed_values,mixed):
    
    change_right=[]
    change_wrong=[]
    print("mix_val",mixed_values)
    print("mix",mixed)
    for entry,indi in zip(mixed_values,mixed):
        for i in range(len(entry)-1):
            
            m=model_group[indi]
            
            if entry[i] == entry[i + 1]:
                pass
            elif entry[i]==0 and entry[i+1]==1:
                change_right.append(m[i+1][3].split()[-1])

            elif entry[i]==1 and entry[i+1]==0:
                change_wrong.append(m[i+1][3].split()[-1])        
            else:
                print("Something is very wrong.")

    return change_right,change_wrong

def split_change_words(word_list):
    

    stopwords_english = stopwords.words("english")
    #stopwords_english=stopwords.words()
    
    position = [i for i in word_list if i in position_words]
    color = [i for i in word_list if i in color_words]
    size = [i for i in word_list if i in size_words]
    rel_pos=[i for i in word_list if i in rel_pos_words]
    stop = [i for i in word_list if i in stopwords_english]
    rest = [i for i in word_list if i not in stopwords_english and i not in position_words and i not in color_words and i not in size_words and i not in rel_pos_words]


    return stop, position, color, size,rel_pos, rest
        



def change_words(binary_grouped,model_group):
    stop_wrong,pos_wrong,rest_wrong,color_wrong,size_wrong,rel_pos_wrong=([] for i in range(6))
    stop_right,pos_right,rest_right,color_right,size_right,rel_pos_right=([] for i in range(6))


    # Split up the list into entries where each guess is correct, wrong, or mixed
    right_only = []
    wrong_only = []
    mixed_only = []
    mixed_values=[]
    for index, entry in enumerate(binary_grouped):
        if all(p == 1 for p in entry):
            right_only.append(index)
        elif all(p == 0 for p in entry):
            wrong_only.append(index)
        else:
            mixed_only.append(index)
            mixed_values.append(entry)

    model_group=group_by_increment(model,inc_len)
    change_right,change_wrong=get_change_words(model_group,mixed_values,mixed_only)
    
    
    all_entries = [change_right,change_wrong]
    types = ["change_right","change_wrong"]


    #loop through the right,wrong,mixed lists and make lists for each  word condition
    for lst, typ in zip(all_entries, types):
        stop, position,color,size,rel_pos, rest = split_change_words(lst)
        stop_ratio, pos_ratio,color_ratio,size_ratio, rel_pos_ratio,rest_ratio = calculate_ratios(stop, position,color,size,rel_pos,rest, lst)

        if typ == "change_wrong":
            stop_wrong.append(stop_ratio)
            pos_wrong.append(pos_ratio)
            color_wrong.append(color_ratio)
            size_wrong.append(size_ratio)
            rel_pos_wrong.append(rel_pos_ratio)
            rest_wrong.append(rest_ratio)
        elif typ == "change_right":
            stop_right.append(stop_ratio)
            pos_right.append(pos_ratio)
            color_right.append(color_ratio)
            size_right.append(size_ratio)
            rel_pos_right.append(rel_pos_ratio)
            rest_right.append(rest_ratio)


    pos_all=[pos_right,pos_wrong]
    color_all=[color_right,color_wrong]
    size_all=[size_right,size_wrong]
    rel_pos_all=[rel_pos_right,rel_pos_wrong]
    stop_all=[stop_right,stop_wrong]
    rest_all=[rest_right,rest_wrong]
    
    
    return pos_all,color_all,size_all,rel_pos_all,stop_all,rest_all

change_words(binary_grouped,model_group)




mix_val [[1, 1, 1, 0, 1, 1, 1, 1], [0, 0, 1, 1], [1, 0, 1, 1], [0, 1, 1], [0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1], [0, 0, 0, 1], [0, 0, 0, 1, 1, 1], [0, 0, 1, 0], [0, 0, 0, 1, 1], [0, 1, 1, 1], [1, 0, 0, 1], [1, 0, 1], [0, 0, 1, 1], [0, 0, 1, 1], [0, 0, 0, 1], [0, 0, 1, 1, 1, 1], [0, 1], [0, 0, 0, 1], [0, 1, 1], [0, 0, 0, 1, 1], [0, 1, 1], [0, 0, 0, 1], [0, 0, 1, 1, 0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], [1, 0, 0, 1], [1, 0, 1], [0, 0, 1, 1, 1, 1], [1, 1, 1, 0, 0, 1, 1], [0, 1, 1], [0, 1, 1], [0, 0, 0, 1], [0, 1, 1], [0, 1, 1], [0, 0, 1, 1, 1], [0, 1, 1], [1, 0], [1, 0], [1, 0], [1, 0], [1, 0], [0, 1, 1, 1], [0, 0, 0, 1], [0, 0, 0, 1, 1], [0, 1, 1, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0], [0, 0, 1, 1, 1], [0, 0, 1], [0, 1, 0, 0, 1], [0, 0, 1], [0, 0, 1], [0, 0, 0, 1], [0, 0, 0, 1], [0, 0, 1], [1, 0, 1, 1], [0, 1, 1], [0, 1, 1, 1], [0, 1, 1], [0, 0, 0, 0, 1], [0, 0, 0, 1, 1, 1, 1, 1], [0, 0, 1, 1, 1, 1], [0, 0, 0, 1, 1], [0, 0, 1], [0, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 0], [0, 0, 1, 1, 1, 1], [0, 1,

([[0.62], [0.24]],
 [[0.02], [0.04]],
 [[0.0], [0.01]],
 [[0.02], [0.05]],
 [[0.12], [0.31]],
 [[0.22], [0.37]])

In [9]:


#this method takes the first word in a sentence and categorises it into one of five lists
#a lists with all those words is returned
def split_first_words(model_group, index_list):
    first_word = [model_group[i][0][3] for i in index_list]

    
    #print(stop)
    position = [i for i in first_word if i in position_words]
    color = [i for i in first_word if i in color_words]
    size = [i for i in first_word if i in size_words]
    rel_pos=[i for i in first_word if i in rel_pos_words]
    stopwords_english = stopwords.words("english")
    stop = [i for i in first_word if i in stopwords_english]

    rest = [i for i in first_word if i not in stopwords_english and i not in position_words and i not in color_words and i not in size_words and i not in rel_pos_words]
    
    return stop, position, color, size, rel_pos, rest, first_word

#calculates lists showing with the ratio of position,colour,size or stopwords being 
#the first word in a sentence

def first_words(binary_grouped,model_group):
    
    stop_wrong,pos_wrong,rest_wrong,color_wrong,size_wrong,rel_pos_wrong=([] for i in range(6))
    stop_right,pos_right,rest_right,color_right,size_right,rel_pos_right=([] for i in range(6))

    #split each example into examples that guess correctly on the first word or not
    right_only = []
    wrong_only = []
    for index, entry in enumerate(binary_grouped):
        
        
        if entry[0]==1:
            right_only.append(index)
        elif entry[0]==0:
            wrong_only.append(index)
        else:
            print("something is very wrong")
        
    all_entries = [right_only, wrong_only]
    types = ["right", "wrong"]


    #loop through the right,wrong lists and make lists for each  word condition
    #and calculate the ratio
    for lst, typ in zip(all_entries, types):
        stop, position,color,size,rel_pos, rest, first_word = split_first_words(model_group, lst)
        stop_ratio, pos_ratio,color_ratio,size_ratio,rel_pos_ratio,rest_ratio = calculate_ratios(stop, position,color,size,rel_pos,rest, first_word)

        if typ == "wrong":
            stop_wrong.append(stop_ratio)
            pos_wrong.append(pos_ratio)
            color_wrong.append(color_ratio)
            size_wrong.append(size_ratio)
            rel_pos_wrong.append(rel_pos_ratio)
            rest_wrong.append(rest_ratio)
        
        elif typ == "right":
            stop_right.append(stop_ratio)
            pos_right.append(pos_ratio)
            color_right.append(color_ratio)
            size_right.append(size_ratio)
            rel_pos_right.append(rel_pos_ratio)
            rest_right.append(rest_ratio)
       
        else:
            print("Something went very wrong")

    pos_all=[pos_right,pos_wrong]
    color_all=[color_right,color_wrong]
    size_all=[size_right,size_wrong]
    rel_pos_all=[rel_pos_right,rel_pos_wrong]
    stop_all=[stop_right,stop_wrong]
    rest_all=[rest_right,rest_wrong]
    
    
    return pos_all,color_all,size_all,rel_pos_all,stop_all,rest_all



In [10]:

#this method takes the first word in a sentence and categorises it into one of five lists
#a lists with all those words is returned
def split_all_words(model_group, index_list):

    stop_c,pos_c,rest_c,color_c,size_c,rel_pos_c=(0 for i in range(6))

    
    
    #sent_list = [model_group[i][-1][3].split() for i in index_list]
    #print("sent_list",len(sent_list))
   
    #stopwords_english = stopwords.words("english")
    
    stopwords_english=stopwords.words()
    stop,position,rest,color,size,rel_pos=([] for j in range(6))
    
    sent_list=[]
    for i in range(len(model_group)):
        if i in index_list:
           
            sent=model_group[i][-1][3].split()
            
            sent_list.append(sent)
    #for sent in sent_list:
        #print(sent)
            for word in sent:
                #print(word)
                
                if word in position_words:
                    position.append(word)
                    pos_c+=1
                
                elif word in color_words:
                    color.append(word)
                    color_c+=1

                elif word in size_words:
                    size.append(word)
                    size_c+=1
                    
                elif word in rel_pos_words:
                    rel_pos.append(word) 
                    rel_pos_c+=1

                elif word in stopwords_english:
                    stop.append(word)
                    stop_c+=1
                else:
                    rest.append(word) 
                    rest_c+=1      
    
    print()
    print("rest",rest_c)
    print()

    print("stop",stop_c)
    print()

    print("pos",pos_c)
    print()

    print("size",size_c)
    return stop, position, color, size, rel_pos, rest, sent_list


#calculates lists showing the ratio of position, colour, size or stopwords being 
#included in entries that are either completly right, wrong or mixed


def words(binary_grouped,model_group):
    stop_wrong,pos_wrong,rest_wrong,color_wrong,size_wrong,rel_pos_wrong=([] for i in range(6))
    stop_right,pos_right,rest_right,color_right,size_right,rel_pos_right=([] for i in range(6))
    stop_mixed,pos_mixed,rest_mixed,color_mixed,size_mixed,rel_pos_mixed=([] for i in range(6))


    # Split up the list into entries where each guess is correct, wrong, or mixed
    right_only = []
    wrong_only = []
    mixed_only = []

    

    for index, entry in enumerate(binary_grouped):
        
        if all(p == 1 for p in entry):
            right_only.append(index)
        elif all(p == 0 for p in entry):
            wrong_only.append(index)
        else:
            mixed_only.append(index)
        
    all_entries = [right_only, wrong_only, mixed_only]
    #print(len(right_only),len(wrong_only),len(mixed_only))
    types = ["right", "wrong", "mixed"]


    #loop through the right,wrong,mixed lists and make lists for each  word condition
    for lst, typ in zip(all_entries, types):
        stop, position,color,size,rel_pos, rest, first_word = split_all_words(model_group, lst)
        print("all_len",len(position)+len(color)+len(size)+len(rel_pos)+len(rest))
        print()
        stop_ratio, pos_ratio,color_ratio,size_ratio,rel_pos_ratio,rest_ratio = calculate_ratios(stop, position,color,size,rel_pos,rest, first_word)

        if typ == "wrong":
            stop_wrong.append(stop_ratio)
            pos_wrong.append(pos_ratio)
            color_wrong.append(color_ratio)
            size_wrong.append(size_ratio)
            rel_pos_wrong.append(rel_pos_ratio)
            rest_wrong.append(rest_ratio)
        
        elif typ == "right":
            stop_right.append(stop_ratio)
            pos_right.append(pos_ratio)
            color_right.append(color_ratio)
            size_right.append(size_ratio)
            rel_pos_right.append(rel_pos_ratio)
            rest_right.append(rest_ratio)
        elif typ == "mixed":
            stop_mixed.append(stop_ratio)
            pos_mixed.append(pos_ratio)
            color_mixed.append(color_ratio)
            size_mixed.append(size_ratio)
            rel_pos_mixed.append(rel_pos_ratio)
            rest_mixed.append(rest_ratio)
        else:
            print("Something went very wrong")

    pos_all=[pos_right,pos_wrong,pos_mixed]
    color_all=[color_right,color_wrong,color_mixed]
    size_all=[size_right,size_wrong,size_mixed]
    rel_pos_all=[rel_pos_right,rel_pos_wrong,rel_pos_mixed]
    stop_all=[stop_right,stop_wrong,stop_mixed]
    rest_all=[rest_right,rest_wrong,rest_mixed]

    

    return pos_all,color_all,size_all,rel_pos_all,stop_all,rest_all


words(binary_grouped,model_group)



rest 2551

stop 1235

pos 2085

size 77
all_len 5212


rest 1879

stop 1252

pos 1074

size 19
all_len 3434


rest 2395

stop 2174

pos 2196

size 45
all_len 5177



([[0.32], [0.23], [0.3]],
 [[0.06], [0.06], [0.05]],
 [[0.01], [0.0], [0.01]],
 [[0.02], [0.04], [0.03]],
 [[0.19], [0.27], [0.3]],
 [[0.4], [0.4], [0.33]])

In [11]:
len(model_group)

5095

In [12]:
def make_wordtype_frame(type_list,typ,setting):

    data_typ={}

    dataset_index=["unc testB", "unc testA", "unc val","unc+ testB", "unc+ testA", "unc+ val", "gref_umd test", "gref_umd val", "gref val"]
    #dataset_index=["unc testB","unc testA","unc val"]
   

   
    data_typ[typ]=dataset_index
    data_typ["Right"]=type_list[0]
    data_typ["Wrong"]=type_list[1]

    if setting=="all":
        data_typ["Mixed"]=type_list[2]


    df_typ=pd.DataFrame(data_typ)

    df_typ[df_typ.select_dtypes(include=['number']).columns] *= 100

    df_typ.set_index(typ,inplace=True)
    
    
    #with open(r"/home/users/fschreiber/project/data_frames/all words/"+typ+"_tvg.p","wb") as f:
    #    pickle.dump(df_typ,f)
   


    
    df_typ=df_typ.style.format(precision=1).background_gradient(cmap="Blues",axis=1)
    
   
    return df_typ

In [13]:
setting="first"
mode="inc"
model_input="TVG"
split_list=["testB","testA","val","test"]
dataset_list=["unc","unc+","gref_umd","gref"]
#dataset_list=["unc"]

pos_all,color_all,size_all,rel_pos_all,stop_all,rest_all=([] for i in range(6))


#load the data
for file in dataset_list:
    for split in split_list:
    
        #if the data set does not exist ignore
        pred_bbox_list,target_bbox_list,inc_len,model,binary_grouped= load_data(model_input,mode,file,split)
        if pred_bbox_list==-1 or target_bbox_list==-1:
             
             pass
        
        else:
        
            print(file+split)

            model_group=group_by_increment(model,inc_len)

            if setting=="first":
                pos_hold,color_hold,size_hold,rel_pos_hold,stop_hold,rest_hold=first_words(binary_grouped,model_group)
            
            elif setting =="all":

                pos_hold,color_hold,size_hold,rel_pos_hold,stop_hold,rest_hold=words(binary_grouped,model_group)

            elif setting=="change":
                pos_hold,color_hold,size_hold,rel_pos_hold,stop_hold,rest_hold=change_words(binary_grouped,model_group)
            
            else:
                print("setting can be either first or all you choose:",setting)
                break

            pos_all.append(pos_hold)
            color_all.append(color_hold)
            size_all.append(size_hold)
            rel_pos_all.append(rel_pos_hold)
            stop_all.append(stop_hold)
            rest_all.append(rest_hold)



all_lists=[pos_all,color_all,size_all,rel_pos_all,stop_all,rest_all]
all_typ=["Position","Color","Size","Relative Position","Stopwords","Rest"]


#transform data into more readable format
for i in range(len(all_lists)):

    flattened_list = [item for sublist1 in all_lists[i] for sublist2 in sublist1 for item in sublist2]

    if setting=="first" or setting=="change":
        list1, list2 = flattened_list[::2], flattened_list[1::2]
        all_lists[i]=[list1,list2]

    elif setting=="all":
        list1, list2, list3 = flattened_list[::3], flattened_list[1::3], flattened_list[2::3]

        all_lists[i] = [list1, list2, list3]
    

#show dataframes
for i,j in zip(all_lists,all_typ):

    display(make_wordtype_frame(i,j,setting))
    #df=make_wordtype_frame(i,j,setting)

    #display(df)


    


unctestB
unctestA
uncval
unc+testB
unc+testA
unc+val
gref_umdval
gref_umdtest
grefval


Unnamed: 0_level_0,Right,Wrong
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,48.0,23.0
unc testA,22.0,9.0
unc val,33.0,16.0
unc+ testB,8.0,3.0
unc+ testA,1.0,2.0
unc+ val,4.0,3.0
gref_umd test,1.0,1.0
gref_umd val,1.0,1.0
gref val,1.0,1.0


Unnamed: 0_level_0,Right,Wrong
Color,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,9.0,13.0
unc testA,9.0,9.0
unc val,11.0,10.0
unc+ testB,21.0,15.0
unc+ testA,23.0,13.0
unc+ val,23.0,14.0
gref_umd test,3.0,4.0
gref_umd val,4.0,4.0
gref val,4.0,4.0


Unnamed: 0_level_0,Right,Wrong
Size,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,2.0,1.0
unc testA,1.0,1.0
unc val,2.0,1.0
unc+ testB,8.0,3.0
unc+ testA,2.0,2.0
unc+ val,5.0,3.0
gref_umd test,1.0,1.0
gref_umd val,2.0,1.0
gref val,1.0,1.0


Unnamed: 0_level_0,Right,Wrong
Relative Position,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,0.0,1.0
unc testA,0.0,0.0
unc val,0.0,0.0
unc+ testB,0.0,1.0
unc+ testA,0.0,0.0
unc+ val,0.0,0.0
gref_umd test,0.0,0.0
gref_umd val,0.0,0.0
gref val,0.0,0.0


Unnamed: 0_level_0,Right,Wrong
Stopwords,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,4.0,7.0
unc testA,3.0,7.0
unc val,3.0,7.0
unc+ testB,5.0,9.0
unc+ testA,3.0,8.0
unc+ val,4.0,8.0
gref_umd test,68.0,79.0
gref_umd val,68.0,79.0
gref val,64.0,80.0


Unnamed: 0_level_0,Right,Wrong
Rest,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,35.0,55.0
unc testA,64.0,74.0
unc val,50.0,66.0
unc+ testB,57.0,69.0
unc+ testA,70.0,76.0
unc+ val,64.0,73.0
gref_umd test,26.0,15.0
gref_umd val,26.0,15.0
gref val,29.0,15.0


In [13]:
import dataframe_image as dfi



In [12]:
with open(r"/home/users/fschreiber/project/data_frames/first words/Color_resc.p","rb") as f:
    color_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Color_tvg.p","rb") as f:
    color_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Position_resc.p","rb") as f:
    pos_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Position_tvg.p","rb") as f:
    pos_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Relative Position_resc.p","rb") as f:
    rel_pos_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Relative Position_tvg.p","rb") as f:
    rel_pos_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Size_resc.p","rb") as f:
    size_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Size_tvg.p","rb") as f:
    size_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Stopwords_resc.p","rb") as f:
    stopwords_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Stopwords_tvg.p","rb") as f:
    stopwords_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Rest_resc.p","rb") as f:
    rest_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/first words/Rest_tvg.p","rb") as f:
    rest_tvg_df=pickle.load(f)


resc_all_first=[pos_resc_df,rel_pos_resc_df,color_resc_df,size_resc_df,stopwords_resc_df,rest_resc_df]
tvg_all_first=[pos_tvg_df,rel_pos_tvg_df,color_tvg_df,size_tvg_df,stopwords_tvg_df,rest_tvg_df]



In [13]:
with open(r"/home/users/fschreiber/project/data_frames/all words/Color_resc.p","rb") as f:
    color_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Color_tvg.p","rb") as f:
    color_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Position_resc.p","rb") as f:
    pos_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Position_tvg.p","rb") as f:
    pos_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Relative Position_resc.p","rb") as f:
    rel_pos_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Relative Position_tvg.p","rb") as f:
    rel_pos_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Size_resc.p","rb") as f:
    size_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Size_tvg.p","rb") as f:
    size_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Stopwords_resc.p","rb") as f:
    stopwords_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Stopwords_tvg.p","rb") as f:
    stopwords_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Rest_resc.p","rb") as f:
    rest_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/all words/Rest_tvg.p","rb") as f:
    rest_tvg_df=pickle.load(f)


resc_all_all=[pos_resc_df,rel_pos_resc_df,color_resc_df,size_resc_df,stopwords_resc_df,rest_resc_df]
tvg_all_all=[pos_tvg_df,rel_pos_tvg_df,color_tvg_df,size_tvg_df,stopwords_tvg_df,rest_tvg_df]



In [2]:
with open(r"/home/users/fschreiber/project/data_frames/change words/Color_resc.p","rb") as f:
    color_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Color_tvg.p","rb") as f:
    color_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Position_resc.p","rb") as f:
    pos_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Position_tvg.p","rb") as f:
    pos_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Relative Position_resc.p","rb") as f:
    rel_pos_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Relative Position_tvg.p","rb") as f:
    rel_pos_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Size_resc.p","rb") as f:
    size_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Size_tvg.p","rb") as f:
    size_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Stopwords_resc.p","rb") as f:
    stopwords_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Stopwords_tvg.p","rb") as f:
    stopwords_tvg_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Rest_resc.p","rb") as f:
    rest_resc_df=pickle.load(f)

with open(r"/home/users/fschreiber/project/data_frames/change words/Rest_tvg.p","rb") as f:
    rest_tvg_df=pickle.load(f)


resc_all_change=[pos_resc_df,rel_pos_resc_df,color_resc_df,size_resc_df,stopwords_resc_df,rest_resc_df]
tvg_all_change=[pos_tvg_df,rel_pos_tvg_df,color_tvg_df,size_tvg_df,stopwords_tvg_df,rest_tvg_df]



Unnamed: 0_level_0,Right,Wrong
Position,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,64.0,26.0
unc testA,50.0,19.0
unc val,55.0,21.0
unc+ testB,9.0,7.0
unc+ testA,6.0,3.0
unc+ val,8.0,6.0
gref_umd test,11.0,7.0
gref_umd val,12.0,6.0
gref val,12.0,5.0


In [37]:
for resc,tvg in zip(resc_all_all,tvg_all_all):
    print("All")
    #display((resc+tvg)/2)
    print(((resc+tvg)/2).to_latex())
    print()




for resc,tvg in zip(resc_all_first,tvg_all_first):
    print("First")
    #display((resc+tvg)/2)
    print(((resc+tvg)/2).to_latex())
    print()


for resc,tvg in zip(resc_all_change,tvg_all_change):
    print("Change")
    #display((resc+tvg)/2)
    print(((resc+tvg)/2).to_latex())
    print()



All
\begin{tabular}{lrrr}
\toprule
{} &  Right &  Wrong &  Mixed \\
Position      &        &        &        \\
\midrule
unc testB     &   54.5 &   24.5 &   20.5 \\
unc testA     &   24.5 &    9.5 &    9.0 \\
unc val       &   36.5 &   18.0 &   14.5 \\
unc+ testB    &    8.5 &    4.0 &    3.0 \\
unc+ testA    &    1.5 &    1.5 &    1.5 \\
unc+ val      &    4.0 &    3.0 &    2.5 \\
gref\_umd test &    2.0 &    1.0 &    1.0 \\
gref\_umd val  &    1.0 &    1.0 &    1.0 \\
gref val      &    2.0 &    1.0 &    1.0 \\
\bottomrule
\end{tabular}


All
\begin{tabular}{lrrr}
\toprule
{} &  Right &  Wrong &  Mixed \\
Relative Position &        &        &        \\
\midrule
unc testB         &    0.0 &    1.0 &    0.5 \\
unc testA         &    0.0 &    0.0 &    0.0 \\
unc val           &    0.0 &    0.0 &    0.0 \\
unc+ testB        &    0.0 &    0.5 &    0.0 \\
unc+ testA        &    0.0 &    0.0 &    0.0 \\
unc+ val          &    0.0 &    0.0 &    0.0 \\
gref\_umd test     &    0.0 &    0.0 &  

In [29]:
typ="Size"
types=["Position","Color","Size","Stopwords","Rest"]

everything=[resc_all_first,tvg_all_first,resc_all_all,tvg_all_all,resc_all_change,tvg_all_change]

for i in range(len(types)):

    if types[i]==typ:
        
        for j in everything:

            print(j[i].mean())
            
            display(j[i].style.format(precision=1).background_gradient(cmap="Blues",axis=1))
           

Right    3.000000
Wrong    1.666667
dtype: float64


Unnamed: 0_level_0,Right,Wrong
Size,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,3.0,1.0
unc testA,1.0,1.0
unc val,2.0,1.0
unc+ testB,8.0,4.0
unc+ testA,2.0,2.0
unc+ val,6.0,3.0
gref_umd test,1.0,1.0
gref_umd val,2.0,1.0
gref val,2.0,1.0


Right    2.666667
Wrong    1.555556
dtype: float64


Unnamed: 0_level_0,Right,Wrong
Size,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,2.0,1.0
unc testA,1.0,1.0
unc val,2.0,1.0
unc+ testB,8.0,3.0
unc+ testA,2.0,2.0
unc+ val,5.0,3.0
gref_umd test,1.0,1.0
gref_umd val,2.0,1.0
gref val,1.0,1.0


Right    3.333333
Wrong    1.666667
Mixed    1.333333
dtype: float64


Unnamed: 0_level_0,Right,Wrong,Mixed
Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,3.0,1.0,1.0
unc testA,2.0,1.0,0.0
unc val,2.0,2.0,1.0
unc+ testB,9.0,4.0,3.0
unc+ testA,2.0,1.0,2.0
unc+ val,6.0,3.0,3.0
gref_umd test,2.0,1.0,1.0
gref_umd val,2.0,1.0,1.0
gref val,2.0,1.0,0.0


Right    3.333333
Wrong    1.666667
Mixed    1.666667
dtype: float64


Unnamed: 0_level_0,Right,Wrong,Mixed
Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,3.0,2.0,1.0
unc testA,1.0,1.0,1.0
unc val,2.0,1.0,1.0
unc+ testB,10.0,3.0,4.0
unc+ testA,2.0,2.0,2.0
unc+ val,6.0,3.0,3.0
gref_umd test,2.0,1.0,1.0
gref_umd val,2.0,1.0,1.0
gref val,2.0,1.0,1.0


Right    0.888889
Wrong    0.666667
dtype: float64


Unnamed: 0_level_0,Right,Wrong
Size,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,0.0,1.0
unc testA,0.0,0.0
unc val,0.0,0.0
unc+ testB,1.0,0.0
unc+ testA,0.0,1.0
unc+ val,1.0,1.0
gref_umd test,2.0,1.0
gref_umd val,2.0,1.0
gref val,2.0,1.0


Right    0.888889
Wrong    0.555556
dtype: float64


Unnamed: 0_level_0,Right,Wrong
Size,Unnamed: 1_level_1,Unnamed: 2_level_1
unc testB,0.0,1.0
unc testA,0.0,0.0
unc val,0.0,0.0
unc+ testB,1.0,0.0
unc+ testA,0.0,0.0
unc+ val,1.0,1.0
gref_umd test,2.0,1.0
gref_umd val,2.0,1.0
gref val,2.0,1.0


In [14]:
for resc,tvg in zip(resc_all_all,tvg_all_all):
    
    #hold=(resc+tvg)/2

    #with open(r"/home/users/fschreiber/project/data_frames/first words/"+typ+"_avg.p","wb") as f:
    #    pickle.dump(hold,f)
    
    #df=hold.style.format(precision=3).background_gradient(cmap="Blues",axis=1)
    #resc[resc.select_dtypes(include=['number']).columns] *= 100
    #tvg[tvg.select_dtypes(include=['number']).columns] *= 100

    df_resc=resc.style.format(precision=1).background_gradient(cmap="Blues",axis=1)
    df_tvg=tvg.style.format(precision=1).background_gradient(cmap="Blues",axis=1)
    
    display(df_resc)
    display(df_tvg)

#color_avg=(color_resc_df+color_tvg_df)/2

#df_typ=color_avg.style.format(precision=3).background_gradient(cmap="Blues",axis=1)
   
#df_typ

Unnamed: 0_level_0,Right,Wrong,Mixed
Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,54.0,25.0,20.0
unc testA,25.0,10.0,9.0
unc val,36.0,18.0,15.0
unc+ testB,8.0,5.0,3.0
unc+ testA,2.0,1.0,1.0
unc+ val,4.0,3.0,2.0
gref_umd test,2.0,1.0,1.0
gref_umd val,1.0,1.0,1.0
gref val,2.0,1.0,1.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,55.0,24.0,21.0
unc testA,24.0,9.0,9.0
unc val,37.0,18.0,14.0
unc+ testB,9.0,3.0,3.0
unc+ testA,1.0,2.0,2.0
unc+ val,4.0,3.0,3.0
gref_umd test,2.0,1.0,1.0
gref_umd val,1.0,1.0,1.0
gref val,2.0,1.0,1.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Relative Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,0.0,1.0,0.0
unc testA,0.0,0.0,0.0
unc val,0.0,0.0,0.0
unc+ testB,0.0,0.0,0.0
unc+ testA,0.0,0.0,0.0
unc+ val,0.0,0.0,0.0
gref_umd test,0.0,0.0,0.0
gref_umd val,0.0,0.0,0.0
gref val,0.0,0.0,0.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Relative Position,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,0.0,1.0,1.0
unc testA,0.0,0.0,0.0
unc val,0.0,0.0,0.0
unc+ testB,0.0,1.0,0.0
unc+ testA,0.0,0.0,0.0
unc+ val,0.0,0.0,0.0
gref_umd test,0.0,0.0,0.0
gref_umd val,0.0,0.0,0.0
gref val,0.0,0.0,0.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,11.0,14.0,10.0
unc testA,9.0,13.0,7.0
unc val,11.0,12.0,8.0
unc+ testB,24.0,18.0,11.0
unc+ testA,24.0,18.0,13.0
unc+ val,25.0,16.0,13.0
gref_umd test,4.0,5.0,4.0
gref_umd val,4.0,5.0,4.0
gref val,4.0,5.0,3.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,9.0,15.0,13.0
unc testA,9.0,13.0,7.0
unc val,11.0,13.0,9.0
unc+ testB,23.0,14.0,16.0
unc+ testA,26.0,14.0,12.0
unc+ val,25.0,14.0,14.0
gref_umd test,3.0,5.0,4.0
gref_umd val,3.0,5.0,4.0
gref val,4.0,5.0,4.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,3.0,1.0,1.0
unc testA,2.0,1.0,0.0
unc val,2.0,2.0,1.0
unc+ testB,9.0,4.0,3.0
unc+ testA,2.0,1.0,2.0
unc+ val,6.0,3.0,3.0
gref_umd test,2.0,1.0,1.0
gref_umd val,2.0,1.0,1.0
gref val,2.0,1.0,0.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,3.0,2.0,1.0
unc testA,1.0,1.0,1.0
unc val,2.0,1.0,1.0
unc+ testB,10.0,3.0,4.0
unc+ testA,2.0,2.0,2.0
unc+ val,6.0,3.0,3.0
gref_umd test,2.0,1.0,1.0
gref_umd val,2.0,1.0,1.0
gref val,2.0,1.0,1.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Stopwords,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,3.0,7.0,8.0
unc testA,2.0,5.0,7.0
unc val,2.0,7.0,7.0
unc+ testB,4.0,6.0,12.0
unc+ testA,3.0,5.0,8.0
unc+ val,3.0,6.0,9.0
gref_umd test,66.0,70.0,81.0
gref_umd val,66.0,72.0,80.0
gref val,58.0,71.0,82.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Stopwords,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,3.0,6.0,8.0
unc testA,2.0,5.0,7.0
unc val,2.0,7.0,7.0
unc+ testB,4.0,7.0,10.0
unc+ testA,3.0,5.0,8.0
unc+ val,3.0,6.0,9.0
gref_umd test,62.0,71.0,80.0
gref_umd val,61.0,71.0,81.0
gref val,55.0,70.0,82.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Rest,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,29.0,53.0,60.0
unc testA,62.0,71.0,77.0
unc val,48.0,61.0,68.0
unc+ testB,55.0,67.0,71.0
unc+ testA,69.0,75.0,75.0
unc+ val,62.0,72.0,73.0
gref_umd test,27.0,24.0,14.0
gref_umd val,26.0,22.0,15.0
gref val,33.0,22.0,14.0


Unnamed: 0_level_0,Right,Wrong,Mixed
Rest,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
unc testB,31.0,53.0,57.0
unc testA,63.0,72.0,76.0
unc val,48.0,61.0,68.0
unc+ testB,53.0,73.0,67.0
unc+ testA,68.0,77.0,76.0
unc+ val,62.0,74.0,72.0
gref_umd test,31.0,23.0,15.0
gref_umd val,32.0,22.0,14.0
gref val,37.0,23.0,14.0
