In [1]:
import os

# General packages
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import PIL.Image
from tqdm.notebook import tqdm, trange
import glob
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet

from IPython.display import Image as IImage
from IPython.display import display, HTML
import warnings
warnings.filterwarnings("ignore")
tqdm.pandas()

In [2]:
import torch
import clip
from PIL import Image
import base64

import os

In [3]:
from sentence_transformers import SentenceTransformer, util
embedder = SentenceTransformer('all-MiniLM-L6-v2')


In [4]:
def find_gpus(nums=6):
    os.system('nvidia-smi -q -d Memory |grep -A4 GPU|grep Free >tmp_free_gpus')
    with open('tmp_free_gpus', 'r') as lines_txt:
        frees = lines_txt.readlines()
        idx_freeMemory_pair = [ (idx,int(x.split()[2]))
                              for idx,x in enumerate(frees) ]
    idx_freeMemory_pair.sort(key=lambda my_tuple:my_tuple[1],reverse=True)
    usingGPUs = [str(idx_memory_pair[0])
                    for idx_memory_pair in idx_freeMemory_pair[:nums] ]
    usingGPUs =  ','.join(usingGPUs)
    print('using GPU idx: #', usingGPUs)
    return usingGPUs


In [5]:
os.environ['CUDA_VISIBLE_DEVICES'] = find_gpus(nums=2)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
print()

using GPU idx: # 1,0
Using device: cuda



## get topics

In [6]:
from bertopic import BERTopic
def get_wordnet_pos(word):
    """Map POS tag to first character lemmatize() accepts"""
    tag = nltk.pos_tag([word])[0][1][0].upper()
    tag_dict = {"J": wordnet.ADJ,
                "N": wordnet.NOUN,
                "V": wordnet.VERB,
                "R": wordnet.ADV}

    return tag_dict.get(tag, wordnet.NOUN)

In [7]:
lemmatizer = WordNetLemmatizer()

In [8]:
topics = {196: {'cross',
  'crossing',
  'crosswalk',
  'pedestrian',
  'signal',
  'stripe',
  'traffic'},
 412: {'arrest',
  'pedestrian',
  'perp',
  'pers',
  'walk',
  'walkability',
  'walkable'},
 172: {'car',
  'lane',
  'pedestrian',
  'road',
  'street',
  'traffic',
  'transportation',
  'vehicle'},
 363: {'assault',
  'bystander',
  'intervene',
  'intervention',
  'pedestrian',
  'stalk',
  'stalker',
  'victim',
  'witness'},
 198: {'car',
  'garage',
  'park',
  'parking',
  'parkjockey',
  'pedestrian',
  'tow',
  'vehicle'},
 114: {'athlete',
  'disability',
  'paralympic',
  'paralympics',
  'pedestrian',
  'sport',
  'sportspeople',
  'wheelchair'},
 419: {'avenue',
  'boulevard',
  'intersection',
  'manhattan',
  'pedestrian',
  'road',
  'street',
  'suffix'},
 294: {'drive',
  'fine',
  'licence',
  'offence',
  'pedestrian',
  'penalty',
  'reckless',
  'speed',
  'ticket',
  'traffic',
  'violation'},
 388: {'brownian',
  'diffusion',
  'distribution',
  'markov',
  'pedestrian',
  'percolation',
  'step',
  'stochastic',
  'walk',
  'wiener'},
 314: {'circle',
  'interchange',
  'intersection',
  'junction',
  'lane',
  'pedestrian',
  'road',
  'roundabout',
  'traffic'},
 98: {'cross',
  'direction',
  'lane',
  'marker',
  'parking',
  'pedestrian',
  'road',
  'sign',
  'signage',
  'traffic'},
 80: {'accident',
  'brake',
  'collision',
  'drive',
  'fatality',
  'momentum',
  'nhtsa',
  'pedestrian',
  'vehicle',
  'velocity'},
 134: {'avenue',
  'edmonton',
  'expressway',
  'freeway',
  'gardiner',
  'highway',
  'lane',
  'ontario',
  'pedestrian',
  'road',
  'toronto'},
 432: {'alice',
  'book',
  'fiction',
  'novel',
  'pedestrian',
  'poem',
  'poetry',
  'pulitzer',
  'walker'},
 394: {'alley',
  'boulevard',
  'bridleway',
  'bridleways',
  'footpath',
  'path',
  'pedestrian'},
 53: {'asphalt',
  'cement',
  'concrete',
  'gravel',
  'pave',
  'pavement',
  'pedestrian',
  'pothole',
  'road',
  'sand'},
 56: {'bus',
  'pedestrian',
  'streetcar',
  'subway',
  'train',
  'tram',
  'transit',
  'transportation',
  'trolley'},
 150: {'freeway',
  'highway',
  'intersection',
  'lane',
  'pedestrian',
  'ramp',
  'road',
  'traffic'},
 278: {'deck',
  'park',
  'pedestrian',
  'promenade',
  'promenadef',
  'promenaden',
  'promener',
  'walk'},
 -1: {'american',
  'century',
  'example',
  'film',
  'her',
  'pedestrian',
  'public',
  'state',
  'street',
  'water'},
 43: {'black',
  'civil',
  'negro',
  'pedestrian',
  'protest',
  'protester',
  'race',
  'racial',
  'segregation',
  'white'},
 265: {'baby',
  'carriage',
  'child',
  'diaper',
  'harness',
  'hatch',
  'infant',
  'pedestrian',
  'stroller'},
 345: {'barrier',
  'bumper',
  'guardrail',
  'hazard',
  'pedestrian',
  'protect',
  'rail',
  'safety'},
 378: {'architect',
  'architecture',
  'building',
  'chicago',
  'city',
  'frederick',
  'park',
  'pedestrian'},
 11: {'freeway',
  'highway',
  'intersection',
  'interstate',
  'lane',
  'pedestrian',
  'road',
  'route',
  'terminus',
  'turnpike'},
 331: {'byway',
  'highway',
  'navajo',
  'parkway',
  'pedestrian',
  'road',
  'roadway',
  'route',
  'scenic'},
 202: {'bottleneck',
  'congest',
  'congestion',
  'delay',
  'downstream',
  'pedestrian',
  'road',
  'speed',
  'traffic',
  'vehicle'},
 169: {'apartment',
  'condominium',
  'home',
  'homeless',
  'homelessness',
  'house',
  'household',
  'housing',
  'pedestrian',
  'shelter'},
 327: {'bartholdi',
  'lazarus',
  'liberty',
  'monument',
  'pedestal',
  'pedestrian',
  'sculptor',
  'sculpture',
  'statue',
  'torch'},
 309: {'adelaide',
  'carriageworks',
  'close',
  'geelong',
  'line',
  'melbourne',
  'pedestrian',
  'rail',
  'railway',
  'train'},
 68: {'axle',
  'carriage',
  'chariot',
  'passenger',
  'pedestrian',
  'vehicle',
  'wagon',
  'wheel'},
 216: {'carriageway',
  'lane',
  'm1',
  'm11',
  'm25',
  'm32',
  'motorway',
  'pedestrian',
  'road',
  'roundabout'},
 40: {'arrest',
  'constable',
  'constabulary',
  'enforcement',
  'officer',
  'patrol',
  'pedestrian',
  'police'},
 99: {'building',
  'concrete',
  'construction',
  'insulation',
  'masonry',
  'pedestrian',
  'roof',
  'structure',
  'wall'},
 426: {'80211p',
  'cv2x',
  'pedestrian',
  'unlock',
  'v2v',
  'v2x',
  'vanet',
  'vanets',
  'vehicle',
  'vehicular'},
 344: {'adriatic',
  'censor',
  'corniche',
  'itinerary',
  'pave',
  'pedestrian',
  'road',
  'roman',
  'rome'},
 47: {'avenue',
  'bmt',
  'broadway',
  'broadwayseventh',
  'brooklyn',
  'manhattan',
  'pedestrian',
  'station',
  'subway',
  'train'},
 8: {'bicycle',
  'bike',
  'cycle',
  'cycling',
  'cyclist',
  'pedestrian',
  'rid',
  'rider',
  'tire'},
 416: {'descriptor',
  'detection',
  'detector',
  'keypoints',
  'ocr',
  'pedestrian',
  'pixel',
  'recognition',
  'sift',
  'vision'},
 112: {'bus',
  'congestion',
  'freight',
  'passenger',
  'pedestrian',
  'route',
  'traffic',
  'transit',
  'transport',
  'transportation'},
 137: {'expressway',
  'freeway',
  'highway',
  'lane',
  'motorway',
  'pedestrian',
  'road'},
 441: {'actor',
  'character',
  'drama',
  'hamlet',
  'novel',
  'pedestrian',
  'protagonist',
  'romeo',
  'shakespeare'},
 192: {'ataxia',
  'cerebellar',
  'cerebellum',
  'cerebral',
  'diplegia',
  'disorder',
  'gait',
  'palsy',
  'pedestrian',
  'symptom',
  'syndrome'},
 20: {'ankle', 'dance', 'dancer', 'foot', 'leg', 'pedestrian', 'toe'},
 296: {'area',
  'monument',
  'park',
  'pedestrian',
  'playground',
  'recreation',
  'scenic',
  'wilderness',
  'yellowstone'},
 144: {'',
  'address',
  'comma',
  'image',
  'more',
  'pedestrian',
  'person',
  'separate',
  'share',
  'video'},
 12: {'appalachian',
  'footpath',
  'hike',
  'hiker',
  'mountain',
  'path',
  'pedestrian',
  'trail'},
 224: {'bus',
  'depot',
  'fleet',
  'goahead',
  'pedestrian',
  'sb',
  'singapore',
  'smrt',
  'tram',
  'transit'},
 250: {'driver',
  'graphic',
  'kernel',
  'kernelmode',
  'kmdf',
  'linux',
  'pedestrian',
  'printer',
  'umdf',
  'usb'},
 376: {'bear',
  'cartoon',
  'charlie',
  'disney',
  'lucy',
  'mickey',
  'peanut',
  'pedestrian',
  'snoopy',
  'trolley'}}

In [9]:
# topics = get_topics(concept='aircraft')
q_id = [el for el in topics]

In [10]:
for el in topics:
    print(el, topics[el])

196 {'crosswalk', 'traffic', 'stripe', 'cross', 'crossing', 'signal', 'pedestrian'}
412 {'walkability', 'arrest', 'perp', 'pers', 'walk', 'pedestrian', 'walkable'}
172 {'street', 'traffic', 'car', 'transportation', 'vehicle', 'lane', 'pedestrian', 'road'}
363 {'stalk', 'intervene', 'victim', 'intervention', 'stalker', 'bystander', 'assault', 'pedestrian', 'witness'}
198 {'parkjockey', 'parking', 'car', 'park', 'tow', 'garage', 'pedestrian', 'vehicle'}
114 {'paralympics', 'sport', 'disability', 'wheelchair', 'sportspeople', 'paralympic', 'athlete', 'pedestrian'}
419 {'avenue', 'street', 'suffix', 'road', 'intersection', 'manhattan', 'pedestrian', 'boulevard'}
294 {'licence', 'speed', 'fine', 'ticket', 'violation', 'penalty', 'traffic', 'drive', 'reckless', 'offence', 'pedestrian'}
388 {'diffusion', 'percolation', 'distribution', 'walk', 'wiener', 'step', 'stochastic', 'markov', 'pedestrian', 'brownian'}
314 {'junction', 'intersection', 'roundabout', 'circle', 'traffic', 'interchange', '

In [11]:
topics[80]

{'accident',
 'brake',
 'collision',
 'drive',
 'fatality',
 'momentum',
 'nhtsa',
 'pedestrian',
 'vehicle',
 'velocity'}

In [12]:
pg_t = dict()
pg_t['a'] = [68, 112, 56, 224]
pg_t['b'] = [309, 344, 47, -1, 53]
pg_t['c'] = [314, 331, 419, 394]
pg_t['d'] = [98, 196, 202, 294]
pg_t['e'] = [172, 137, 11, 134, 150, 216]
pg_t['f'] = [363, 40, 43]
pg_t['g'] = [441, 376]
pg_t['h'] = [144, 416, 327]
pg_t['i'] = [192, 20, 114]
pg_t['j'] = [345]
pg_t['k'] = [8, 265]
pg_t['l'] = [250, 80, 426]
pg_t['m'] = [169, 99, 378]
pg_t['n'] = [432, 388, 412]
pg_t['o'] = [12, 278, 198, 296] 

In [13]:
for el in pg_t:
    group_words = []
    for t in pg_t[el]:
        group_words.extend(list(topics[t]))
#         break
    pg_t[el]= list(set(group_words))
    

In [14]:
print(pg_t[el])

['area', 'mountain', 'car', 'tow', 'pedestrian', 'path', 'deck', 'hike', 'hiker', 'parkjockey', 'park', 'garage', 'promenaden', 'wilderness', 'vehicle', 'promener', 'trail', 'scenic', 'promenadef', 'playground', 'walk', 'parking', 'yellowstone', 'promenade', 'monument', 'appalachian', 'footpath', 'recreation']


In [15]:
for g in pg_t:
    pg_t[g] = ' '.join(pg_t[g])

In [16]:
pg_t[g]

'area mountain car tow pedestrian path deck hike hiker parkjockey park garage promenaden wilderness vehicle promener trail scenic promenadef playground walk parking yellowstone promenade monument appalachian footpath recreation'

In [17]:
toopic_dict = dict()
for el in topics:
    toopic_dict[el] = ' '.join(topics[el])
toopic_dict[el]

'lucy trolley charlie bear cartoon disney peanut snoopy pedestrian mickey'

## Load Predictions

In [18]:
# pd.set_option('display.max_rows', None)

# d_path = '/raid/AISSEL/htest/datasets/ped_data/ped_ftest/all'
# before
base_path = '.'
df_b = pd.read_pickle(f'{base_path}/pd_base.pk')

# after q50_over_avg
d_path = '/raid/AISSEL/htest/datasets/ped_data/ped_ftest/missed_q50_over_avg'
df_f = pd.read_pickle(f'{d_path}/pd_mq50_over_avg.pk')


# wit
d_path = '/raid/AISSEL/htest/datasets/ped_data/wit/missed_q50_over_avg'
df_w = pd.read_pickle(f'{d_path}/ped_mq50_over_avg.pk')
# df

# random
d_path = '/raid/AISSEL/htest/datasets/ped_data/wit/random_missed'
df_r = pd.read_pickle(f'{d_path}/ped_mq50_over_avg.pk')
# df

In [19]:
pedestrian_list = ['people', 'someone', 'individual', 'worker', 'child', 'human', 
                   'somebody', 'man', 'boy', 'human', 'person', 'mankind', 
                   'humankind', 'guy', 'gentleman', 'someone', 'girl', 'men', 
                   'woman', 'women', 'lady', 'madam', 'pedestrian', 'kid']

In [20]:
def create_gt(tag):
    context = ""
    for el in tag:
        if tag[el]:
            context = context + " " + el
    if not context:
        context = None
    return context

In [21]:
# pedestrian_list
def flag_search_gt(tag, t):
    context = ""
    for el in tag:
        if tag[el]:
            context = context + " " + el
        
    for w in t:
        if w in context:
            return float(1)
    return float(0)

In [22]:
def get_percentage(tag):
    sum_val = 0
    for el in tag:
        sum_val = sum_val + tag[el]
    return sum_val / len(tag)

In [23]:
def get_sim(u,v):

    emb1 = embedder.encode(u)
    if isinstance(v, list):
        v = ' '.join(v)
    emb2 = embedder.encode(v)
    
    return float(util.cos_sim(emb1, emb2)[0][0])

In [24]:
def simple_search(context, t):
    percentage = 0
    for w in t:
        if w in context:
            percentage = percentage + 1
    return float(percentage/len(t))

In [25]:
def calc_sim(df):
    df = df[df['tag'].notna()]
    df = df.reset_index(drop=True)
    df = df[df['gt'].notna()]
    df = df.reset_index(drop=True)
    
    df['gt_coverage'] = df.apply(lambda x: get_percentage(x.tag), axis=1)
    df['org_top_cov'] = df.apply(lambda x: simple_search(x.caption, list(topics[x.topic_id])), axis=1)
    df['ofa_top_cov'] = df.apply(lambda x: simple_search(x.ofa_caption, list(topics[x.topic_id])), axis=1)

    df['gt_ped_flag'] = df.apply(lambda x: flag_search_gt(x.tag, pedestrian_list), axis=1)
    df['org_top_sim'] = df.apply(lambda x: get_sim(x.caption, list(topics[x.topic_id])), axis=1)
    df['ofa_top_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, list(topics[x.topic_id])), axis=1)
    df['ofa_con_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, ['pedestrian']), axis=1)
    df['org_con_sim'] = df.apply(lambda x: get_sim(x.caption, ['pedestrian']), axis=1)
    # df['ofa_gt_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, x.gt), axis=1)
    df['ofa_gt_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, create_gt(x.tag)), axis=1)
    df['org_gt_sim'] = df.apply(lambda x: get_sim(x.caption, create_gt(x.tag)), axis=1)
    df['human_con_sim'] = df.apply(lambda x: get_sim(x.h_caption, ['pedestrian']), axis=1)
    df['web_ofa_sim'] = df.apply(lambda x: get_sim(x.caption, x.ofa_caption), axis=1)
    gk = df.groupby('topics')
    pg_dict = dict()
    for el in gk.groups.keys():
        pg_dict[el] = gk.get_group(el)
        pg_dict[el] = pg_dict[el].reset_index(drop=True)
    return df, pg_dict

In [26]:
df_b, pg_dict_b = calc_sim(df_b)

In [27]:
df_f, pg_dict_f = calc_sim(df_f)

In [28]:
df_w, pg_dict_w = calc_sim(df_w)

In [29]:
df_r, pg_dict_r = calc_sim(df_r)

In [30]:
data = []
for el in pg_dict_b:
#     print(el)
    data.append([el, pg_dict_w[el].describe().round(2).at['mean','web_ofa_sim'], pg_dict_f[el].describe().round(2).at['mean','web_ofa_sim'], pg_dict_b[el].describe().round(2).at['mean','web_ofa_sim'], pg_dict_r[el].describe().round(2).at['mean','web_ofa_sim']])
#     print(f"{pg_dict_b[el].describe().round(2).at['mean','web_ofa_sim']}")
#     print()

In [31]:
df_cap2cap = pd.DataFrame(data, columns = ['Topic', 'App_1', 'App_2', 'Base', 'Random'])
df_cap2cap

Unnamed: 0,Topic,App_1,App_2,Base,Random
0,a,0.35,0.39,0.34,0.31
1,b,0.43,0.49,0.42,0.4
2,c,0.48,0.52,0.44,0.43
3,d,0.58,0.59,0.52,0.5
4,e,0.43,0.47,0.41,0.34
5,f,0.29,0.24,0.22,0.2
6,g,0.3,0.36,0.31,0.31
7,h,0.4,0.46,0.42,0.37
8,i,0.45,0.49,0.42,0.41
9,j,0.32,0.38,0.27,0.19


In [32]:
data = []
for el in pg_dict_b:
    data.append([el, pg_dict_w[el].describe().round(2).at['mean','ofa_con_sim'], pg_dict_f[el].describe().round(2).at['mean','ofa_con_sim'], pg_dict_b[el].describe().round(2).at['mean','ofa_con_sim'], pg_dict_r[el].describe().round(2).at['mean','ofa_con_sim']])


In [33]:
df_cap2con = pd.DataFrame(data, columns = ['Topic', 'App_1', 'App_2', 'Base', 'Random'])
df_cap2con

Unnamed: 0,Topic,App_1,App_2,Base,Random
0,a,0.34,0.31,0.27,0.25
1,b,0.39,0.38,0.3,0.29
2,c,0.42,0.41,0.33,0.32
3,d,0.51,0.46,0.4,0.38
4,e,0.46,0.38,0.29,0.3
5,f,0.29,0.21,0.19,0.25
6,g,0.25,0.24,0.2,0.14
7,h,0.38,0.38,0.34,0.29
8,i,0.33,0.29,0.26,0.25
9,j,0.34,0.33,0.26,0.22


In [34]:
data = []
for el in pg_dict_b:
    data.append([el, pg_dict_w[el].describe().round(2).at['mean','ofa_gt_sim'], pg_dict_f[el].describe().round(2).at['mean','ofa_gt_sim'], pg_dict_b[el].describe().round(2).at['mean','ofa_gt_sim'], pg_dict_r[el].describe().round(2).at['mean','ofa_gt_sim']])


In [35]:
df_cap2top = pd.DataFrame(data, columns = ['Topic', 'App_1', 'App_2', 'Base', 'Random'])
df_cap2top

Unnamed: 0,Topic,App_1,App_2,Base,Random
0,a,0.39,0.39,0.36,0.31
1,b,0.4,0.43,0.37,0.35
2,c,0.43,0.43,0.36,0.34
3,d,0.53,0.51,0.5,0.47
4,e,0.4,0.42,0.41,0.34
5,f,0.32,0.2,0.22,0.2
6,g,0.29,0.31,0.26,0.24
7,h,0.4,0.41,0.4,0.34
8,i,0.4,0.39,0.36,0.35
9,j,0.31,0.36,0.21,0.18


In [36]:
df_cap2cap.to_csv(f'ped_cap2cap.csv', index=False)
df_cap2con.to_csv(f'ped_cap2con.csv', index=False)
df_cap2top.to_csv(f'ped_cap2top.csv', index=False)

In [None]:
# df['gt_coverage'] = df.apply(lambda x: get_percentage(x.tag), axis=1)

In [486]:
# df['org_top_cov'] = df.apply(lambda x: simple_search(x.caption, list(topics[x.topic_id])), axis=1)
# df['ofa_top_cov'] = df.apply(lambda x: simple_search(x.ofa_caption, list(topics[x.topic_id])), axis=1)

# df['gt_ped_flag'] = df.apply(lambda x: flag_search_gt(x.tag, pedestrian_list), axis=1)
# df['org_top_sim'] = df.apply(lambda x: get_sim(x.caption, list(topics[x.topic_id])), axis=1)
# df['ofa_top_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, list(topics[x.topic_id])), axis=1)


In [487]:
# df['ofa_con_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, ['pedestrian']), axis=1)
# df['org_con_sim'] = df.apply(lambda x: get_sim(x.caption, ['pedestrian']), axis=1)
# # df['ofa_gt_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, x.gt), axis=1)
# df['ofa_gt_sim'] = df.apply(lambda x: get_sim(x.ofa_caption, create_gt(x.tag)), axis=1)
# df['org_gt_sim'] = df.apply(lambda x: get_sim(x.caption, create_gt(x.tag)), axis=1)

In [488]:
# df['human_con_sim'] = df.apply(lambda x: get_sim(x.h_caption, ['pedestrian']), axis=1)
# df['web_ofa_sim'] = df.apply(lambda x: get_sim(x.caption, x.ofa_caption), axis=1)

In [490]:
# # av_groups = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'k']
# gk = df.groupby('topics')
# pg_dict = dict()


In [492]:
# for el in gk.groups.keys():
#     pg_dict[el] = gk.get_group(el)
#     pg_dict[el] = pg_dict[el].reset_index(drop=True)

In [31]:
pg_dict_b['a']

Unnamed: 0,uniq_id,image_id,caption,topic_id,labels,image,topic2caption_sim,concept2caption_sim,topics,tag,...,ofa_top_cov,gt_ped_flag,org_top_sim,ofa_top_sim,ofa_con_sim,org_con_sim,ofa_gt_sim,org_gt_sim,human_con_sim,web_ofa_sim
0,167,167,Car on rural road vector Stock Photo,68,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.376758,0.212455,a,"{'passenger': 0, 'wheel': 1, 'vehicle': 1, 'wa...",...,0.0,0.0,0.347474,0.174121,0.291389,0.212455,0.339229,0.406319,0.269255,0.505929
1,184,184,Bus Only Lane Rendering Kuhio and Walina,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.392783,0.160691,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.1,1.0,0.369608,0.367089,0.306415,0.160691,0.465906,0.398409,0.299443,0.405279
2,187,187,Passenger boarding a bus on Bayshore,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.438588,0.197647,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.1,1.0,0.406568,0.447455,0.315319,0.197647,0.544593,0.44638,0.270336,0.465499
3,192,192,Passenger boarding a bus on Bayshore,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.438588,0.197647,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.1,1.0,0.406568,0.447455,0.315319,0.197647,0.544593,0.44638,0.270336,0.465499
4,209,209,Pedestrian Connections Congestion Strategy,112,,/9j/7gAOQWRvYmUAZAAAAAAA/9sAQwAIBgYHBgUIBwcHCQ...,0.612697,0.493012,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.1,1.0,0.64239,0.34668,0.26763,0.493012,0.370596,0.635256,0.593294,0.172662
5,213,213,Commercial Vehicle Accommodations Congestion S...,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.437218,0.220831,a,"{'freight': 0, 'transportation': 1, 'traffic':...",...,0.0,0.0,0.467462,0.252557,0.148062,0.220831,0.254973,0.339962,0.359241,0.32245
6,226,226,LA Metro Traffic Reduction Study Congestion Pr...,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.552017,0.181466,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.1,0.0,0.571539,0.371986,0.227163,0.181466,0.375075,0.567769,0.312877,0.404182
7,237,237,examples of universal design which include bus...,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.402614,0.31551,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.0,1.0,0.382267,0.302053,0.224148,0.311081,0.320912,0.375126,0.317293,0.278122
8,260,260,Traffic and pedestrians in Mumbai,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.453251,0.519047,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.2,1.0,0.489579,0.445729,0.357262,0.519047,0.436709,0.512142,0.501388,0.524769
9,264,264,People cross crowded Mission Street as a bus w...,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.488813,0.31295,a,"{'freight': 1, 'transportation': 1, 'traffic':...",...,0.1,1.0,0.493534,0.381377,0.384256,0.31295,0.370342,0.506553,0.524679,0.672055


In [32]:
pg_dict_b['a'].describe().round(2)

Unnamed: 0,uniq_id,image_id,topic_id,topic2caption_sim,concept2caption_sim,gt_coverage,org_top_cov,ofa_top_cov,gt_ped_flag,org_top_sim,ofa_top_sim,ofa_con_sim,org_con_sim,ofa_gt_sim,org_gt_sim,human_con_sim,web_ofa_sim
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,481.39,481.39,136.39,0.47,0.24,0.53,0.06,0.06,0.76,0.48,0.3,0.27,0.23,0.36,0.49,0.33,0.34
std,192.26,192.26,76.76,0.06,0.08,0.2,0.07,0.06,0.43,0.11,0.11,0.07,0.08,0.15,0.12,0.09,0.14
min,167.0,167.0,56.0,0.38,0.16,0.2,0.0,0.0,0.0,0.1,0.04,0.01,0.13,0.04,0.13,0.2,0.01
25%,378.0,378.0,56.0,0.44,0.18,0.4,0.0,0.0,1.0,0.42,0.24,0.23,0.18,0.24,0.43,0.26,0.27
50%,443.0,443.0,112.0,0.47,0.22,0.5,0.0,0.1,1.0,0.49,0.29,0.27,0.2,0.35,0.48,0.32,0.35
75%,643.5,643.5,224.0,0.51,0.27,0.56,0.11,0.11,1.0,0.55,0.38,0.3,0.26,0.44,0.58,0.38,0.44
max,814.0,814.0,224.0,0.61,0.52,1.0,0.22,0.22,1.0,0.64,0.51,0.4,0.52,0.69,0.69,0.59,0.67


In [33]:
# Test
df_b[['gt_coverage', 'gt_ped_flag', 'org_gt_sim']].describe().round(2)

Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,620.0,620.0,620.0
mean,0.49,0.65,0.5
std,0.23,0.48,0.14
min,0.09,0.0,0.07
25%,0.33,0.0,0.41
50%,0.45,1.0,0.52
75%,0.62,1.0,0.6
max,1.0,1.0,0.88


In [34]:
# Test
for el in pg_dict_b:
    print(el)
    display(pg_dict_b[el][['gt_coverage', 'gt_ped_flag', 'org_gt_sim']].describe().round(2))

a


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,51.0,51.0,51.0
mean,0.53,0.76,0.49
std,0.2,0.43,0.12
min,0.2,0.0,0.13
25%,0.4,1.0,0.43
50%,0.5,1.0,0.48
75%,0.56,1.0,0.58
max,1.0,1.0,0.69


b


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,103.0,103.0,103.0
mean,0.4,0.82,0.49
std,0.16,0.39,0.12
min,0.1,0.0,0.14
25%,0.3,1.0,0.41
50%,0.4,1.0,0.51
75%,0.5,1.0,0.58
max,0.7,1.0,0.77


c


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,57.0,57.0,57.0
mean,0.43,0.63,0.49
std,0.18,0.49,0.09
min,0.11,0.0,0.25
25%,0.29,0.0,0.44
50%,0.44,1.0,0.49
75%,0.56,1.0,0.55
max,0.78,1.0,0.65


d


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,62.0,62.0,62.0
mean,0.45,0.16,0.59
std,0.13,0.37,0.11
min,0.1,0.0,0.29
25%,0.4,0.0,0.55
50%,0.41,0.0,0.61
75%,0.5,0.0,0.66
max,0.71,1.0,0.77


e


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,46.0,46.0,46.0
mean,0.5,0.59,0.54
std,0.24,0.5,0.13
min,0.09,0.0,0.33
25%,0.36,0.0,0.45
50%,0.45,1.0,0.55
75%,0.6,1.0,0.61
max,1.0,1.0,0.83


f


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,16.0,16.0,16.0
mean,0.67,0.62,0.5
std,0.33,0.5,0.11
min,0.12,0.0,0.3
25%,0.38,0.0,0.46
50%,0.72,1.0,0.5
75%,1.0,1.0,0.57
max,1.0,1.0,0.65


g


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,25.0,25.0,25.0
mean,0.24,0.56,0.45
std,0.11,0.51,0.17
min,0.1,0.0,0.13
25%,0.2,0.0,0.38
50%,0.2,1.0,0.5
75%,0.3,1.0,0.59
max,0.56,1.0,0.69


h


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,28.0,28.0,28.0
mean,0.27,1.0,0.49
std,0.12,0.0,0.12
min,0.2,1.0,0.25
25%,0.2,1.0,0.42
50%,0.2,1.0,0.46
75%,0.3,1.0,0.55
max,0.7,1.0,0.74


i


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,70.0,70.0,70.0
mean,0.58,0.67,0.54
std,0.23,0.47,0.16
min,0.12,0.0,0.17
25%,0.38,0.0,0.42
50%,0.55,1.0,0.61
75%,0.86,1.0,0.65
max,1.0,1.0,0.78


j


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,7.0,7.0,7.0
mean,0.93,0.43,0.66
std,0.07,0.53,0.07
min,0.88,0.0,0.56
25%,0.88,0.0,0.62
50%,0.88,0.0,0.66
75%,1.0,1.0,0.69
max,1.0,1.0,0.79


k


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,41.0,41.0,41.0
mean,0.83,0.98,0.5
std,0.12,0.16,0.12
min,0.44,0.0,0.11
25%,0.78,1.0,0.43
50%,0.78,1.0,0.52
75%,0.89,1.0,0.58
max,1.0,1.0,0.77


l


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,11.0,11.0,11.0
mean,0.44,0.64,0.34
std,0.27,0.5,0.14
min,0.1,0.0,0.18
25%,0.2,0.0,0.26
50%,0.5,1.0,0.28
75%,0.6,1.0,0.39
max,0.8,1.0,0.68


m


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,25.0,25.0,25.0
mean,0.63,0.68,0.5
std,0.18,0.48,0.12
min,0.11,0.0,0.19
25%,0.6,0.0,0.46
50%,0.62,1.0,0.52
75%,0.75,1.0,0.56
max,0.88,1.0,0.74


n


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,29.0,29.0,29.0
mean,0.34,0.66,0.26
std,0.24,0.48,0.13
min,0.11,0.0,0.07
25%,0.11,0.0,0.13
50%,0.3,1.0,0.24
75%,0.57,1.0,0.4
max,0.71,1.0,0.49


o


Unnamed: 0,gt_coverage,gt_ped_flag,org_gt_sim
count,49.0,49.0,49.0
mean,0.54,0.51,0.51
std,0.15,0.51,0.14
min,0.12,0.0,0.24
25%,0.5,0.0,0.42
50%,0.5,1.0,0.51
75%,0.62,1.0,0.59
max,0.88,1.0,0.88


# Random

In [408]:
df[['gt_ped_flag', 'gt_coverage', 'org_top_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2)

Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,620.0,620.0,620.0,620.0,620.0,620.0,620.0
mean,0.65,0.49,0.46,0.26,0.32,0.27,0.37
std,0.48,0.23,0.13,0.16,0.18,0.12,0.19
min,0.0,0.09,0.08,-0.09,-0.1,-0.06,-0.08
25%,0.0,0.33,0.39,0.15,0.18,0.2,0.23
50%,1.0,0.45,0.47,0.25,0.33,0.27,0.37
75%,1.0,0.62,0.55,0.38,0.45,0.34,0.5
max,1.0,1.0,0.86,0.74,0.84,0.68,0.87


In [409]:
# random
for el in pg_dict:
    print(el)
    display(pg_dict[el][['gt_ped_flag', 'gt_coverage', 'org_top_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2))

a


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,0.76,0.53,0.48,0.28,0.31,0.25,0.31
std,0.43,0.2,0.1,0.13,0.15,0.08,0.13
min,0.0,0.2,0.09,-0.05,0.07,0.07,0.07
25%,1.0,0.4,0.44,0.19,0.18,0.2,0.21
50%,1.0,0.5,0.49,0.28,0.31,0.23,0.29
75%,1.0,0.56,0.54,0.37,0.42,0.28,0.4
max,1.0,1.0,0.61,0.62,0.61,0.52,0.58


b


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,103.0,103.0,103.0,103.0,103.0,103.0,103.0
mean,0.82,0.4,0.41,0.24,0.35,0.29,0.4
std,0.39,0.16,0.11,0.1,0.13,0.1,0.15
min,0.0,0.1,0.08,0.02,0.08,0.09,0.02
25%,1.0,0.3,0.33,0.18,0.24,0.22,0.29
50%,1.0,0.4,0.4,0.22,0.35,0.29,0.41
75%,1.0,0.5,0.49,0.3,0.45,0.35,0.5
max,1.0,0.7,0.69,0.49,0.66,0.68,0.75


c


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,57.0,57.0,57.0,57.0,57.0,57.0,57.0
mean,0.63,0.43,0.43,0.28,0.34,0.32,0.43
std,0.49,0.18,0.06,0.11,0.14,0.12,0.17
min,0.0,0.11,0.31,0.01,0.03,0.06,0.01
25%,0.0,0.29,0.39,0.22,0.24,0.26,0.33
50%,1.0,0.44,0.42,0.27,0.36,0.32,0.43
75%,1.0,0.56,0.47,0.35,0.43,0.37,0.54
max,1.0,0.78,0.58,0.51,0.61,0.61,0.86


d


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,62.0,62.0,62.0,62.0,62.0,62.0,62.0
mean,0.16,0.45,0.63,0.45,0.47,0.38,0.5
std,0.37,0.13,0.1,0.21,0.2,0.14,0.19
min,0.0,0.1,0.37,-0.05,-0.06,0.06,0.07
25%,0.0,0.4,0.61,0.31,0.3,0.28,0.37
50%,0.0,0.41,0.64,0.5,0.55,0.36,0.49
75%,0.0,0.5,0.68,0.62,0.61,0.48,0.63
max,1.0,0.71,0.86,0.74,0.84,0.62,0.87


e


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,46.0,46.0,46.0,46.0,46.0,46.0,46.0
mean,0.59,0.5,0.53,0.27,0.34,0.3,0.34
std,0.5,0.24,0.09,0.14,0.15,0.09,0.13
min,0.0,0.09,0.36,-0.05,0.03,0.12,0.12
25%,0.0,0.36,0.46,0.16,0.24,0.25,0.23
50%,1.0,0.45,0.52,0.3,0.38,0.32,0.34
75%,1.0,0.6,0.58,0.38,0.45,0.33,0.45
max,1.0,1.0,0.8,0.56,0.55,0.56,0.66


f


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,0.62,0.67,0.49,0.18,0.2,0.25,0.2
std,0.5,0.33,0.09,0.2,0.17,0.09,0.2
min,0.0,0.12,0.31,-0.09,-0.1,0.05,-0.04
25%,0.0,0.38,0.45,0.02,0.1,0.23,0.03
50%,1.0,0.72,0.51,0.16,0.15,0.28,0.22
75%,1.0,1.0,0.55,0.29,0.31,0.3,0.29
max,1.0,1.0,0.62,0.49,0.49,0.37,0.72


g


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.56,0.24,0.36,0.2,0.24,0.14,0.31
std,0.51,0.11,0.17,0.14,0.17,0.11,0.17
min,0.0,0.1,0.11,-0.02,-0.04,-0.06,0.06
25%,0.0,0.2,0.16,0.12,0.11,0.07,0.19
50%,1.0,0.2,0.42,0.17,0.19,0.12,0.34
75%,1.0,0.3,0.47,0.3,0.37,0.19,0.46
max,1.0,0.56,0.6,0.45,0.54,0.4,0.61


h


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,1.0,0.27,0.4,0.23,0.34,0.29,0.37
std,0.0,0.12,0.07,0.12,0.13,0.08,0.15
min,1.0,0.2,0.29,0.04,0.12,0.14,0.06
25%,1.0,0.2,0.35,0.16,0.25,0.26,0.25
50%,1.0,0.2,0.4,0.2,0.31,0.28,0.41
75%,1.0,0.3,0.44,0.29,0.42,0.35,0.47
max,1.0,0.7,0.54,0.47,0.6,0.44,0.68


i


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,70.0,70.0,70.0,70.0,70.0,70.0,70.0
mean,0.67,0.58,0.48,0.28,0.35,0.25,0.41
std,0.47,0.23,0.12,0.16,0.22,0.11,0.2
min,0.0,0.12,0.19,-0.08,-0.06,0.05,0.03
25%,0.0,0.38,0.45,0.13,0.15,0.16,0.24
50%,1.0,0.55,0.49,0.34,0.39,0.27,0.46
75%,1.0,0.86,0.55,0.39,0.58,0.34,0.58
max,1.0,1.0,0.74,0.58,0.65,0.43,0.75


j


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.43,0.93,0.69,0.19,0.18,0.22,0.19
std,0.53,0.07,0.08,0.13,0.14,0.09,0.11
min,0.0,0.88,0.61,0.02,0.0,0.1,0.05
25%,0.0,0.88,0.64,0.08,0.06,0.15,0.1
50%,0.0,0.88,0.67,0.24,0.18,0.22,0.22
75%,1.0,1.0,0.74,0.27,0.29,0.28,0.27
max,1.0,1.0,0.8,0.36,0.35,0.33,0.34


k


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,41.0,41.0,41.0,41.0,41.0,41.0,41.0
mean,0.98,0.83,0.51,0.35,0.34,0.25,0.44
std,0.16,0.12,0.07,0.17,0.18,0.06,0.17
min,0.0,0.44,0.29,0.0,-0.03,0.14,0.1
25%,1.0,0.78,0.49,0.3,0.25,0.21,0.35
50%,1.0,0.78,0.51,0.39,0.37,0.25,0.46
75%,1.0,0.89,0.54,0.52,0.49,0.28,0.54
max,1.0,1.0,0.68,0.57,0.66,0.43,0.84


l


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,11.0,11.0,11.0,11.0,11.0,11.0,11.0
mean,0.64,0.44,0.31,0.11,0.19,0.26,0.28
std,0.5,0.27,0.12,0.09,0.12,0.13,0.18
min,0.0,0.1,0.12,-0.03,0.04,0.03,0.0
25%,0.0,0.2,0.24,0.05,0.13,0.19,0.17
50%,1.0,0.5,0.28,0.12,0.15,0.27,0.27
75%,1.0,0.6,0.37,0.16,0.2,0.35,0.37
max,1.0,0.8,0.51,0.26,0.44,0.44,0.65


m


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.68,0.63,0.46,0.16,0.18,0.18,0.2
std,0.48,0.18,0.08,0.11,0.14,0.09,0.13
min,0.0,0.11,0.26,-0.08,-0.06,0.06,-0.03
25%,0.0,0.6,0.42,0.1,0.08,0.1,0.14
50%,1.0,0.62,0.47,0.17,0.16,0.16,0.2
75%,1.0,0.75,0.5,0.25,0.32,0.23,0.25
max,1.0,0.88,0.62,0.29,0.46,0.42,0.5


n


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,0.66,0.34,0.27,0.12,0.17,0.24,0.25
std,0.48,0.24,0.06,0.1,0.14,0.14,0.17
min,0.0,0.11,0.17,-0.03,-0.09,-0.03,-0.01
25%,0.0,0.11,0.23,0.03,0.06,0.15,0.1
50%,1.0,0.3,0.27,0.1,0.15,0.23,0.21
75%,1.0,0.57,0.3,0.19,0.32,0.33,0.33
max,1.0,0.71,0.45,0.36,0.42,0.58,0.65


o


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,0.51,0.54,0.48,0.2,0.27,0.22,0.33
std,0.51,0.15,0.11,0.14,0.17,0.1,0.21
min,0.0,0.12,0.27,-0.07,-0.05,0.03,-0.08
25%,0.0,0.5,0.4,0.11,0.13,0.14,0.17
50%,1.0,0.5,0.48,0.18,0.25,0.21,0.29
75%,1.0,0.62,0.55,0.31,0.4,0.27,0.44
max,1.0,0.88,0.8,0.44,0.65,0.58,0.8


In [410]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','web_ofa_sim']}")
    print()

a
0.31

b
0.4

c
0.43

d
0.5

e
0.34

f
0.2

g
0.31

h
0.37

i
0.41

j
0.19

k
0.44

l
0.28

m
0.2

n
0.25

o
0.33



In [411]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_con_sim']}")
    print()

a
0.25

b
0.29

c
0.32

d
0.38

e
0.3

f
0.25

g
0.14

h
0.29

i
0.25

j
0.22

k
0.25

l
0.26

m
0.18

n
0.24

o
0.22



In [412]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_gt_sim']}")
    print()

a
0.31

b
0.35

c
0.34

d
0.47

e
0.34

f
0.2

g
0.24

h
0.34

i
0.35

j
0.18

k
0.34

l
0.19

m
0.18

n
0.17

o
0.27



# base

### keep all

In [361]:
df[['gt_ped_flag', 'gt_coverage', 'org_top_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2)

Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,620.0,620.0,620.0,620.0,620.0,620.0,620.0
mean,0.65,0.49,0.46,0.29,0.35,0.29,0.4
std,0.48,0.23,0.13,0.15,0.18,0.11,0.18
min,0.0,0.09,0.08,-0.12,-0.08,-0.04,-0.06
25%,0.0,0.33,0.39,0.18,0.22,0.21,0.27
50%,1.0,0.45,0.47,0.28,0.36,0.28,0.39
75%,1.0,0.62,0.55,0.4,0.48,0.36,0.53
max,1.0,1.0,0.86,0.75,0.79,0.63,0.85


In [362]:
# before
for el in pg_dict:
    print(el)
    display(pg_dict[el][['gt_ped_flag', 'gt_coverage', 'org_top_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2))

a


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,0.76,0.53,0.48,0.3,0.36,0.27,0.34
std,0.43,0.2,0.1,0.13,0.15,0.07,0.14
min,0.0,0.2,0.09,0.04,0.04,0.01,0.01
25%,1.0,0.4,0.44,0.21,0.24,0.23,0.27
50%,1.0,0.5,0.49,0.31,0.35,0.27,0.35
75%,1.0,0.56,0.54,0.41,0.44,0.3,0.44
max,1.0,1.0,0.61,0.52,0.69,0.4,0.67


b


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,103.0,103.0,103.0,103.0,103.0,103.0,103.0
mean,0.82,0.4,0.41,0.25,0.37,0.3,0.42
std,0.39,0.16,0.11,0.11,0.15,0.09,0.16
min,0.0,0.1,0.08,-0.06,0.02,0.07,0.07
25%,1.0,0.3,0.33,0.18,0.26,0.24,0.32
50%,1.0,0.4,0.4,0.26,0.37,0.3,0.42
75%,1.0,0.5,0.49,0.33,0.46,0.37,0.52
max,1.0,0.7,0.69,0.54,0.76,0.61,0.84


c


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,57.0,57.0,57.0,57.0,57.0,57.0,57.0
mean,0.63,0.43,0.43,0.29,0.36,0.33,0.44
std,0.49,0.18,0.06,0.1,0.12,0.11,0.18
min,0.0,0.11,0.31,0.08,0.06,0.06,0.05
25%,0.0,0.29,0.39,0.24,0.31,0.27,0.33
50%,1.0,0.44,0.42,0.3,0.37,0.32,0.45
75%,1.0,0.56,0.47,0.36,0.43,0.39,0.53
max,1.0,0.78,0.58,0.49,0.66,0.63,0.85


d


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,62.0,62.0,62.0,62.0,62.0,62.0,62.0
mean,0.16,0.45,0.63,0.48,0.5,0.4,0.52
std,0.37,0.13,0.1,0.16,0.17,0.1,0.15
min,0.0,0.1,0.37,0.09,0.09,0.18,0.15
25%,0.0,0.4,0.61,0.36,0.36,0.33,0.44
50%,0.0,0.41,0.64,0.51,0.55,0.39,0.53
75%,0.0,0.5,0.68,0.61,0.61,0.45,0.61
max,1.0,0.71,0.86,0.75,0.78,0.63,0.82


e


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,46.0,46.0,46.0,46.0,46.0,46.0,46.0
mean,0.59,0.5,0.53,0.35,0.41,0.29,0.41
std,0.5,0.24,0.09,0.12,0.12,0.09,0.13
min,0.0,0.09,0.36,-0.01,0.03,0.08,0.15
25%,0.0,0.36,0.46,0.29,0.34,0.23,0.28
50%,1.0,0.45,0.52,0.38,0.43,0.3,0.42
75%,1.0,0.6,0.58,0.44,0.5,0.33,0.52
max,1.0,1.0,0.8,0.58,0.6,0.48,0.64


f


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,0.62,0.67,0.49,0.22,0.22,0.19,0.22
std,0.5,0.33,0.09,0.13,0.14,0.08,0.13
min,0.0,0.12,0.31,0.04,0.05,0.04,-0.06
25%,0.0,0.38,0.45,0.12,0.11,0.16,0.15
50%,1.0,0.72,0.51,0.2,0.21,0.18,0.25
75%,1.0,1.0,0.55,0.28,0.26,0.26,0.28
max,1.0,1.0,0.62,0.46,0.47,0.31,0.46


g


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.56,0.24,0.36,0.21,0.26,0.2,0.31
std,0.51,0.11,0.17,0.15,0.18,0.12,0.18
min,0.0,0.1,0.11,0.03,-0.02,-0.04,0.05
25%,0.0,0.2,0.16,0.11,0.14,0.11,0.19
50%,1.0,0.2,0.42,0.17,0.25,0.21,0.29
75%,1.0,0.3,0.47,0.28,0.34,0.25,0.42
max,1.0,0.56,0.6,0.61,0.73,0.53,0.74


h


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,1.0,0.27,0.4,0.23,0.4,0.34,0.42
std,0.0,0.12,0.07,0.11,0.15,0.1,0.14
min,1.0,0.2,0.29,0.11,0.11,0.12,0.19
25%,1.0,0.2,0.35,0.14,0.26,0.27,0.32
50%,1.0,0.2,0.4,0.21,0.4,0.35,0.39
75%,1.0,0.3,0.44,0.3,0.51,0.42,0.49
max,1.0,0.7,0.54,0.45,0.62,0.54,0.65


i


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,70.0,70.0,70.0,70.0,70.0,70.0,70.0
mean,0.67,0.58,0.48,0.29,0.36,0.26,0.42
std,0.47,0.23,0.12,0.16,0.22,0.13,0.2
min,0.0,0.12,0.19,-0.01,-0.01,0.06,0.05
25%,0.0,0.38,0.45,0.17,0.17,0.14,0.3
50%,1.0,0.55,0.49,0.32,0.35,0.28,0.4
75%,1.0,0.86,0.55,0.43,0.56,0.36,0.58
max,1.0,1.0,0.74,0.54,0.79,0.56,0.78


j


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.43,0.93,0.69,0.24,0.21,0.26,0.27
std,0.53,0.07,0.08,0.14,0.15,0.08,0.13
min,0.0,0.88,0.61,0.02,-0.03,0.19,0.1
25%,0.0,0.88,0.64,0.14,0.1,0.21,0.18
50%,0.0,0.88,0.67,0.26,0.26,0.24,0.31
75%,1.0,1.0,0.74,0.34,0.34,0.29,0.32
max,1.0,1.0,0.8,0.43,0.36,0.41,0.49


k


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,41.0,41.0,41.0,41.0,41.0,41.0,41.0
mean,0.98,0.83,0.51,0.39,0.38,0.26,0.46
std,0.16,0.12,0.07,0.17,0.18,0.08,0.17
min,0.0,0.44,0.29,-0.05,-0.05,0.09,0.09
25%,1.0,0.78,0.49,0.34,0.28,0.21,0.34
50%,1.0,0.78,0.51,0.42,0.4,0.26,0.48
75%,1.0,0.89,0.54,0.53,0.52,0.32,0.57
max,1.0,1.0,0.68,0.61,0.66,0.44,0.75


l


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,11.0,11.0,11.0,11.0,11.0,11.0,11.0
mean,0.64,0.44,0.31,0.15,0.21,0.27,0.27
std,0.5,0.27,0.12,0.11,0.12,0.15,0.19
min,0.0,0.1,0.12,-0.02,0.07,-0.0,0.01
25%,0.0,0.2,0.24,0.06,0.12,0.23,0.16
50%,1.0,0.5,0.28,0.18,0.23,0.26,0.28
75%,1.0,0.6,0.37,0.24,0.27,0.37,0.34
max,1.0,0.8,0.51,0.29,0.47,0.5,0.65


m


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.68,0.63,0.46,0.25,0.28,0.26,0.29
std,0.48,0.18,0.08,0.15,0.19,0.11,0.18
min,0.0,0.11,0.26,-0.04,-0.02,-0.02,-0.01
25%,0.0,0.6,0.42,0.18,0.14,0.21,0.14
50%,1.0,0.62,0.47,0.24,0.27,0.25,0.3
75%,1.0,0.75,0.5,0.34,0.42,0.31,0.36
max,1.0,0.88,0.62,0.58,0.75,0.44,0.7


n


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,0.66,0.34,0.27,0.14,0.17,0.26,0.23
std,0.48,0.24,0.06,0.11,0.13,0.12,0.15
min,0.0,0.11,0.17,-0.12,-0.08,0.01,-0.02
25%,0.0,0.11,0.23,0.07,0.08,0.18,0.13
50%,1.0,0.3,0.27,0.12,0.18,0.25,0.21
75%,1.0,0.57,0.3,0.21,0.24,0.35,0.32
max,1.0,0.71,0.45,0.34,0.48,0.48,0.53


o


Unnamed: 0,gt_ped_flag,gt_coverage,org_top_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,0.51,0.54,0.48,0.25,0.31,0.22,0.36
std,0.51,0.15,0.11,0.12,0.16,0.11,0.22
min,0.0,0.12,0.27,-0.0,0.06,-0.01,-0.0
25%,0.0,0.5,0.4,0.17,0.18,0.16,0.19
50%,1.0,0.5,0.48,0.22,0.28,0.22,0.31
75%,1.0,0.62,0.55,0.34,0.4,0.29,0.54
max,1.0,0.88,0.8,0.5,0.77,0.59,0.79


In [363]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','web_ofa_sim']}")
    print()

a
0.34

b
0.42

c
0.44

d
0.52

e
0.41

f
0.22

g
0.31

h
0.42

i
0.42

j
0.27

k
0.46

l
0.27

m
0.29

n
0.23

o
0.36



In [364]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_con_sim']}")
    print()

a
0.27

b
0.3

c
0.33

d
0.4

e
0.29

f
0.19

g
0.2

h
0.34

i
0.26

j
0.26

k
0.26

l
0.27

m
0.26

n
0.26

o
0.22



In [365]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_gt_sim']}")
    print()

a
0.36

b
0.37

c
0.36

d
0.5

e
0.41

f
0.22

g
0.26

h
0.4

i
0.36

j
0.21

k
0.38

l
0.21

m
0.28

n
0.17

o
0.31



# After Fatemeh


### keep all

In [252]:
df[['gt_ped_flag', 'gt_coverage', 'org_gt_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2)

Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,620.0,620.0,620.0,620.0,620.0,620.0,620.0
mean,0.65,0.49,0.5,0.34,0.4,0.35,0.45
std,0.48,0.23,0.14,0.16,0.17,0.15,0.2
min,0.0,0.09,0.07,-0.09,-0.09,-0.07,-0.04
25%,0.0,0.33,0.41,0.23,0.27,0.24,0.31
50%,1.0,0.45,0.52,0.35,0.41,0.32,0.47
75%,1.0,0.62,0.6,0.43,0.54,0.45,0.59
max,1.0,1.0,0.88,0.79,0.81,0.76,0.94


In [253]:
#after
for el in pg_dict:
    print(el)
    display(pg_dict[el][['gt_ped_flag', 'gt_coverage', 'org_gt_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2))

a


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,0.76,0.53,0.49,0.33,0.39,0.31,0.39
std,0.43,0.2,0.12,0.13,0.16,0.12,0.16
min,0.0,0.2,0.13,-0.0,0.07,0.14,0.19
25%,1.0,0.4,0.43,0.22,0.26,0.23,0.26
50%,1.0,0.5,0.48,0.35,0.41,0.29,0.38
75%,1.0,0.56,0.58,0.45,0.5,0.36,0.49
max,1.0,1.0,0.69,0.55,0.73,0.63,0.79


b


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,103.0,103.0,103.0,103.0,103.0,103.0,103.0
mean,0.82,0.4,0.49,0.31,0.43,0.38,0.49
std,0.39,0.16,0.12,0.1,0.15,0.15,0.17
min,0.0,0.1,0.14,0.03,0.07,0.08,0.09
25%,1.0,0.3,0.41,0.24,0.31,0.26,0.37
50%,1.0,0.4,0.51,0.3,0.46,0.33,0.49
75%,1.0,0.5,0.58,0.37,0.55,0.49,0.61
max,1.0,0.7,0.77,0.58,0.8,0.76,0.87


c


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,57.0,57.0,57.0,57.0,57.0,57.0,57.0
mean,0.63,0.43,0.49,0.36,0.43,0.41,0.52
std,0.49,0.18,0.09,0.11,0.14,0.15,0.17
min,0.0,0.11,0.25,0.02,0.03,0.08,0.08
25%,0.0,0.29,0.44,0.29,0.36,0.29,0.42
50%,1.0,0.44,0.49,0.38,0.46,0.38,0.53
75%,1.0,0.56,0.55,0.43,0.5,0.54,0.58
max,1.0,0.78,0.65,0.61,0.66,0.69,0.92


d


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,62.0,62.0,62.0,62.0,62.0,62.0,62.0
mean,0.16,0.45,0.59,0.53,0.51,0.46,0.59
std,0.37,0.13,0.11,0.18,0.18,0.15,0.17
min,0.0,0.1,0.29,0.08,0.08,0.19,0.01
25%,0.0,0.4,0.55,0.43,0.39,0.31,0.5
50%,0.0,0.41,0.61,0.58,0.56,0.5,0.61
75%,0.0,0.5,0.66,0.65,0.64,0.57,0.73
max,1.0,0.71,0.77,0.79,0.81,0.71,0.91


e


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,46.0,46.0,46.0,46.0,46.0,46.0,46.0
mean,0.59,0.5,0.54,0.38,0.42,0.38,0.47
std,0.5,0.24,0.13,0.11,0.13,0.15,0.16
min,0.0,0.09,0.33,0.07,0.04,0.08,0.15
25%,0.0,0.36,0.45,0.34,0.37,0.27,0.37
50%,1.0,0.45,0.55,0.39,0.43,0.34,0.53
75%,1.0,0.6,0.61,0.47,0.48,0.49,0.57
max,1.0,1.0,0.83,0.58,0.66,0.71,0.83


f


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,0.62,0.67,0.5,0.2,0.2,0.21,0.24
std,0.5,0.33,0.11,0.16,0.16,0.11,0.15
min,0.0,0.12,0.3,-0.09,-0.09,-0.01,0.0
25%,0.0,0.38,0.46,0.09,0.08,0.19,0.12
50%,1.0,0.72,0.5,0.23,0.19,0.23,0.26
75%,1.0,1.0,0.57,0.27,0.32,0.28,0.35
max,1.0,1.0,0.65,0.44,0.44,0.34,0.52


g


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.56,0.24,0.45,0.24,0.31,0.24,0.36
std,0.51,0.11,0.17,0.15,0.17,0.13,0.17
min,0.0,0.1,0.13,-0.08,-0.04,0.09,0.06
25%,0.0,0.2,0.38,0.15,0.2,0.15,0.25
50%,1.0,0.2,0.5,0.2,0.33,0.23,0.37
75%,1.0,0.3,0.59,0.34,0.45,0.29,0.47
max,1.0,0.56,0.69,0.53,0.56,0.53,0.76


h


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,1.0,0.27,0.49,0.27,0.41,0.38,0.46
std,0.0,0.12,0.12,0.09,0.17,0.14,0.15
min,1.0,0.2,0.25,0.06,0.08,0.12,0.15
25%,1.0,0.2,0.42,0.22,0.29,0.29,0.35
50%,1.0,0.2,0.46,0.28,0.4,0.34,0.48
75%,1.0,0.3,0.55,0.32,0.59,0.48,0.57
max,1.0,0.7,0.74,0.41,0.63,0.62,0.74


i


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,70.0,70.0,70.0,70.0,70.0,70.0,70.0
mean,0.67,0.58,0.54,0.32,0.39,0.29,0.49
std,0.47,0.23,0.16,0.15,0.2,0.13,0.24
min,0.0,0.12,0.17,-0.04,-0.02,-0.01,-0.03
25%,0.0,0.38,0.42,0.21,0.23,0.2,0.32
50%,1.0,0.55,0.61,0.37,0.44,0.32,0.5
75%,1.0,0.86,0.65,0.43,0.59,0.41,0.69
max,1.0,1.0,0.78,0.6,0.68,0.48,0.94


j


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.43,0.93,0.66,0.38,0.36,0.33,0.38
std,0.53,0.07,0.07,0.07,0.09,0.11,0.04
min,0.0,0.88,0.56,0.3,0.27,0.23,0.31
25%,0.0,0.88,0.62,0.32,0.3,0.26,0.36
50%,0.0,0.88,0.66,0.36,0.33,0.29,0.37
75%,1.0,1.0,0.69,0.4,0.38,0.34,0.41
max,1.0,1.0,0.79,0.52,0.53,0.56,0.43


k


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,41.0,41.0,41.0,41.0,41.0,41.0,41.0
mean,0.98,0.83,0.5,0.4,0.38,0.29,0.5
std,0.16,0.12,0.12,0.12,0.14,0.1,0.15
min,0.0,0.44,0.11,0.05,0.02,0.07,0.04
25%,1.0,0.78,0.43,0.37,0.32,0.23,0.43
50%,1.0,0.78,0.52,0.4,0.39,0.27,0.51
75%,1.0,0.89,0.58,0.46,0.45,0.37,0.59
max,1.0,1.0,0.77,0.62,0.62,0.56,0.8


l


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,11.0,11.0,11.0,11.0,11.0,11.0,11.0
mean,0.64,0.44,0.34,0.24,0.29,0.39,0.38
std,0.5,0.27,0.14,0.14,0.16,0.19,0.18
min,0.0,0.1,0.18,0.06,0.1,0.08,0.09
25%,0.0,0.2,0.26,0.12,0.15,0.3,0.24
50%,1.0,0.5,0.28,0.21,0.29,0.38,0.44
75%,1.0,0.6,0.39,0.35,0.37,0.54,0.52
max,1.0,0.8,0.68,0.44,0.62,0.63,0.55


m


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.68,0.63,0.5,0.31,0.34,0.34,0.36
std,0.48,0.18,0.12,0.14,0.18,0.17,0.22
min,0.0,0.11,0.19,-0.01,0.02,0.09,-0.04
25%,0.0,0.6,0.46,0.24,0.21,0.2,0.19
50%,1.0,0.62,0.52,0.31,0.34,0.34,0.32
75%,1.0,0.75,0.56,0.4,0.47,0.48,0.54
max,1.0,0.88,0.74,0.57,0.72,0.66,0.76


n


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,0.66,0.34,0.26,0.18,0.25,0.32,0.28
std,0.48,0.24,0.13,0.13,0.18,0.17,0.17
min,0.0,0.11,0.07,-0.03,-0.05,-0.05,0.03
25%,0.0,0.11,0.13,0.09,0.14,0.22,0.16
50%,1.0,0.3,0.24,0.15,0.24,0.29,0.24
75%,1.0,0.57,0.4,0.29,0.33,0.38,0.39
max,1.0,0.71,0.49,0.44,0.7,0.7,0.64


o


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,0.51,0.54,0.51,0.31,0.37,0.32,0.4
std,0.51,0.15,0.14,0.15,0.17,0.17,0.24
min,0.0,0.12,0.24,-0.07,0.01,-0.07,-0.02
25%,0.0,0.5,0.42,0.21,0.25,0.21,0.19
50%,1.0,0.5,0.51,0.3,0.39,0.31,0.35
75%,1.0,0.62,0.59,0.39,0.5,0.42,0.57
max,1.0,0.88,0.88,0.58,0.71,0.69,0.92


In [467]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','web_ofa_sim']}")
    print()

a
0.39

b
0.49

c
0.52

d
0.59

e
0.47

f
0.24

g
0.36

h
0.46

i
0.49

j
0.38

k
0.5

l
0.38

m
0.36

n
0.28

o
0.4



In [468]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_con_sim']}")
    print()

a
0.31

b
0.38

c
0.41

d
0.46

e
0.38

f
0.21

g
0.24

h
0.38

i
0.29

j
0.33

k
0.29

l
0.39

m
0.34

n
0.32

o
0.32



In [469]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_gt_sim']}")
    print()

a
0.39

b
0.43

c
0.43

d
0.51

e
0.42

f
0.2

g
0.31

h
0.41

i
0.39

j
0.36

k
0.38

l
0.29

m
0.34

n
0.25

o
0.37



# Wit

In [315]:
df[['gt_ped_flag', 'gt_coverage', 'org_gt_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2)

Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,620.0,620.0,620.0,620.0,620.0,620.0,620.0
mean,0.65,0.49,0.5,0.34,0.39,0.38,0.42
std,0.48,0.23,0.14,0.16,0.17,0.16,0.18
min,0.0,0.09,0.07,-0.1,-0.05,-0.01,-0.05
25%,0.0,0.33,0.41,0.23,0.26,0.27,0.29
50%,1.0,0.45,0.52,0.34,0.39,0.35,0.42
75%,1.0,0.62,0.6,0.43,0.53,0.52,0.56
max,1.0,1.0,0.88,0.76,0.84,0.71,0.94


In [316]:
#after
for el in pg_dict:
    print(el)
    display(pg_dict[el][['gt_ped_flag', 'gt_coverage', 'org_gt_sim', 'ofa_top_sim', 'ofa_gt_sim', 'ofa_con_sim', 'web_ofa_sim']].describe().round(2))

a


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,51.0,51.0,51.0,51.0,51.0,51.0,51.0
mean,0.76,0.53,0.49,0.36,0.39,0.34,0.35
std,0.43,0.2,0.12,0.12,0.15,0.13,0.13
min,0.0,0.2,0.13,0.11,0.11,0.16,0.13
25%,1.0,0.4,0.43,0.28,0.25,0.25,0.26
50%,1.0,0.5,0.48,0.36,0.38,0.31,0.33
75%,1.0,0.56,0.58,0.44,0.47,0.38,0.44
max,1.0,1.0,0.69,0.65,0.73,0.61,0.61


b


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,103.0,103.0,103.0,103.0,103.0,103.0,103.0
mean,0.82,0.4,0.49,0.28,0.4,0.39,0.43
std,0.39,0.16,0.12,0.1,0.13,0.13,0.14
min,0.0,0.1,0.14,0.01,0.1,-0.0,0.08
25%,1.0,0.3,0.41,0.24,0.31,0.29,0.33
50%,1.0,0.4,0.51,0.29,0.41,0.36,0.41
75%,1.0,0.5,0.58,0.34,0.5,0.51,0.54
max,1.0,0.7,0.77,0.55,0.67,0.7,0.8


c


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,57.0,57.0,57.0,57.0,57.0,57.0,57.0
mean,0.63,0.43,0.49,0.36,0.43,0.42,0.48
std,0.49,0.18,0.09,0.12,0.15,0.16,0.17
min,0.0,0.11,0.25,-0.05,-0.05,0.08,0.1
25%,0.0,0.29,0.44,0.29,0.34,0.31,0.37
50%,1.0,0.44,0.49,0.38,0.46,0.4,0.47
75%,1.0,0.56,0.55,0.45,0.55,0.55,0.6
max,1.0,0.78,0.65,0.52,0.69,0.71,0.87


d


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,62.0,62.0,62.0,62.0,62.0,62.0,62.0
mean,0.16,0.45,0.59,0.56,0.53,0.51,0.58
std,0.37,0.13,0.11,0.16,0.16,0.14,0.16
min,0.0,0.1,0.29,0.17,0.17,0.21,0.21
25%,0.0,0.4,0.55,0.44,0.43,0.37,0.45
50%,0.0,0.41,0.61,0.6,0.56,0.54,0.59
75%,0.0,0.5,0.66,0.72,0.66,0.64,0.68
max,1.0,0.71,0.77,0.76,0.84,0.68,0.94


e


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,46.0,46.0,46.0,46.0,46.0,46.0,46.0
mean,0.59,0.5,0.54,0.38,0.4,0.46,0.43
std,0.5,0.24,0.13,0.11,0.12,0.11,0.11
min,0.0,0.09,0.33,0.04,0.1,0.22,0.17
25%,0.0,0.36,0.45,0.36,0.32,0.37,0.37
50%,1.0,0.45,0.55,0.4,0.4,0.51,0.42
75%,1.0,0.6,0.61,0.45,0.49,0.54,0.51
max,1.0,1.0,0.83,0.53,0.64,0.6,0.61


f


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,16.0,16.0,16.0,16.0,16.0,16.0,16.0
mean,0.62,0.67,0.5,0.31,0.32,0.29,0.29
std,0.5,0.33,0.11,0.17,0.18,0.13,0.21
min,0.0,0.12,0.3,0.03,0.05,0.09,-0.05
25%,0.0,0.38,0.46,0.2,0.19,0.24,0.17
50%,1.0,0.72,0.5,0.36,0.34,0.31,0.32
75%,1.0,1.0,0.57,0.45,0.47,0.34,0.4
max,1.0,1.0,0.65,0.52,0.57,0.65,0.72


g


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.56,0.24,0.45,0.22,0.29,0.25,0.3
std,0.51,0.11,0.17,0.13,0.19,0.18,0.18
min,0.0,0.1,0.13,0.01,-0.01,-0.01,-0.0
25%,0.0,0.2,0.38,0.14,0.14,0.12,0.19
50%,1.0,0.2,0.5,0.19,0.24,0.22,0.31
75%,1.0,0.3,0.59,0.31,0.43,0.28,0.37
max,1.0,0.56,0.69,0.53,0.62,0.62,0.76


h


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,28.0,28.0,28.0,28.0,28.0,28.0,28.0
mean,1.0,0.27,0.49,0.25,0.4,0.38,0.4
std,0.0,0.12,0.12,0.1,0.15,0.13,0.17
min,1.0,0.2,0.25,0.07,0.15,0.14,0.06
25%,1.0,0.2,0.42,0.18,0.3,0.3,0.29
50%,1.0,0.2,0.46,0.24,0.39,0.34,0.37
75%,1.0,0.3,0.55,0.32,0.54,0.43,0.5
max,1.0,0.7,0.74,0.41,0.65,0.67,0.72


i


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,70.0,70.0,70.0,70.0,70.0,70.0,70.0
mean,0.67,0.58,0.54,0.33,0.4,0.33,0.45
std,0.47,0.23,0.16,0.16,0.22,0.15,0.21
min,0.0,0.12,0.17,-0.1,-0.04,0.03,-0.03
25%,0.0,0.38,0.42,0.21,0.23,0.22,0.29
50%,1.0,0.55,0.61,0.36,0.36,0.33,0.5
75%,1.0,0.86,0.65,0.43,0.62,0.43,0.61
max,1.0,1.0,0.78,0.69,0.75,0.69,0.81


j


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,7.0,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.43,0.93,0.66,0.31,0.31,0.34,0.32
std,0.53,0.07,0.07,0.09,0.09,0.12,0.09
min,0.0,0.88,0.56,0.17,0.15,0.19,0.17
25%,0.0,0.88,0.62,0.26,0.28,0.29,0.26
50%,0.0,0.88,0.66,0.35,0.34,0.3,0.35
75%,1.0,1.0,0.69,0.37,0.36,0.37,0.38
max,1.0,1.0,0.79,0.41,0.39,0.57,0.42


k


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,41.0,41.0,41.0,41.0,41.0,41.0,41.0
mean,0.98,0.83,0.5,0.38,0.36,0.29,0.45
std,0.16,0.12,0.12,0.16,0.17,0.09,0.15
min,0.0,0.44,0.11,-0.04,-0.04,0.13,0.1
25%,1.0,0.78,0.43,0.35,0.32,0.23,0.32
50%,1.0,0.78,0.52,0.41,0.38,0.27,0.49
75%,1.0,0.89,0.58,0.49,0.48,0.33,0.56
max,1.0,1.0,0.77,0.6,0.64,0.61,0.75


l


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,11.0,11.0,11.0,11.0,11.0,11.0,11.0
mean,0.64,0.44,0.34,0.21,0.25,0.49,0.39
std,0.5,0.27,0.14,0.09,0.15,0.16,0.21
min,0.0,0.1,0.18,0.05,0.11,0.21,0.04
25%,0.0,0.2,0.26,0.17,0.13,0.39,0.26
50%,1.0,0.5,0.28,0.21,0.2,0.49,0.39
75%,1.0,0.6,0.39,0.25,0.3,0.6,0.49
max,1.0,0.8,0.68,0.4,0.54,0.71,0.76


m


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,25.0,25.0,25.0,25.0,25.0,25.0,25.0
mean,0.68,0.63,0.5,0.31,0.32,0.37,0.31
std,0.48,0.18,0.12,0.15,0.19,0.16,0.18
min,0.0,0.11,0.19,-0.04,-0.03,0.1,-0.04
25%,0.0,0.6,0.46,0.18,0.16,0.26,0.23
50%,1.0,0.62,0.52,0.32,0.32,0.3,0.3
75%,1.0,0.75,0.56,0.4,0.44,0.54,0.43
max,1.0,0.88,0.74,0.58,0.7,0.61,0.67


n


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,29.0,29.0,29.0,29.0,29.0,29.0,29.0
mean,0.66,0.34,0.26,0.19,0.24,0.32,0.28
std,0.48,0.24,0.13,0.13,0.17,0.18,0.17
min,0.0,0.11,0.07,-0.05,-0.04,-0.01,-0.0
25%,0.0,0.11,0.13,0.1,0.12,0.19,0.15
50%,1.0,0.3,0.24,0.17,0.22,0.28,0.27
75%,1.0,0.57,0.4,0.29,0.38,0.49,0.38
max,1.0,0.71,0.49,0.46,0.66,0.66,0.62


o


Unnamed: 0,gt_ped_flag,gt_coverage,org_gt_sim,ofa_top_sim,ofa_gt_sim,ofa_con_sim,web_ofa_sim
count,49.0,49.0,49.0,49.0,49.0,49.0,49.0
mean,0.51,0.54,0.51,0.31,0.39,0.36,0.4
std,0.51,0.15,0.14,0.14,0.17,0.17,0.2
min,0.0,0.12,0.24,0.04,0.08,-0.0,0.0
25%,0.0,0.5,0.42,0.21,0.22,0.22,0.25
50%,1.0,0.5,0.51,0.32,0.39,0.33,0.36
75%,1.0,0.62,0.59,0.41,0.51,0.5,0.6
max,1.0,0.88,0.88,0.66,0.69,0.67,0.79


In [512]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','web_ofa_sim']}")
    print()

a
0.35

b
0.43

c
0.48

d
0.58

e
0.43

f
0.29

g
0.3

h
0.4

i
0.45

j
0.32

k
0.45

l
0.39

m
0.31

n
0.28

o
0.4



In [513]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_con_sim']}")
    print()

a
0.34

b
0.39

c
0.42

d
0.51

e
0.46

f
0.29

g
0.25

h
0.38

i
0.33

j
0.34

k
0.29

l
0.49

m
0.37

n
0.32

o
0.36



In [514]:
for el in pg_dict:
    print(el)
    print(f"{pg_dict[el].describe().round(2).at['mean','ofa_gt_sim']}")
    print()

a
0.39

b
0.4

c
0.43

d
0.53

e
0.4

f
0.32

g
0.29

h
0.4

i
0.4

j
0.31

k
0.36

l
0.25

m
0.32

n
0.24

o
0.39



# CLIP

In [18]:
model, preprocess = clip.load("ViT-B/32", device=device, jit=False)

In [19]:
d_path = '.'
#before
df_b = pd.read_pickle(f'{d_path}/pd_base.pk')


# after fat
d_path = '/raid/AISSEL/htest/datasets/ped_data/ped_ftest/missed_q50_over_avg'
df_f = pd.read_pickle(f'{d_path}/pd_mq50_over_avg.pk')

# wit
d_path = '/raid/AISSEL/htest/datasets/ped_data/wit/missed_q50_over_avg'
df_w = pd.read_pickle(f'{d_path}/ped_mq50_over_avg.pk')

# random
d_path = '/raid/AISSEL/htest/datasets/ped_data/wit/random_missed'
df_r = pd.read_pickle(f'{d_path}/ped_mq50_over_avg.pk')

In [20]:
df_b = df_b.dropna()
df_b = df_b.reset_index(drop=True)
df_f = df_f.dropna()
df_f = df_f.reset_index(drop=True)
df_w = df_w.dropna()
df_w = df_w.reset_index(drop=True)

df_r = df_r.dropna()
df_r = df_r.reset_index(drop=True)

In [21]:
df_b

Unnamed: 0,uniq_id,image_id,caption,topic_id,labels,image,topic2caption_sim,concept2caption_sim,topics,tag,gt,h_caption,ofa_caption
0,167,167,Car on rural road vector Stock Photo,68,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.376758,0.212455,a,"{'passenger': 0, 'wheel': 1, 'vehicle': 1, 'wa...",wheel vehicle,A parked car in a remote area,a silhouette of a car parked in a field
1,184,184,Bus Only Lane Rendering Kuhio and Walina,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.392783,0.160691,a,"{'freight': 1, 'transportation': 1, 'traffic':...",freight transportation traffic transport tran...,Two individuals ride bikes on the bus-only rou...,a street with a bus and a white car on the road
2,187,187,Passenger boarding a bus on Bayshore,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.438588,0.197647,a,"{'freight': 1, 'transportation': 1, 'traffic':...",freight transportation traffic transport tran...,A bus stopped in the bus stop when a passenger...,a person is getting on a bus at a bus stop
3,192,192,Passenger boarding a bus on Bayshore,112,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.438588,0.197647,a,"{'freight': 1, 'transportation': 1, 'traffic':...",freight transportation traffic transport tran...,A bus stopped in the bus stop when a passenger...,a person is getting on a bus at a bus stop
4,209,209,Pedestrian Connections Congestion Strategy,112,,/9j/7gAOQWRvYmUAZAAAAAAA/9sAQwAIBgYHBgUIBwcHCQ...,0.612697,0.493012,a,"{'freight': 1, 'transportation': 1, 'traffic':...",freight transportation traffic transport cong...,A pedestrian with wheelchair waiting in the road,a woman and a child in a wheelchair waiting fo...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
615,1228,1228,Westown Commons picnic shelter front view,296,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.395052,0.217862,o,"{'scenic': 0, 'recreation': 1, 'wilderness': 0...",recreation park area,Shed in a park,a gazebo with picnic tables in a park
616,1241,1241,Humboldt Alexander Von Park,296,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.415312,0.211162,o,"{'scenic': 1, 'recreation': 1, 'wilderness': 0...",scenic recreation park area pedestrian,A bridge over a lake,a bridge over a river with a body of water
617,1259,1259,photo of trees and the fenceline at Mason Hill...,296,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.446961,0.234170,o,"{'scenic': 1, 'recreation': 1, 'wilderness': 0...",scenic recreation park area,A park,a tree in a field next to a fence
618,1296,1296,Hogback Ridge Park,296,,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,0.579372,0.235827,o,"{'scenic': 1, 'recreation': 1, 'wilderness': 0...",scenic recreation park area,Fallen leaves with food,a person holding a camera on a pile of leaves


In [22]:
print(len(df_b))
print(len(df_f))
print(len(df_w))
print(len(df_r))

620
620
620
620


In [23]:
tmp_df = pd.DataFrame()
tmp_df['image'] = df_b['image'].to_list()
# tmp_df['or_caption'] = df_b['caption'].to_list()
tmp_df['bf_caption'] = df_b['ofa_caption'].to_list()
tmp_df['aw_caption'] = df_w['ofa_caption'].to_list()
tmp_df['af_caption'] = df_f['ofa_caption'].to_list()
# tmp_df['hu_caption'] = df_b['h_caption'].to_list()
tmp_df['topics'] = df_b['topics'].to_list()

tmp_df['ar_caption'] = df_f['ofa_caption'].to_list()

In [24]:
tmp_df

Unnamed: 0,image,bf_caption,aw_caption,af_caption,topics,ar_caption
0,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a silhouette of a car parked in a field,A car parked on the side of the road,a silhouette of a car parked in a field,a,a silhouette of a car parked in a field
1,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a street with a bus and a white car on the road,English A pedestrian and cyclists crossing the...,pedestrians and cars on a pedestrian crossing ...,a,pedestrians and cars on a pedestrian crossing ...
2,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person is getting on a bus at a bus stop,English A woman boarding a bus at a bus stop,a woman is getting on a bus at a bus stop,a,a woman is getting on a bus at a bus stop
3,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person is getting on a bus at a bus stop,English A woman boarding a bus at a bus stop,a woman is getting on a bus at a bus stop,a,a woman is getting on a bus at a bus stop
4,/9j/7gAOQWRvYmUAZAAAAAAA/9sAQwAIBgYHBgUIBwcHCQ...,a woman and a child in a wheelchair waiting fo...,English A woman in a wheelchair waiting for a ...,a woman in a wheelchair crossing the street in...,a,a woman in a wheelchair crossing the street in...
...,...,...,...,...,...,...
615,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a gazebo with picnic tables in a park,English A picnic shelter in the park,a pavilion with picnic tables and benches in a...,o,a pavilion with picnic tables and benches in a...
616,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a bridge over a river with a body of water,English A pedestrian bridge over the river in ...,pedestrian bridge over the river,o,pedestrian bridge over the river
617,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a tree in a field next to a fence,English A view from the parking lot of the house,a tree in a field with a fence,o,a tree in a field with a fence
618,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person holding a camera on a pile of leaves,English woman holding a camera on the ground w...,a woman holding a camera in her hand with autu...,o,a woman holding a camera in her hand with autu...


In [25]:
def clip_3(row):
    image_64_decode = base64.b64decode(row['image']) 
    image_result = open('test.jpg', 'wb')
    image_result.write(image_64_decode)
    try:
        image = Image.open('test.jpg')
        image = preprocess(image).unsqueeze(0).to(device)

        with torch.no_grad():
            image_features = model.encode_image(image)

#         text_snippets = [row['bf_caption'], row['aw_caption'], row['af_caption']]
        text_snippets = [row['bf_caption'], row['aw_caption'], row['af_caption'], row['ar_caption']]
        text = clip.tokenize(text_snippets).to(device)

        with torch.no_grad():
            text_features = model.encode_text(text)
        
        with torch.no_grad():
            logits_per_image, logits_per_text = model(image, text)
            probs = list(logits_per_image.softmax(dim=-1).cpu().numpy()[0])
        
        return probs[0], probs[1], probs[2] , probs[3]
    except:
        return None

In [26]:
clip_scores = tmp_df.apply(clip_3, axis=1)

In [27]:
bf_score = []
aw_score = []
af_score = []
ar_score = []
for el in clip_scores:
#     print(el)
    if el:
        bf_score.append(el[0])
        aw_score.append(el[1])
        af_score.append(el[2])
        ar_score.append(el[3])
    else:
        bf_score.append(None)
        aw_score.append(None)
        af_score.append(None)
        ar_score.append(None)

In [29]:
tmp_df['bf_score'] = bf_score
tmp_df['aw_score'] = aw_score
tmp_df['af_score'] = af_score
tmp_df['ar_score'] = ar_score

In [30]:
tmp_df

Unnamed: 0,image,bf_caption,aw_caption,af_caption,topics,ar_caption,bf_score,aw_score,af_score,ar_score
0,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a silhouette of a car parked in a field,A car parked on the side of the road,a silhouette of a car parked in a field,a,a silhouette of a car parked in a field,0.333252,0.000018,0.333252,0.333252
1,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a street with a bus and a white car on the road,English A pedestrian and cyclists crossing the...,pedestrians and cars on a pedestrian crossing ...,a,pedestrians and cars on a pedestrian crossing ...,0.630371,0.109558,0.130127,0.130127
2,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person is getting on a bus at a bus stop,English A woman boarding a bus at a bus stop,a woman is getting on a bus at a bus stop,a,a woman is getting on a bus at a bus stop,0.089478,0.029510,0.440430,0.440430
3,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person is getting on a bus at a bus stop,English A woman boarding a bus at a bus stop,a woman is getting on a bus at a bus stop,a,a woman is getting on a bus at a bus stop,0.089478,0.029510,0.440430,0.440430
4,/9j/7gAOQWRvYmUAZAAAAAAA/9sAQwAIBgYHBgUIBwcHCQ...,a woman and a child in a wheelchair waiting fo...,English A woman in a wheelchair waiting for a ...,a woman in a wheelchair crossing the street in...,a,a woman in a wheelchair crossing the street in...,0.093323,0.002230,0.452148,0.452148
...,...,...,...,...,...,...,...,...,...,...
615,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a gazebo with picnic tables in a park,English A picnic shelter in the park,a pavilion with picnic tables and benches in a...,o,a pavilion with picnic tables and benches in a...,0.795898,0.025589,0.089294,0.089294
616,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a bridge over a river with a body of water,English A pedestrian bridge over the river in ...,pedestrian bridge over the river,o,pedestrian bridge over the river,0.070740,0.211060,0.359131,0.359131
617,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a tree in a field next to a fence,English A view from the parking lot of the house,a tree in a field with a fence,o,a tree in a field with a fence,0.150879,0.410156,0.219482,0.219482
618,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person holding a camera on a pile of leaves,English woman holding a camera on the ground w...,a woman holding a camera in her hand with autu...,o,a woman holding a camera in her hand with autu...,0.018616,0.351318,0.314941,0.314941


In [31]:
tmp_df = tmp_df.dropna()
tmp_df = tmp_df.reset_index(drop=True)
tmp_df

Unnamed: 0,image,bf_caption,aw_caption,af_caption,topics,ar_caption,bf_score,aw_score,af_score,ar_score
0,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a silhouette of a car parked in a field,A car parked on the side of the road,a silhouette of a car parked in a field,a,a silhouette of a car parked in a field,0.333252,0.000018,0.333252,0.333252
1,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a street with a bus and a white car on the road,English A pedestrian and cyclists crossing the...,pedestrians and cars on a pedestrian crossing ...,a,pedestrians and cars on a pedestrian crossing ...,0.630371,0.109558,0.130127,0.130127
2,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person is getting on a bus at a bus stop,English A woman boarding a bus at a bus stop,a woman is getting on a bus at a bus stop,a,a woman is getting on a bus at a bus stop,0.089478,0.029510,0.440430,0.440430
3,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person is getting on a bus at a bus stop,English A woman boarding a bus at a bus stop,a woman is getting on a bus at a bus stop,a,a woman is getting on a bus at a bus stop,0.089478,0.029510,0.440430,0.440430
4,/9j/7gAOQWRvYmUAZAAAAAAA/9sAQwAIBgYHBgUIBwcHCQ...,a woman and a child in a wheelchair waiting fo...,English A woman in a wheelchair waiting for a ...,a woman in a wheelchair crossing the street in...,a,a woman in a wheelchair crossing the street in...,0.093323,0.002230,0.452148,0.452148
...,...,...,...,...,...,...,...,...,...,...
614,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a gazebo with picnic tables in a park,English A picnic shelter in the park,a pavilion with picnic tables and benches in a...,o,a pavilion with picnic tables and benches in a...,0.795898,0.025589,0.089294,0.089294
615,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a bridge over a river with a body of water,English A pedestrian bridge over the river in ...,pedestrian bridge over the river,o,pedestrian bridge over the river,0.070740,0.211060,0.359131,0.359131
616,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a tree in a field next to a fence,English A view from the parking lot of the house,a tree in a field with a fence,o,a tree in a field with a fence,0.150879,0.410156,0.219482,0.219482
617,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBw...,a person holding a camera on a pile of leaves,English woman holding a camera on the ground w...,a woman holding a camera in her hand with autu...,o,a woman holding a camera in her hand with autu...,0.018616,0.351318,0.314941,0.314941


In [32]:
tmp_df.describe().round(2)

Unnamed: 0,bf_score,aw_score,af_score,ar_score
count,619.0,619.0,619.0,619.0
mean,0.32,0.16,0.26,0.26
std,0.31,0.25,0.17,0.17
min,0.0,0.0,0.0,0.0
25%,0.05,0.0,0.1,0.1
50%,0.23,0.04,0.27,0.27
75%,0.52,0.2,0.42,0.42
max,1.0,1.0,0.5,0.5


In [35]:
gk = tmp_df.groupby('topics')
pg_dict = dict()
for el in gk.groups.keys():
    pg_dict[el] = gk.get_group(el)
    pg_dict[el] = pg_dict[el].reset_index(drop=True)
# # before
# for el in pg_dict:
#     print(el)
#     print(f"{pg_dict[el].describe().round(2).at['mean','aw_score']}, {pg_dict[el].describe().round(2).at['mean','af_score']}, {pg_dict[el].describe().round(2).at['mean','bf_score']}")
#     print()

In [36]:
data = []
for el in pg_dict:
    data.append([el, pg_dict[el].describe().round(2).at['mean','aw_score'], pg_dict[el].describe().round(2).at['mean','af_score'], pg_dict[el].describe().round(2).at['mean','bf_score'], pg_dict[el].describe().round(2).at['mean','ar_score']])

In [37]:
df_clip = pd.DataFrame(data, columns = ['Topic', 'App_1', 'App_2', 'Base', 'random'])
df_clip

Unnamed: 0,Topic,App_1,App_2,Base,random
0,a,0.13,0.27,0.34,0.27
1,b,0.12,0.27,0.35,0.27
2,c,0.18,0.27,0.28,0.27
3,d,0.22,0.26,0.26,0.26
4,e,0.09,0.24,0.42,0.24
5,f,0.12,0.33,0.22,0.33
6,g,0.14,0.27,0.32,0.27
7,h,0.18,0.23,0.35,0.23
8,i,0.26,0.2,0.34,0.2
9,j,0.01,0.25,0.49,0.25


In [37]:
df_clip.to_csv(f'ped_clip.csv', index=False)

In [33]:
gk = tmp_df.groupby('topics')
pg_dict = dict()
for el in gk.groups.keys():
    pg_dict[el] = gk.get_group(el)
    pg_dict[el] = pg_dict[el].reset_index(drop=True)
# before
for el in pg_dict:
    print(el)
    display(pg_dict[el].describe().round(2))

a


Unnamed: 0,bf_score,aw_score,af_score
count,51.0,51.0,51.0
mean,0.4,0.16,0.44
std,0.36,0.26,0.34
min,0.0,0.0,0.01
25%,0.06,0.0,0.11
50%,0.32,0.05,0.4
75%,0.81,0.17,0.75
max,0.99,0.98,0.98


b


Unnamed: 0,bf_score,aw_score,af_score
count,103.0,103.0,103.0
mean,0.42,0.14,0.44
std,0.31,0.22,0.33
min,0.0,0.0,0.0
25%,0.16,0.0,0.12
50%,0.37,0.03,0.4
75%,0.65,0.2,0.75
max,1.0,1.0,1.0


c


Unnamed: 0,bf_score,aw_score,af_score
count,57.0,57.0,57.0
mean,0.35,0.22,0.43
std,0.29,0.26,0.31
min,0.0,0.0,0.01
25%,0.08,0.01,0.19
50%,0.25,0.11,0.38
75%,0.62,0.33,0.66
max,0.99,0.97,0.98


d


Unnamed: 0,bf_score,aw_score,af_score
count,61.0,61.0,61.0
mean,0.31,0.25,0.44
std,0.31,0.31,0.35
min,0.0,0.0,0.0
25%,0.03,0.01,0.1
50%,0.22,0.09,0.35
75%,0.55,0.45,0.72
max,1.0,0.98,1.0


e


Unnamed: 0,bf_score,aw_score,af_score
count,46.0,46.0,46.0
mean,0.5,0.1,0.4
std,0.35,0.23,0.36
min,0.0,0.0,0.0
25%,0.18,0.0,0.09
50%,0.46,0.01,0.31
75%,0.85,0.06,0.71
max,1.0,0.92,1.0


f


Unnamed: 0,bf_score,aw_score,af_score
count,16.0,16.0,16.0
mean,0.29,0.15,0.56
std,0.28,0.24,0.34
min,0.01,0.0,0.0
25%,0.05,0.01,0.31
50%,0.27,0.03,0.54
75%,0.45,0.17,0.95
max,0.97,0.73,0.98


g


Unnamed: 0,bf_score,aw_score,af_score
count,25.0,25.0,25.0
mean,0.39,0.16,0.44
std,0.33,0.3,0.35
min,0.01,0.0,0.0
25%,0.08,0.0,0.04
50%,0.28,0.01,0.5
75%,0.58,0.16,0.75
max,0.99,0.98,0.96


h


Unnamed: 0,bf_score,aw_score,af_score
count,28.0,28.0,28.0
mean,0.41,0.21,0.38
std,0.33,0.28,0.33
min,0.0,0.0,0.0
25%,0.14,0.0,0.07
50%,0.3,0.07,0.32
75%,0.66,0.35,0.69
max,1.0,1.0,0.98


i


Unnamed: 0,bf_score,aw_score,af_score
count,70.0,70.0,70.0
mean,0.39,0.29,0.31
std,0.33,0.3,0.32
min,0.0,0.0,0.0
25%,0.07,0.05,0.06
50%,0.32,0.19,0.19
75%,0.71,0.44,0.53
max,1.0,0.95,0.99


j


Unnamed: 0,bf_score,aw_score,af_score
count,7.0,7.0,7.0
mean,0.54,0.01,0.45
std,0.45,0.01,0.45
min,0.02,0.0,0.0
25%,0.09,0.0,0.09
50%,0.81,0.0,0.19
75%,0.89,0.0,0.91
max,1.0,0.04,0.98


k


Unnamed: 0,bf_score,aw_score,af_score
count,41.0,41.0,41.0
mean,0.29,0.25,0.45
std,0.27,0.27,0.33
min,0.0,0.0,0.0
25%,0.04,0.05,0.2
50%,0.28,0.17,0.37
75%,0.48,0.32,0.79
max,1.0,1.0,0.99


l


Unnamed: 0,bf_score,aw_score,af_score
count,11.0,11.0,11.0
mean,0.3,0.18,0.52
std,0.32,0.32,0.4
min,0.0,0.0,0.0
25%,0.05,0.01,0.14
50%,0.17,0.02,0.58
75%,0.51,0.17,0.88
max,0.99,1.0,0.95


m


Unnamed: 0,bf_score,aw_score,af_score
count,25.0,25.0,25.0
mean,0.47,0.06,0.47
std,0.35,0.13,0.32
min,0.0,0.0,0.0
25%,0.16,0.0,0.25
50%,0.37,0.01,0.5
75%,0.75,0.04,0.73
max,1.0,0.55,0.99


n


Unnamed: 0,bf_score,aw_score,af_score
count,29.0,29.0,29.0
mean,0.31,0.22,0.47
std,0.3,0.3,0.35
min,0.0,0.0,0.0
25%,0.04,0.01,0.16
50%,0.14,0.08,0.37
75%,0.53,0.18,0.85
max,0.89,0.95,0.99


o


Unnamed: 0,bf_score,aw_score,af_score
count,49.0,49.0,49.0
mean,0.43,0.13,0.43
std,0.31,0.22,0.29
min,0.0,0.0,0.0
25%,0.12,0.0,0.27
50%,0.42,0.03,0.39
75%,0.63,0.19,0.57
max,1.0,0.98,1.0
