In [1]:
import os
import torch
import pandas as pd
from skimage import io, transform
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils, models
import PIL.Image as Image
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm
import sys

sys.path.insert(0, '../src')
from bird_dataset import *
from XAI_birds_dataloader import *


In [2]:
bd = BirdDataset()

In [3]:
attr_list = [bd.images[i]['attributes'] for i in bd.images]
attr_filt_list = [[attr for attr in attrs if 'wing_color' in attr] for attrs in attr_list]
filt_df = pd.DataFrame(attr_filt_list).apply(lambda x: np.array(sorted([i for i in x if i is not None])), axis=1)

In [139]:
filt_df[filt_df.apply(len)>1].apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x])))

2         black_brown_buff
4              black_brown
5         brown_buff_white
7              black_brown
12               blue_grey
               ...        
11782    brown_buff_yellow
11784         brown_yellow
11785         olive_yellow
11786           brown_buff
11787      brown_buff_grey
Length: 6667, dtype: object

In [17]:
# try and see if certain colors are 'primary' versus 'secondary' wing colors based on their certainty scores

In [4]:
# get attribute labels, with the following format: <image_id> <attribute_id> <is_present> <certainty_id> <time>
with open(bd.data_dir+'attributes/image_attribute_labels.txt') as f:
    # for now, i'm not considering certainty values when inserting attributes into the dictionary
    for line in f.readlines():
        line_lst = line.split()
        print(line_lst)
        break
        img_id, attr_id, present = int(line_lst[0]), int(line_lst[1]), int(line_lst[2])
#         bd.images[img_id]['attributes'] = bd.images[img_id].get('attributes', [])
#         if present == 1:
#             bd.images[img_id]['attributes'].append(bd.attributes[attr_id])


['1', '1', '0', '3', '27.7080']


In [5]:
# get attribute labels, with the following format: <image_id> <attribute_id> <is_present> <certainty_id> <time>
class_attr_lst = []
with open(bd.data_dir+'attributes/class_attribute_labels_continuous.txt') as f:
    # for now, i'm not considering certainty values when inserting attributes into the dictionary
    for line in f.readlines():
        line_lst = line.split()
#         print(len(line_lst))
        class_attr_lst.append(line_lst)
#         print(line_lst)
#         break
#         img_id, attr_id, present = int(line_lst[0]), int(line_lst[1]), int(line_lst[2])
#         bd.images[img_id]['attributes'] = bd.images[img_id].get('attributes', [])
#         if present == 1:
#             bd.images[img_id]['attributes'].append(bd.attributes[attr_id])


In [6]:
# get attribute labels, with the following format: <image_id> <attribute_id> <is_present> <certainty_id> <time>
classes = {}
with open(bd.data_dir+'classes-subset.txt') as f:
    # for now, i'm not considering certainty values when inserting attributes into the dictionary
    for line in f.readlines():
        line_lst = line.split()
#         print(len(line_lst))
        classes[int(line_lst[0])-1]= line_lst[1]

In [10]:
# get attribute labels, with the following format: <image_id> <attribute_id> <is_present> <certainty_id> <time>
class_attr_lst = []
with open(bd.data_dir+'attributes/class_attribute_labels_continuous.txt') as f:
    # for now, i'm not considering certainty values when inserting attributes into the dictionary
    for line in f.readlines():
        line_lst = line.split()
#         print(len(line_lst))
        class_attr_lst.append(line_lst)

class_attr_df = pd.DataFrame(class_attr_lst, columns=bd.attributes.values())

class_attr_df_filt = class_attr_df.loc[classes.keys()]
class_attr_df_filt.index = classes.values()
class_attr_df_filt = class_attr_df_filt.astype(float)

In [138]:
# len(class_attr_df)

In [11]:
class_attr_df.loc[:,class_attr_df.columns.str.contains('has_wing_color')].head(11)

Unnamed: 0,has_wing_color::blue,has_wing_color::brown,has_wing_color::iridescent,has_wing_color::purple,has_wing_color::rufous,has_wing_color::grey,has_wing_color::yellow,has_wing_color::olive,has_wing_color::green,has_wing_color::pink,has_wing_color::orange,has_wing_color::black,has_wing_color::white,has_wing_color::red,has_wing_color::buff
0,1.6393442623,45.9016393443,3.27868852459,0.0,2.45901639344,24.5901639344,0.0,0.0,0.0,0.0,0.0,31.9672131148,3.27868852459,2.45901639344,28.6885245902
1,0.0,37.2727272727,0.0,0.0,0.0,20.9090909091,0.0,0.0,0.0,0.0,0.0,52.7272727273,9.09090909091,0.0,7.27272727273
2,0.0,26.6666666667,0.0,0.0,0.0,47.5,0.0,0.0,1.66666666667,2.5,0.0,26.6666666667,11.6666666667,2.5,11.6666666667
3,5.83941605839,8.02919708029,4.3795620438,0.0,0.0,4.3795620438,0.0,0.0,0.0,0.0,0.0,96.3503649635,1.4598540146,0.0,2.9197080292
4,0.0,15.0,7.5,2.5,0.0,28.75,0.0,0.0,0.0,0.0,2.5,87.5,0.0,0.0,3.75
5,0.0,11.6883116883,0.0,0.0,0.0,42.8571428571,0.0,2.5974025974,0.0,0.0,0.0,64.9350649351,63.6363636364,0.0,0.0
6,1.72413793103,4.31034482759,0.0,1.72413793103,0.0,37.0689655172,0.0,0.0,0.0,0.0,0.0,81.0344827586,15.5172413793,0.0,0.0
7,0.0,41.1111111111,0.0,2.22222222222,0.0,18.8888888889,2.22222222222,0.0,0.0,0.0,0.0,70.0,0.0,0.0,4.44444444444
8,9.02777777778,4.16666666667,12.5,0.0,0.0,6.25,2.08333333333,2.08333333333,0.0,0.0,0.0,76.3888888889,0.0,0.0,4.86111111111
9,0.0,1.91082802548,0.0,0.0,1.91082802548,0.0,27.3885350318,0.0,0.0,0.0,39.4904458599,91.0828025478,21.6560509554,49.0445859873,3.82165605096


In [12]:
wing_color_certainties = class_attr_df_filt.loc[:,class_attr_df_filt.columns.str.contains('has_wing_color')]
filt_wing_colors = wing_color_certainties.apply(lambda x: x.nlargest(2), axis=1)

In [173]:
# wing_color_certainties

In [140]:
# filt_wing_colors = wing_color_certainties.apply(lambda x: x[x>30], axis=1)

In [299]:
attr_list = [bd.images[i]['attributes'] for i in bd.images]
attr_filt_list = [[attr for attr in attrs if 'wing_color' in attr] for attrs in attr_list]
filt_df = pd.DataFrame(attr_filt_list).apply(lambda x: np.array(sorted([i for i in x if i is not None])), axis=1)
filt_df = filt_df.to_frame()
filt_df.index += 1

In [300]:
# filt_df[filt_df.apply(len)>1].apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x]))).value_counts().head(50)

In [301]:
### IMPORTANT LINE: # filt_df[filt_df.apply(len)>1].apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x])))

In [302]:
filt_wing_color_dict = filt_wing_colors.T.apply(lambda x: list(x.loc[x.isnull()==False].index))

In [303]:
tmp_df = filt_wing_colors.T.apply(lambda x: list(x.loc[x.isnull()==False].index))
filt_wing_color_dict = dict(zip(tmp_df.columns, [tmp_df[col].values for col in tmp_df.columns]))

In [304]:
# filt_wing_color_dict

In [305]:
filt_df_color = filt_df.loc[filt_df[0].apply(lambda x: len(x)>0)].reset_index().rename(columns={'index':'img_id'})
filt_df_color['species'] = filt_df_color['img_id'].apply(lambda x: bd.images[x]['filepath'].split('/')[0])
filt_df_color = filt_df_color[filt_df_color['species'].isin(filt_wing_color_dict.keys())]

In [306]:
filt_colors = filt_df_color.apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x[0] if i in filt_wing_color_dict[x['species']]])), axis=1)

In [307]:
filt_df_color.head()

Unnamed: 0,img_id,0,species
403,484,"[has_wing_color::black, has_wing_color::red, h...",010.Red_winged_Blackbird
404,485,"[has_wing_color::black, has_wing_color::orange...",010.Red_winged_Blackbird
405,486,"[has_wing_color::orange, has_wing_color::red, ...",010.Red_winged_Blackbird
406,487,"[has_wing_color::black, has_wing_color::orange...",010.Red_winged_Blackbird
407,489,[has_wing_color::black],010.Red_winged_Blackbird


In [308]:
# wing_color_certainties.T.to_dict()

In [309]:
# alt_filt_colors = filt_df_color.apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x[0] if i not in filt_wing_color_dict[x['species']]])), axis=1)

In [310]:
alt_filt_colors

403             white_yellow
404             orange_white
405      orange_white_yellow
406            orange_yellow
407                         
                ...         
10394                       
10395                       
10396                       
10397                       
10398                       
Length: 1703, dtype: object

In [312]:
# filt_colors_tot.loc[filt_colors==''] = filt_colors.loc[filt_colors==''].replace('',np.nan).fillna(alt_filt_colors.apply(lambda x: '_'.join(x.split('_')[:1])))

In [313]:
# sorted(wing_color_certainties.to_dict()['has_wing_color::blue'])

In [314]:
# filt_df_color

In [352]:
del filt_colors

In [387]:
# get attribute labels, with the following format: <image_id> <attribute_id> <is_present> <certainty_id> <time>
classes = {}
with open(bd.data_dir+'classes-subset.txt') as f:
    # for now, i'm not considering certainty values when inserting attributes into the dictionary
    for line in f.readlines():
        line_lst = line.split()
#         print(len(line_lst))
        classes[int(line_lst[0])-1]= line_lst[1]

# get attribute labels, with the following format: <image_id> <attribute_id> <is_present> <certainty_id> <time>
class_attr_lst = []
with open(bd.data_dir+'attributes/class_attribute_labels_continuous.txt') as f:
    # for now, i'm not considering certainty values when inserting attributes into the dictionary
    for line in f.readlines():
        line_lst = line.split()
#         print(len(line_lst))
        class_attr_lst.append(line_lst)

class_attr_df = pd.DataFrame(class_attr_lst, columns=bd.attributes.values())

class_attr_df_filt = class_attr_df.loc[classes.keys()]
class_attr_df_filt.index = classes.values()
class_attr_df_filt = class_attr_df_filt.astype(float)


attr_list = [bd.images[i]['attributes'] for i in bd.images]
attr_filt_list = [[attr for attr in attrs if 'wing_color' in attr] for attrs in attr_list]
filt_df = pd.DataFrame(attr_filt_list).apply(lambda x: np.array(sorted([i for i in x if i is not None])), axis=1)
filt_df = filt_df.to_frame()
filt_df.index += 1


wing_color_certainties = class_attr_df_filt.loc[:,class_attr_df_filt.columns.str.contains('has_wing_color')]
filt_wing_colors = wing_color_certainties.apply(lambda x: x.nlargest(3), axis=1)

filt_wing_color_dict = filt_wing_colors.T.apply(lambda x: list(x.loc[x.isnull()==False].index))
tmp_df = filt_wing_colors.T.apply(lambda x: list(x.loc[x.isnull()==False].index))
filt_wing_color_dict = dict(zip(tmp_df.columns, [tmp_df[col].values for col in tmp_df.columns]))

filt_df_color = filt_df.loc[filt_df[0].apply(lambda x: len(x)>0)].reset_index().rename(columns={'index':'img_id'})
filt_df_color['species'] = filt_df_color['img_id'].apply(lambda x: bd.images[x]['filepath'].split('/')[0])
filt_df_color = filt_df_color[filt_df_color['species'].isin(filt_wing_color_dict.keys())]


filt_colors = filt_df_color.apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x[0] if i in filt_wing_color_dict[x['species']]])), axis=1)


wing_dict_full = wing_color_certainties.apply(lambda x: dict(zip(list(x.sort_values(ascending=False).index), range(len(list(x.sort_values(ascending=False).index))))), axis=1).to_dict()
inv_wing_dict = {i : {v: k for k, v in wing_dict_full[i].items()} for i in wing_dict_full}
alt_color_df = filt_df_color.loc[filt_colors.loc[filt_colors==''].index].copy()

filt_colors.index = filt_df_color['img_id'].values

alt_color_df['color_idx'] = alt_color_df.apply(lambda x: min([wing_dict_full[x['species']][i] for i in x[0]]), axis=1)
alt_color_df['wing_color'] = alt_color_df.apply(lambda x: inv_wing_dict[x['species']][x['color_idx']].split('::')[1], axis=1)

filt_colors_tot = filt_colors.copy()
filt_colors_tot.loc[filt_colors==''] = alt_color_df['wing_color']
filt_colors_tot=filt_colors_tot.str.replace('red_rufous', 'rufous')

In [342]:
# class_attr_df_filt

In [388]:
filt_images = {}
for i in list(filt_colors_tot.index):
    filt_images[i] = bd.images[i]
    filt_images[i]['wing_color'] = filt_colors_tot[i]

In [389]:
filt_images

{484: {'filepath': '010.Red_winged_Blackbird/Red_Winged_Blackbird_0091_4096.jpg',
  'class_label': 10,
  'bounding_box': [187.0, 129.0, 131.0, 226.0],
  'parts': {'back': [256.0, 197.0],
   'beak': [203.0, 137.0],
   'breast': [213.0, 196.0],
   'crown': [230.0, 142.0],
   'forehead': [223.0, 137.0],
   'left eye': [218.0, 142.0],
   'left leg': [247.0, 287.0],
   'left wing': [231.0, 215.0],
   'nape': [244.0, 172.0],
   'right leg': [239.0, 268.0],
   'tail': [296.0, 319.0],
   'throat': [214.0, 154.0]},
  'attributes': ['has_bill_shape::dagger',
   'has_wing_color::yellow',
   'has_wing_color::black',
   'has_wing_color::white',
   'has_wing_color::red',
   'has_upperparts_color::black',
   'has_underparts_color::black',
   'has_breast_pattern::solid',
   'has_back_color::black',
   'has_tail_shape::fan-shaped_tail',
   'has_upper_tail_color::black',
   'has_head_pattern::plain',
   'has_breast_color::black',
   'has_throat_color::black',
   'has_eye_color::black',
   'has_bill_leng

In [377]:
filt_df_color[]

Unnamed: 0,img_id,0,species
403,484,"[has_wing_color::black, has_wing_color::red, h...",010.Red_winged_Blackbird
404,485,"[has_wing_color::black, has_wing_color::orange...",010.Red_winged_Blackbird
405,486,"[has_wing_color::orange, has_wing_color::red, ...",010.Red_winged_Blackbird
406,487,"[has_wing_color::black, has_wing_color::orange...",010.Red_winged_Blackbird
407,489,[has_wing_color::black],010.Red_winged_Blackbird
...,...,...,...
10394,11305,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker
10395,11306,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker
10396,11307,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker
10397,11308,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker


In [375]:
filt_images

{403: {'filepath': '008.Rhinoceros_Auklet/Rhinoceros_Auklet_0011_797530.jpg',
  'class_label': 8,
  'bounding_box': [8.0, 18.0, 289.0, 260.0],
  'parts': {'beak': [50.0, 57.0],
   'crown': [247.0, 122.0],
   'forehead': [158.0, 78.0],
   'left eye': [191.0, 124.0],
   'nape': [273.0, 222.0],
   'throat': [131.0, 188.0]},
  'attributes': ['has_bill_shape::dagger',
   'has_wing_color::grey',
   'has_wing_color::black',
   'has_upperparts_color::grey',
   'has_upperparts_color::black',
   'has_underparts_color::grey',
   'has_underparts_color::black',
   'has_breast_pattern::solid',
   'has_back_color::grey',
   'has_back_color::black',
   'has_tail_shape::notched_tail',
   'has_upper_tail_color::grey',
   'has_upper_tail_color::black',
   'has_head_pattern::crested',
   'has_breast_color::grey',
   'has_breast_color::black',
   'has_throat_color::black',
   'has_eye_color::orange',
   'has_eye_color::buff',
   'has_bill_length::about_the_same_as_head',
   'has_forehead_color::black',
   

In [340]:
filt_df_color

Unnamed: 0,img_id,0,species
403,484,"[has_wing_color::black, has_wing_color::red, h...",010.Red_winged_Blackbird
404,485,"[has_wing_color::black, has_wing_color::orange...",010.Red_winged_Blackbird
405,486,"[has_wing_color::orange, has_wing_color::red, ...",010.Red_winged_Blackbird
406,487,"[has_wing_color::black, has_wing_color::orange...",010.Red_winged_Blackbird
407,489,[has_wing_color::black],010.Red_winged_Blackbird
...,...,...,...
10394,11305,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker
10395,11306,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker
10396,11307,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker
10397,11308,"[has_wing_color::black, has_wing_color::white]",192.Downy_Woodpecker


In [338]:
bd.images[]

{1: {'filepath': '001.Black_footed_Albatross/Black_Footed_Albatross_0046_18.jpg',
  'class_label': 1,
  'bounding_box': [60.0, 27.0, 325.0, 304.0],
  'parts': {'beak': [312.0, 182.0],
   'crown': [186.0, 45.0],
   'forehead': [247.0, 79.0],
   'nape': [100.0, 221.0],
   'right eye': [183.0, 101.0],
   'throat': [215.0, 194.0]},
  'attributes': ['has_bill_shape::hooked_seabird',
   'has_head_pattern::masked',
   'has_throat_color::buff',
   'has_eye_color::brown',
   'has_bill_length::longer_than_head',
   'has_forehead_color::white',
   'has_nape_color::buff',
   'has_size::large_(16_-_32_in)',
   'has_shape::long-legged-like',
   'has_primary_color::buff',
   'has_bill_color::buff',
   'has_crown_color::buff']},
 2: {'filepath': '001.Black_footed_Albatross/Black_Footed_Albatross_0009_34.jpg',
  'class_label': 1,
  'bounding_box': [139.0, 30.0, 153.0, 264.0],
  'parts': {'back': [228.0, 138.0],
   'beak': [282.0, 154.0],
   'breast': [248.0, 158.0],
   'crown': [266.0, 141.0],
   'fore

In [324]:
filt_colors.value_counts()

black_white     444
black           433
                227
brown            78
red              66
white            64
blue             61
black_blue       59
grey             58
black_red        34
grey_white       23
black_brown      22
buff             20
green            17
black_buff       17
rufous           14
grey_red         12
black_grey       12
yellow           10
brown_buff       10
brown_grey        9
green_yellow      8
red_rufous        5
dtype: int64

In [317]:
alt_color_df

Unnamed: 0,img_id,0,species,color_idx,wing_color
457,542,[has_wing_color::orange],010.Red_winged_Blackbird,2,orange
458,543,"[has_wing_color::brown, has_wing_color::buff, ...",010.Red_winged_Blackbird,2,orange
461,546,[has_wing_color::red],011.Rusty_Blackbird,6,red
466,551,[has_wing_color::grey],011.Rusty_Blackbird,2,grey
481,566,[has_wing_color::buff],011.Rusty_Blackbird,4,buff
...,...,...,...,...,...
10119,11025,[has_wing_color::brown],188.Pileated_Woodpecker,3,brown
10162,11068,[has_wing_color::grey],188.Pileated_Woodpecker,2,grey
10163,11069,[has_wing_color::grey],188.Pileated_Woodpecker,2,grey
10237,11144,"[has_wing_color::brown, has_wing_color::buff]",190.Red_cockaded_Woodpecker,2,brown


In [325]:
# filt_df_color['color_idx'] = 

In [326]:
# filt_df_color

In [53]:
# filt_df[filt_df.apply(len)>1].apply(lambda x: [i for i in x if i in ])

In [41]:
bd.attributes

{1: 'has_bill_shape::curved_(up_or_down)',
 2: 'has_bill_shape::dagger',
 3: 'has_bill_shape::hooked',
 4: 'has_bill_shape::needle',
 5: 'has_bill_shape::hooked_seabird',
 6: 'has_bill_shape::spatulate',
 7: 'has_bill_shape::all-purpose',
 8: 'has_bill_shape::cone',
 9: 'has_bill_shape::specialized',
 10: 'has_wing_color::blue',
 11: 'has_wing_color::brown',
 12: 'has_wing_color::iridescent',
 13: 'has_wing_color::purple',
 14: 'has_wing_color::rufous',
 15: 'has_wing_color::grey',
 16: 'has_wing_color::yellow',
 17: 'has_wing_color::olive',
 18: 'has_wing_color::green',
 19: 'has_wing_color::pink',
 20: 'has_wing_color::orange',
 21: 'has_wing_color::black',
 22: 'has_wing_color::white',
 23: 'has_wing_color::red',
 24: 'has_wing_color::buff',
 25: 'has_upperparts_color::blue',
 26: 'has_upperparts_color::brown',
 27: 'has_upperparts_color::iridescent',
 28: 'has_upperparts_color::purple',
 29: 'has_upperparts_color::rufous',
 30: 'has_upperparts_color::grey',
 31: 'has_upperparts_col

In [23]:
bd.images[1]['attributes']

['has_bill_shape::hooked_seabird',
 'has_head_pattern::masked',
 'has_throat_color::buff',
 'has_eye_color::brown',
 'has_bill_length::longer_than_head',
 'has_forehead_color::white',
 'has_nape_color::buff',
 'has_size::large_(16_-_32_in)',
 'has_shape::long-legged-like',
 'has_primary_color::buff',
 'has_bill_color::buff',
 'has_crown_color::buff']

In [20]:
filt_df[filt_df.apply(len)>1].apply(lambda x: '_'.join(sorted([i.split('::')[1] for i in x]))).value_counts()

black_white               795
brown_buff                529
black_grey                409
grey_white                377
black_grey_white          355
                         ... 
black_olive_orange          1
orange_pink_red_rufous      1
black_brown_buff_olive      1
black_blue_grey_purple      1
grey_pink                   1
Length: 355, dtype: int64

In [153]:
filt_df

0                                                       []
1                                   [has_wing_color::grey]
2        [has_wing_color::black, has_wing_color::brown,...
3                                  [has_wing_color::brown]
4           [has_wing_color::black, has_wing_color::brown]
                               ...                        
11783                               [has_wing_color::grey]
11784      [has_wing_color::brown, has_wing_color::yellow]
11785      [has_wing_color::olive, has_wing_color::yellow]
11786        [has_wing_color::brown, has_wing_color::buff]
11787    [has_wing_color::brown, has_wing_color::buff, ...
Length: 11788, dtype: object

In [152]:
filt_df.apply(lambda x: [i in ])

0                                                       []
1                                   [has_wing_color::grey]
2        [has_wing_color::black, has_wing_color::brown,...
3                                  [has_wing_color::brown]
4           [has_wing_color::black, has_wing_color::brown]
                               ...                        
11783                               [has_wing_color::grey]
11784      [has_wing_color::brown, has_wing_color::yellow]
11785      [has_wing_color::olive, has_wing_color::yellow]
11786        [has_wing_color::brown, has_wing_color::buff]
11787    [has_wing_color::brown, has_wing_color::buff, ...
Length: 11788, dtype: object

In [67]:
class_attr_df_filt.loc[:,class_attr_df_filt.columns.str.contains('has_wing_color')].apply(max, axis=1)

010.Red_winged_Blackbird               70.886076
011.Rusty_Blackbird                    96.875000
012.Yellow_headed_Blackbird            89.208633
014.Indigo_Bunting                     69.503546
015.Lazuli_Bunting                     46.268657
016.Painted_Bunting                    73.188406
017.Cardinal                           61.363636
036.Northern_Flicker                   47.967480
037.Acadian_Flycatcher                 64.615385
038.Great_Crested_Flycatcher           49.565217
039.Least_Flycatcher                   50.442478
040.Olive_sided_Flycatcher             59.183673
041.Scissor_tailed_Flycatcher          51.655629
042.Vermilion_Flycatcher               42.857143
043.Yellow_bellied_Flycatcher          83.544304
054.Blue_Grosbeak                      83.916084
055.Evening_Grosbeak                   53.571429
056.Pine_Grosbeak                      94.160584
057.Rose_breasted_Grosbeak             78.620690
095.Baltimore_Oriole                   90.604027
096.Hooded_Oriole   

**Certainty Values**

In [95]:
certainties = []
img_label_lst = []
with open(bd.data_dir+'attributes/image_attribute_labels.txt') as f:
    for line in f.readlines():
        line_lst = line.split()
        img_label_lst.append(line_lst)
#         img_id, attr_id, present, certainty_id = line_lst[0], line_lst[1], line_lst[2], line_lst[3]
        
#         certainties.append(int(certainty_id))
#         if present==1:
#             print((img_id, attr_id, present))

0          27.708
1          27.708
2          27.708
3          27.708
4          27.708
            ...  
3677851     4.989
3677852     8.309
3677853     8.309
3677854     8.309
3677855     8.309
Name: 5, Length: 3677856, dtype: float64

In [111]:
img_label_df = pd.DataFrame(img_label_lst, columns=['image_id', 'attribute_id', 'is_present', 'certainty_id', 'time2', 'time'])
img_label_df.iloc[:, :-2] = img_label_df.iloc[:, :-2].astype(int)
img_label_df['time']= img_label_df['time'].fillna(img_label_df['time2'])
img_label_df = img_label_df.drop('time2', axis=1)
img_label_df['time'] = img_label_df['time'].astype(float)

In [117]:
label_pres_df = img_label_df[img_label_df['is_present']==1]

In [127]:
len(certain_labels)/len(label_pres_df)

0.9264082447776157

In [125]:
certain_labels = label_pres_df.loc[label_pres_df['certainty_id']>=3]

In [93]:
pd.Series(certainties).value_counts(normalize=True)

4    0.552978
3    0.277650
1    0.107374
2    0.061998
dtype: float64

[3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 3,
 3,
