In [1]:
import os
import math
import pandas as pd
import numpy as np
np.seterr(divide='ignore')

data_dir = 'D:\\data\\caltecBirds\\CUB_200_2011'

In [2]:
# Load class and attribute data from the supplied text files

train_test_split = pd.read_csv(os.path.join(data_dir, 'train_test_split.txt'),   sep=" ", index_col=[0], names=['image_id', 'trainset'])
image_file_names = pd.read_csv(os.path.join(data_dir, 'images.txt'),             sep=" ", index_col=[0], names=['image_id', 'file_name'])
class_labels     = pd.read_csv(os.path.join(data_dir, 'image_class_labels.txt'), sep=" ", index_col=[0], names=['image_id', 'class_id'])
image_attribute_labels = pd.read_csv(os.path.join(data_dir, 'attributes', 'image_attribute_labels.txt'), sep=" ", 
                                     names = ['image_id', 'attribute_id', 'present', 'certainty_id', 'time', 'd1', 'd2'])
image_attribute_labels = image_attribute_labels.merge(train_test_split, on='image_id', how='left' )
image_attribute_labels = image_attribute_labels.merge(class_labels, on='image_id', how='left' )

expert_probabilities = pd.read_csv(os.path.join(data_dir, 'attributes', 'class_attribute_labels_continuous.txt'), sep=" ",  names=[i for i in range(1, 313)]).transpose() / 100
expert_probabilities.rename(columns=lambda x:x+1, inplace=True)

test_attribute_labels  = image_attribute_labels.loc[image_attribute_labels['trainset']==1].copy()
test_attribute_probabilities = test_attribute_labels.pivot_table(index='attribute_id', columns='class_id')['present']
test_attribute_probabilities = np.log(test_attribute_probabilities)
#test_attribute_probabilities.replace(to_replace=-np.inf, value=0, inplace=True)



In [3]:
# Class attribute probabilities based on human knowledge

expert_probabilities = np.log(expert_probabilities)
expert_probabilities


Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,191,192,193,194,195,196,197,198,199,200
1,-inf,-3.120895,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,...,-inf,-inf,-1.533930,-1.195239,-1.152680,-3.610918,-1.698233,-2.229382,-inf,-inf
2,-3.533687,-3.120895,-3.225520,-4.226834,-inf,-inf,-inf,-3.526361,-2.810908,-1.778336,...,-0.431782,-0.767255,-2.322388,-1.132060,-3.077970,-2.052773,-2.140066,-1.002152,-3.878121,-2.491296
3,-4.226834,-3.526361,-3.407842,-2.280924,-3.433987,-inf,-4.127134,-2.227078,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf
4,-inf,-4.219508,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,...,-inf,-inf,-5.030438,-4.330733,-inf,-inf,-3.421000,-2.577688,-3.878121,-inf
5,-0.513262,-0.555946,-0.344451,-inf,-4.532599,-inf,-4.127134,-2.545531,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-5.062595,-inf,-inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,-1.142097,-inf,-1.821612,-3.843030,-3.091042,-3.015535,-inf,-inf,-3.339322,-inf,...,-5.123964,-inf,-1.803188,-3.084528,-2.227622,-1.217218,-1.479076,-0.870481,-1.649984,-0.966616
309,-0.624154,-0.259511,-0.342095,-0.172843,-0.122602,-1.819158,-0.449268,-0.443205,-0.282567,-1.881786,...,-2.664700,-inf,-0.622831,-3.944813,-1.173733,-1.939395,-1.784791,-1.671742,-2.086362,-0.571132
310,-2.926739,-3.267666,-4.672829,-3.737670,-inf,-2.106841,-3.555348,-inf,-3.768922,-4.324133,...,-3.453157,-0.261215,-3.071370,-0.418452,-2.625985,-1.475090,-1.379326,-0.814292,-1.064711,-3.488903
311,-1.540445,-2.256065,-2.593387,-3.449988,-3.258097,-0.720546,-1.881372,-1.621860,-2.382628,-2.619385,...,-3.270836,-1.826851,-1.188638,-1.577689,-0.733421,-0.576091,-1.134203,-1.671742,-1.105533,-2.236140


In [4]:
# Train a Naive Bayes classifier based on the attributes contained in the training dataset

train_attribute_labels = image_attribute_labels.loc[image_attribute_labels['trainset']==0].copy()
class_attribute_probabilities = train_attribute_labels.pivot_table(index='attribute_id', columns='class_id')['present']
class_attribute_probabilities = np.log(class_attribute_probabilities)
#class_attribute_probabilities.replace(to_replace=-np.inf, value=0, inplace=True)
class_attribute_probabilities

class_id,1,2,3,4,5,6,7,8,9,10,...,191,192,193,194,195,196,197,198,199,200
attribute_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-inf,-2.708050,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,...,-inf,-inf,-1.791759,-1.455287,-1.455287,-3.401197,-1.321756,-2.302585,-inf,-inf
2,-3.401197,-2.708050,-3.332205,-inf,-inf,-inf,-inf,-inf,-2.674149,-2.302585,...,-0.510826,-0.693147,-2.708050,-1.203973,-2.708050,-2.014903,-1.609438,-1.455287,-3.401197,-2.014903
3,-inf,-3.401197,-3.332205,-2.014903,-2.639057,-inf,-inf,-2.890372,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf
4,-inf,-3.401197,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-3.401197,-2.014903,-inf,-inf
5,-0.628609,-0.693147,-0.387766,-inf,-inf,-inf,-inf,-1.791759,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-3.401197,-inf,-inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,-1.203973,-inf,-1.722767,-3.401197,-inf,-inf,-inf,-inf,-2.674149,-inf,...,-3.401197,-inf,-1.791759,-2.708050,-2.014903,-1.203973,-1.203973,-0.693147,-1.791759,-0.762140
309,-1.003302,-0.510826,-0.693147,-0.356675,-0.693147,-2.397895,-0.737599,-0.587787,-0.422857,-1.791759,...,-2.302585,-inf,-1.098612,-2.708050,-1.455287,-1.791759,-1.321756,-1.321756,-2.014903,-0.916291
310,-2.708050,-3.401197,-3.332205,-inf,-inf,-1.704748,-3.135494,-inf,-inf,-3.401197,...,-2.708050,-0.223144,-2.708050,-0.693147,-3.401197,-1.455287,-1.609438,-1.203973,-0.916291,-2.708050
311,-2.014903,-2.708050,-1.945910,-3.401197,-2.639057,-1.704748,-2.036882,-1.791759,-2.268684,-2.302585,...,-3.401197,-2.014903,-0.916291,-1.098612,-0.762140,-0.628609,-1.321756,-2.014903,-1.321756,-2.014903


In [5]:
test_attribute_probabilities

class_id,1,2,3,4,5,6,7,8,9,10,...,191,192,193,194,195,196,197,198,199,200
attribute_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,-inf,-3.401197,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,...,-inf,-inf,-1.321756,-1.003302,-1.003302,-3.367296,-2.014903,-2.302585,-inf,-inf
2,-3.401197,-3.401197,-2.708050,-3.401197,-inf,-inf,-inf,-2.708050,-3.401197,-1.321756,...,-0.405465,-0.693147,-2.014903,-1.098612,-3.401197,-1.981001,-2.708050,-0.693147,-inf,-2.708050
3,-3.401197,-2.302585,-3.401197,-2.708050,-inf,-inf,-3.401197,-2.302585,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf
4,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,...,-inf,-inf,-3.401197,-3.401197,-inf,-inf,-3.401197,-inf,-3.401197,-inf
5,-0.567984,-0.567984,-0.356675,-inf,-3.401197,-inf,-3.401197,-3.401197,-inf,-inf,...,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308,-1.203973,-inf,-2.302585,-inf,-2.708050,-2.708050,-inf,-inf,-inf,-inf,...,-inf,-inf,-1.791759,-3.401197,-2.302585,-1.170071,-1.791759,-1.098612,-1.321756,-1.203973
309,-1.003302,-0.628609,-0.510826,-0.356675,-0.356675,-2.302585,-0.762140,-0.916291,-0.510826,-2.014903,...,-2.708050,-inf,-0.405465,-inf,-1.098612,-2.268684,-2.708050,-2.302585,-2.014903,-0.510826
310,-inf,-3.401197,-inf,-3.401197,-inf,-2.708050,-inf,-inf,-3.401197,-inf,...,-inf,-0.310155,-3.401197,-0.310155,-2.014903,-1.421386,-1.455287,-0.628609,-1.455287,-inf
311,-1.455287,-2.302585,-inf,-3.401197,-inf,-0.836248,-1.791759,-2.014903,-2.708050,-3.401197,...,-3.401197,-1.609438,-1.321756,-2.014903,-0.762140,-0.594707,-1.003302,-1.321756,-0.836248,-2.302585


In [6]:
classifiers = {'Expert': expert_probabilities, 'Trained': class_attribute_probabilities, 'TrainedOnTest': test_attribute_probabilities}

for name, classifier in classifiers.items():
    count, correct = 0, 0
    for image_id, _ in train_test_split.loc[train_test_split['trainset'] == 1].iterrows():
        present_attribute_ids = image_attribute_labels.loc[((image_attribute_labels['image_id']==image_id) & (image_attribute_labels['present']==1))]['attribute_id']
        class_probability_factors = classifier.loc[classifier.index.isin([attribute_id for attribute_id in present_attribute_ids]) ]
        class_probabilities = class_probability_factors.sum()
        
        class_probabilities = np.exp(class_probabilities)
        total_prob          = sum(class_probabilities)
        if total_prob > 0:
            norm_probabilities  = class_probabilities / total_prob
            predicted_class = norm_probabilities.idxmax(skipna=True)
            labelled_class  = class_labels.loc[class_labels.index == image_id]['class_id'].tolist()[0]
            if predicted_class == labelled_class: correct +=1
        count+=1
      
      

    print(name, correct / count)
    
        
    

Expert 0.6339673006339673
Trained 0.29796463129796463
TrainedOnTest 0.7057057057057057
