In [2]:
import os
import pandas as pd
from bs4 import BeautifulSoup
import voc_utils
from more_itertools import unique_everseen

In [3]:
root_dir = '/home/shang/data/VOC2012/VOCdevkit/VOC2012/'
img_dir = os.path.join(root_dir, 'JPEGImages')
ann_dir = os.path.join(root_dir, 'Annotations')
set_dir = os.path.join(root_dir, 'ImageSets', 'Main')

In [4]:
# list image sets
all_files = !ls {set_dir}
image_sets = sorted(list(set([filename.replace('.txt', '').strip().split('_')[0] for filename in all_files])))
print image_sets, len(image_sets)

['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'test', 'train', 'trainval', 'tvmonitor', 'val'] 23


In [5]:
# category name is from above, dataset is either "train" or
# "val" or "train_val"
def imgs_from_category(cat_name, dataset):
    filename = os.path.join(set_dir, cat_name + "_" + dataset + ".txt")
    df = pd.read_csv(
        filename,
        delim_whitespace=True,
        header=None,
        names=['filename', 'true'])
    return df

def imgs_from_category_as_list(cat_name, dataset):
    df = imgs_from_category(cat_name, dataset)
    df = df[df['true'] == 1]
    return df['filename'].values

def annotation_file_from_img(img_name):
    return os.path.join(ann_dir, img_name) + '.xml'

In [6]:
# annotation operations
def load_annotation(img_filename):
    xml = ""
    with open(annotation_file_from_img(img_filename)) as f:
        xml = f.readlines()
    xml = ''.join([line.strip('\t') for line in xml])
    return BeautifulSoup(xml, 'xml')

def get_all_obj_and_box(objname, img_set):
    img_list = imgs_from_category_as_list(objname, img_set)
    
    for img in img_list:
        annotation = load_annotation(img)
        

In [7]:
# image operations
def load_img(img_filename):
    return io.load_image(os.path.join(img_dir, img_filename + '.jpg'))

In [31]:
train_img_list = imgs_from_category_as_list('bicycle', 'train')

In [32]:
a = load_annotation(train_img_list[0])

In [33]:
def load_train_data(category):
    to_find = category
    # train_filename = '/Users/mprat/personal/VOCdevkit/VOC2012/csvs/train_' + category + '.csv'
    train_filename = os.path.join(root_dir, 'csvs')
    if not os.path.exists(train_filename):
        os.makedirs(train_filename)
    train_filename = os.path.join(train_filename, category+'.csv')
    
    if os.path.isfile(train_filename):
        return pd.read_csv(train_filename)
    else:
        train_img_list = imgs_from_category_as_list(to_find, 'train')
        data = []
        for item in train_img_list:
            anno = load_annotation(item)
            objs = anno.findAll('object')
            for obj in objs:
                obj_names = obj.findChildren('name')
                for name_tag in obj_names:
                    if str(name_tag.contents[0]) == 'bicycle':
                        fname = anno.findChild('filename').contents[0]
                        bbox = obj.findChildren('bndbox')[0]
                        xmin = int(bbox.findChildren('xmin')[0].contents[0])
                        ymin = int(bbox.findChildren('ymin')[0].contents[0])
                        xmax = int(bbox.findChildren('xmax')[0].contents[0])
                        ymax = int(bbox.findChildren('ymax')[0].contents[0])
                        data.append([fname, xmin, ymin, xmax, ymax])
        df = pd.DataFrame(data, columns=['fname', 'xmin', 'ymin', 'xmax', 'ymax'])
        df.to_csv(train_filename, index=False)
        return df

In [34]:
df = load_train_data('bicycle')
print list(unique_everseen(list(root_dir + df['fname'])))

['/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_000036.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_000191.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_000196.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_000531.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_000615.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_000764.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001336.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001375.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001402.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001523.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001566.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001626.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001791.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001813.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001860.jpg', '/home/shang/data/VOC2012/VOCdevkit/VOC2012/2008_001986.jpg', '/home/

In [38]:
for cat in image_sets:
    if cat != 'train' and cat != 'val' and cat != 'trainval' and cat != 'test':
        print(cat)
        load_train_data(cat)

aeroplane
bicycle
bird
boat
bottle
bus
car
cat
chair
cow
diningtable
dog
horse
motorbike
person
pottedplant
sheep
sofa
tvmonitor


In [43]:
df = load_train_data('bicycle')
for row_num, entry in df.iterrows():
    print entry
#print df


fname    2008_000036.jpg
xmin                 120
ymin                   1
xmax                 203
ymax                  35
Name: 0, dtype: object
fname    2008_000036.jpg
xmin                 117
ymin                  38
xmax                 273
ymax                 121
Name: 1, dtype: object
fname    2008_000191.jpg
xmin                  57
ymin                 194
xmax                 151
ymax                 255
Name: 2, dtype: object
fname    2008_000196.jpg
xmin                 348
ymin                   7
xmax                 500
ymax                 309
Name: 3, dtype: object
fname    2008_000196.jpg
xmin                   1
ymin                   1
xmax                 206
ymax                 310
Name: 4, dtype: object
fname    2008_000531.jpg
xmin                  99
ymin                 183
xmax                 203
ymax                 263
Name: 5, dtype: object
fname    2008_000615.jpg
xmin                  50
ymin                 179
xmax                 311
ymax        