In [2]:
import os
import pandas as pd
from glob import glob
from functools import reduce
from xml.etree import ElementTree as et

In [3]:
xml_list = glob('./data_labels/*.xml')
xml_list

['./data_labels/2008_004518.xml',
 './data_labels/2010_004201.xml',
 './data_labels/2012_001778.xml',
 './data_labels/2008_000007.xml',
 './data_labels/2007_008446.xml',
 './data_labels/2010_005512.xml',
 './data_labels/2009_002487.xml',
 './data_labels/2010_003248.xml',
 './data_labels/2009_001139.xml',
 './data_labels/2008_004754.xml',
 './data_labels/2009_002607.xml',
 './data_labels/2008_007227.xml',
 './data_labels/2011_005574.xml',
 './data_labels/2012_000217.xml',
 './data_labels/2010_001537.xml',
 './data_labels/2012_004247.xml',
 './data_labels/2008_005369.xml',
 './data_labels/2008_001773.xml',
 './data_labels/2008_006424.xml',
 './data_labels/2012_000931.xml',
 './data_labels/2011_005619.xml',
 './data_labels/2011_000623.xml',
 './data_labels/2010_006699.xml',
 './data_labels/2011_000771.xml',
 './data_labels/2008_004636.xml',
 './data_labels/2010_002577.xml',
 './data_labels/2008_002767.xml',
 './data_labels/2008_002495.xml',
 './data_labels/2012_002449.xml',
 './data_label

In [15]:
# read xml files, extract filename, object (name, xmin, xmax, ymin, ymax), size (width, height)
def extract_info(filename):
    tree = et.parse(filename)
    root = tree.getroot()
    
    img_name = root.find('filename').text
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text
    
    objs = root.findall('object')
    parser = []
    for obj in objs:
        name = obj.find('name').text
        xmin = obj.find('bndbox').find('xmin').text
        xmax = obj.find('bndbox').find('xmax').text
        ymin = obj.find('bndbox').find('ymin').text
        ymax = obj.find('bndbox').find('ymax').text
        parser.append([img_name, width, height, name, xmin, xmax, ymin, ymax])

    return parser

In [17]:
extract_info('./data_labels/2008_007227.xml')

[['2008_007227.jpg', '470', '500', 'car', '201', '262', '210', '249']]

In [18]:
parser_all = list(map(extract_info, xml_list))

In [24]:
data = reduce(lambda x,y : x+y, parser_all)

In [26]:
df = pd.DataFrame(data, columns=['filename', 'width', 'height', 'name', 'xmin', 'xmax', 'ymin', 'ymax'])

In [27]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,2008_004518.jpg,500,324,person,110,423,66,324
1,2010_004201.jpg,500,333,bird,172,265,218,271
2,2010_004201.jpg,500,333,bird,229,321,238,288
3,2012_001778.jpg,375,500,person,219,323,225,440
4,2008_000007.jpg,500,375,boat,1,428,230,293


In [28]:
df.shape

(40138, 8)

In [29]:
df.name.value_counts()

name
person         17401
chair           3056
car             2492
dog             1598
bottle          1561
cat             1277
bird            1271
pottedplant     1202
sheep           1084
boat            1059
aeroplane       1002
tvmonitor        893
sofa             841
bicycle          837
horse            803
motorbike        801
diningtable      800
cow              771
train            704
bus              685
Name: count, dtype: int64

In [30]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40138 entries, 0 to 40137
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  40138 non-null  object
 1   width     40138 non-null  object
 2   height    40138 non-null  object
 3   name      40138 non-null  object
 4   xmin      40138 non-null  object
 5   xmax      40138 non-null  object
 6   ymin      40138 non-null  object
 7   ymax      40138 non-null  object
dtypes: object(8)
memory usage: 2.4+ MB


In [51]:
type_conv_cols = ['width', 'height', 'xmin', 'xmax', 'ymax']
# column 'ymin' has some float integer, so 
df['ymin'] = df['ymin'].apply(lambda x: int(float(x)))
df[type_conv_cols] = df[type_conv_cols].round().astype(int)

In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40138 entries, 0 to 40137
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  40138 non-null  object
 1   width     40138 non-null  int64 
 2   height    40138 non-null  int64 
 3   name      40138 non-null  object
 4   xmin      40138 non-null  int64 
 5   xmax      40138 non-null  int64 
 6   ymin      40138 non-null  int64 
 7   ymax      40138 non-null  int64 
dtypes: int64(6), object(2)
memory usage: 2.4+ MB


In [54]:
df['center_x'] = ((df['xmin']+df['xmax'])/2)/df['width']
df['center_y'] = ((df['ymin']+df['ymax'])/2)/df['height']
df['w'] = (df['xmax']-df['xmin'])/df['width']
df['h'] = (df['ymax']-df['ymin'])/df['height']

In [56]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,2008_004518.jpg,500,324,person,110,423,66,324,0.533,0.601852,0.626,0.796296
1,2010_004201.jpg,500,333,bird,172,265,218,271,0.437,0.734234,0.186,0.159159
2,2010_004201.jpg,500,333,bird,229,321,238,288,0.55,0.78979,0.184,0.15015
3,2012_001778.jpg,375,500,person,219,323,225,440,0.722667,0.665,0.277333,0.43
4,2008_000007.jpg,500,375,boat,1,428,230,293,0.429,0.697333,0.854,0.168


In [75]:
# Train-Test Split

images = df['filename'].unique()
len(images)

17125

In [66]:
img_df = pd.DataFrame(images, columns=['filename'])
img_train = tuple(img_df.sample(frac=0.8)['filename'])
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename'])

In [69]:
len(img_train), len(img_test)

(13700, 3425)

In [70]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [72]:
train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,2008_004518.jpg,500,324,person,110,423,66,324,0.533,0.601852,0.626,0.796296
1,2010_004201.jpg,500,333,bird,172,265,218,271,0.437,0.734234,0.186,0.159159
2,2010_004201.jpg,500,333,bird,229,321,238,288,0.55,0.78979,0.184,0.15015
3,2012_001778.jpg,375,500,person,219,323,225,440,0.722667,0.665,0.277333,0.43
7,2010_005512.jpg,500,375,chair,288,399,67,219,0.687,0.381333,0.222,0.405333


In [73]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
4,2008_000007.jpg,500,375,boat,1,428,230,293,0.429,0.697333,0.854,0.168
5,2007_008446.jpg,375,500,person,96,281,43,334,0.502667,0.377,0.493333,0.582
6,2007_008446.jpg,375,500,person,112,287,205,465,0.532,0.67,0.466667,0.52
15,2009_002487.jpg,500,333,diningtable,157,333,179,333,0.49,0.768769,0.352,0.462462
16,2009_002487.jpg,500,333,chair,172,242,150,284,0.414,0.651652,0.14,0.402402


In [77]:
#label-encoding
def label_encoding(x):
    labels = {'person':0, 'car':1, 'chair':2, 'bottle':3, 'pottedplant':4, 'bird':5, 'dog':6, 'sofa':7, 'bicycle':8, 'horse':9, 'boat':10, 
              'motorbike':11, 'cat':12, 'tvmonitor':13, 'cow':14, 'sheep':15, 'aeroplane':16, 'train':17, 'diningtable':18, 'bus':19}
    return labels[x]

In [78]:
train_df['id'] = train_df['name'].apply(label_encoding)
test_df['id'] = test_df['name'].apply(label_encoding)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['id'] = train_df['name'].apply(label_encoding)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['id'] = test_df['name'].apply(label_encoding)


In [79]:
 train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,2008_004518.jpg,500,324,person,110,423,66,324,0.533,0.601852,0.626,0.796296,0
1,2010_004201.jpg,500,333,bird,172,265,218,271,0.437,0.734234,0.186,0.159159,5
2,2010_004201.jpg,500,333,bird,229,321,238,288,0.55,0.78979,0.184,0.15015,5
3,2012_001778.jpg,375,500,person,219,323,225,440,0.722667,0.665,0.277333,0.43,0
7,2010_005512.jpg,500,375,chair,288,399,67,219,0.687,0.381333,0.222,0.405333,2


In [80]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
4,2008_000007.jpg,500,375,boat,1,428,230,293,0.429,0.697333,0.854,0.168,10
5,2007_008446.jpg,375,500,person,96,281,43,334,0.502667,0.377,0.493333,0.582,0
6,2007_008446.jpg,375,500,person,112,287,205,465,0.532,0.67,0.466667,0.52,0
15,2009_002487.jpg,500,333,diningtable,157,333,179,333,0.49,0.768769,0.352,0.462462,18
16,2009_002487.jpg,500,333,chair,172,242,150,284,0.414,0.651652,0.14,0.402402,2


In [81]:
#folder structure

from shutil import move

train_folder = 'data_images/train'
test_folder = 'data_images/test'

os.mkdir(train_folder)
os.mkdir(test_folder)

In [86]:
cols = ['filename', 'id', 'center_x', 'center_y', 'w', 'h']

groupby_obj_train = train_df[cols].groupby('filename')
groupby_obj_test = test_df[cols].groupby('filename')

In [87]:
# save each image in train/test folders along with respective .txt files
def data_prep(filename, folder_path, group_obj):
    src = os.path.join('data_images', filename)
    dst = os.path.join(folder_path, filename)
    move(src, dst)

    #save labels
    text_filename = os.path.join(folder_path, os.path.splitext(filename)[0]+'.txt')
    group_obj.get_group(filename).set_index('filename').to_csv(text_filename, sep=' ', index=False, header=False)

In [90]:
train_filename_series = pd.Series(groupby_obj_train.groups.keys())
train_filename_series.apply(data_prep, args=(train_folder, groupby_obj_train))

0        None
1        None
2        None
3        None
4        None
         ... 
13695    None
13696    None
13697    None
13698    None
13699    None
Length: 13700, dtype: object

In [91]:
test_filename_series = pd.Series(groupby_obj_test.groups.keys())
test_filename_series.apply(data_prep, args=(test_folder, groupby_obj_test))

0       None
1       None
2       None
3       None
4       None
        ... 
3420    None
3421    None
3422    None
3423    None
3424    None
Length: 3425, dtype: object

In [92]:
labels = {'person':0, 'car':1, 'chair':2, 'bottle':3, 'pottedplant':4, 'bird':5, 'dog':6, 'sofa':7, 'bicycle':8, 'horse':9, 'boat':10, 
              'motorbike':11, 'cat':12, 'tvmonitor':13, 'cow':14, 'sheep':15, 'aeroplane':16, 'train':17, 'diningtable':18, 'bus':19}

In [93]:
labels.keys()

dict_keys(['person', 'car', 'chair', 'bottle', 'pottedplant', 'bird', 'dog', 'sofa', 'bicycle', 'horse', 'boat', 'motorbike', 'cat', 'tvmonitor', 'cow', 'sheep', 'aeroplane', 'train', 'diningtable', 'bus'])