In [1]:
import os
from glob import glob
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et

In [10]:
#load all xml files and store in a list 
xml_list = glob('./data_images/archive/VOC2012/Annotations/*.xml')
#data cleaning replace \\ with /
xmlfiles = list(map(lambda x:x.replace('\\','/'),xml_list))

In [11]:
xmlfiles

['./data_images/archive/VOC2012/Annotations/2007_000027.xml',
 './data_images/archive/VOC2012/Annotations/2007_000032.xml',
 './data_images/archive/VOC2012/Annotations/2007_000033.xml',
 './data_images/archive/VOC2012/Annotations/2007_000039.xml',
 './data_images/archive/VOC2012/Annotations/2007_000042.xml',
 './data_images/archive/VOC2012/Annotations/2007_000061.xml',
 './data_images/archive/VOC2012/Annotations/2007_000063.xml',
 './data_images/archive/VOC2012/Annotations/2007_000068.xml',
 './data_images/archive/VOC2012/Annotations/2007_000121.xml',
 './data_images/archive/VOC2012/Annotations/2007_000123.xml',
 './data_images/archive/VOC2012/Annotations/2007_000129.xml',
 './data_images/archive/VOC2012/Annotations/2007_000170.xml',
 './data_images/archive/VOC2012/Annotations/2007_000175.xml',
 './data_images/archive/VOC2012/Annotations/2007_000187.xml',
 './data_images/archive/VOC2012/Annotations/2007_000241.xml',
 './data_images/archive/VOC2012/Annotations/2007_000243.xml',
 './data

In [9]:
# step-2
# from each xml file we need to extract 
# filename, size(width,height),object(name,xmin,xmax,ymin,ymax)
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()
    
    # extract filename
    image_name = root.find('filename').text
    # width and height of the image
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text
    objs = root.findall('object')
    parser = []
    for obj in objs:
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = bndbox.find('xmin').text
        xmax = bndbox.find('xmax').text
        ymin= bndbox.find('ymin').text
        ymax= bndbox.find('ymax').text
        parser.append([image_name,width,height,name,xmin,xmax,ymin,ymax])
    
    return parser

In [12]:
parser_all = list(map(extract_text,xmlfiles))

In [16]:
data =  reduce(lambda x,y : x+y,parser_all)

In [21]:
df = pd.DataFrame(data,columns = ['filename','width','height','name','xmin','xmax','ymin','ymax'])

In [23]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,2007_000027.jpg,486,500,person,174,349,101,351
1,2007_000032.jpg,500,281,aeroplane,104,375,78,183
2,2007_000032.jpg,500,281,aeroplane,133,197,88,123
3,2007_000032.jpg,500,281,person,195,213,180,229
4,2007_000032.jpg,500,281,person,26,44,189,238


In [24]:
df.shape

(40138, 8)

In [25]:
df['name'].value_counts()

name
person         17401
chair           3056
car             2492
dog             1598
bottle          1561
cat             1277
bird            1271
pottedplant     1202
sheep           1084
boat            1059
aeroplane       1002
tvmonitor        893
sofa             841
bicycle          837
horse            803
motorbike        801
diningtable      800
cow              771
train            704
bus              685
Name: count, dtype: int64

In [27]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40138 entries, 0 to 40137
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  40138 non-null  object
 1   width     40138 non-null  object
 2   height    40138 non-null  object
 3   name      40138 non-null  object
 4   xmin      40138 non-null  object
 5   xmax      40138 non-null  object
 6   ymin      40138 non-null  object
 7   ymax      40138 non-null  object
dtypes: object(8)
memory usage: 2.4+ MB


In [32]:
#type conversion
cols = ['width','height','xmin','xmax','ymin','ymax']

# Convert columns to floating-point numbers
df[cols] = df[cols].astype(float)

# Convert floating-point numbers to integers
df[cols] = df[cols].astype(int)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40138 entries, 0 to 40137
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  40138 non-null  object
 1   width     40138 non-null  int32 
 2   height    40138 non-null  int32 
 3   name      40138 non-null  object
 4   xmin      40138 non-null  int32 
 5   xmax      40138 non-null  int32 
 6   ymin      40138 non-null  int32 
 7   ymax      40138 non-null  int32 
dtypes: int32(6), object(2)
memory usage: 1.5+ MB


In [33]:

# center x, center y
df['center_x'] =((df['xmax']+df['xmin'])/2)/df['width']
df['center_y'] =((df['ymax']+df['ymin'])/2)/df['height']
# W
df['w'] = (df['xmax']-df['xmin'])/df['width']
# h
df['h'] = (df['ymax']-df['ymin'])/df['height']

In [34]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,2007_000027.jpg,486,500,person,174,349,101,351,0.538066,0.452,0.360082,0.5
1,2007_000032.jpg,500,281,aeroplane,104,375,78,183,0.479,0.464413,0.542,0.373665
2,2007_000032.jpg,500,281,aeroplane,133,197,88,123,0.33,0.375445,0.128,0.124555
3,2007_000032.jpg,500,281,person,195,213,180,229,0.408,0.727758,0.036,0.174377
4,2007_000032.jpg,500,281,person,26,44,189,238,0.07,0.759786,0.036,0.174377


**Split data into train and test**

In [35]:
image = df['filename'].unique()

In [37]:
len(image)

17125

In [38]:
# 80 % train and 20 % test

img_df = pd.DataFrame(image,columns=['filename'])
img_train = tuple(img_df.sample(frac=0.8)['filename'])

In [39]:
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename'])

In [43]:
len(img_train),len(img_test)

(13700, 3425)

In [44]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [45]:
train_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
0,2007_000027.jpg,486,500,person,174,349,101,351,0.538066,0.452,0.360082,0.5
1,2007_000032.jpg,500,281,aeroplane,104,375,78,183,0.479,0.464413,0.542,0.373665
2,2007_000032.jpg,500,281,aeroplane,133,197,88,123,0.33,0.375445,0.128,0.124555
3,2007_000032.jpg,500,281,person,195,213,180,229,0.408,0.727758,0.036,0.174377
4,2007_000032.jpg,500,281,person,26,44,189,238,0.07,0.759786,0.036,0.174377


In [46]:
test_df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h
9,2007_000042.jpg,500,335,train,263,500,32,295,0.763,0.48806,0.474,0.785075
10,2007_000042.jpg,500,335,train,1,235,36,299,0.236,0.5,0.468,0.785075
19,2007_000129.jpg,334,500,bicycle,70,255,202,500,0.486527,0.702,0.553892,0.596
20,2007_000129.jpg,334,500,bicycle,251,334,242,500,0.875749,0.742,0.248503,0.516
21,2007_000129.jpg,334,500,bicycle,1,67,144,436,0.101796,0.58,0.197605,0.584


**Assign id number to object names**

In [47]:
#label encoding

In [49]:
def label_encoding(x):
    labels = {'person':0,'chair':1, 'car':2, 'dog':3 ,'bottle':4 ,'cat':5, 'bird':6,
              'pottedplant':7,  'sheep':8, 'boat':9, 'aeroplane':10 ,'tvmonitor':11, 
              'sofa':12, 'bicycle':13, 'horse':14 ,'motorbike':15, 'diningtable':16,'cow':17,
              'train':18 ,'bus': 19}
    return labels[x]

In [51]:
train_df.loc[:, 'id'] = train_df['name'].apply(label_encoding)
test_df.loc[:, 'id'] = test_df['name'].apply(label_encoding)

In [53]:
train_df.head(10)

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,center_x,center_y,w,h,id
0,2007_000027.jpg,486,500,person,174,349,101,351,0.538066,0.452,0.360082,0.5,0
1,2007_000032.jpg,500,281,aeroplane,104,375,78,183,0.479,0.464413,0.542,0.373665,10
2,2007_000032.jpg,500,281,aeroplane,133,197,88,123,0.33,0.375445,0.128,0.124555,10
3,2007_000032.jpg,500,281,person,195,213,180,229,0.408,0.727758,0.036,0.174377,0
4,2007_000032.jpg,500,281,person,26,44,189,238,0.07,0.759786,0.036,0.174377,0
5,2007_000033.jpg,500,366,aeroplane,9,499,107,263,0.508,0.505464,0.98,0.42623,10
6,2007_000033.jpg,500,366,aeroplane,421,482,200,226,0.903,0.581967,0.122,0.071038,10
7,2007_000033.jpg,500,366,aeroplane,325,411,188,223,0.736,0.561475,0.172,0.095628,10
8,2007_000039.jpg,500,375,tvmonitor,156,344,89,279,0.5,0.490667,0.376,0.506667,11
11,2007_000061.jpg,500,333,boat,274,437,11,279,0.711,0.435435,0.326,0.804805,9


**Save Images and Labels in Text**

In [54]:
import os 
from shutil import move

In [56]:
train_folder = 'data_images/train'
test_folder = 'data_images/test'


os.mkdir(test_folder)

FileExistsError: [WinError 183] Cannot create a file when that file already exists: 'data_images/train'