In [86]:
import os
from glob import glob
import pandas as pd
from functools import reduce
from xml.etree import ElementTree as et

In [87]:
#load all xml file and store in a list
xml_list = glob('./data_images/*.xml')
#data cleaning replace \\ with /
xml_list = list(map(lambda x: x.replace('\\','/'),xml_list))

In [88]:
# step-2: read xml files
# from each xml file we need to extract
# filename, size(width, height), object(name, xmin, xmax, ymin, ymax)
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()

    # extract filename
    image_name = root.find('filename').text
    # width and height of the image
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text
    objs = root.findall('object')
    parser = []
    for obj in objs:
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = bndbox.find('xmin').text
        xmax = bndbox.find('xmax').text
        ymin = bndbox.find('ymin').text
        ymax = bndbox.find('ymax').text
        parser.append([image_name, width, height, name,xmin,xmax,ymin,ymax])
        
    return parser

In [89]:
parser_all = list(map(extract_text,xml_list))

In [90]:
parser_all

[[['000001.jpg', '1024', '657', 'car', '14', '301', '335', '522'],
  ['000001.jpg', '1024', '657', 'car', '269', '571', '345', '489'],
  ['000001.jpg', '1024', '657', 'car', '502', '798', '342', '450'],
  ['000001.jpg', '1024', '657', 'car', '709', '1009', '333', '438']],
 [['000002.jpg', '800', '600', 'car', '41', '768', '240', '497'],
  ['000002.jpg', '800', '600', 'car', '533', '722', '236', '299']],
 [['000007.jpg', '500', '333', 'car', '141', '500', '50', '330']],
 [['000009.jpg', '500', '375', 'horse', '69', '270', '172', '330'],
  ['000009.jpg', '500', '375', 'person', '150', '229', '141', '284'],
  ['000009.jpg', '500', '375', 'person', '285', '327', '201', '331'],
  ['000009.jpg', '500', '375', 'person', '258', '297', '198', '329']],
 [['000012.jpg', '500', '333', 'car', '156', '351', '97', '270']],
 [['000016.jpg', '334', '500', 'bicycle', '92', '305', '72', '473']],
 [['000017.jpg', '480', '364', 'person', '185', '279', '62', '199'],
  ['000017.jpg', '480', '364', 'horse', '

In [91]:
data = reduce(lambda x, y : x+y,parser_all)

In [92]:
data

[['000001.jpg', '1024', '657', 'car', '14', '301', '335', '522'],
 ['000001.jpg', '1024', '657', 'car', '269', '571', '345', '489'],
 ['000001.jpg', '1024', '657', 'car', '502', '798', '342', '450'],
 ['000001.jpg', '1024', '657', 'car', '709', '1009', '333', '438'],
 ['000002.jpg', '800', '600', 'car', '41', '768', '240', '497'],
 ['000002.jpg', '800', '600', 'car', '533', '722', '236', '299'],
 ['000007.jpg', '500', '333', 'car', '141', '500', '50', '330'],
 ['000009.jpg', '500', '375', 'horse', '69', '270', '172', '330'],
 ['000009.jpg', '500', '375', 'person', '150', '229', '141', '284'],
 ['000009.jpg', '500', '375', 'person', '285', '327', '201', '331'],
 ['000009.jpg', '500', '375', 'person', '258', '297', '198', '329'],
 ['000012.jpg', '500', '333', 'car', '156', '351', '97', '270'],
 ['000016.jpg', '334', '500', 'bicycle', '92', '305', '72', '473'],
 ['000017.jpg', '480', '364', 'person', '185', '279', '62', '199'],
 ['000017.jpg', '480', '364', 'horse', '90', '403', '78', '33

In [93]:
df = pd.DataFrame(data,columns=['filename','width','height','name','xmin','xmax','ymin','ymax'])

In [94]:
df.shape

(15663, 8)

In [95]:
df['name'].value_counts()

person         5447
car            1650
chair          1427
bottle          634
pottedplant     625
bird            599
dog             538
sofa            425
bicycle         418
horse           406
boat            398
motorbike       390
cat             389
tvmonitor       367
cow             356
sheep           353
aeroplane       331
train           328
diningtable     310
bus             272
Name: name, dtype: int64

In [96]:
#type conversion 
cols = ['width','height','xmin','xmax','ymin','ymax']
df[cols] = df[cols].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15663 entries, 0 to 15662
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  15663 non-null  object
 1   width     15663 non-null  int32 
 2   height    15663 non-null  int32 
 3   name      15663 non-null  object
 4   xmin      15663 non-null  int32 
 5   xmax      15663 non-null  int32 
 6   ymin      15663 non-null  int32 
 7   ymax      15663 non-null  int32 
dtypes: int32(6), object(2)
memory usage: 612.0+ KB


In [97]:
#center x , center y
df['centerx'] = ((df['xmin']+df['xmax'])/2)/df['width']
df['centery'] = ((df['ymin']+df['ymax'])/2)/df['height']
#w h
df['w']=(df['xmax']-df['xmin'])/df['width']
df['h']=(df['ymax']-df['ymin'])/df['height']

In [98]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15663 entries, 0 to 15662
Data columns (total 12 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   filename  15663 non-null  object 
 1   width     15663 non-null  int32  
 2   height    15663 non-null  int32  
 3   name      15663 non-null  object 
 4   xmin      15663 non-null  int32  
 5   xmax      15663 non-null  int32  
 6   ymin      15663 non-null  int32  
 7   ymax      15663 non-null  int32  
 8   centerx   15663 non-null  float64
 9   centery   15663 non-null  float64
 10  w         15663 non-null  float64
 11  h         15663 non-null  float64
dtypes: float64(4), int32(6), object(2)
memory usage: 1.1+ MB


In [100]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centerx,centery,w,h
0,000001.jpg,1024,657,car,14,301,335,522,0.153809,0.652207,0.280273,0.284627
1,000001.jpg,1024,657,car,269,571,345,489,0.410156,0.634703,0.294922,0.219178
2,000001.jpg,1024,657,car,502,798,342,450,0.634766,0.60274,0.289062,0.164384
3,000001.jpg,1024,657,car,709,1009,333,438,0.838867,0.586758,0.292969,0.159817
4,000002.jpg,800,600,car,41,768,240,497,0.505625,0.614167,0.90875,0.428333


In [101]:
images = df['filename'].unique()

In [102]:
len(images)

5012

In [105]:
# 80% for train 20% for test
img_df = pd.DataFrame(images,columns=['filename'])
img_train = tuple(img_df.sample(frac=0.8)['filename']) #shuffle and pick 80% of images

In [107]:
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename'])

In [109]:
len(img_train),len(img_test)

(4010, 1002)

In [110]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [121]:
#training the text is not recommend so we need label encoding
# label encoding
def label_encoding(x):
    labels = {'person':0, 'car':1, 'chair':2, 'bottle':3, 'pottedplant':4, 'bird':5, 'dog':6,
       'sofa':7, 'bicycle':8, 'horse':9, 'boat':10, 'motorbike':11, 'cat':12, 'tvmonitor':13,
       'cow':14, 'sheep':15, 'aeroplane':16, 'train':17, 'diningtable':18, 'bus':19}
    return labels[x]

In [123]:
train_df['id'] = train_df['name'].apply(label_encoding)
test_df['id'] = test_df['name'].apply(label_encoding)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df['id'] = train_df['name'].apply(label_encoding)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['id'] = test_df['name'].apply(label_encoding)


In [124]:
train_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centerx,centery,w,h,id
6,000007.jpg,500,333,car,141,500,50,330,0.641,0.570571,0.718,0.840841,1
7,000009.jpg,500,375,horse,69,270,172,330,0.339,0.669333,0.402,0.421333,9
8,000009.jpg,500,375,person,150,229,141,284,0.379,0.566667,0.158,0.381333,0
9,000009.jpg,500,375,person,285,327,201,331,0.612,0.709333,0.084,0.346667,0
10,000009.jpg,500,375,person,258,297,198,329,0.555,0.702667,0.078,0.349333,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15658,009958.jpg,500,333,person,89,149,27,217,0.238,0.366366,0.120,0.570571,0
15659,009958.jpg,500,333,person,75,147,39,124,0.222,0.244745,0.144,0.255255,0
15660,009958.jpg,500,333,bicycle,77,178,121,316,0.255,0.656156,0.202,0.585586,8
15661,009959.jpg,500,375,car,192,330,142,187,0.522,0.438667,0.276,0.120000,1


In [125]:
import os
from shutil import move

In [128]:
train_folder = 'data_images/train'
test_folder  = 'data_images/test'

os.mkdir(train_folder)
os.mkdir(test_folder)

In [130]:
cols = ['filename','id','centerx','centery','w','h']
groupby_obj_train = train_df[cols].groupby('filename')
groupby_obj_test = test_df[cols].groupby('filename')

In [132]:
groupby_obj_train.get_group('000009.jpg').set_index('filename')

Unnamed: 0,filename,id,centerx,centery,w,h
7,000009.jpg,9,0.339,0.669333,0.402,0.421333
8,000009.jpg,0,0.379,0.566667,0.158,0.381333
9,000009.jpg,0,0.612,0.709333,0.084,0.346667
10,000009.jpg,0,0.555,0.702667,0.078,0.349333
