In [14]:
import os
import glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from functools import reduce
from xml.etree import ElementTree as et

*******DATA PREPARATION*******

In [28]:
#### loading all the xml file and store them in a list
root_dir = '/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/object_detection/1_datapreparation/data_images/datasets'
xmlfiles = glob.glob(os.path.join(root_dir, '**', 'Annotations', '*.xml'), recursive=True)

In [29]:
xmlfiles

['/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/object_detection/1_datapreparation/data_images/datasets/VOC2012_train_val/VOC2012_train_val/Annotations/2011_006135.xml',
 '/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/object_detection/1_datapreparation/data_images/datasets/VOC2012_train_val/VOC2012_train_val/Annotations/2008_006482.xml',
 '/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/object_detection/1_datapreparation/data_images/datasets/VOC2012_train_val/VOC2012_train_val/Annotations/2010_005054.xml',
 '/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/object_detection/1_datapreparation/data_images/datasets/VOC2012_train_val/VOC2012_train_val/Annotations/2010_005732.xml',
 '/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/object_detection/1_datapreparation/data_images/datasets/VOC2012_train_val/VOC2012_train_val/Annotations/2011_004044.xml',
 '/Users/sreethanubhuvaneshgk/Desktop/ml_projects/project_folder/

In [35]:
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()

    ##ectract file name 
    image_name = root.find('filename').text
    #### find the width and height of the image 
    width = root.find('size').find('width').text
    height = root.find('size').find('height').text

    objs = root.findall('object')
    parser = []
    for obj in objs:
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = bndbox.find('xmin').text
        xmax = bndbox.find('xmax').text
        ymin = bndbox.find('ymin').text
        ymax = bndbox.find('ymax').text
        parser.append([image_name, width, height, name, xmin, xmax, ymin, ymax])
    return parser
    

In [36]:
parser_all = list(map(extract_text, xmlfiles))

In [38]:
data = reduce(lambda x, y : x + y, parser_all) 


In [89]:
data

[['2011_006135.jpg', '500', '375', 'person', '391', '470', '144', '315'],
 ['2008_006482.jpg', '500', '411', 'chair', '155', '233', '321', '411'],
 ['2008_006482.jpg', '500', '411', 'diningtable', '1', '201', '341', '411'],
 ['2010_005054.jpg', '500', '111', 'tvmonitor', '86', '121', '34', '70'],
 ['2010_005054.jpg', '500', '111', 'sofa', '318', '427', '42', '111'],
 ['2010_005054.jpg', '500', '111', 'sofa', '1', '98', '70', '111'],
 ['2010_005732.jpg', '500', '375', 'cat', '181', '500', '1', '375'],
 ['2011_004044.jpg', '500', '375', 'person', '220', '337', '143', '375'],
 ['2011_004044.jpg', '500', '375', 'person', '102', '214', '187', '375'],
 ['2010_003343.jpg', '500', '375', 'sheep', '68', '264', '122', '232'],
 ['2010_003343.jpg', '500', '375', 'sheep', '325', '500', '129', '298'],
 ['2011_002435.jpg', '500', '375', 'sofa', '23', '500', '56', '375'],
 ['2011_002435.jpg', '500', '375', 'person', '153', '500', '184', '375'],
 ['2011_002435.jpg', '500', '375', 'person', '2', '332', 

*******DATA PRE-PROCESSING*******

In [97]:
df = pd.DataFrame(data, columns = ['filename', 'width', 'height', 'name', 'xmin', 'xmax', 'ymin', 'ymax'])

In [99]:
df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
0,2011_006135.jpg,500,375,person,391,470,144,315
1,2008_006482.jpg,500,411,chair,155,233,321,411
2,2008_006482.jpg,500,411,diningtable,1,201,341,411
3,2010_005054.jpg,500,111,tvmonitor,86,121,34,70
4,2010_005054.jpg,500,111,sofa,318,427,42,111
...,...,...,...,...,...,...,...,...
47463,2011_006892.jpg,500,333,person,216,277,134,221
47464,2011_003732.jpg,500,333,person,224,329,57,333
47465,2011_003732.jpg,500,333,person,47,140,97,296
47466,2011_005425.jpg,500,375,person,186,373,20,318


In [100]:
df.shape

(47468, 8)

In [101]:
df['name'].value_counts()

name
person         24727
chair           3058
car             2492
dog             1598
bottle          1561
cat             1277
bird            1271
pottedplant     1202
sheep           1084
boat            1059
aeroplane       1002
tvmonitor        893
sofa             841
bicycle          837
horse            803
diningtable      802
motorbike        801
cow              771
train            704
bus              685
Name: count, dtype: int64

******for our YOLO algorithm we need to find the centre of the bounding box*******

this can be done by the following conversion formula 

       
centre_x = ((xmin + xmax) / 2) / (width of the image)


centre_y = ((ymin + ymax) / 2) / (height of the image)


w = (xmax- xmin)/ (width of the image)

        
h = (ymax - ymin)/ (height of the image)

In [102]:
df.describe()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax
count,47468,47468,47468,47468,47468,47468,47468,47468
unique,22263,288,344,20,494,493,461,488
top,2008_007069.jpg,500,375,person,1,500,1,375
freq,56,36974,21489,24727,5406,4119,3121,5744


In [103]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47468 entries, 0 to 47467
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  47468 non-null  object
 1   width     47468 non-null  object
 2   height    47468 non-null  object
 3   name      47468 non-null  object
 4   xmin      47468 non-null  object
 5   xmax      47468 non-null  object
 6   ymin      47468 non-null  object
 7   ymax      47468 non-null  object
dtypes: object(8)
memory usage: 2.9+ MB


In [104]:
##type conversion
cols = ['width', 'height', 'xmin', 'xmax', 'ymin', 'ymax']
# First, convert the columns to float to handle any decimal values
df[cols] = df[cols].astype(float)

# Then, convert to int (this will truncate the decimal part)
df[cols] = df[cols].astype(int)

# Verify the changes
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47468 entries, 0 to 47467
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  47468 non-null  object
 1   width     47468 non-null  int64 
 2   height    47468 non-null  int64 
 3   name      47468 non-null  object
 4   xmin      47468 non-null  int64 
 5   xmax      47468 non-null  int64 
 6   ymin      47468 non-null  int64 
 7   ymax      47468 non-null  int64 
dtypes: int64(6), object(2)
memory usage: 2.9+ MB


In [105]:
####creating the centre co-ordinates
df['centre_x'] = ((df['xmin'] +df['xmax']) / 2) / df['width']
df['centre_y'] = ((df['ymin'] +df['ymax']) / 2) / df['height']

#w
df['w'] = (df['xmax'] - df['xmin']) / df['width']

#h
df['h'] = (df['ymax'] - df['ymin']) / df['height']

In [106]:
df.head()

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centre_x,centre_y,w,h
0,2011_006135.jpg,500,375,person,391,470,144,315,0.861,0.612,0.158,0.456
1,2008_006482.jpg,500,411,chair,155,233,321,411,0.388,0.890511,0.156,0.218978
2,2008_006482.jpg,500,411,diningtable,1,201,341,411,0.202,0.914842,0.4,0.170316
3,2010_005054.jpg,500,111,tvmonitor,86,121,34,70,0.207,0.468468,0.07,0.324324
4,2010_005054.jpg,500,111,sofa,318,427,42,111,0.745,0.689189,0.218,0.621622


In [107]:
#finding out how many unique sets of images are present in the data set
images = df['filename'].unique()

In [108]:
len(images)

22263

*****SPLITING THE DATA INTO TRAINING AND TESTING IMAGES*****

In [109]:
#SAY WE WANT TO SPLIT THE IMAGES INTO 80% TRAINING AND 20% TESTING
#FIRST WE CONVERT THE UNIQUE IMAGES INTO A DATA - FRAME
img_df = pd.DataFrame(images, columns=['filename'])
img_df.head()

Unnamed: 0,filename
0,2011_006135.jpg
1,2008_006482.jpg
2,2010_005054.jpg
3,2010_005732.jpg
4,2011_004044.jpg


In [110]:
#split the data
img_train = tuple(img_df.sample(frac = 0.8)['filename']) #performs random shuffle and picks 80% of images and stores them in the variable img_train
img_test = tuple(img_df.query(f'filename not in {img_train}')['filename']) # takes the rest of the 20% of the images and stores them in the variable img_test

In [111]:
len(img_train), len(img_test)

(17810, 4453)

In [112]:
train_df = df.query(f'filename in {img_train}')
test_df = df.query(f'filename in {img_test}')

In [113]:
train_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centre_x,centre_y,w,h
1,2008_006482.jpg,500,411,chair,155,233,321,411,0.388,0.890511,0.156,0.218978
2,2008_006482.jpg,500,411,diningtable,1,201,341,411,0.202,0.914842,0.400,0.170316
9,2010_003343.jpg,500,375,sheep,68,264,122,232,0.332,0.472000,0.392,0.293333
10,2010_003343.jpg,500,375,sheep,325,500,129,298,0.825,0.569333,0.350,0.450667
11,2011_002435.jpg,500,375,sofa,23,500,56,375,0.523,0.574667,0.954,0.850667
...,...,...,...,...,...,...,...,...,...,...,...,...
47463,2011_006892.jpg,500,333,person,216,277,134,221,0.493,0.533033,0.122,0.261261
47464,2011_003732.jpg,500,333,person,224,329,57,333,0.553,0.585586,0.210,0.828829
47465,2011_003732.jpg,500,333,person,47,140,97,296,0.187,0.590090,0.186,0.597598
47466,2011_005425.jpg,500,375,person,186,373,20,318,0.559,0.450667,0.374,0.794667


In [114]:
test_df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centre_x,centre_y,w,h
0,2011_006135.jpg,500,375,person,391,470,144,315,0.861000,0.612000,0.158000,0.456000
3,2010_005054.jpg,500,111,tvmonitor,86,121,34,70,0.207000,0.468468,0.070000,0.324324
4,2010_005054.jpg,500,111,sofa,318,427,42,111,0.745000,0.689189,0.218000,0.621622
5,2010_005054.jpg,500,111,sofa,1,98,70,111,0.099000,0.815315,0.194000,0.369369
6,2010_005732.jpg,500,375,cat,181,500,1,375,0.681000,0.501333,0.638000,0.997333
...,...,...,...,...,...,...,...,...,...,...,...,...
47442,2010_003414.jpg,375,500,person,91,375,26,500,0.621333,0.526000,0.757333,0.948000
47445,2012_000962.jpg,375,500,person,107,305,30,387,0.549333,0.417000,0.528000,0.714000
47454,2011_005419.jpg,500,333,person,273,316,166,268,0.589000,0.651652,0.086000,0.306306
47455,2011_005419.jpg,500,333,person,179,213,150,223,0.392000,0.560060,0.068000,0.219219


*****LABEL ENCODING THE NAME COLUMN FOR MODEL TRAINING*****

In [116]:
df['name_original'] = df['name']

In [117]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()

df['name']= le.fit_transform(df['name']) 

df['name'].unique() 


array([14,  8, 10, 19, 17,  7, 16,  4,  2,  6,  1,  3, 18,  9, 12, 11,  0,
        5, 15, 13])

In [118]:
df

Unnamed: 0,filename,width,height,name,xmin,xmax,ymin,ymax,centre_x,centre_y,w,h,name_original
0,2011_006135.jpg,500,375,14,391,470,144,315,0.861,0.612000,0.158,0.456000,person
1,2008_006482.jpg,500,411,8,155,233,321,411,0.388,0.890511,0.156,0.218978,chair
2,2008_006482.jpg,500,411,10,1,201,341,411,0.202,0.914842,0.400,0.170316,diningtable
3,2010_005054.jpg,500,111,19,86,121,34,70,0.207,0.468468,0.070,0.324324,tvmonitor
4,2010_005054.jpg,500,111,17,318,427,42,111,0.745,0.689189,0.218,0.621622,sofa
...,...,...,...,...,...,...,...,...,...,...,...,...,...
47463,2011_006892.jpg,500,333,14,216,277,134,221,0.493,0.533033,0.122,0.261261,person
47464,2011_003732.jpg,500,333,14,224,329,57,333,0.553,0.585586,0.210,0.828829,person
47465,2011_003732.jpg,500,333,14,47,140,97,296,0.187,0.590090,0.186,0.597598,person
47466,2011_005425.jpg,500,375,14,186,373,20,318,0.559,0.450667,0.374,0.794667,person


In [119]:
import os 
from shutil import move
