In [1]:
import os
import xml.etree.ElementTree as ET
from glob import glob
import pandas as pd
from functools import reduce
from torch.utils.data import Dataset, DataLoader

In [2]:
def extract_text(annotation_path):
        tree = ET.parse(annotation_path)
        root = tree.getroot()

        image_name = root.find('filename').text
        width = root.find('size').find('width').text
        height = root.find('size').find('height').text
        objs = root.findall('object')

        parser = []
        for obj in objs:
            name = obj.find('name').text
            bndbox = obj.find('bndbox')
            xmin = bndbox.find('xmin').text
            xmax = bndbox.find('xmax').text
            ymin = bndbox.find('ymin').text
            ymax = bndbox.find('ymax').text
            parser.append([image_name, name, width, height, xmin, xmax, ymin, ymax])

        return parser


class PascalVOCDataset(Dataset):
    def __init__(self, root):
        self.root = root
        self.imgs = list(sorted(os.listdir(os.path.join(root, "data_images"))))
        self.annotations = list(sorted(os.listdir(os.path.join(root, "Annotations"))))

    def __getitem__(self, idx):
        annotation_path = os.path.join(self.root, "Annotations", self.annotations[idx])
        extract_text(annotation_path)
        

    def __len__(self):
        return len(self.imgs)

root_dir = "1_Data_Preparation"

dataset = PascalVOCDataset(root=root_dir)
dataloader = DataLoader(dataset)
xmlfiles = glob(os.path.join(root_dir, "Annotations", "*.xml"))

In [3]:
parser_all = list(map(extract_text,xmlfiles))
data = reduce(lambda x, y : x+y,parser_all)

Data Analysis and Conversion To YOLO Data Structure Formate

In [4]:
df = pd.DataFrame(data, columns = ['Filename', 'Name', 'Width', 'Height', 'Xmin', 'Xmax', 'Ymin', 'Ymax'])
df.head()

Unnamed: 0,Filename,Name,Width,Height,Xmin,Xmax,Ymin,Ymax
0,2007_000027.jpg,person,486,500,174,349,101,351
1,2007_000032.jpg,aeroplane,500,281,104,375,78,183
2,2007_000032.jpg,aeroplane,500,281,133,197,88,123
3,2007_000032.jpg,person,500,281,195,213,180,229
4,2007_000032.jpg,person,500,281,26,44,189,238


In [5]:
df.shape


(40138, 8)

In [6]:
df['Name'].value_counts()

Name
person         17401
chair           3056
car             2492
dog             1598
bottle          1561
cat             1277
bird            1271
pottedplant     1202
sheep           1084
boat            1059
aeroplane       1002
tvmonitor        893
sofa             841
bicycle          837
horse            803
motorbike        801
diningtable      800
cow              771
train            704
bus              685
Name: count, dtype: int64

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40138 entries, 0 to 40137
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Filename  40138 non-null  object
 1   Name      40138 non-null  object
 2   Width     40138 non-null  object
 3   Height    40138 non-null  object
 4   Xmin      40138 non-null  object
 5   Xmax      40138 non-null  object
 6   Ymin      40138 non-null  object
 7   Ymax      40138 non-null  object
dtypes: object(8)
memory usage: 2.4+ MB


In [8]:
cols = ['Width','Height','Xmin','Xmax','Ymin','Ymax']
df[cols] = df[cols].astype(float)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40138 entries, 0 to 40137
Data columns (total 8 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Filename  40138 non-null  object 
 1   Name      40138 non-null  object 
 2   Width     40138 non-null  float64
 3   Height    40138 non-null  float64
 4   Xmin      40138 non-null  float64
 5   Xmax      40138 non-null  float64
 6   Ymin      40138 non-null  float64
 7   Ymax      40138 non-null  float64
dtypes: float64(6), object(2)
memory usage: 2.4+ MB


In [9]:
df['Center_X'] = ((df['Xmax']+df['Xmin'])/2)/df['Width']
df['Center_Y'] = ((df['Ymax']+df['Ymin'])/2)/df['Height']

df['Width_Conversion'] = (df['Xmax']-df['Xmin'])/df['Width']
 
df['Height_Conversion'] = (df['Ymax']-df['Ymin'])/df['Height']

df.head()

Unnamed: 0,Filename,Name,Width,Height,Xmin,Xmax,Ymin,Ymax,Center_X,Center_Y,Width_Conversion,Height_Conversion
0,2007_000027.jpg,person,486.0,500.0,174.0,349.0,101.0,351.0,0.538066,0.452,0.360082,0.5
1,2007_000032.jpg,aeroplane,500.0,281.0,104.0,375.0,78.0,183.0,0.479,0.464413,0.542,0.373665
2,2007_000032.jpg,aeroplane,500.0,281.0,133.0,197.0,88.0,123.0,0.33,0.375445,0.128,0.124555
3,2007_000032.jpg,person,500.0,281.0,195.0,213.0,180.0,229.0,0.408,0.727758,0.036,0.174377
4,2007_000032.jpg,person,500.0,281.0,26.0,44.0,189.0,238.0,0.07,0.759786,0.036,0.174377


Split Data into Training and Test9ing

In [10]:
images = df['Filename'].unique()
len(images)
images

array(['2007_000027.jpg', '2007_000032.jpg', '2007_000033.jpg', ...,
       '2012_004329.jpg', '2012_004330.jpg', '2012_004331.jpg'],
      dtype=object)

In [11]:
# 80% train and 20% test

img_df = pd.DataFrame(images,columns=['Filename'])

Img_Train = tuple(img_df.sample(frac=0.8)['Filename']) # shuffle and pick 80% of images
Img_Test = tuple(img_df.query(f'Filename not in {Img_Train}')['Filename']) # take rest 20% images

len(Img_Train), len(Img_Test)

(13700, 3425)

In [12]:
train_df = df.query(f'Filename in {Img_Train}')
test_df = df.query(f'Filename in {Img_Test}')


In [13]:
train_df.head()

Unnamed: 0,Filename,Name,Width,Height,Xmin,Xmax,Ymin,Ymax,Center_X,Center_Y,Width_Conversion,Height_Conversion
0,2007_000027.jpg,person,486.0,500.0,174.0,349.0,101.0,351.0,0.538066,0.452,0.360082,0.5
1,2007_000032.jpg,aeroplane,500.0,281.0,104.0,375.0,78.0,183.0,0.479,0.464413,0.542,0.373665
2,2007_000032.jpg,aeroplane,500.0,281.0,133.0,197.0,88.0,123.0,0.33,0.375445,0.128,0.124555
3,2007_000032.jpg,person,500.0,281.0,195.0,213.0,180.0,229.0,0.408,0.727758,0.036,0.174377
4,2007_000032.jpg,person,500.0,281.0,26.0,44.0,189.0,238.0,0.07,0.759786,0.036,0.174377


In [14]:
test_df.head()

Unnamed: 0,Filename,Name,Width,Height,Xmin,Xmax,Ymin,Ymax,Center_X,Center_Y,Width_Conversion,Height_Conversion
13,2007_000063.jpg,dog,500.0,375.0,123.0,379.0,115.0,275.0,0.502,0.52,0.512,0.426667
14,2007_000063.jpg,chair,500.0,375.0,75.0,428.0,1.0,375.0,0.503,0.501333,0.706,0.997333
19,2007_000129.jpg,bicycle,334.0,500.0,70.0,255.0,202.0,500.0,0.486527,0.702,0.553892,0.596
20,2007_000129.jpg,bicycle,334.0,500.0,251.0,334.0,242.0,500.0,0.875749,0.742,0.248503,0.516
21,2007_000129.jpg,bicycle,334.0,500.0,1.0,67.0,144.0,436.0,0.101796,0.58,0.197605,0.584


Assign Id Number to Object Names

In [15]:
def label_encoding(x):
    labels = {'person':0, 'chair':1, 'car':2, 'dog':3, 'bottle':4, 'cat':5, 'bird':6, 'pottedplant':7, 'sheep':8, 'boat':9, 'aeroplane':10, 'tvmonitor':11, 'sofa':12, 'bicycle':13, 'horse':14, 'motorbike':15, 'diningtable':16, 'cow':17, 'train':18, 'bus':19 }

    return labels[x]

In [16]:
train_df.loc[:, 'Id'] = train_df['Name'].apply(label_encoding)
test_df.loc[:, 'Id'] = test_df['Name'].apply(label_encoding)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df.loc[:, 'Id'] = train_df['Name'].apply(label_encoding)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df.loc[:, 'Id'] = test_df['Name'].apply(label_encoding)


In [17]:
train_df.head()

Unnamed: 0,Filename,Name,Width,Height,Xmin,Xmax,Ymin,Ymax,Center_X,Center_Y,Width_Conversion,Height_Conversion,Id
0,2007_000027.jpg,person,486.0,500.0,174.0,349.0,101.0,351.0,0.538066,0.452,0.360082,0.5,0
1,2007_000032.jpg,aeroplane,500.0,281.0,104.0,375.0,78.0,183.0,0.479,0.464413,0.542,0.373665,10
2,2007_000032.jpg,aeroplane,500.0,281.0,133.0,197.0,88.0,123.0,0.33,0.375445,0.128,0.124555,10
3,2007_000032.jpg,person,500.0,281.0,195.0,213.0,180.0,229.0,0.408,0.727758,0.036,0.174377,0
4,2007_000032.jpg,person,500.0,281.0,26.0,44.0,189.0,238.0,0.07,0.759786,0.036,0.174377,0


In [18]:
test_df.head()

Unnamed: 0,Filename,Name,Width,Height,Xmin,Xmax,Ymin,Ymax,Center_X,Center_Y,Width_Conversion,Height_Conversion,Id
13,2007_000063.jpg,dog,500.0,375.0,123.0,379.0,115.0,275.0,0.502,0.52,0.512,0.426667,3
14,2007_000063.jpg,chair,500.0,375.0,75.0,428.0,1.0,375.0,0.503,0.501333,0.706,0.997333,1
19,2007_000129.jpg,bicycle,334.0,500.0,70.0,255.0,202.0,500.0,0.486527,0.702,0.553892,0.596,13
20,2007_000129.jpg,bicycle,334.0,500.0,251.0,334.0,242.0,500.0,0.875749,0.742,0.248503,0.516,13
21,2007_000129.jpg,bicycle,334.0,500.0,1.0,67.0,144.0,436.0,0.101796,0.58,0.197605,0.584,13


Save Images and Labels in text

In [19]:
import os
from shutil import move

In [20]:
root_dir = "1_Data_Preparation/data_images"

Train_Folder = os.path.join(root_dir, 'train')
Test_Folder = os.path.join(root_dir, 'test')

os.mkdir(Train_Folder)
os.mkdir(Test_Folder)

FileExistsError: [WinError 183] Cannot create a file when that file already exists: '1_Data_Preparation/data_images\\train'

In [None]:
cols = ['Filename', 'Id', 'Center_X', 'Center_Y', 'Width_Conversion', 'Height_Conversion']
groupby_obj_train = train_df[cols].groupby('Filename')
groupby_obj_test = test_df[cols].groupby('Filename')

In [None]:
def save_data(Filename, folder_path, group_obj):
    scr = os.path.join(root_dir, Filename)
    dst = os.path.join(folder_path, Filename)
    if os.path.exists(scr):
        move(scr, dst)
        
        text_filename = os.path.join(folder_path, os.path.splitext(Filename)[0] + '.txt')
        group_obj.get_group(Filename).set_index('Filename').to_csv(text_filename, sep=' ', index=False, header=False)
    else:
        print(f"File not found: {scr}")

In [None]:
filename_series = pd.Series(groupby_obj_train.groups.keys())
filename_series.apply(save_data, args=(Train_Folder, groupby_obj_train))

0        None
1        None
2        None
3        None
4        None
         ... 
13695    None
13696    None
13697    None
13698    None
13699    None
Length: 13700, dtype: object

In [None]:
filename_series_test = pd.Series(groupby_obj_test.groups.keys())
filename_series_test.apply(save_data, args=(Test_Folder, groupby_obj_test))

0       None
1       None
2       None
3       None
4       None
        ... 
3420    None
3421    None
3422    None
3423    None
3424    None
Length: 3425, dtype: object