In [0]:
import pandas as pd
import numpy as np
import glob
import xml.etree.ElementTree as ET

##read bounding boxes
def xml_to_csv(path):
    """Iterates through all .xml files (generated by labelImg) in a given directory and combines them in a single Pandas datagrame.

    Parameters:
    ----------
    path : {str}
        The path containing the .xml files
    Returns
    -------
    Pandas DataFrame
        The produced dataframe
    """
    classlist = ['ship','vehicle','airplane']
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            #print(member.find('bndbox')[0].text)  #we get ymin xmin ymax x min
            if any (x in member.find('name').text for x in classlist):
            #member[0] is name..but thats only if the name is directly under <object>
                value = (root.find('filename').text,
                        int(root.find('size')[0].text),
                        int(root.find('size')[1].text),
                        member.find('name').text,
                        int(member.find('bndbox')[0].text),
                        int(member.find('bndbox')[1].text),
                        int(member.find('bndbox')[2].text),
                        int(member.find('bndbox')[3].text)
                        )
                xml_list.append(value)
    column_name = ['filename', 'width', 'height',
                'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

In [0]:
###get the dataframe and output it.
annotation_path = '../data/Annotations'
xml_dataframe = xml_to_csv(annotation_path)
xml_dataframe.head()

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,21665.jpg,800,800,ship,410,340,459,595
1,21665.jpg,800,800,ship,454,619,470,666
2,23072.jpg,800,800,ship,292,73,406,114
3,23072.jpg,800,800,ship,223,481,326,522
4,23072.jpg,800,800,ship,202,579,317,620


In [0]:
xml_dataframe.shape

(112874, 8)

In [0]:
xml_dataframe.to_csv("../data/LabelsCombined.csv")

In [0]:
column_name = ['filename', 'width', 'height','class', 'xmin', 'ymin', 'xmax', 'ymax']
df_train = pd.DataFrame(columns=column_name)
df_validation = pd.DataFrame(columns=column_name)
df_test = pd.DataFrame(columns=column_name)
for i in xml_dataframe.index: 
  if (int(xml_dataframe.loc[i,'filename'].split(".")[0]) <= 5862):
    df_train = df_train.append(xml_dataframe.iloc[i],ignore_index=True)
  elif (int(xml_dataframe.loc[i,'filename'].split(".")[0]) <= 11725):
    df_validation = df_validation.append(xml_dataframe.iloc[i],ignore_index=True)
  else:
    df_test = df_test.append(xml_dataframe.iloc[i],ignore_index=True)

In [0]:
df_train.to_csv("../data/trainLabels.csv")
print(df_train.shape)
df_train.head()

(20921, 8)


Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,02916.jpg,800,800,airplane,550,45,688,195
1,02916.jpg,800,800,airplane,128,202,180,280
2,02916.jpg,800,800,airplane,100,368,152,450
3,02916.jpg,800,800,airplane,201,658,259,726
4,01589.jpg,800,800,vehicle,238,460,254,495


In [0]:
df_validation.to_csv("../data/validationLabels.csv")
print(df_validation.shape)
df_validation.head()

(21978, 8)


Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,09857.jpg,800,800,vehicle,626,119,640,171
1,09857.jpg,800,800,vehicle,645,259,659,311
2,09843.jpg,800,800,vehicle,47,74,98,93
3,09843.jpg,800,800,vehicle,20,232,60,266
4,05937.jpg,800,800,vehicle,160,260,170,267


In [0]:
df_test.to_csv("../data/testLabels.csv")
print(df_test.shape)
df_test.head()

(69975, 8)


Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,21665.jpg,800,800,ship,410,340,459,595
1,21665.jpg,800,800,ship,454,619,470,666
2,23072.jpg,800,800,ship,292,73,406,114
3,23072.jpg,800,800,ship,223,481,326,522
4,23072.jpg,800,800,ship,202,579,317,620


In [0]:
df_trainval = pd.concat([df_train,df_validation])
print (df_trainval.shape)
df_trainval.to_csv("../data/trainvalLabels.csv")

(42899, 8)
