In [153]:
# Imports
import os
import pandas as pd
from glob import glob
from functools import reduce
from xml.etree import ElementTree as et
from shutil import move

In [154]:
# Load XML files and store them in a list
xml_files = glob("labeled_image_data/*.xml")
print(xml_files)

['labeled_image_data/157c26be-00595.xml', 'labeled_image_data/b695b399-00439.xml', 'labeled_image_data/d80257fd-00747.xml', 'labeled_image_data/ce40d81f-00279.xml', 'labeled_image_data/ed5069e6-00129.xml', 'labeled_image_data/36c4c908-00458.xml', 'labeled_image_data/652bb440-00441.xml', 'labeled_image_data/dcc0dc62-00329.xml', 'labeled_image_data/c22b1409-00234.xml', 'labeled_image_data/27d29b02-00735.xml', 'labeled_image_data/1f8f5442-00508.xml', 'labeled_image_data/126edbc5-00318.xml', 'labeled_image_data/60d13132-00430.xml', 'labeled_image_data/463cb35c-00453.xml', 'labeled_image_data/182f5fba-07649.xml', 'labeled_image_data/6d1f3135-07885.xml', 'labeled_image_data/08ba247e-00689.xml', 'labeled_image_data/e9a6a76a-00355.xml', 'labeled_image_data/126c5d13-00153.xml', 'labeled_image_data/83e49185-00798.xml', 'labeled_image_data/2d94aea5-00437.xml', 'labeled_image_data/cc03fb5b-00121.xml', 'labeled_image_data/0123dd72-00403.xml', 'labeled_image_data/3f0d495b-00131.xml', 'labeled_image_

In [155]:
# Define function to read XML files, extract file name, size (width, height), and object data (name, xmin, xmax, ymin, max)
def extract_text(filename):
    tree = et.parse(filename)
    root = tree.getroot()

    # Get file name
    img_name = root.find("filename").text

    # Get size (width and height) of an image
    width = root.find("size").find("width").text
    height = root.find("size").find("height").text

    # Get data of object(s) within an image
    objs = root.findall("object")
    parser = []

    for obj in objs:
        name = obj.find("name").text
        bnd_box = obj.find("bndbox")
        x_min = bnd_box.find("xmin").text
        x_max = bnd_box.find("xmax").text
        y_min = bnd_box.find("ymin").text
        y_max = bnd_box.find("ymax").text
        parser.append([img_name, width, height, name, x_min, x_max, y_min, y_max])

    return parser

In [156]:
# Extract text from all XML files and flatten the resulting list
parser_all = list(map(extract_text, xml_files))
obj_data = reduce(lambda x, y: x + y, parser_all)

In [157]:
# Create the DataFrame, with each entry being a labeled object in an image
df = pd.DataFrame(obj_data, columns = ["file_name", "width", "height", "class", "x_min", "x_max", "y_min", "y_max"])
df

Unnamed: 0,file_name,width,height,class,x_min,x_max,y_min,y_max
0,157c26be-00595.jpg,640,480,Truck,95,243,180,228
1,157c26be-00595.jpg,640,480,Car,52,143,188,234
2,157c26be-00595.jpg,640,480,Car,0,57,188,219
3,157c26be-00595.jpg,640,480,Truck,91,581,143,413
4,b695b399-00439.jpg,650,434,Car,10,639,90,415
...,...,...,...,...,...,...,...,...
953,076f6ecb-00525.jpg,500,375,Car,390,433,167,196
954,076f6ecb-00525.jpg,500,375,Car,474,499,165,197
955,076f6ecb-00525.jpg,500,375,Car,102,241,167,224
956,076f6ecb-00525.jpg,500,375,Car,115,388,146,315


In [158]:
# Clean the file_name column
def clean_filename(filename):
    return filename.split("-")[-1]

df["file_name"] = df["file_name"].apply(clean_filename)

In [159]:
df["class"].value_counts()

class
Car             740
Person          129
Truck            75
TrafficLight     10
TrafficSign       4
Name: count, dtype: int64

In [160]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958 entries, 0 to 957
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  958 non-null    object
 1   width      958 non-null    object
 2   height     958 non-null    object
 3   class      958 non-null    object
 4   x_min      958 non-null    object
 5   x_max      958 non-null    object
 6   y_min      958 non-null    object
 7   y_max      958 non-null    object
dtypes: object(8)
memory usage: 60.0+ KB


In [161]:
# Convert types of width, height, x_min, x_max, y_min, y_max from object to integer
cols = ["width", "height", "x_min", "x_max", "y_min", "y_max"]
df[cols] = df[cols].astype(int)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 958 entries, 0 to 957
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   file_name  958 non-null    object
 1   width      958 non-null    int64 
 2   height     958 non-null    int64 
 3   class      958 non-null    object
 4   x_min      958 non-null    int64 
 5   x_max      958 non-null    int64 
 6   y_min      958 non-null    int64 
 7   y_max      958 non-null    int64 
dtypes: int64(6), object(2)
memory usage: 60.0+ KB


In [162]:
# Prepare columns for YOLO object detection
# Get center x and center y
df["center_x"] = ((df["x_min"] + df["x_max"]) / 2) / df["width"]
df["center_y"] = ((df["y_min"] + df["y_max"]) / 2) / df["height"]

# Get normalized width and height
df["w"] = (df["x_max"] - df["x_min"]) / df["width"]
df["h"] = (df["y_max"] - df["y_min"]) / df["height"]

In [163]:
df.head()

Unnamed: 0,file_name,width,height,class,x_min,x_max,y_min,y_max,center_x,center_y,w,h
0,00595.jpg,640,480,Truck,95,243,180,228,0.264062,0.425,0.23125,0.1
1,00595.jpg,640,480,Car,52,143,188,234,0.152344,0.439583,0.142187,0.095833
2,00595.jpg,640,480,Car,0,57,188,219,0.044531,0.423958,0.089063,0.064583
3,00595.jpg,640,480,Truck,91,581,143,413,0.525,0.579167,0.765625,0.5625
4,00439.jpg,650,434,Car,10,639,90,415,0.499231,0.581797,0.967692,0.748848


### Split data into train and test data

In [164]:
images = df["file_name"].unique()
len(images)

631

In [165]:
# Split data into 80% and 20% test
img_df = pd.DataFrame(images, columns=["file_name"])

# Shuffle data and choose 80% of our total amount of images for train
img_train = tuple(img_df.sample(frac=0.80)["file_name"])

# Choose remaining 20% of images for test
img_test = tuple(img_df.query(f"file_name not in {img_train}")["file_name"])

In [166]:
# Create train and test DataFrames
train_df = df.query(f"file_name in {img_train}")
test_df = df.query(f"file_name in {img_test}")

In [167]:
train_df.head()

Unnamed: 0,file_name,width,height,class,x_min,x_max,y_min,y_max,center_x,center_y,w,h
0,00595.jpg,640,480,Truck,95,243,180,228,0.264062,0.425,0.23125,0.1
1,00595.jpg,640,480,Car,52,143,188,234,0.152344,0.439583,0.142187,0.095833
2,00595.jpg,640,480,Car,0,57,188,219,0.044531,0.423958,0.089063,0.064583
3,00595.jpg,640,480,Truck,91,581,143,413,0.525,0.579167,0.765625,0.5625
4,00439.jpg,650,434,Car,10,639,90,415,0.499231,0.581797,0.967692,0.748848


In [168]:
test_df.head()

Unnamed: 0,file_name,width,height,class,x_min,x_max,y_min,y_max,center_x,center_y,w,h
5,00747.jpg,939,704,Car,206,763,306,558,0.515974,0.613636,0.593184,0.357955
6,00279.jpg,576,352,Car,14,555,51,297,0.493924,0.494318,0.939236,0.698864
13,00234.jpg,1504,1000,Car,101,1204,364,930,0.433843,0.647,0.733378,0.566
21,00453.jpg,424,360,Truck,72,152,105,144,0.264151,0.345833,0.188679,0.108333
22,00453.jpg,424,360,Car,381,423,118,169,0.948113,0.398611,0.099057,0.141667


### Label Encoding

In [169]:
# Create a "labels" dictionary, with each unique class having its own ID
classes = df["class"].unique()
labels = {}

for i in range(len(classes)):
    labels[classes[i]] = i

# Function to get the ID of a specific object
def label_encoding(obj):
    return labels[obj]

label_encoding("Person")

2

In [170]:
train_df["id"] = train_df["class"].apply(label_encoding)
test_df["id"] = test_df["class"].apply(label_encoding)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_df["id"] = train_df["class"].apply(label_encoding)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df["id"] = test_df["class"].apply(label_encoding)


In [171]:
train_df.head()

Unnamed: 0,file_name,width,height,class,x_min,x_max,y_min,y_max,center_x,center_y,w,h,id
0,00595.jpg,640,480,Truck,95,243,180,228,0.264062,0.425,0.23125,0.1,0
1,00595.jpg,640,480,Car,52,143,188,234,0.152344,0.439583,0.142187,0.095833,1
2,00595.jpg,640,480,Car,0,57,188,219,0.044531,0.423958,0.089063,0.064583,1
3,00595.jpg,640,480,Truck,91,581,143,413,0.525,0.579167,0.765625,0.5625,0
4,00439.jpg,650,434,Car,10,639,90,415,0.499231,0.581797,0.967692,0.748848,1


### Save Images and Labels in Text

In [172]:
train_folder = "image_data/train"
test_folder = "image_data/test"

os.mkdir(train_folder)
os.mkdir(test_folder)

In [173]:
cols = ["file_name", "id", "center_x", "center_y", "w", "h"]
groupby_obj_train = train_df[cols].groupby("file_name")
groupby_obj_test = test_df[cols].groupby("file_name")

In [185]:
# Save each image in the train or test folder, and save respective labels in a text document
def save_data(filename, folder_path, group_obj):
    # Move image
    src = os.path.join("image_data", filename)
    dst = os.path.join(folder_path, filename)
    move(src, dst) # Move image from the source folder to the destination folder

    # Save the labels
    text_filename = os.path.join(folder_path, os.path.splitext(filename)[0] + ".txt")
    group_obj.get_group(filename).set_index("file_name").to_csv(text_filename, sep=" ", index=False, header=False)

In [191]:
filename_series = pd.Series(groupby_obj_train.groups.keys())

In [193]:
filename_series.apply(save_data, args=(train_folder, groupby_obj_train))

0      None
1      None
2      None
3      None
4      None
       ... 
500    None
501    None
502    None
503    None
504    None
Length: 505, dtype: object

In [194]:
filename_series_test = pd.Series(groupby_obj_test.groups.keys())
filename_series_test.apply(save_data, args=(test_folder, groupby_obj_test))

0      None
1      None
2      None
3      None
4      None
       ... 
121    None
122    None
123    None
124    None
125    None
Length: 126, dtype: object

### Data Preparation Done!

In [196]:
# Check class order
classes = df["class"].unique()
labels = {}

for i in range(len(classes)):
    labels[classes[i]] = i

list(labels.keys())

['Truck', 'Car', 'Person', 'TrafficLight', 'TrafficSign']