In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
from functools import reduce
from xml.etree import ElementTree

In [2]:
data_images_path = Path("../data_images")
xml_list = list(data_images_path.glob("*.xml"))

In [3]:
xml_list

[PosixPath('../data_images/003127.xml'),
 PosixPath('../data_images/000403.xml'),
 PosixPath('../data_images/004600.xml'),
 PosixPath('../data_images/009709.xml'),
 PosixPath('../data_images/005135.xml'),
 PosixPath('../data_images/005406.xml'),
 PosixPath('../data_images/001258.xml'),
 PosixPath('../data_images/001771.xml'),
 PosixPath('../data_images/002667.xml'),
 PosixPath('../data_images/005385.xml'),
 PosixPath('../data_images/004341.xml'),
 PosixPath('../data_images/009138.xml'),
 PosixPath('../data_images/009192.xml'),
 PosixPath('../data_images/007148.xml'),
 PosixPath('../data_images/002579.xml'),
 PosixPath('../data_images/001187.xml'),
 PosixPath('../data_images/002237.xml'),
 PosixPath('../data_images/001499.xml'),
 PosixPath('../data_images/003618.xml'),
 PosixPath('../data_images/001224.xml'),
 PosixPath('../data_images/000228.xml'),
 PosixPath('../data_images/007234.xml'),
 PosixPath('../data_images/008856.xml'),
 PosixPath('../data_images/001772.xml'),
 PosixPath('../d

In [4]:
def parse_xml_file(file_path: Path | str) -> list:
    objects = []

    root = ElementTree.parse(file_path).getroot()
    image_name = root.findtext("filename")

    size = root.find("size")
    width = int(size.find("width").text)
    height = int(size.find("height").text)

    object_tags = root.findall("object")
    object_count = len(object_tags)
    for obj_tag in object_tags:
        label = obj_tag.findtext("name")
        bndbox = obj_tag.find("bndbox")
        xmin, xmax = int(bndbox.findtext("xmin")), int(bndbox.findtext("xmax"))
        ymin, ymax = int(bndbox.findtext("ymin")), int(bndbox.findtext("ymax"))
        objects.append([image_name, width, height, label, xmin, xmax, ymin, ymax, object_count])

    return objects

In [5]:
first_objects = parse_xml_file("../data_images/000001.xml")
first_objects

[['000001.jpg', 1024, 657, 'car', 14, 301, 335, 522, 4],
 ['000001.jpg', 1024, 657, 'car', 269, 571, 345, 489, 4],
 ['000001.jpg', 1024, 657, 'car', 502, 798, 342, 450, 4],
 ['000001.jpg', 1024, 657, 'car', 709, 1009, 333, 438, 4]]

In [6]:
img_objects = list(map(parse_xml_file, xml_list))
img_objects

[[['003127.jpg', 500, 375, 'cat', 3, 499, 2, 350, 1]],
 [['000403.jpg', 500, 375, 'cat', 157, 486, 90, 372, 1]],
 [['004600.jpg', 500, 375, 'cat', 178, 298, 40, 281, 1]],
 [['009709.jpg', 375, 500, 'bottle', 257, 375, 1, 498, 3],
  ['009709.jpg', 375, 500, 'bottle', 132, 271, 2, 495, 3],
  ['009709.jpg', 375, 500, 'bottle', 1, 141, 1, 489, 3]],
 [['005135.jpg', 500, 332, 'sofa', 185, 500, 161, 332, 2],
  ['005135.jpg', 500, 332, 'chair', 3, 58, 146, 302, 2]],
 [['005406.jpg', 500, 333, 'aeroplane', 172, 235, 126, 187, 1]],
 [['001258.jpg', 500, 375, 'car', 101, 423, 92, 287, 2],
  ['001258.jpg', 500, 375, 'car', 307, 414, 103, 180, 2]],
 [['001771.jpg', 500, 375, 'chair', 61, 188, 53, 218, 3],
  ['001771.jpg', 500, 375, 'chair', 228, 336, 2, 120, 3],
  ['001771.jpg', 500, 375, 'chair', 355, 496, 67, 218, 3]],
 [['002667.jpg', 500, 333, 'car', 5, 500, 76, 333, 1]],
 [['005385.jpg', 500, 336, 'bottle', 46, 101, 208, 334, 4],
  ['005385.jpg', 500, 336, 'person', 153, 500, 5, 336, 4],
  ['

In [7]:
list1 = [["a", "b", "c"], [1, 2, 3]]
list2 = [["d", "e", "f"]]
list1 + list2

[['a', 'b', 'c'], [1, 2, 3], ['d', 'e', 'f']]

In [8]:
data = reduce(lambda x, y: x + y, img_objects)
data

[['003127.jpg', 500, 375, 'cat', 3, 499, 2, 350, 1],
 ['000403.jpg', 500, 375, 'cat', 157, 486, 90, 372, 1],
 ['004600.jpg', 500, 375, 'cat', 178, 298, 40, 281, 1],
 ['009709.jpg', 375, 500, 'bottle', 257, 375, 1, 498, 3],
 ['009709.jpg', 375, 500, 'bottle', 132, 271, 2, 495, 3],
 ['009709.jpg', 375, 500, 'bottle', 1, 141, 1, 489, 3],
 ['005135.jpg', 500, 332, 'sofa', 185, 500, 161, 332, 2],
 ['005135.jpg', 500, 332, 'chair', 3, 58, 146, 302, 2],
 ['005406.jpg', 500, 333, 'aeroplane', 172, 235, 126, 187, 1],
 ['001258.jpg', 500, 375, 'car', 101, 423, 92, 287, 2],
 ['001258.jpg', 500, 375, 'car', 307, 414, 103, 180, 2],
 ['001771.jpg', 500, 375, 'chair', 61, 188, 53, 218, 3],
 ['001771.jpg', 500, 375, 'chair', 228, 336, 2, 120, 3],
 ['001771.jpg', 500, 375, 'chair', 355, 496, 67, 218, 3],
 ['002667.jpg', 500, 333, 'car', 5, 500, 76, 333, 1],
 ['005385.jpg', 500, 336, 'bottle', 46, 101, 208, 334, 4],
 ['005385.jpg', 500, 336, 'person', 153, 500, 5, 336, 4],
 ['005385.jpg', 500, 336, 'per

In [9]:
df = pd.DataFrame(
    data, columns=["image_name", "width", "height", "label", "xmin", "xmax", "ymin", "ymax", "object_count"]
)
df

Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count
0,003127.jpg,500,375,cat,3,499,2,350,1
1,000403.jpg,500,375,cat,157,486,90,372,1
2,004600.jpg,500,375,cat,178,298,40,281,1
3,009709.jpg,375,500,bottle,257,375,1,498,3
4,009709.jpg,375,500,bottle,132,271,2,495,3
...,...,...,...,...,...,...,...,...,...
15658,009758.jpg,500,332,chair,4,104,200,323,20
15659,009758.jpg,500,332,chair,150,311,284,332,20
15660,007146.jpg,500,374,bird,124,462,43,367,1
15661,003536.jpg,500,375,dog,111,500,100,375,2


In [10]:
df[df["image_name"] == "002965.jpg"]

Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count
996,002965.jpg,500,375,bottle,24,61,176,274,26
997,002965.jpg,500,375,bottle,94,128,150,262,26
998,002965.jpg,500,375,bottle,132,167,150,244,26
999,002965.jpg,500,375,bottle,171,199,153,234,26
1000,002965.jpg,500,375,bottle,199,231,151,228,26
1001,002965.jpg,500,375,bottle,235,259,157,240,26
1002,002965.jpg,500,375,bottle,261,286,153,260,26
1003,002965.jpg,500,375,bottle,288,316,155,263,26
1004,002965.jpg,500,375,bottle,316,346,157,266,26
1005,002965.jpg,500,375,bottle,349,375,158,266,26


In [11]:
labels = df.label.sort_values().unique()

In [12]:
labels_to_idx = {label: idx for idx, label in enumerate(labels)}

In [13]:
df["center_x"] = (((df["xmin"] + df["xmax"]) / 2) / df["width"]).round(4)
df["center_y"] = (((df["ymin"] + df["ymax"]) / 2) / df["height"]).round(4)
df["bb_width"] = ((df["xmax"] - df["xmin"]) / df["width"]).round(4)
df["bb_height"] = ((df["ymax"] - df["ymin"]) / df["height"]).round(4)
df

Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count,center_x,center_y,bb_width,bb_height
0,003127.jpg,500,375,cat,3,499,2,350,1,0.5020,0.4693,0.9920,0.9280
1,000403.jpg,500,375,cat,157,486,90,372,1,0.6430,0.6160,0.6580,0.7520
2,004600.jpg,500,375,cat,178,298,40,281,1,0.4760,0.4280,0.2400,0.6427
3,009709.jpg,375,500,bottle,257,375,1,498,3,0.8427,0.4990,0.3147,0.9940
4,009709.jpg,375,500,bottle,132,271,2,495,3,0.5373,0.4970,0.3707,0.9860
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15658,009758.jpg,500,332,chair,4,104,200,323,20,0.1080,0.7877,0.2000,0.3705
15659,009758.jpg,500,332,chair,150,311,284,332,20,0.4610,0.9277,0.3220,0.1446
15660,007146.jpg,500,374,bird,124,462,43,367,1,0.5860,0.5481,0.6760,0.8663
15661,003536.jpg,500,375,dog,111,500,100,375,2,0.6110,0.6333,0.7780,0.7333


In [14]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15663 entries, 0 to 15662
Data columns (total 13 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   image_name    15663 non-null  object 
 1   width         15663 non-null  int64  
 2   height        15663 non-null  int64  
 3   label         15663 non-null  object 
 4   xmin          15663 non-null  int64  
 5   xmax          15663 non-null  int64  
 6   ymin          15663 non-null  int64  
 7   ymax          15663 non-null  int64  
 8   object_count  15663 non-null  int64  
 9   center_x      15663 non-null  float64
 10  center_y      15663 non-null  float64
 11  bb_width      15663 non-null  float64
 12  bb_height     15663 non-null  float64
dtypes: float64(4), int64(7), object(2)
memory usage: 1.6+ MB


In [15]:
rng = np.random.default_rng()

images = df["image_name"].unique()
rng.shuffle(images)

TRAIN_DF_IDX = int(len(images) * 0.8)

train_image_df = df.loc[df["image_name"].isin(images[:TRAIN_DF_IDX])]
test_image_df = df.loc[df["image_name"].isin(images[TRAIN_DF_IDX:])]

In [16]:
print(train_image_df.shape)
train_image_df.sort_values("image_name").head(10)

(12562, 13)


Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count,center_x,center_y,bb_width,bb_height
7277,000001.jpg,1024,657,car,14,301,335,522,4,0.1538,0.6522,0.2803,0.2846
7278,000001.jpg,1024,657,car,269,571,345,489,4,0.4102,0.6347,0.2949,0.2192
7279,000001.jpg,1024,657,car,502,798,342,450,4,0.6348,0.6027,0.2891,0.1644
7280,000001.jpg,1024,657,car,709,1009,333,438,4,0.8389,0.5868,0.293,0.1598
12787,000009.jpg,500,375,person,150,229,141,284,4,0.379,0.5667,0.158,0.3813
12786,000009.jpg,500,375,horse,69,270,172,330,4,0.339,0.6693,0.402,0.4213
12788,000009.jpg,500,375,person,285,327,201,331,4,0.612,0.7093,0.084,0.3467
12789,000009.jpg,500,375,person,258,297,198,329,4,0.555,0.7027,0.078,0.3493
8745,000012.jpg,500,333,car,156,351,97,270,1,0.507,0.5511,0.39,0.5195
12187,000016.jpg,334,500,bicycle,92,305,72,473,1,0.5943,0.545,0.6377,0.802


In [17]:
print(test_image_df.shape)
test_image_df.sort_values("image_name").head(10)

(3101, 13)


Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count,center_x,center_y,bb_width,bb_height
4463,000002.jpg,800,600,car,41,768,240,497,2,0.5056,0.6142,0.9088,0.4283
4464,000002.jpg,800,600,car,533,722,236,299,2,0.7844,0.4458,0.2362,0.105
15224,000007.jpg,500,333,car,141,500,50,330,1,0.641,0.5706,0.718,0.8408
2376,000019.jpg,500,375,cat,231,483,88,256,2,0.714,0.4587,0.504,0.448
2377,000019.jpg,500,375,cat,11,266,113,259,2,0.277,0.496,0.51,0.3893
10211,000021.jpg,336,500,dog,1,182,235,388,4,0.2723,0.623,0.5387,0.306
10212,000021.jpg,336,500,person,210,336,36,482,4,0.8125,0.518,0.375,0.892
10213,000021.jpg,336,500,person,46,170,82,365,4,0.3214,0.447,0.369,0.566
10214,000021.jpg,336,500,person,11,142,181,419,4,0.2277,0.6,0.3899,0.476
12523,000050.jpg,500,375,person,68,293,96,375,10,0.361,0.628,0.45,0.744


In [18]:
train_image_df.loc[:, "label_id"] = train_image_df["label"].apply(lambda x: labels_to_idx[x])
test_image_df.loc[:, "label_id"] = test_image_df["label"].apply(lambda x: labels_to_idx[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_image_df.loc[:, "label_id"] = train_image_df["label"].apply(lambda x: labels_to_idx[x])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_image_df.loc[:, "label_id"] = test_image_df["label"].apply(lambda x: labels_to_idx[x])


In [19]:
train_image_df.head(10)

Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count,center_x,center_y,bb_width,bb_height,label_id
0,003127.jpg,500,375,cat,3,499,2,350,1,0.502,0.4693,0.992,0.928,7
1,000403.jpg,500,375,cat,157,486,90,372,1,0.643,0.616,0.658,0.752,7
6,005135.jpg,500,332,sofa,185,500,161,332,2,0.685,0.7425,0.63,0.5151,17
7,005135.jpg,500,332,chair,3,58,146,302,2,0.061,0.6747,0.11,0.4699,8
9,001258.jpg,500,375,car,101,423,92,287,2,0.524,0.5053,0.644,0.52,6
10,001258.jpg,500,375,car,307,414,103,180,2,0.721,0.3773,0.214,0.2053,6
11,001771.jpg,500,375,chair,61,188,53,218,3,0.249,0.3613,0.254,0.44,8
12,001771.jpg,500,375,chair,228,336,2,120,3,0.564,0.1627,0.216,0.3147,8
13,001771.jpg,500,375,chair,355,496,67,218,3,0.851,0.38,0.282,0.4027,8
14,002667.jpg,500,333,car,5,500,76,333,1,0.505,0.6141,0.99,0.7718,6


## Move respective images to train and test directory

In [20]:
images_list = list(data_images_path.glob("**/*.jp*g"))
images_list

[PosixPath('../data_images/test/001809.jpg'),
 PosixPath('../data_images/test/005318.jpg'),
 PosixPath('../data_images/test/000872.jpg'),
 PosixPath('../data_images/test/003089.jpg'),
 PosixPath('../data_images/test/002039.jpg'),
 PosixPath('../data_images/test/006673.jpg'),
 PosixPath('../data_images/test/009373.jpg'),
 PosixPath('../data_images/test/001510.jpg'),
 PosixPath('../data_images/test/003282.jpg'),
 PosixPath('../data_images/test/009099.jpg'),
 PosixPath('../data_images/test/005267.jpg'),
 PosixPath('../data_images/test/006797.jpg'),
 PosixPath('../data_images/test/009591.jpg'),
 PosixPath('../data_images/test/003159.jpg'),
 PosixPath('../data_images/test/008053.jpg'),
 PosixPath('../data_images/test/009461.jpg'),
 PosixPath('../data_images/test/004886.jpg'),
 PosixPath('../data_images/test/008801.jpg'),
 PosixPath('../data_images/test/004310.jpg'),
 PosixPath('../data_images/test/008944.jpg'),
 PosixPath('../data_images/test/004307.jpg'),
 PosixPath('../data_images/test/00

In [21]:
train_path = data_images_path.joinpath("train")
test_path = data_images_path.joinpath("test")

In [22]:
if not train_path.exists():
    train_path.mkdir()
if not test_path.exists():
    test_path.mkdir()

print(type(train_path))
print(type(test_path))

<class 'pathlib.PosixPath'>
<class 'pathlib.PosixPath'>


In [23]:
next(train_path.glob("*.jp*g")).parent

PosixPath('../data_images/train')

In [24]:
images_list[0].stem

'001809'

In [25]:
if len(images_list) != 0:
    for image in images_list:
        if image.name in train_image_df["image_name"].unique():
            image.rename(train_path / image.name)
        elif image.name in test_image_df["image_name"].unique():
            image.rename(test_path / image.name)

## Create annotation text file for each image

In [26]:
def create_annotation_text_file(dst: Path | str, df: pd.DataFrame):
    columns = ["label_id", "center_x", "center_y", "bb_width", "bb_height"]
    imgs = df["image_name"].to_numpy()
    for img in imgs:
        filename = img.split(".")[0] + ".txt"
        df.to_csv(dst / filename, sep="\t", columns=columns, index=False, header=False)

In [27]:
train_img_groups = train_image_df.groupby("image_name")
test_img_groups = test_image_df.groupby("image_name")

### Delete any text files from previous runs (if any) first

In [28]:
for txt in data_images_path.glob("**/*.txt"):
    txt.unlink(missing_ok=True)

### Create annotation files for train and test datasets

In [29]:
for k in train_img_groups.groups.keys():
    create_annotation_text_file(train_path, train_img_groups.get_group(k))

In [30]:
for k in test_img_groups.groups.keys():
    create_annotation_text_file(test_path, test_img_groups.get_group(k))

In [31]:
test_image_df[test_image_df["image_name"] == "000001.jpg"]

Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count,center_x,center_y,bb_width,bb_height,label_id


In [32]:
train_image_df[train_image_df["center_y"] >= 1]

Unnamed: 0,image_name,width,height,label,xmin,xmax,ymin,ymax,object_count,center_x,center_y,bb_width,bb_height,label_id
