In [1]:
import pandas as pd
import os
import numpy as np
from re import match as match_regex
from shutil import move

In [2]:
print(os.getcwd())

/home/leon/Documents/Studienarbeit/EvalFramework/notebooks


## Reorganise Folder Structure

In [10]:
gtsdb_root = "../data/datasets/gtsdb"
gtsdb_train = f"{gtsdb_root}/TrainIJCNN2013"
gtsdb_test = f"{gtsdb_root}/TestIJCNN2013"

In [9]:
#os.mkdir(f"{gtsdb_train}/images")
for image in os.listdir(f"{gtsdb_train}/TrainIJCNN2013"):
    if match_regex(".*([.]ppm)", image):
        move(f"{gtsdb_train}/TrainIJCNN2013/{image}", f"{gtsdb_train}/images")

## Convert gt.txt of GTSDB to YOLO Format
YOLO format, with one *.txt file per image (if no objects in image, no *.txt file is required). The *.txt file specifications are:
- One row per object
- Each row is class x_center y_center width height format.
- Box coordinates must be in normalized xywh format (from 0 - 1). If your boxes are in pixels, divide x_center and width by image width, and y_center and height by image height.
- Class numbers are zero-indexed (start from 0).
![](https://user-images.githubusercontent.com/26833433/91506361-c7965000-e886-11ea-8291-c72b98c25eec.jpg)
The label file corresponding to the above image contains 2 persons (class 0) and a tie (class 27):
![](https://user-images.githubusercontent.com/26833433/112467037-d2568c00-8d66-11eb-8796-55402ac0d62f.png)


- Filename: Filename of the image the annotations apply for
- Traffic sign's region of interest (ROI) in the image
    - leftmost image column of the ROI
    - upmost image row of the ROI
    - rightmost image column of the ROI
    - downmost image row of the ROI
- ID providing the traffic sign's class


In [2]:
df = pd.read_csv("../data/datasets/gtsdb/TrainIJCNN2013/TrainIJCNN2013/gt.txt",
                 sep=";",
                 names=["Filename", "X1.ROI", "Y1.ROI", "X2.ROI", "Y2.ROI", "classID"]
                 )
print(df)

      Filename  X1.ROI  Y1.ROI  X2.ROI  Y2.ROI  classID
0    00000.ppm     774     411     815     446       11
1    00001.ppm     983     388    1024     432       40
2    00001.ppm     386     494     442     552       38
3    00001.ppm     973     335    1031     390       13
4    00002.ppm     892     476    1006     592       39
..         ...     ...     ...     ...     ...      ...
847  00570.ppm     881     416     914     449        9
848  00571.ppm    1287     361    1308     384       17
849  00575.ppm     403     474     435     506       38
850  00593.ppm     584     510     608     534       38
851  00599.ppm     700     454     722     476        9

[852 rows x 6 columns]


In [3]:
image_size = (1360, 800) # (width, height)

In [4]:
# The center of rectangle is the midpoint of the diagonal end points of rectangle
test_row = df.loc[0]
x_center = (test_row.loc["X1.ROI"] + test_row.loc["X2.ROI"]) // 2
y_center = (test_row.loc["Y1.ROI"] + test_row.loc["Y2.ROI"]) // 2
box_width = test_row.loc["X2.ROI"] - test_row.loc["X1.ROI"]
box_height = test_row.loc["Y2.ROI"] - test_row.loc["Y1.ROI"]
print(x_center, y_center, box_height, box_width)

794 428 35 41


In [5]:
# Normalize Coordinates
df["X1.ROI"] /= image_size[0]
df["X2.ROI"] /= image_size[0]
df["Y1.ROI"] /= image_size[1]
df["Y2.ROI"] /= image_size[1]
print(df)

      Filename    X1.ROI   Y1.ROI    X2.ROI   Y2.ROI  classID
0    00000.ppm  0.569118  0.51375  0.599265  0.55750       11
1    00001.ppm  0.722794  0.48500  0.752941  0.54000       40
2    00001.ppm  0.283824  0.61750  0.325000  0.69000       38
3    00001.ppm  0.715441  0.41875  0.758088  0.48750       13
4    00002.ppm  0.655882  0.59500  0.739706  0.74000       39
..         ...       ...      ...       ...      ...      ...
847  00570.ppm  0.647794  0.52000  0.672059  0.56125        9
848  00571.ppm  0.946324  0.45125  0.961765  0.48000       17
849  00575.ppm  0.296324  0.59250  0.319853  0.63250       38
850  00593.ppm  0.429412  0.63750  0.447059  0.66750       38
851  00599.ppm  0.514706  0.56750  0.530882  0.59500        9

[852 rows x 6 columns]


In [15]:
print(df.loc[df["Filename"] == "00000.ppm"])

    Filename    X1.ROI   Y1.ROI    X2.ROI  Y2.ROI  classID
0  00000.ppm  0.569118  0.51375  0.599265  0.5575       11


In [6]:
for index, row in df.iterrows():
    print(row.loc['classID'])
    print([row.loc['classID'],
        (row.loc["X1.ROI"] + row.loc["X2.ROI"]) / 2.0,
        (row.loc["Y1.ROI"] + row.loc["Y2.ROI"]) / 2.0,
        row.loc["X2.ROI"] - row.loc["X1.ROI"],
        row.loc["Y2.ROI"] - row.loc["Y1.ROI"]])
    break

11
[11, 0.5841911764705883, 0.535625, 0.030147058823529416, 0.043749999999999956]


In [7]:
gt_converted = np.array([
    [
        row.loc['classID'],
        (row.loc["X1.ROI"] + row.loc["X2.ROI"]) / 2.0,
        (row.loc["Y1.ROI"] + row.loc["Y2.ROI"]) / 2.0,
        row.loc["X2.ROI"] - row.loc["X1.ROI"],
        row.loc["Y2.ROI"] - row.loc["Y1.ROI"]
    ] for index, row in df.iterrows()])
print(gt_converted.shape)
print(gt_converted)
print(df.loc[0].loc['classID'])

(852, 5)
[[1.10000000e+01 5.84191176e-01 5.35625000e-01 3.01470588e-02
  4.37500000e-02]
 [4.00000000e+01 7.37867647e-01 5.12500000e-01 3.01470588e-02
  5.50000000e-02]
 [3.80000000e+01 3.04411765e-01 6.53750000e-01 4.11764706e-02
  7.25000000e-02]
 ...
 [3.80000000e+01 3.08088235e-01 6.12500000e-01 2.35294118e-02
  4.00000000e-02]
 [3.80000000e+01 4.38235294e-01 6.52500000e-01 1.76470588e-02
  3.00000000e-02]
 [9.00000000e+00 5.22794118e-01 5.81250000e-01 1.61764706e-02
  2.75000000e-02]]
11


In [8]:
# check for invalid mappings
for index, row in df.iterrows():
    if row.loc["classID"] == gt_converted[index][0] and (row.loc["X1.ROI"] + row.loc["X2.ROI"]) / 2 == gt_converted[index][1] and (row.loc["Y1.ROI"] + row.loc["Y2.ROI"]) / 2 == gt_converted[index][2] and row.loc["X2.ROI"] - row.loc["X1.ROI"] == gt_converted[index][3] and row.loc["Y2.ROI"] - row.loc["Y1.ROI"] == gt_converted[index][4]:
        continue
    else:
        print("row false values")
    break

In [25]:
os.mkdir(f"{gtsdb_train}/labels")

In [11]:
with open(f"{gtsdb_train}/labels/{df.loc[0].loc['Filename'][:-4]}.txt", "w+") as f:
    print(f"{int(gt_converted[0][0])} {' '.join(map(str, gt_converted[0][1:]))}")
    f.write(f"{int(gt_converted[0][0])} {' '.join(map(str, gt_converted[0][1:]))}")
    f.close()

11 0.5841911764705883 0.535625 0.030147058823529416 0.043749999999999956


In [12]:
f = open(f"{gtsdb_train}/labels/{df.loc[0].loc['Filename'][:-4]}.txt", "w+")
f.readlines()

[]