# GTSDB dataset converter
This script converts the overall GTSDB dataset format to a more usable YOLOv5 format.
The following steps are executed:
* moves relevant images from the GTSDB folder structure to new folder structure
* deletes unnecessary files and folders
* converts images from .ppm file format to .png file format

## Define dependencies and important paths

In [25]:
import os
import shutil
import csv
from PIL import Image

In [15]:
# Path to GTSDB root folder
gtsdb_path = "../../GTSDB"

## Download dataset

There are different sources for the GTSDB dataset.
For consistency, please download the GTSDB dataset from kaggle.

It can be found here: [GTSDB Dataset](https://www.kaggle.com/datasets/safabouguezzi/german-traffic-sign-detection-benchmark-gtsdb)

You might need to unzip downloaded folder and rename it to "GTSDB".

### define Paths 

In [17]:
# Path to directory containing the train images
train_source_directory = os.path.join(gtsdb_path, "TrainIJCNN2013/TrainIJCNN2013")

# Path to directory containing the test images
test_source_directory = os.path.join(gtsdb_path, "TestIJCNN2013/TestIJCNN2013Download")

# Path to new train image location
train_output_directory = os.path.join(gtsdb_path, "images")

# Path to new test image location
test_output_directory = os.path.join(gtsdb_path, "images/test")

In [18]:
# Ensure the output directory exists
os.makedirs(train_output_directory, exist_ok=True)
os.makedirs(test_output_directory, exist_ok=True)

### Move train images

In [19]:
for filename in os.listdir(train_source_directory):
    if filename.endswith(".ppm"):
        image_source_path = os.path.join(train_source_directory, filename)
        image_output_path = os.path.join(train_output_directory, filename)
        shutil.move(image_source_path, image_output_path)

### Move test images

In [20]:
for filename in os.listdir(test_source_directory):
    if filename.endswith(".ppm"):
        image_source_path = os.path.join(test_source_directory, filename)
        image_output_path = os.path.join(test_output_directory, filename)
        shutil.move(image_source_path, image_output_path)

## Delete unnecessary files

In [21]:
shutil.rmtree(os.path.join(gtsdb_path, "TrainIJCNN2013"))
shutil.rmtree(os.path.join(gtsdb_path, "TestIJCNN2013"))

## Convert .ppm to .png

In [23]:
def convert_ppm_to_png(source_path, output_path = None): 
    if output_path is None:
        output_path = source_path
        
    # Loop through each file in the ppm directory
    for filename in os.listdir(source_path):
        if filename.endswith(".ppm"):
            # Open the .ppm file
            ppm_path = os.path.join(source_path, filename)
            image = Image.open(ppm_path)
    
            # Create the .png filename
            png_filename = os.path.splitext(filename)[0] + ".png"
            png_path = os.path.join(output_path, png_filename)
    
            # Save the image as .png
            image.save(png_path, "PNG")
    
            # Remove old file
            os.remove(ppm_path)

In [24]:
convert_ppm_to_png(train_output_directory)
convert_ppm_to_png(test_output_directory)

## Convert label format

In [29]:
# Converts the GTSDB training labels into YOLOv5 structured labels
def convert_gtsdb_to_yolov5(gtsdb_labels_path, yolo_labels_path, images_path):
    # Read the GTSDB annotation file
    with open(gtsdb_labels_path, 'r') as f:
        reader = csv.reader(f,  delimiter=';')
        annotations = list(reader)
        
    # Create a YOLOv5 annotation file for each annotation/line in the old label file
    for annotation in annotations:

        # Extract the image filename from the GTSDB annotation
        image_filepath = annotation[0] 
        image_filename = image_filepath.split('.')[0]

        # Convert the GTSDB bounding box coordinates to YOLOv5 format
        box_x_center = (int(annotation[1]) + int(annotation[3])) / 2
        box_y_center = (int(annotation[2]) + int(annotation[4])) / 2
        box_width = (int(annotation[3]) - int(annotation[1])) 
        box_height = (int(annotation[4]) - int(annotation[2]))

        # Normalise the co-ordinates by the dimensions of the image
        image = Image.open(os.path.join(images_path, image_filename + ".png"))
        img_width = image.width
        img_height = image.height
        box_x_center /= img_width 
        box_y_center /= img_height 
        box_width    /= img_width 
        box_height   /= img_height 

        # Write the YOLOv5 annotation to the file
        class_id = int(annotation[5])
        with open(os.path.join(yolo_labels_path, image_filename + ".txt"), 'a+') as f:
          f.write("{} {:.3f} {:.3f} {:.3f} {:.3f}\n".format(class_id, box_x_center, box_y_center, box_width, box_height))

In [28]:
# Set relevant paths and start conversion
gtsdb_labels_path = os.path.join(gtsdb_path, "gt.txt")
yolo_labels_path = os.path.join(gtsdb_path, "labels")

# Ensure the output directory exists
os.makedirs(yolo_labels_path, exist_ok=True)

convert_gtsdb_to_yolov5(gtsdb_labels_path, yolo_labels_path, train_output_directory)

Conversion complete.
