# 0. Imports

In [3]:
import os
import subprocess
import ctypes
from PIL import Image
import re
import xml.etree.ElementTree as ET
import cv2
import shutil

# 1. Convert all Files to JPG

In [None]:
def convertDirToJpg(input_dir):    
    input_files = os.listdir(input_dir)

    for file_name in input_files:
        input_path = os.path.join(input_dir, file_name)
        
        if os.path.isdir(input_path):
            continue
        
        # Get the filename without extension
        filename_no_ext = os.path.splitext(file_name)[0]
        
        # Construct the full output file path with the same name
        output_path = os.path.join(input_dir, filename_no_ext + '.jpg')
        
        convertImageToJpg(input_path, output_path)

    print("End of process")


def convertImageToJpg(inputImg, outputDir):
    image = Image.open(inputImg)
    rgb_image = image.convert('RGB') # JPG does not support alpha(=transparency) and P and RGBA have alpha, so we convert to RGB first
    rgb_image.save(outputDir)
    if(os.path.splitext(inputImg)[1] != '.jpg'):
        os.remove(inputImg)
    print(f"Conversion completed for {outputDir}")

In [None]:
IMAGE_PATH = os.path.join('..\\', '01_Tensorflow', 'workspace','images', 'train')
convertDirToJpg(IMAGE_PATH)

# 2. Rename Files

In [None]:
def rename_files(folder_path, starting_number):
    if not os.path.isdir(folder_path):
        print("Error: The provided path is not a directory.")
        return
    
    folder_name = os.path.basename(folder_path)
    files = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]


    for index, file_name in enumerate(files, starting_number):
        # Split the file name and extension
        name, extension = os.path.splitext(file_name)

        # Create the new file name with the specified prefix and number
        new_name = f"{folder_name}_{index}{extension}"

        old_path = os.path.join(folder_path, file_name)
        new_path = os.path.join(folder_path, new_name)

        os.rename(old_path, new_path)
        print(f"Renamed to: {new_path}")

In [None]:
IMAGE_PATH = os.path.join('..\\', '01_Tensorflow', 'workspace','images', 'austria')

rename_files(os.path.join(IMAGE_PATH, 'w'), 0)
rename_files(os.path.join(IMAGE_PATH, 'bgld'), 0)
rename_files(os.path.join(IMAGE_PATH, 'ktn'), 0)
rename_files(os.path.join(IMAGE_PATH, 'n'), 0)
rename_files(os.path.join(IMAGE_PATH, 'o'), 0)
rename_files(os.path.join(IMAGE_PATH, 'sbg'), 0)
rename_files(os.path.join(IMAGE_PATH, 'stmk'), 0)
rename_files(os.path.join(IMAGE_PATH, 'vbg'), 0)
rename_files(os.path.join(IMAGE_PATH, 't'), 0)
rename_files(os.path.join(IMAGE_PATH, 'eCar'), 0)
rename_files(os.path.join(IMAGE_PATH, 'sonder'), 0)

In [None]:
def processFile(input_dir, starting_number):
    rename_files(input_dir, starting_number)
    convertDirToJpg(input_dir)

# 3. Modify Data from Public Dataset to Fit Requirements

## 3.1. Replace "licenseplate" label with "licence" in xml files

In [None]:
directory_path = os.path.join('..\\', '01_Tensorflow', 'workspace','images', 'train')

for filename in os.listdir(directory_path):
    if filename.endswith('.xml'): 
        file_path = os.path.join(directory_path, filename)

        with open(file_path, 'r') as file:
            xml_content = file.read()

        # Replace "licenseplate" with "licence"
        new_xml_content = re.sub(r'<name>licenseplate</name>', '<name>licence</name>', xml_content)

        with open(file_path, 'w') as file:
            file.write(new_xml_content)

print("Modification completed.")


## 3.2. Create annotation from txt files with x, y, width, height format to xml Pascal VOC format

In [None]:
folder_path ="C:/Users/jinji/OneDrive/Desktop/bachelor-thesis/source/benchmark/benchmarks-master/endtoend/eu"
out_path ="../01_Tensorflow//workspace/images/mytest"
process_files(folder_path, out_path)

In [None]:
def process_files(folder_path, out_path):
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".txt"):
            image_file_name = file_name.replace(".txt", ".jpg")
            image_file_path = os.path.join(folder_path, image_file_name)

            with open(os.path.join(folder_path, file_name), 'r') as txt_file:
                line = txt_file.readline().strip().split('\t')
            folder = "images"
            filename = image_file_name
            image = cv2.imread(image_file_path)
            height, width, depth = image.shape
            x, y, box_width, box_height = map(int, line[1:5])
            xmin, ymin, xmax, ymax = convert_coordinates(x, y, box_width, box_height, width, height)

            # # Create XML content
            xml_content = create_annotation_xml(folder, filename, width, height, depth, xmin, ymin, xmax, ymax)
            # print(xml_content)
            
            if not os.path.exists(out_path):
                os.makedirs(out_path)
    
            xml_file_path = os.path.join(out_path, f"{filename.replace('.jpg', '.xml')}")
            with open(xml_file_path, 'w') as xml_file:
                xml_file.write(xml_content)
            
            # Copy img to dir too
            destination_dir = os.path.join(out_path, image_file_name)
            shutil.copy(image_file_path, destination_dir)
                
            print(f"XML file saved: {xml_file_path}")
            
        
def convert_coordinates(x, y, width, height, image_width, image_height):
    xmin = max(0, x)
    ymin = max(0, y)
    xmax = min(x + width, image_width)
    ymax = min(y + height, image_height)
    return xmin, ymin, xmax, ymax

            
def create_annotation_xml(folder, filename, width, height, depth, xmin, ymin, xmax, ymax):
    # Create root element
    annotation = ET.Element("annotation")

    # Add child elements
    folder_element = ET.SubElement(annotation, "folder")
    folder_element.text = folder

    filename_element = ET.SubElement(annotation, "filename")
    filename_element.text = filename

    size_element = ET.SubElement(annotation, "size")
    width_element = ET.SubElement(size_element, "width")
    width_element.text = str(width)
    height_element = ET.SubElement(size_element, "height")
    height_element.text = str(height)
    depth_element = ET.SubElement(size_element, "depth")
    depth_element.text = str(depth)

    segmented_element = ET.SubElement(annotation, "segmented")
    segmented_element.text = str(0)

    object_element = ET.SubElement(annotation, "object")
    name_element = ET.SubElement(object_element, "name")
    name_element.text = "licence"

    pose_element = ET.SubElement(object_element, "pose")
    pose_element.text = "Unspecified"

    truncated_element = ET.SubElement(object_element, "truncated")
    truncated_element.text = "0"

    occluded_element = ET.SubElement(object_element, "occluded")
    occluded_element.text = "0"

    difficult_element = ET.SubElement(object_element, "difficult")
    difficult_element.text = "0"

    bndbox_element = ET.SubElement(object_element, "bndbox")
    xmin_element = ET.SubElement(bndbox_element, "xmin")
    xmin_element.text = str(xmin)
    ymin_element = ET.SubElement(bndbox_element, "ymin")
    ymin_element.text = str(ymin)
    xmax_element = ET.SubElement(bndbox_element, "xmax")
    xmax_element.text = str(xmax)
    ymax_element = ET.SubElement(bndbox_element, "ymax")
    ymax_element.text = str(ymax)

    # Create and return the XML string
    xml_str = ET.tostring(annotation, encoding="unicode")
    return xml_str
           

# 4. Label Images

In [None]:
# Download and build Labelimg tool
LABELIMG_PATH = os.path.join('..\\', '01:Tensorflow', 'labelimg')

if not os.path.exists(LABELIMG_PATH):
    os.makedirs(LABELIMG_PATH)
    subprocess.run(["git", "clone", "https://github.com/tzutalin/labelImg", LABELIMG_PATH])
    # compile
    !cd {LABELIMG_PATH} && pyrcc5 -o libs/resources.py resources.qrc

In [None]:
# Open Labelimg tool
!cd {LABELIMG_PATH} && python labelImg.py

# 5. Move Data into a Training and Testing Folder

In [6]:
TRAIN_PATH = os.path.join('..\\', '01_Tensorflow', 'workspace', 'images', 'train')
TEST_PATH = os.path.join('..\\', 'Tensorflow', 'workspace', 'images', 'test')

if not os.path.exists(TRAIN_PATH):
    os.makedirs(TRAIN_PATH)
if not os.path.exists(TEST_PATH):
    os.makedirs(TEST_PATH)

# 6. OPTIONAL - Compress Data for Colab Training

In [None]:
IMAGES_PATH = os.path.join('..\\', '01_Tensorflow', 'workspace', 'images')
ARCHIVE_PATH = os.path.join(IMAGES_PATH, 'archive.tar.gz')

In [None]:
# Compress
if not os.path.exists(ARCHIVE_PATH):
    !cd {IMAGES_PATH} && tar -czvf archive.tar.gz train test
else: 
    print("archive.tar.gz already exist.")

In [None]:
# Extract
if os.path.exists(ARCHIVE_PATH):
  !cd {IMAGES_PATH} && tar -xzvf archive.tar.gz
else: 
    print("archive.tar.gz does not exist.")