# Datenvorbereitung, in den YOLOv5 Datensatz Ordner + Konvertierung in das YOLO-Format

### Importe

In [44]:
# Import
import os
import json
import shutil
import cv2
from sklearn.model_selection import train_test_split
import zipfile
import xml.etree.ElementTree as ET

#### YOLOv5 und YOLOv4 Installation

In [None]:
!git clone "https://github.com/ultralytics/yolov5.git"
%cd yolov5
%pip install -qr requirements.txt
%cd ..

In [None]:
# Entzippen des YOLOv4 Ordners 
def zip_folder(folder_path):
    with zipfile.ZipFile(folder_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                zipf.write(file_path, os.path.relpath(file_path, folder_path))
yolo4 = "yolov4.zip"
zip_folder(yolo4)

In [None]:
%cd yolov4
%pip install -qr requirements.txt
%cd ..

### Datensatz für Alle Klassen speichern
Hierfür wird der Große Datensatz verwendet und alle 20 Klassen dieses Datensatzes werden verwendet. Diese werden für das YOLO Training in 70% Training, 15% Test und 15% Validierungsdatensatz getrennt.

**Kopieren von Bildern in den YOLOv5-Dataset-Ordner und Konvertieren von JSON-Daten in das YOLO-Format (txt)**
 - erste Spalte: Klassenindizien
 - zweite bis fünfte Spalte: Koordinaten der Bounding Box (x, y, w, h), diese müssen normalisiert werden

In [22]:
# Pfad zu den JSON-Daten und Bildern
json_dir = "../Datasets/Face Mask Detection Dataset/Medical mask/Medical mask/Medical Mask/annotations" 
image_dir = "../Datasets/Face Mask Detection Dataset/Medical mask/Medical mask/Medical Mask/images"  

# Pfad zu den YOLOv5 Dataset-Ordnern
labels_output_dir = "yolov5/dataset/all_classes/labels"  
image_output_dir = "yolov5/dataset/all_classes/images"    

#Klassen Mapping von allen Klassen
class_mapping_all = {
    "face_no_mask": 0,
    "face_with_mask": 1,
    "mask_surgical": 2,
    "hat": 3,
    "eyeglasses": 4,
    "face_other_covering": 5,
    "face_with_mask_incorrect": 6,
    "mask_colorful": 7,
    "helmet": 8,
    "sunglasses": 9,
    "scarf_bandana": 10,
    "hair_net": 11,
    "goggles": 12,
    "face_shield": 13,
    "hijab_niqab": 14,
    "turban": 15,
    "balaclava_ski_mask": 16,
    "gas_mask": 17,
    "hood": 18,
    "other": 19
}

# Bilder und Annotationen einlesen
images = [os.path.join(image_dir, x) for x in os.listdir(image_dir)]
annotations = [os.path.join(json_dir, x) for x in os.listdir(json_dir) if x.endswith(".json")]

# Train-Test-Split
if len(images) < 2 or len(annotations) < 2:
    print("Nicht genügend Datenpunkte für den Split vorhanden.")
    exit()

# Train-Test-Split
train_images, test_images, train_annotations, test_annotations = train_test_split(images, annotations, test_size=0.3, random_state=1)
# Validierungs-Split
val_images, test_images, val_annotations, test_annotations = train_test_split(test_images, test_annotations, test_size=0.5, random_state=1)

In [23]:
# Funktion zum Kopieren von Dateien
def copy_files_to_folder(file_list, destination_folder):
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    
    for file_path in file_list:
        file_name = os.path.basename(file_path)
        destination_path = os.path.join(destination_folder, file_name)
        shutil.copy(file_path, destination_path)

In [24]:
# Funktion zur Konvertierung der Labels ins YOLOv5-Format. Hier werden die Annotationen mit der Klassenindizierung versehen und 
# die Bounding Box Koordinaten normalisiert.
def convert_labels_to_yolov5_format(json_path, image_path, output_dir, class_mapping):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Erstelle YOLOv5-Annotationen
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    yolo_annotations = []
    for bbox in json_data["Annotations"]:
        x_min, y_min, x_max, y_max = bbox["BoundingBox"]
        class_name = bbox["classname"]

        if class_name not in class_mapping:
            raise ValueError(f"Ungültige Klasse: {class_name}")

        # Klassenindizierung
        class_index = class_mapping[class_name]

        # Normalisierung der Koordinaten
        x_center = (x_min + x_max) / (2 * image_width)
        y_center = (y_min + y_max) / (2 * image_height)
        width = (x_max - x_min) / image_width
        height = (y_max - y_min) / image_height

        yolo_annotation = f"{class_index} {x_center} {y_center} {width} {height}"
        yolo_annotations.append(yolo_annotation)

    image_filename = os.path.splitext(os.path.basename(image_path))[0]
    output_filename = f"{image_filename}.txt"
    output_path = os.path.join(output_dir, output_filename)

    # Erstelle YOLOv5-Annotationen
    with open(output_path, 'w') as f:
        for annotation in yolo_annotations:
            f.write(annotation + '\n')

    # Erstelle classes.txt-Datei, mit allen Klassen und deren Indizierung
    classes_file_path = os.path.join(output_dir, "classes.txt")
    with open(classes_file_path, 'w') as f:
        for class_name, class_index in class_mapping.items():
            f.write(f"{class_index} {class_name}\n")

All_Klasses Datensatz

In [25]:
def split_dataset(labels_output_dir, image_output_dir):
    # Bilder in Trainings-, Validierungs- und Testordner kopieren
    train_image_output_dir = os.path.join(image_output_dir, "train")
    val_image_output_dir = os.path.join(image_output_dir, "val")
    test_image_output_dir = os.path.join(image_output_dir, "test")
    copy_files_to_folder(train_images, train_image_output_dir)
    copy_files_to_folder(val_images, val_image_output_dir)
    copy_files_to_folder(test_images, test_image_output_dir)

    # Labels in den entsprechenden Ordnern kopieren und ins YOLOv5-Format konvertieren
    train_labels_output_dir = os.path.join(labels_output_dir, "train")
    val_labels_output_dir = os.path.join(labels_output_dir, "val")
    test_labels_output_dir = os.path.join(labels_output_dir, "test")
    copy_files_to_folder(train_annotations, train_labels_output_dir)
    copy_files_to_folder(val_annotations, val_labels_output_dir)
    copy_files_to_folder(test_annotations, test_labels_output_dir)

    return train_labels_output_dir, val_labels_output_dir, test_labels_output_dir


In [26]:
def convert_json(train_labels_output_dir, val_labels_output_dir, test_labels_output_dir, class_mapping):
    # Labels ins YOLOv5-Format konvertieren
    for json_path, image_path in zip(train_annotations, train_images):
        convert_labels_to_yolov5_format(json_path, image_path, train_labels_output_dir, class_mapping)

    for json_path, image_path in zip(val_annotations, val_images):
        convert_labels_to_yolov5_format(json_path, image_path, val_labels_output_dir, class_mapping)

    for json_path, image_path in zip(test_annotations, test_images):
        convert_labels_to_yolov5_format(json_path, image_path, test_labels_output_dir, class_mapping)

    print("Kopieren und Konvertieren erfolgreich.")

In [27]:
def copy_folder(source, destination):
    if not os.path.exists(destination):  # Überprüfen, ob der Zielordner bereits existiert
        shutil.copytree(source, destination)
        print(f"Ordner '{source}' wurde erfolgreich nach '{destination}' kopiert.")
    else:
        print(f"Zielordner '{destination}' existiert bereits. Kopiervorgang abgebrochen.")

In [28]:
# Datensatz wird in die YOLOv5-Ordner kopiert und gesplittet. Hier noch keine Konvertierung der Labels.
train_labels_output_dir, val_labels_output_dir, test_labels_output_dir = split_dataset(labels_output_dir, image_output_dir)

In [29]:
# Kopieren der Datensätze für zwei weitere YOLOv5-Trainings (Diese werden später benötigt und müssen angepasst werden.)
all_classes = "yolov5/dataset/all_classes"
faces_classes = "yolov5/dataset/face_classes"
one_person = "yolov5/dataset/one_person"
copy_folder(all_classes, faces_classes)
copy_folder(all_classes, one_person)

Ordner 'yolov5/dataset/all_classes' wurde erfolgreich nach 'yolov5/dataset/face_classes' kopiert.
Ordner 'yolov5/dataset/all_classes' wurde erfolgreich nach 'yolov5/dataset/one_person' kopiert.


In [30]:
# Konvertieren der JSON-Labels ins YOLOv5-Format
convert_json(train_labels_output_dir, val_labels_output_dir, test_labels_output_dir, class_mapping_all)

Kopieren und Konvertieren erfolgreich.


In [31]:
#JSON-Dateien aus yolov5/dataset löschen
def delete_json_files(folders):
    for folder in folders:
        if not os.path.exists(folder):
            continue
        
        for filename in os.listdir(folder):
            file_path = os.path.join(folder, filename)
            
            if filename.endswith(".json"):
                os.remove(file_path)

folders = [
    "yolov5/dataset/all_classes/labels/train",
    "yolov5/dataset/all_classes/labels/test",
    "yolov5/dataset/all_classes/labels/val",
]

delete_json_files(folders)

#### Face_Classes für Training erstellen
Hier wird derselbe Datensatz verwendet wie oben bei der Datenvorbereitung, der den großen Datensatz mit den JSON-Dateien enthält. In diesem Fall liegt der Fokus auf dem Anwendungsfall der Face_Classes. Das bedeutet, dass wir mit insgesamt 4 Klassen trainieren werden. Vor der Konvertierung in das YOLO-Format müssen jedoch alle anderen Klassen in der Annotationsdatei identifiziert und entfernt werden.

In [32]:

# Identifizierung und Löschen von JSON-Zeilen, die nicht den Face_Classes entsprechen, die für das Training verwendet werden sollen.
def filter_json_files(folder_path, class_mapping_faces):
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                data = json.load(file)

            filtered_annotations = [annotation for annotation in data["Annotations"] if annotation["classname"] in class_mapping_faces]
            data["Annotations"] = filtered_annotations

            with open(file_path, 'w') as file:
                json.dump(data, file)

folder_path1 = "yolov5/dataset/face_classes/labels/test"
folder_path2 = "yolov5/dataset/face_classes/labels/val"
folder_path3 = "yolov5/dataset/face_classes/labels/train"

class_mapping_faces = [
    "face_no_mask",
    "face_with_mask",
    "face_other_covering",
    "face_with_mask_incorrect"
]

# Löschen der nicht benötigten Klassen
filter_json_files(folder_path1, class_mapping_faces)
filter_json_files(folder_path2, class_mapping_faces)
filter_json_files(folder_path3, class_mapping_faces)

In [33]:
#Faces-JSON-Dateien in YOLOv5-Format konvertieren dabei werden die Klassen neu nummeriert
def convert_json_to_yolo(json_path, image_path, output_dir, class_mapping):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Öffne JSON-Datei
    with open(json_path, 'r') as f:
        json_data = json.load(f)

    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    yolo_annotations = []
    for bbox in json_data["Annotations"]:
        x_min, y_min, x_max, y_max = bbox["BoundingBox"]
        class_name = bbox["classname"]

        if class_name not in class_mapping:
            raise ValueError(f"Ungültige Klasse: {class_name}")

        # Klassenindizierung
        class_index = class_mapping[class_name]

        # Normalisierung der Koordinaten
        x_center = (x_min + x_max) / (2 * image_width)
        y_center = (y_min + y_max) / (2 * image_height)
        width = (x_max - x_min) / image_width
        height = (y_max - y_min) / image_height

        yolo_annotation = f"{class_index} {x_center} {y_center} {width} {height}"
        yolo_annotations.append(yolo_annotation)

    image_filename = os.path.splitext(os.path.basename(image_path))[0]
    output_filename = f"{image_filename}.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, 'w') as f:
        for annotation in yolo_annotations:
            f.write(annotation + '\n')

def get_image_filename_without_extension(filename):
    return os.path.splitext(filename)[0]


def convert_json_files_to_yolo(json_directory, image_directory, output_directory, class_mapping):
    for filename in os.listdir(json_directory):
        if filename.endswith(".json"):
            json_path = json_directory + "/"+ filename
            image_filename_without_extension = get_image_filename_without_extension(filename)

            # Check for JPEG image
            image_path =  image_directory + "/"+ image_filename_without_extension
            if not os.path.exists(image_path):
                # Check for JPG image
                image_path = image_directory + "/"+ image_filename_without_extension
                if not os.path.exists(image_path):
                    # Check for PNG image
                    image_path = image_directory + "/"+ image_filename_without_extension
                    if not os.path.exists(image_path):
                        print(f"Image file not found for JSON: {image_path}")
                        continue  # Skip this JSON file if no corresponding image is found
            convert_json_to_yolo(json_path, image_path, output_directory, class_mapping)

# classes.txt enthält die Klassenindizierung und die Klassennamen
def write_classes_to_file(class_mapping, json_directory):
    classes_file_path = os.path.join(json_directory, "classes.txt")
    with open(classes_file_path, 'w') as f:
        for class_name, class_index in class_mapping.items():
            f.write(f"{class_index} {class_name}\n")

In [34]:
json_directory1 = "yolov5/dataset/face_classes/labels/test/"
image_directory1 = "yolov5/dataset/face_classes/images/test"
json_directory2 = "yolov5/dataset/face_classes/labels/val/"
image_directory2 = "yolov5/dataset/face_classes/images/val"
json_directory3 = "yolov5/dataset/face_classes/labels/train/"
image_directory3 = "yolov5/dataset/face_classes/images/train"

# Klassenmapping
class_mapping_faces = {
    "face_no_mask": 0,
    "face_with_mask": 1,
    "face_other_covering": 2,
    "face_with_mask_incorrect": 3,
}

# Konvertieren der JSON-Dateien ins YOLOv5-Format
convert_json_files_to_yolo(json_directory1, image_directory1, json_directory1, class_mapping_faces)
convert_json_files_to_yolo(json_directory2, image_directory2, json_directory2, class_mapping_faces)
convert_json_files_to_yolo(json_directory3, image_directory3, json_directory3, class_mapping_faces)
write_classes_to_file(class_mapping_faces, json_directory1)
write_classes_to_file(class_mapping_faces, json_directory2)
write_classes_to_file(class_mapping_faces, json_directory3)

In [35]:
folders = [
    json_directory1,
    json_directory2,
    json_directory3,
]
# Löschen der JSON-Dateien
delete_json_files(folders)

#### Einzel Personen Datensatz
In diesem Datensatz wird ebenfalls der große Datensatz mit den JSON-Annotationen verarbeitet. Hier liegt der Fokus ausschließlich auf einzelnen Gesichtern. Das bedeutet, dass Bilder mit mehreren erkannten Gesichtern in diesem Datensatz entfernt werden. Dazu werden nur die 4 Face_Classes berücksichtigt, und es ist bekannt, dass in einer TXT-Datei mehrere Face_Classes darauf hinweisen, dass mehrere Personen erkannt wurden. Solche TXT-Dateien und die dazugehörigen Bilder werden gelöscht.

In [36]:
label_folder_path1 = "yolov5/dataset/one_person/labels/test"
label_folder_path2 = "yolov5/dataset/one_person/labels/val"
label_folder_path3 = "yolov5/dataset/one_person/labels/train"

filter_json_files(label_folder_path1, class_mapping_faces)
filter_json_files(label_folder_path2, class_mapping_faces)
filter_json_files(label_folder_path3, class_mapping_faces)

In [37]:
# Überprüfung, ob die Face-Class mehr als einmal in einer JSON-Datei vorkommt
def check_classname_frequency(data):
    classname_count = {}
    for annotation in data['Annotations']:
        classname = annotation['classname']
        if classname in classname_count:
            classname_count[classname] += 1
        else:
            classname_count[classname] = 1

    for classname, count in classname_count.items():
        if classname in class_mapping_faces and count > 1:
            return True

    return False

# Löschen der JSON-Dateien, die die Face-Class mehr als einmal enthalten
def filterjson_files(folder_path):
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as file:
                data = json.load(file)

            if check_classname_frequency(data):
                os.remove(file_path)

# Löschen der JSON-Dateien mit mehr Klassen
filterjson_files(label_folder_path1)
filterjson_files(label_folder_path2)
filterjson_files(label_folder_path3)

In [38]:
# Überprüfung, ob die JSON-Dateien und Bilder übereinstimmen, nach dem Löschen der JSON-Dateien
def check_json_images(json_folder, image_folder):
    # Liste aller JSON-Dateien
    json_files = [file.split(".jpg.json")[0].split(".jpeg.json")[0].split(".png.json")[0] for file in os.listdir(json_folder) if file.endswith((".jpg.json", ".jpeg.json", ".png.json"))]

    # Durchsuche Bilderordner nach Dateien ohne entsprechende JSON-Datei
    for file in os.listdir(image_folder):
        if file.endswith((".jpg", ".jpeg", ".png")):
            image_name = file.split(".")[0]
            if image_name not in json_files:
                # Lösche das Bild, da keine JSON-Datei vorhanden ist
                image_path = os.path.join(image_folder, file)
                os.remove(image_path)

In [39]:
image_folder1 = "yolov5/dataset/one_person/images/test"
image_folder2 = "yolov5/dataset/one_person/images/val"
image_folder3 = "yolov5/dataset/one_person/images/train"

label_folder_path1 = "yolov5/dataset/one_person/labels/test"
label_folder_path2 = "yolov5/dataset/one_person/labels/val"
label_folder_path3 = "yolov5/dataset/one_person/labels/train"

# Checke, ob alle Bilder eine JSON-Datei haben
check_json_images(label_folder_path1, image_folder1)
check_json_images(label_folder_path2, image_folder2)
check_json_images(label_folder_path3, image_folder3)

In [40]:
# Konvertieren der JSON-Dateien ins YOLOv5-Format und classes.txt-Datei erstellen
convert_json_files_to_yolo(label_folder_path1, image_folder1, label_folder_path1, class_mapping_faces)
convert_json_files_to_yolo(label_folder_path2, image_folder2, label_folder_path2, class_mapping_faces)
convert_json_files_to_yolo(label_folder_path3, image_folder3, label_folder_path3, class_mapping_faces)
write_classes_to_file(class_mapping_faces, label_folder_path1)
write_classes_to_file(class_mapping_faces, label_folder_path2)
write_classes_to_file(class_mapping_faces, label_folder_path3)

In [41]:
folders = [
    label_folder_path1,
    label_folder_path2,
    label_folder_path3,
]

# Löschen der JSON-Dateien
delete_json_files(folders)

In [42]:
# Überprüfung, ob die TXT-Dateien und Bilder übereinstimmen mit außnahme der classes.txt-Datei
def check_txt_images(txt_folder, image_folder):
    # Überprüfe, ob die Datei "classes.txt" vorhanden ist
    classes_txt_path = os.path.join(txt_folder, "classes.txt")
    if not os.path.exists(classes_txt_path):
        return
    
    # Liste aller TXT-Dateien
    txt_files = [file for file in os.listdir(txt_folder) if file.endswith(".txt")]

    # Durchsuche TXT-Ordner nach Dateien ohne entsprechendes Bild, außer "classes.txt"
    for txt_file in txt_files:
        if txt_file != "classes.txt":
            txt_name = txt_file.split(".")[0]
            image_path = os.path.join(image_folder, f"{txt_name}.jpg")
            if not (os.path.exists(image_path) or os.path.exists(image_path[:-4] + ".jpeg") or os.path.exists(image_path[:-4] + ".png")):
                # Drucke den Namen der fehlenden Bild-Datei
                print(f"Fehlendes Bild für TXT-Datei: {txt_file}")

    # Durchsuche Bilderordner nach Bildern ohne entsprechende TXT-Datei
    for image_file in os.listdir(image_folder):
        if image_file.endswith((".jpg", ".jpeg", ".png")):
            image_name = image_file.split(".")[0]
            txt_path = os.path.join(txt_folder, f"{image_name}.txt")
            if not os.path.exists(txt_path) and image_name != "classes":
                # Drucke den Namen der fehlenden TXT-Datei
                print(f"Fehlende TXT-Datei für Bild: {image_file}")

In [43]:
# Überprüfung, ob die TXT-Dateien und Bilder übereinstimmen für alle drei Datensätze
check_txt_images("yolov5/dataset/all_classes/labels/train", "yolov5/dataset/all_classes/images/train")
check_txt_images("yolov5/dataset/all_classes/labels/test", "yolov5/dataset/all_classes/images/test")
check_txt_images("yolov5/dataset/all_classes/labels/val", "yolov5/dataset/all_classes/images/val")
check_txt_images("yolov5/dataset/face_classes/labels/train", "yolov5/dataset/face_classes/images/train")
check_txt_images("yolov5/dataset/face_classes/labels/test", "yolov5/dataset/face_classes/images/test")
check_txt_images("yolov5/dataset/face_classes/labels/val", "yolov5/dataset/face_classes/images/val")
check_txt_images("yolov5/dataset/one_person/labels/train", "yolov5/dataset/one_person/images/train")
check_txt_images("yolov5/dataset/one_person/labels/test", "yolov5/dataset/one_person/images/test")
check_txt_images("yolov5/dataset/one_person/labels/val", "yolov5/dataset/one_person/images/val")

#### Dark Datensatz
Der Dunkle Datensatz wurde durch Augmentation verdunkelt, da wir keinen bereits dunklen Datensatz in Kaggle finden konnten. Ziel ist es, ein Modell zu trainieren, das auch im Dunkeln Masken erkennen kann. In diesem Datensatz werden ausschließlich die 4 Face_Classes betrachtet. Es ist wichtig zu beachten, dass die Annotationen für diesen Datensatz als XML-Dateien vorliegen und in das YOLO-Format konvertiert werden müssen.

In [45]:
# Funktion zur Konvertierung der XML-Datei ins YOLOv5-Format
def convert_xml_to_yolo(xml_path, image_directory, output_dir, class_mapping):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Öffne XML-Datei
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Holen Sie sich den Dateinamen des Bildes (ohne Erweiterung)
    image_filename_without_extension = os.path.splitext(root.find('filename').text)[0]

    # Bildpfad
    image_path = os.path.join(image_directory, f"{image_filename_without_extension}.png")

    # Überprüfen Sie, ob das Bild vorhanden ist
    if not os.path.exists(image_path):
        print(f"Image file not found: {image_path}")
        return

    image = cv2.imread(image_path)
    image_height, image_width, _ = image.shape

    yolo_annotations = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text

        if class_name not in class_mapping:
            raise ValueError(f"Ungültige Klasse: {class_name}")

        # Klassenindizierung
        class_index = class_mapping[class_name]

        # Bounding Box Koordinaten
        bbox = obj.find('bndbox')
        x_min = float(bbox.find('xmin').text)
        y_min = float(bbox.find('ymin').text)
        x_max = float(bbox.find('xmax').text)
        y_max = float(bbox.find('ymax').text)

        # Normalisierung der Koordinaten
        x_center = (x_min + x_max) / (2 * image_width)
        y_center = (y_min + y_max) / (2 * image_height)
        width = (x_max - x_min) / image_width
        height = (y_max - y_min) / image_height

        yolo_annotation = f"{class_index} {x_center} {y_center} {width} {height}"
        yolo_annotations.append(yolo_annotation)

    output_filename = f"{image_filename_without_extension}.txt"
    output_path = os.path.join(output_dir, output_filename)

    with open(output_path, 'w') as f:
        for annotation in yolo_annotations:
            f.write(annotation + '\n')


In [46]:
xml_directory = "../Datasets/Kaggle Face Mask Detection Full/annotations"
image_directory = "../Datasets/Kaggle Face Mask Detection Full/dark/images"
output_directory = "yolov5/dataset/dark/labels"

# Klassenmapping
class_mapping_dark = {
    "without_mask": 0,
    "with_mask": 1,
    "mask_weared_incorrect": 2,
}

def convert_xml_files_to_yolo(xml_directory, image_directory, output_directory, class_mapping):
    for filename in os.listdir(xml_directory):
        if filename.endswith(".xml"):
            xml_path = os.path.join(xml_directory, filename)
            convert_xml_to_yolo(xml_path, image_directory, output_directory, class_mapping)

# Rufen Sie die Funktion auf, um die XML-Dateien in das YOLOv5-Format umzuwandeln
convert_xml_files_to_yolo(xml_directory, image_directory, output_directory, class_mapping_dark)


In [47]:
# Pfad zu den YOLOv5 Dataset-Ordnern
labels_output_dir = "yolov5/dataset/dark/labels"  
image_output_dir = "yolov5/dataset/dark/images"  

# Bilder und Annotationen einlesen
images = [os.path.join(image_directory, x) for x in os.listdir(image_directory)]
annotations = [os.path.join(output_directory, x) for x in os.listdir(output_directory) if x.endswith(".txt")]

# Train-Test-Split
if len(images) < 2 or len(annotations) < 2:
    print("Nicht genügend Datenpunkte für den Split vorhanden.")
    exit()

# Train-Test-Split
train_images, test_images, train_annotations, test_annotations = train_test_split(images, annotations, test_size=0.3, random_state=1)
# Validierungs-Split
val_images, test_images, val_annotations, test_annotations = train_test_split(test_images, test_annotations, test_size=0.5, random_state=1)

In [48]:
# Splitten der Bilder und Annotationen
train_labels_output_dir, val_labels_output_dir, test_labels_output_dir = split_dataset(labels_output_dir, image_output_dir)

In [49]:
# löschen der überschüssigen txt-Dateien im labels Ordner die nicht benötigt werden
def delete_files_in_folder(folder_path):
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        try:
            if os.path.isfile(file_path):
                os.remove(file_path)
        except Exception as e:
            print(f"Fehler beim Löschen der Datei {file_path}: {e}")

# Ordner mit den überschüssigen txt-Dateien löschen (dataset/dark/labels)
delete_files_in_folder(output_directory)

In [50]:
label_test = "yolov5/dataset/dark/labels/test"
label_val = "yolov5/dataset/dark/labels/val"
label_train = "yolov5/dataset/dark/labels/train"

# Klassen mit Klassenindizierung in die classes.txt schreiben
write_classes_to_file(class_mapping_dark, label_test)
write_classes_to_file(class_mapping_dark, label_val)
write_classes_to_file(class_mapping_dark, label_train)

#### YAML-Datei für alle Datensätze erstellen
Die YAML-Datei ist eine Konfigurationsdatei für das Training des YOLO-Modells. Diese ist wie folgt aufgebaut:
- `train`: Pfad zum Verzeichnis, das die Trainingsbilder enthält.
- `val`: Pfad zum Verzeichnis mit den Validierungsbildern.
- `test`: Pfad zum Verzeichnis mit den Testbildern.
- `nc`: Anzahl der Klassen im Datensatz.
- `names`: Eine Liste der Klassennamen. 
- `patience`: Anzahl der Epochen, die gewartet werden, wenn die Leistung des Modells nicht besser wird, bevor das Training beendet wird. Hierbei kann bei Overfitting das Training automatisch abgebrochen werden.
- `delta`: Schwellenwert, um zu bestimmen, ob die Leistung des Modells sich verbessert hat. Wenn die Verbesserung kleiner als dieser Schwellenwert ist, wird das Training gestoppt. 

In [51]:
def create_data_yaml_file(data_path, train_path, val_path, test_path, class_names_file):
    with open(class_names_file, 'r') as f:
        class_names = [line.strip().split(' ', 1)[1] for line in f.readlines()]

    data_yaml = f"train: {train_path}\n" \
                f"val: {val_path}\n" \
                f"test: {test_path}\n" \
                f"nc: {len(class_names)}\n" \
                f"names: {class_names}\n" \
                f"patience: 5  # Anzahl der Epochen, die gewartet werden, wenn die Leistung nicht besser wird\n" \
                f"delta: 0.0001  # Schwellenwert, um zu bestimmen, ob die Leistung sich verbessert hat\n" 
    
    with open(data_path, 'w') as file:
        file.write(data_yaml)
    
    print(f"Die data.yaml-Datei wurde erfolgreich erstellt: {data_path}")

In [52]:
#YAML-Datei für all_classes
create_data_yaml_file("yolov5/dataset/all_classes/data.yaml", 
                      "../yolov5/dataset/all_classes/images/train/", 
                      "../yolov5/dataset/all_classes/images/val/",
                      "../yolov5/dataset/all_classes/images/test/",
                      "yolov5/dataset/all_classes/labels/train/classes.txt")

Die data.yaml-Datei wurde erfolgreich erstellt: yolov5/dataset/all_classes/data.yaml


In [53]:
#YAML-Datei für face_classes
create_data_yaml_file(  "yolov5/dataset/face_classes/data.yaml",
                        "../yolov5/dataset/face_classes/images/train/",
                        "../yolov5/dataset/face_classes/images/val/",
                        "../yolov5/dataset/face_classes/images/test/",
                        "yolov5/dataset/face_classes/labels/train/classes.txt")

Die data.yaml-Datei wurde erfolgreich erstellt: yolov5/dataset/face_classes/data.yaml


In [54]:
#YAML-Datei für one_person
create_data_yaml_file(  "yolov5/dataset/one_person/data.yaml",
                        "../yolov5/dataset/one_person/images/train/",
                        "../yolov5/dataset/one_person/images/val/",
                        "../yolov5/dataset/one_person/images/test/",
                        "yolov5/dataset/one_person/labels/train/classes.txt")

Die data.yaml-Datei wurde erfolgreich erstellt: yolov5/dataset/one_person/data.yaml


In [55]:
#YAML-Datei für dark
create_data_yaml_file(  "yolov5/dataset/dark/data.yaml",
                        "../yolov5/dataset/dark/images/train/",
                        "../yolov5/dataset/dark/images/val/",
                        "../yolov5/dataset/dark/images/test/",
                        "yolov5/dataset/dark/labels/train/classes.txt")

Die data.yaml-Datei wurde erfolgreich erstellt: yolov5/dataset/dark/data.yaml
