# Wandle Datensatz in das Format für YoloX und Yolov8 um

In [72]:
import random
import os
import shutil
import json

import numpy as np
import pandas as pd

from pycocotools import coco

## YoloX

In [73]:
datasetPathSRC = '../data/dataFiltered'
datasetPathYoloX = '../data/dataYoloX'

# Erstelle Ordnerstruktur wenn nicht vorhanden
if not os.path.exists(datasetPathYoloX):
    os.makedirs(datasetPathYoloX)
    os.makedirs(datasetPathYoloX + '/trainImages')
    os.makedirs(datasetPathYoloX + '/valImages')
    os.makedirs(datasetPathYoloX + '/annotations')

In [74]:
# Erstelle COCO Dataset categories
categories = [
    {'id': 0, 'name': 'car'},
    {'id': 1, 'name': 'pedestrian'},
    {'id': 2, 'name': 'trafficLight'},
    {'id': 3, 'name': 'truck'},
    {'id': 4, 'name': 'biker'}
    ]


In [75]:
def convertDatasetToCoco(annotationFilePath, datasetPathSRC, datasetPathYoloX, imageFolder, categories):
    """
    Konvertiert den Dataensatz in das COCO-Format
    
    annotationFilePath: Pfad zur CSV-Datei mit den Annotationen
    datasetPathSRC: Pfad zum Quell-Datensatz
    datasetPathYoloX: Pfad zum YoloX-Datensatz
    imageFolder: Ordner in dem die Bilder gespeichert werden (train oder val)
    categories: Liste mit den Kategorien

    return: COCO-Datensatz
    """
    categoryIdMap = {category['name']: category['id'] for category in categories}
    categoryData = [{'id': category['id'], 'name': category['name']} for category in categories]

    # Erstelle leeren COCO-Datensatz
    cocoDataset = coco.COCO()

    # Füge Kategorien zum COCO-Datensatz hinzu
    cocoDataset.dataset['categories'] = categoryData
    print("Categories: ", cocoDataset.dataset['categories'])

    annotationFile = pd.read_csv(annotationFilePath)
    cocoDataset.dataset['images'] = []
    cocoDataset.dataset['annotations'] = []

    # change to unique filenames for index
    uniqueFilesnames = annotationFile['filename'].unique()
    
    for index, filename in enumerate(uniqueFilesnames):
        # Pfad zum Bild
        imageFilePath = os.path.join(datasetPathSRC, filename)

        # Kopiere Bild in YoloX Ordner wenn noch nicht vorhanden
        imagePathYoloX = os.path.join(datasetPathYoloX, imageFolder, filename)
        if not os.path.exists(imagePathYoloX):
            shutil.copy(imageFilePath, imagePathYoloX)
        
        # Füge Bild zum COCO-Datensatz hinzu
        imageData = {
            'id': index,
            'file_name': filename,
            'height': int(annotationFile.loc[annotationFile['filename'] == filename]['height'].iloc[0]),
            'width': int(annotationFile.loc[annotationFile['filename'] == filename]['width'].iloc[0])
        }
        cocoDataset.dataset['images'].append(imageData)

        # Füge Annotaitonen zum COCO-Datensatz hinzu
        for indexCategory, row in annotationFile.loc[annotationFile['filename'] == filename].iterrows():
            annotationData = {
                'id': indexCategory,
                'image_id': index,
                'category_id': categoryIdMap[row['class']],
                'bbox': [row['xmin'], row['ymin'], row['xmax'] - row['xmin'], row['ymax'] - row['ymin']],
                'area': (row['xmax'] - row['xmin']) * (row['ymax'] - row['ymin']),
                'iscrowd': 0
            }
            cocoDataset.dataset['annotations'].append(annotationData)
    

    cocoDataset.createIndex()

    return cocoDataset
            
    

In [76]:
annotationFilePath = os.path.join(datasetPathSRC, '_train_annotations_newClasses.csv')
trainCocoDataset = convertDatasetToCoco(annotationFilePath, datasetPathSRC, datasetPathYoloX, 'trainImages', categories)

annotationFilePath = os.path.join(datasetPathSRC, '_val_annotations_newClasses.csv')
valCocoDataset = convertDatasetToCoco(annotationFilePath, datasetPathSRC, datasetPathYoloX, 'valImages', categories)

with open(os.path.join(datasetPathYoloX, 'annotations', 'instances_train.json'), 'w') as trainFile:
    json.dump(trainCocoDataset.dataset, trainFile)

with open(os.path.join(datasetPathYoloX, 'annotations', 'instances_val.json'), 'w') as valFile:
    json.dump(valCocoDataset.dataset, valFile)

Categories:  [{'id': 0, 'name': 'car'}, {'id': 1, 'name': 'pedestrian'}, {'id': 2, 'name': 'trafficLight'}, {'id': 3, 'name': 'truck'}, {'id': 4, 'name': 'biker'}]
creating index...
index created!
Categories:  [{'id': 0, 'name': 'car'}, {'id': 1, 'name': 'pedestrian'}, {'id': 2, 'name': 'trafficLight'}, {'id': 3, 'name': 'truck'}, {'id': 4, 'name': 'biker'}]
creating index...
index created!


## Yolov8

In [26]:
datasetPathSRC = '../data/dataFiltered'
datasetPathYolov8 = '../data/dataYolov8'

# Erstelle Ordnerstruktur wenn nicht vorhanden
if not os.path.exists(datasetPathYolov8):
    os.makedirs(datasetPathYolov8)
    os.makedirs(datasetPathYolov8 + '/images')
    os.makedirs(datasetPathYolov8 + '/images/train')
    os.makedirs(datasetPathYolov8 + '/images/val')
    os.makedirs(datasetPathYolov8 + '/labels')
    os.makedirs(datasetPathYolov8 + '/labels/train')
    os.makedirs(datasetPathYolov8 + '/labels/val')

In [27]:
# Erstelle Kategorien für Mapping
categories = [
    {'id': 0, 'name': 'car'},
    {'id': 1, 'name': 'pedestrian'},
    {'id': 2, 'name': 'trafficLight'},
    {'id': 3, 'name': 'truck'},
    {'id': 4, 'name': 'biker'}
    ]



In [30]:
def convertDatasetToYolo(annotationFile, datasetPathSRC, datasetPathYolov8, imageFolder, categories):
    """  """

    categoryIdMap = {category['name']: category['id'] for category in categories}

    fileNames = annotationFile['filename'].unique()

    for fileName in fileNames:

        imagePath = os.path.join(datasetPathSRC, fileName)
        imagePathYolov8 = os.path.join(datasetPathYolov8, 'images', imageFolder, fileName)

        if not os.path.exists(imagePathYolov8):
            shutil.copy(imagePath, imagePathYolov8)

        annotationFileFiltered = annotationFile[annotationFile['filename'] == fileName]
        annotationFileFiltered = annotationFileFiltered.reset_index(drop=True)

        image_width = annotationFileFiltered['width'][0]

        with open(os.path.join(datasetPathYolov8, 'labels', imageFolder, fileName.replace('.jpg', '.txt')), 'w') as labelFile:
            for index, row in annotationFileFiltered.iterrows():
                x_center = (row['xmin'] + row['xmax']) / 2
                y_center = (row['ymin'] + row['ymax']) / 2
                bb_width = row['xmax'] - row['xmin']
                bb_height = row['ymax'] - row['ymin']
                labelFile.write(str(categoryIdMap[row['class']]) + ' ' + str(x_center/image_width) + ' ' + str(y_center/image_width) + ' ' + str(bb_width/image_width) + ' ' + str(bb_height/image_width) + '\n')
    

In [31]:
annotationFilePath = os.path.join(datasetPathSRC, '_train_annotations_newClasses.csv')
annotationFile = pd.read_csv(annotationFilePath)
convertDatasetToYolo(annotationFile, datasetPathSRC, datasetPathYolov8, 'train', categories)

annotationFilePath = os.path.join(datasetPathSRC, '_val_annotations_newClasses.csv')
annotationFile = pd.read_csv(annotationFilePath)
convertDatasetToYolo(annotationFile, datasetPathSRC, datasetPathYolov8, 'val', categories)