# Generar archivos .yaml para entrenar modelos YOLO

En este notebook vamos a generar los archivos .yaml que utilizaremos para entrenar nuestros modelos YOLO con validación cruzada. Vamos a generar una versión para local y otra para Google Colab. Vamos a usar los datos ya procesados de DatasetNinja, por lo que es necesario haber ejecutado el notebook `prepare-DatasetNinja.ipynb` antes de ejecutar este notebook.

In [1]:
import os, sys

In [2]:
# Definimos el path de la carpeta donde se encuentran los archivos .yaml
PATH = '../yaml/datasetninja'
PATH_LOCAL = os.path.join(PATH, 'local')
PATH_GDRIVE = os.path.join(PATH, 'gdrive')

# Definimos el path a los datos procesados de DatasetNinja
DATA_PATH = '../data/rdd2022-DatasetNinja'

# Definimos algunas constantes
REGIONS = ['China_Drone', 'China_MotorBike', 'Czech', 'India', 'Japan', 'Norway', 'United_States']
ITERS = [0, 1, 2, 3]

In [3]:
# Creamos las carpetas si no existen y si no existen borramos los archivos .yaml
if not os.path.exists(PATH):
    os.makedirs(PATH)
else:
    for file in os.listdir(PATH_LOCAL):
        os.remove(os.path.join(PATH_LOCAL, file))
    for file in os.listdir(PATH_GDRIVE):
        os.remove(os.path.join(PATH_GDRIVE, file))
if not os.path.exists(PATH_LOCAL):
    os.makedirs(PATH_LOCAL)
if not os.path.exists(PATH_GDRIVE):
    os.makedirs(PATH_GDRIVE)

# El formato de los archivos .yaml es el siguiente: train_<region>_val_fold_<iteration>.yaml
# Iteration indica que fold se va a usar para validación y el resto para entrenamiento (0-3)
# Region indica la región cuyos datos se van a usar para entrenamiento y validación puede ser all para usar todos los datos
    
# Comenzamos con los .yaml para Google Drive
for region in REGIONS:
    relative_path = 'gdrive/MyDrive/tfg-informatica/datasets/RDD2022/images'
    
    for iteration in ITERS:
        with open(os.path.join(PATH_GDRIVE, f'train_{region}_val_fold_{iteration}.yaml'), 'w') as f:
            # Relative path
            f.write(f'path: {relative_path}\n')

            # Train paths
            f.write('train:\n')
            triain_folds = [i for i in ITERS if i != iteration]
            for fold in triain_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

            # Val paths
            f.write('val:\n')
            val_folds = [iteration]
            for fold in val_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

            # Classes in .yaml
            f.write('\nnc: 4\n')
            f.write('names:\n')
            f.write('  0: D00 - Longitudinal Crack\n')
            f.write('  1: D10 - Transverse Crack\n')
            f.write('  2: D20 - Alligator Crack\n')
            f.write('  3: D40 - Pothole\n')

relative_path = 'gdrive/MyDrive/tfg-informatica/datasets/RDD2022/images'

for iteration in ITERS:
    with open(os.path.join(PATH_GDRIVE, f'train_All_val_fold_{iteration}.yaml'), 'w') as f:
        # Relative path
        f.write(f'path: {relative_path}\n')

        # Train paths
        f.write('train:\n')
        triain_folds = [i for i in ITERS if i != iteration]
        for region in REGIONS:
            for fold in triain_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

        # Val paths
        f.write('val:\n')
        val_folds = [iteration]
        for region in REGIONS:
            for fold in val_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

        # Classes in .yaml
        f.write('\nnc: 4\n')
        f.write('names:\n')
        f.write('  0: D00 - Longitudinal Crack\n')
        f.write('  1: D10 - Transverse Crack\n')
        f.write('  2: D20 - Alligator Crack\n')
        f.write('  3: D40 - Pothole\n')


# Ahora los .yaml para local
for region in REGIONS:
    relative_path = os.path.join(DATA_PATH, 'images')
    
    for iteration in ITERS:
        with open(os.path.join(PATH_LOCAL, f'train_{region}_val_fold_{iteration}.yaml'), 'w') as f:
            # Relative path
            f.write(f'path: {relative_path}\n')

            # Train paths
            f.write('train:\n')
            triain_folds = [i for i in ITERS if i != iteration]
            for fold in triain_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

            # Val paths
            f.write('val:\n')
            val_folds = [iteration]
            for fold in val_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

            # Classes in .yaml
            f.write('\nnc: 4\n')
            f.write('names:\n')
            f.write('  0: D00 - Longitudinal Crack\n')
            f.write('  1: D10 - Transverse Crack\n')
            f.write('  2: D20 - Alligator Crack\n')
            f.write('  3: D40 - Pothole\n')

relative_path = os.path.join(DATA_PATH, 'images')

for iteration in ITERS:
    with open(os.path.join(PATH_LOCAL, f'train_All_val_fold_{iteration}.yaml'), 'w') as f:
        # Relative path
        f.write(f'path: {relative_path}\n')

        # Train paths
        f.write('train:\n')
        triain_folds = [i for i in ITERS if i != iteration]
        for region in REGIONS:
            for fold in triain_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

        # Val paths
        f.write('val:\n')
        val_folds = [iteration]
        for region in REGIONS:
            for fold in val_folds:
                f.write(f'- {region}/fold_{fold}/images\n')

        # Classes in .yaml
        f.write('\nnc: 4\n')
        f.write('names:\n')
        f.write('  0: D00 - Longitudinal Crack\n')
        f.write('  1: D10 - Transverse Crack\n')
        f.write('  2: D20 - Alligator Crack\n')
        f.write('  3: D40 - Pothole\n')