In [None]:
from google.colab import drive
drive.mount('/content/drive', timeout_ms=1200000, force_remount=True)

Mounted at /content/drive


In [None]:
import os
import shutil
import random

In [None]:
def transform_dataset(root_dir, train_ratio=0.7, validation_ratio=0.2, test_ratio=0.1):
    # Create the output directory
    images_dir = os.path.join(root_dir, 'images')
    labels_dir = os.path.join(root_dir, 'labels')

    images_train_dir = os.path.join(images_dir, 'train')
    images_val_dir = os.path.join(images_dir, 'val')
    images_test_dir = os.path.join(images_dir, 'test')

    labels_train_dir = os.path.join(labels_dir, 'train')
    labels_val_dir = os.path.join(labels_dir, 'val')
    labels_test_dir = os.path.join(labels_dir, 'test')

    os.makedirs(images_train_dir, exist_ok=True)
    os.makedirs(images_val_dir, exist_ok=True)
    os.makedirs(images_test_dir, exist_ok=True)

    os.makedirs(labels_train_dir, exist_ok=True)
    os.makedirs(labels_val_dir, exist_ok=True)
    os.makedirs(labels_test_dir, exist_ok=True)
    # Get images
    images = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]

    # Shuffle the images randomly
    random.shuffle(images)

    # Calculate the number of training images based on the specified ratio
    train_size = int(len(images) * train_ratio)
    val_size = int(len(images) * validation_ratio)

    # Copy the training images and labels
    for img in images[:train_size]:
        label = f"{os.path.splitext(img)[0]}.txt"
        image_src_path = os.path.join(images_dir, img)
        label_src_path = os.path.join(labels_dir, label)
        if os.path.exists(label_src_path):
            shutil.move(image_src_path, images_train_dir)
            shutil.move(label_src_path, labels_train_dir)

    # Copy the validation images and labels
    for img in images[train_size:train_size+val_size]:
        label = f"{os.path.splitext(img)[0]}.txt"
        image_src_path = os.path.join(images_dir, img)
        label_src_path = os.path.join(labels_dir, label)
        if os.path.exists(label_src_path):
            shutil.move(image_src_path, images_val_dir)
            shutil.move(label_src_path, labels_val_dir)


    # Copy the test images and labels
    for img in images[train_size+val_size:]:
        label = f"{os.path.splitext(img)[0]}.txt"
        image_src_path = os.path.join(images_dir, img)
        label_src_path = os.path.join(labels_dir, label)
        if os.path.exists(label_src_path):
            shutil.move(image_src_path, images_test_dir)
            shutil.move(label_src_path, labels_test_dir)


In [None]:
root_dir   = 'videos/dataset'   # Path to the original dataset
transform_dataset(root_dir)

## Separada por videos

In [None]:
train_pref = ['Aug_1', 'Aug_2','Aug_4','Aug_5', 'Aug_6', 'Aug_7', 'Aug_8', 'Aug_9',
         'Jun_1', 'Jun_2', 'Jun_3', 'Jun_4',
         'Lobo_3', 'Lobo_6', 'Lobo_11', 'Lobo_4', 'Lobo_8', 'Lobo_9', 'Lobo_10', 'Lobo_12',
         'Manolo_1', 'Manolo_2', 'Manolo_3',
         'Otras_1', 'Otras_rear_1', 'Otras_rear_2',
         'Sep_2', 'Sep_3', 'Sep_4', 'Sep_5'
         ]
val_pref = ['Aug_3', 'Aug_10',
       'Jun_5',
       'Lobo_13','Lobo_14','Lobo_15',
       'Manolo_4',
       'Otras_2',
       'Sep_1'
       ]

In [None]:
def create_dataset(root_dir):
    # Create the output directory
    images_dir = os.path.join(root_dir, 'images')
    labels_dir = os.path.join(root_dir, 'labels')

    images_train_dir = os.path.join(images_dir, 'train')
    images_val_dir = os.path.join(images_dir, 'val')

    labels_train_dir = os.path.join(labels_dir, 'train')
    labels_val_dir = os.path.join(labels_dir, 'val')

    os.makedirs(images_train_dir, exist_ok=True)
    os.makedirs(images_val_dir, exist_ok=True)

    os.makedirs(labels_train_dir, exist_ok=True)
    os.makedirs(labels_val_dir, exist_ok=True)
    # Get images
    images = [f for f in os.listdir(images_dir) if f.endswith('.jpg')]
    images_train = [f for f in images if f.startswith(tuple(train_pref))]
    images_val = [f for f in images if f.startswith(tuple(val_pref))]

    # Copy the training images and labels
    for img in images_train:
        label = f"{os.path.splitext(img)[0]}.txt"
        image_src_path = os.path.join(images_dir, img)
        label_src_path = os.path.join(labels_dir, label)
        if os.path.exists(label_src_path):
            shutil.move(image_src_path, images_train_dir)
            shutil.move(label_src_path, labels_train_dir)

    # Copy the validation images and labels
    for img in images_val:
        label = f"{os.path.splitext(img)[0]}.txt"
        image_src_path = os.path.join(images_dir, img)
        label_src_path = os.path.join(labels_dir, label)
        if os.path.exists(label_src_path):
            shutil.move(image_src_path, images_val_dir)
            shutil.move(label_src_path, labels_val_dir)

In [None]:
root_dir   = 'dataset'   # Path to the original dataset
create_dataset(root_dir)