In [1]:
import os
import shutil
from pathlib import Path
from sklearn.model_selection import train_test_split

# ===== PARÁMETROS =====
DATASET_PATH = Path("dataset_rangos_10")  # Carpeta original
OUTPUT_PATH = Path("dataset_rangos_10_split")  # Carpeta destino
SPLIT_RATIOS = {'train': 0.7, 'valid': 0.2, 'test': 0.1}  # Porcentajes de división

# Crear las carpetas de salida
for split in SPLIT_RATIOS.keys():
    (OUTPUT_PATH / split).mkdir(parents=True, exist_ok=True)

# ===== DIVISIÓN =====
for age_folder in DATASET_PATH.iterdir():
    if not age_folder.is_dir():
        continue  # Saltar si no es una carpeta
    
    # Listar las imágenes en la carpeta actual
    images = list(age_folder.glob("*.jpg"))
    if not images:
        print(f"No images found in {age_folder}")
        continue
    
    # Dividir imágenes en train, valid y test
    train_imgs, temp_imgs = train_test_split(images, test_size=1-SPLIT_RATIOS['train'], random_state=42)
    valid_imgs, test_imgs = train_test_split(temp_imgs, test_size=SPLIT_RATIOS['test']/(SPLIT_RATIOS['valid']+SPLIT_RATIOS['test']), random_state=42)

    # Copiar las imágenes a las carpetas correspondientes
    for split, split_imgs in zip(['train', 'valid', 'test'], [train_imgs, valid_imgs, test_imgs]):
        split_folder = OUTPUT_PATH / split / age_folder.name
        split_folder.mkdir(parents=True, exist_ok=True)
        for img in split_imgs:
            shutil.copy2(img, split_folder / img.name)

    print(f"Processed folder: {age_folder.name}")


Processed folder: age_0_9
Processed folder: age_100_109
Processed folder: age_10_19
Processed folder: age_110_119
Processed folder: age_20_29
Processed folder: age_30_39
Processed folder: age_40_49
Processed folder: age_50_59
Processed folder: age_60_69
Processed folder: age_70_79
Processed folder: age_80_89
Processed folder: age_90_99
