In [1]:
from datasets import load_dataset

data_set = load_dataset("keremberke/license-plate-object-detection", name="full")
data_set

DatasetDict({
    train: Dataset({
        features: ['image_id', 'image', 'width', 'height', 'objects'],
        num_rows: 6176
    })
    validation: Dataset({
        features: ['image_id', 'image', 'width', 'height', 'objects'],
        num_rows: 1765
    })
    test: Dataset({
        features: ['image_id', 'image', 'width', 'height', 'objects'],
        num_rows: 882
    })
})

In [2]:
import os
from tqdm.notebook import tqdm

sub_carpetas = ["train", "validation", "test"]
carpetas_principales = ["images", "labels"]

data_dir = 'data'
paths = [os.path.join('data', principal, sub_carpeta) 
         for principal in carpetas_principales 
         for sub_carpeta in sub_carpetas]

for path in paths:
    os.makedirs(path, exist_ok=True)

for ds_name in tqdm(sub_carpetas, desc='Procesando datasets'):
    images = data_set[ds_name][::]['image']
    objects = data_set[ds_name][::]['objects']
    
    for img, obj in tqdm(zip(images, objects), total=len(images), desc=f'Procesando imágenes de {ds_name}'):
        if len(obj['id']) == 1:
            x, y, w, h = map(int, obj['bbox'][0])
            
            img_path = os.path.join(data_dir, 'images', ds_name, f"{obj['id'][0]}.jpg")
            img.save(img_path)
            
            img_width, img_height = img.size
            x_center = (x + w / 2) / img_width
            y_center = (y + h / 2) / img_height
            w /= img_width
            h /= img_height
            
            label_path = os.path.join(data_dir, 'labels', ds_name, f"{obj['id'][0]}.txt")
            with open(label_path, 'w') as f:
                class_index = 0
                f.write(f"{class_index} {x_center} {y_center} {w} {h}\n")


Procesando datasets:   0%|          | 0/3 [00:00<?, ?it/s]

Procesando imágenes de train:   0%|          | 0/6176 [00:00<?, ?it/s]

Procesando imágenes de validation:   0%|          | 0/1765 [00:00<?, ?it/s]

Procesando imágenes de test:   0%|          | 0/882 [00:00<?, ?it/s]