In [1]:
import os
import csv
import random
from pathlib import Path

DATASET_PATH = '../dataset'
TRAIN_CSV = '../train.csv'
VAL_CSV = '../val.csv'
LABEL_MAP = '../label_map.txt'
SPLIT_RATIO = 0.8  # 80% train, 20% val

def generar_archivos_csv_y_labelmap():
    clases = sorted([d.name for d in Path(DATASET_PATH).iterdir() if d.is_dir()])
    label_map = {cls: idx for idx, cls in enumerate(clases)}
    
    # Guardar label_map.txt
    with open(LABEL_MAP, 'w') as f:
        for cls, idx in label_map.items():
            f.write(f"{cls} {idx}\n")

    # Recolectar clips
    clips = []
    for cls in clases:
        clase_path = Path(DATASET_PATH) / cls
        for clip_dir in clase_path.iterdir():
            if clip_dir.is_dir():
                clip_id = clip_dir.name
                label = label_map[cls]
                clips.append((clip_id, label, str(clip_dir)))

    random.shuffle(clips)

    split_idx = int(len(clips) * SPLIT_RATIO)
    train_clips = clips[:split_idx]
    val_clips = clips[split_idx:]

    # Guardar train.csv
    with open(TRAIN_CSV, 'w', newline='') as f:
        writer = csv.writer(f, delimiter=' ')
        for row in train_clips:
            writer.writerow(row)

    # Guardar val.csv
    with open(VAL_CSV, 'w', newline='') as f:
        writer = csv.writer(f, delimiter=' ')
        for row in val_clips:
            writer.writerow(row)

    print(f"✅ Generado: {TRAIN_CSV}, {VAL_CSV}, {LABEL_MAP}")
    print(f"Entrenamiento: {len(train_clips)} clips | Validación: {len(val_clips)} clips")

if __name__ == "__main__":
    generar_archivos_csv_y_labelmap()


✅ Generado: ../train.csv, ../val.csv, ../label_map.txt
Entrenamiento: 80 clips | Validación: 21 clips
