In [1]:
# --- CELDA 0: INSTALACI√ìN ---
# Instala ultralytics Y fija NumPy a una versi√≥n < 2.0 
# para evitar conflictos de entorno en Kaggle.
!pip install ultralytics "numpy<2"

print("¬°Bibliotecas 'ultralytics' y 'numpy<2' instaladas!")

Collecting ultralytics
  Downloading ultralytics-8.3.225-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.18 (from ultralytics)
  Downloading ultralytics_thop-2.0.18-py3-none-any.whl.metadata (14 kB)
INFO: pip is looking at multiple versions of opencv-python to determine which version is compatible with other requirements. This could take a while.
Collecting opencv-python>=4.6.0 (from ultralytics)
  Downloading opencv_python-4.11.0.86-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_

In [2]:
import os
import shutil
import glob
import random

print("Iniciando la creaci√≥n y re-partici√≥n (80/10/10) del dataset filtrado...")

# --- 1. Definir Rutas ---
# Ruta base del dataset de SOLO LECTURA
INPUT_DIR = '/kaggle/input/graffiti-train-3' 

# D√≥nde guardaremos el NUEVO dataset final
OUTPUT_DIR = '/kaggle/working/final_dataset_80_10_10'

# D√≥nde guardaremos temporalmente TODOS los archivos filtrados antes de partirlos
STAGING_DIR = os.path.join(OUTPUT_DIR, 'staging')
STAGING_IMAGE_DIR = os.path.join(STAGING_DIR, 'images')
STAGING_LABEL_DIR = os.path.join(STAGING_DIR, 'labels')

# --- 2. Definir Mapeo de Clases ---
# Queremos '1' (artistico) -> nueva clase '0'
# Queremos '2' (vandalico) -> nueva clase '1'
CLASS_MAPPING = {
    1: 0,  # 'artistico' se convierte en 0
    2: 1   # 'vandalico' se convierte en 1
}
NEW_CLASS_NAMES = ['artistico', 'vandalico']

# --- 3. Limpiar y Crear Directorios ---
# Limpiar ejecuciones anteriores
if os.path.exists(OUTPUT_DIR):
    shutil.rmtree(OUTPUT_DIR)

# Crear las carpetas de 'staging' (almacenamiento temporal)
os.makedirs(STAGING_IMAGE_DIR, exist_ok=True)
os.makedirs(STAGING_LABEL_DIR, exist_ok=True)

# --- 4. Procesar y Unificar todos los splits (train, val, test) ---
splits_to_process = ['train', 'valid', 'test']
total_files_processed = 0

print(f"\n--- PASO 1: Filtrando y Unificando Clases ---")
for split in splits_to_process:
    print(f"Procesando split de origen: '{split}'...")
    
    src_label_dir = os.path.join(INPUT_DIR, split, 'labels')
    src_image_dir = os.path.join(INPUT_DIR, split, 'images')
    
    if not os.path.exists(src_label_dir):
        print(f"  Advertencia: No se encontr√≥ {src_label_dir}. Saltando...")
        continue

    images_to_keep = set() # Im√°genes v√°lidas en ESTE split
    
    for label_file in os.listdir(src_label_dir):
        if not label_file.endswith('.txt'):
            continue
            
        src_file_path = os.path.join(src_label_dir, label_file)
        dst_file_path = os.path.join(STAGING_LABEL_DIR, label_file) # ¬°Guardar en staging!
        
        new_label_lines = [] 
        
        try:
            with open(src_file_path, 'r') as f_in:
                lines = f_in.readlines()
            
            for line in lines:
                if not line.strip():
                    continue
                
                parts = line.split()
                original_class_id = int(parts[0])
                
                # ¬°Aqu√≠ la magia del filtrado!
                if original_class_id in CLASS_MAPPING:
                    new_class_id = CLASS_MAPPING[original_class_id]
                    new_line = f"{new_class_id} {' '.join(parts[1:])}\n"
                    new_label_lines.append(new_line)
            
            if new_label_lines:
                with open(dst_file_path, 'w') as f_out:
                    f_out.writelines(new_label_lines)
                images_to_keep.add(os.path.splitext(label_file)[0])

        except Exception as e:
            print(f"  Error procesando {label_file}: {e}")

    print(f"  Se filtraron {len(images_to_keep)} etiquetas v√°lidas.")

    # --- 5. Copiar solo las Im√°genes Relevantes a Staging ---
    copied_count = 0
    if not os.path.exists(src_image_dir):
        print(f"  Advertencia: No se encontr√≥ {src_image_dir}. Saltando copia...")
        continue
        
    for img_file in os.listdir(src_image_dir):
        base_name = os.path.splitext(img_file)[0]
        
        if base_name in images_to_keep:
            src_img_path = os.path.join(src_image_dir, img_file)
            dst_img_path = os.path.join(STAGING_IMAGE_DIR, img_file) # ¬°Guardar en staging!
            shutil.copy2(src_img_path, dst_img_path)
            copied_count += 1
            
    print(f"  Se copiaron {copied_count} im√°genes a staging.")
    total_files_processed += copied_count

print(f"\nFiltrado completo. Se unificaron {total_files_processed} im√°genes en total.")

# --- 6. Re-partici√≥n 80/10/10 ---
print("\n--- PASO 2: Re-partiendo el dataset en 80/10/10 ---")

# Obtener la lista de TODAS las im√°genes filtradas
all_image_paths = glob.glob(os.path.join(STAGING_IMAGE_DIR, '*.*'))
random.seed(42) # Usar una semilla para que la divisi√≥n sea reproducible
random.shuffle(all_image_paths)

total_count = len(all_image_paths)
if total_count == 0:
    print("Error: No se encontraron im√°genes filtradas. Deteniendo.")
else:
    # Calcular los √≠ndices de divisi√≥n
    train_end = int(total_count * 0.8)
    valid_end = train_end + int(total_count * 0.1)

    # Crear listas de archivos para cada split
    splits_files = {
        'train': all_image_paths[:train_end],
        'valid': all_image_paths[train_end:valid_end],
        'test': all_image_paths[valid_end:] # El 10% restante
    }

    # Mover los archivos desde 'staging' a las carpetas finales
    for split_name, image_files in splits_files.items():
        print(f"Creando split '{split_name}' con {len(image_files)} im√°genes...")
        
        final_img_dir = os.path.join(OUTPUT_DIR, split_name, 'images')
        final_label_dir = os.path.join(OUTPUT_DIR, split_name, 'labels')
        os.makedirs(final_img_dir, exist_ok=True)
        os.makedirs(final_label_dir, exist_ok=True)
        
        for img_path in image_files:
            base_name = os.path.splitext(os.path.basename(img_path))[0]
            label_file_name = base_name + '.txt'
            
            src_label_path = os.path.join(STAGING_LABEL_DIR, label_file_name)
            dst_label_path = os.path.join(final_label_dir, label_file_name)
            
            # Mover la imagen
            shutil.move(img_path, final_img_dir)
            
            # Mover la etiqueta
            if os.path.exists(src_label_path):
                shutil.move(src_label_path, dst_label_path)

    # 7. Limpiar la carpeta temporal
    shutil.rmtree(STAGING_DIR)

    print("\n¬°Proceso de filtrado y re-partici√≥n (80/10/10) completado!")
    print(f"Dataset listo en: {OUTPUT_DIR}")
    print(f"  Train: {len(splits_files['train'])} im√°genes")
    print(f"  Valid: {len(splits_files['valid'])} im√°genes")
    print(f"  Test:  {len(splits_files['test'])} im√°genes")

Iniciando la creaci√≥n y re-partici√≥n (80/10/10) del dataset filtrado...

--- PASO 1: Filtrando y Unificando Clases ---
Procesando split de origen: 'train'...
  Se filtraron 2298 etiquetas v√°lidas.
  Se copiaron 2298 im√°genes a staging.
Procesando split de origen: 'valid'...
  Se filtraron 96 etiquetas v√°lidas.
  Se copiaron 96 im√°genes a staging.
Procesando split de origen: 'test'...
  Se filtraron 101 etiquetas v√°lidas.
  Se copiaron 101 im√°genes a staging.

Filtrado completo. Se unificaron 2495 im√°genes en total.

--- PASO 2: Re-partiendo el dataset en 80/10/10 ---
Creando split 'train' con 1996 im√°genes...
Creando split 'valid' con 249 im√°genes...
Creando split 'test' con 250 im√°genes...

¬°Proceso de filtrado y re-partici√≥n (80/10/10) completado!
Dataset listo en: /kaggle/working/final_dataset_80_10_10
  Train: 1996 im√°genes
  Valid: 249 im√°genes
  Test:  250 im√°genes


In [3]:
import os
import yaml

print("Creando el archivo YAML para el dataset 80/10/10...")

# --- ¬°CORREGIDO! ---
# Apuntar al directorio que la Celda 1 (80/10/10) realmente cre√≥
DATASET_80_10_10_DIR = '/kaggle/working/final_dataset_80_10_10'

# D√≥nde guardaremos el nuevo YAML
YAML_PATH = '/kaggle/working/data_final_80_10_10.yaml'

# Definir la estructura del nuevo YAML
yaml_data = {
    'train': os.path.join(DATASET_80_10_10_DIR, 'train/images'),
    'val': os.path.join(DATASET_80_10_10_DIR, 'valid/images'),
    'test': os.path.join(DATASET_80_10_10_DIR, 'test/images'),
    
    # Clases que definimos en la Celda 1
    'nc': 2,
    'names': ['artistico', 'vandalico']
}

# Escribir el archivo YAML
with open(YAML_PATH, 'w') as f:
    yaml.dump(yaml_data, f, sort_keys=False)

print(f"Archivo YAML creado exitosamente en: {YAML_PATH}")
print("\n--- Contenido del YAML que se usar√° para entrenar ---")
print(yaml.dump(yaml_data, sort_keys=False))
print("-----------------------------------------------------")

Creando el archivo YAML para el dataset 80/10/10...
Archivo YAML creado exitosamente en: /kaggle/working/data_final_80_10_10.yaml

--- Contenido del YAML que se usar√° para entrenar ---
train: /kaggle/working/final_dataset_80_10_10/train/images
val: /kaggle/working/final_dataset_80_10_10/valid/images
test: /kaggle/working/final_dataset_80_10_10/test/images
nc: 2
names:
- artistico
- vandalico

-----------------------------------------------------


In [4]:
# ¬°Recuerda poner "!pip install -q ultralytics" en la Celda 0!
# ¬°No lo pongas aqu√≠!

import os
import yaml
from ultralytics import YOLO

print("¬°Hola! Iniciando el script de entrenamiento...")

# --- 1. Definici√≥n de Rutas ---

# --- ¬°CORREGIDO! ---
# Usamos el YAML que creamos en la Celda 2 (el de 80/10/10)
YAML_PARA_ENTRENAR = '/kaggle/working/data_final_80_10_10.yaml'

# La carpeta de SALIDA (donde se guarda todo)
OUTPUT_DIR = '/kaggle/working/'

# Nuevos nombres para los experimentos
TRAIN_RUN_NAME = 'artistico_vandalico_train'
TEST_RUN_NAME = 'artistico_vandalico_test'

print(f"Usando archivo YAML: {YAML_PARA_ENTRENAR}")

# --- 2. Entrenamiento del Modelo ---
print("Cargando el modelo YOLOv8s...")
model = YOLO('yolov8s.pt') # Cargar el modelo 'small' pre-entrenado

print("Iniciando el entrenamiento...")
results = model.train(
    data=YAML_PARA_ENTRENAR,
    imgsz=640,
    batch=-1,               # Auto-batch
    project=OUTPUT_DIR,
    name=TRAIN_RUN_NAME,
    exist_ok=True,
    
    # --- Par√°metros de Entrenamiento ---
    epochs=1, # (3 √©pocas es solo para una prueba r√°pida)
    #time = 1,  # (Entrenemos por 8 horas, como ten√≠as)
    patience=50, # Se detendr√° si no mejora en 50 √©pocas
    
    # --- ¬°CORRECCI√ìN CR√çTICA! ---
    # Debe ser 'False' para evitar el error de Kaggle
    amp=True
)

print("\n¬°Entrenamiento completado!")

# --- 3. Evaluaci√≥n en el Set de TEST ---
print("\nCargando el MEJOR modelo para la evaluaci√≥n en 'test'...")

best_model_path = os.path.join(OUTPUT_DIR, TRAIN_RUN_NAME, 'weights/best.pt')

if not os.path.exists(best_model_path):
    print(f"ERROR: No se encontr√≥ el modelo en {best_model_path}")
else:
    best_model = YOLO(best_model_path)
    print("Ejecutando evaluaci√≥n en el set de 'test'...")
    
    test_results = best_model.val(
        data=YAML_PARA_ENTRENAR,
        split='test',  # ¬°Perfecto! Usar√° nuestro 10% de 'test'
        project=OUTPUT_DIR,
        name=TEST_RUN_NAME
    )
    print("\n¬°Evaluaci√≥n de test completada!")

# --- 4. Resultados Finales ---
print("\n--- ¬°Proceso Finalizado! ---")
print(f"Carpeta de Entrenamiento: {OUTPUT_DIR}/{TRAIN_RUN_NAME}/")
print(f"Carpeta de Evaluaci√≥n: {OUTPUT_DIR}/{TEST_RUN_NAME}/")

Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
¬°Hola! Iniciando el script de entrenamiento...
Usando archivo YAML: /kaggle/working/data_final_80_10_10.yaml
Cargando el modelo YOLOv8s...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 21.5MB 233.0MB/s 0.1s
Iniciando el entrenamiento...
Ultralytics 8.3.225 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        249        366       0.39      0.416       0.34      0.169

1 epochs completed in 0.013 hours.
Optimizer stripped from /kaggle/working/artistico_vandalico_train/weights/last.pt, 22.5MB
Optimizer stripped from /kaggle/working/artistico_vandalico_train/weights/best.pt, 22.5MB

Validating /kaggle/working/artistico_vandalico_train/weights/best.pt...
Ultralytics 8.3.225 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,126,358 parameters, 0 gradients, 28.4 GFLOPs
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 4/4 1.0it/s 4.1s


  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        249        366       0.39      0.416      0.341      0.169
             artistico        127        144      0.307      0.354      0.274      0.111
             vandalico        150        222      0.473      0.477      0.408      0.227
Speed: 0.3ms preprocess, 5.9ms inference, 0.0ms loss, 4.1ms postprocess per image
Results saved to [1m/kaggle/working/artistico_vandalico_train[0m

¬°Entrenamiento completado!

Cargando el MEJOR modelo para la evaluaci√≥n en 'test'...
Ejecutando evaluaci√≥n en el set de 'test'...
Ultralytics 8.3.225 üöÄ Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 11,126,358 parameters, 0 gradients, 28.4 GFLOPs
[34m[1mval: [0mFast image access ‚úÖ (ping: 0.0¬±0.0 ms, read: 3027.0¬±787.0 MB/s, size: 604.0 KB)
[K[34m[1mval: [0mScanning /kaggle/working/final_dataset_80_10_10/test/labels... 250 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 250/250 1.5Kit/s

  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        250        396      0.396      0.412       0.35      0.169
             artistico        126        162      0.339      0.444      0.337      0.155
             vandalico        149        234      0.453       0.38      0.362      0.183
Speed: 2.4ms preprocess, 8.9ms inference, 0.0ms loss, 2.6ms postprocess per image
Results saved to [1m/kaggle/working/artistico_vandalico_test[0m

¬°Evaluaci√≥n de test completada!

--- ¬°Proceso Finalizado! ---
Carpeta de Entrenamiento: /kaggle/working//artistico_vandalico_train/
Carpeta de Evaluaci√≥n: /kaggle/working//artistico_vandalico_test/
