# PseudoLabeling Manual

In [1]:
%%capture
!pip install ultralytics

In [2]:
%%capture
!wget https://www.dropbox.com/scl/fi/3mwyjefpr86kqktwrs1dh/HeadBest.pt?rlkey=b1h3rsr1z4cb82yh2najhvmrj&st=k4l2wefe&dl=0
!mv HeadBest.pt?rlkey=b1h3rsr1z4cb82yh2najhvmrj HeadBest.pt

In [3]:
%%capture
!wget https://www.dropbox.com/scl/fi/ejtve3v8cggqo1d7vfmr9/dataSetUnlabel.zip?rlkey=lyun664rceo9shrfie7rh9wsc&st=aldjww59&dl=0
!mv dataSetUnlabel.zip?rlkey=lyun664rceo9shrfie7rh9wsc dataSetUnlabel.zip
!unzip dataSetUnlabel.zip

In [4]:
from ultralytics import YOLO

# Cargar el modelo preentrenado
model = YOLO('HeadBest.pt')


In [7]:
import os
import shutil
import random

# Definir las rutas a los directorios
base_dir = '/content/dataSet'  # Reemplaza esto con la ruta correcta
unlabel_dir = os.path.join(base_dir, 'Unlabel')
train_images_dir = os.path.join(base_dir, 'images', 'train')
train_labels_dir = os.path.join(base_dir, 'labels', 'train')
processed_images_file = os.path.join(base_dir, 'processed_images.txt')

# Crear los directorios de destino si no existen
os.makedirs(train_images_dir, exist_ok=True)
os.makedirs(train_labels_dir, exist_ok=True)

def load_processed_images(processed_images_file):
    if os.path.exists(processed_images_file):
        with open(processed_images_file, 'r') as f:
            return set(line.strip() for line in f)
    return set()

def save_processed_images(processed_images_file, processed_images):
    with open(processed_images_file, 'w') as f:
        for image_path in processed_images:
            f.write(f"{image_path}\n")

def get_unlabeled_images(batch_size=100):
    processed_images = load_processed_images(processed_images_file)
    all_images = [os.path.join(unlabel_dir, f) for f in os.listdir(unlabel_dir) if f.endswith('.jpg') or f.endswith('.png')]

    # Filtrar las imágenes ya procesadas y verificar existencia
    unprocessed_images = [img for img in all_images if img not in processed_images and os.path.exists(img)]

    # Seleccionar secuencialmente las primeras `batch_size` imágenes no procesadas
    return unprocessed_images[:batch_size]

def predict_and_label_images(images, model, confidence_threshold=0.7):
    labeled_images = []

    for image_path in images:
        results = model.predict(image_path, conf=confidence_threshold, verbose=False)

        for result in results:
            boxes = result.boxes
            confs = boxes.conf.cpu().numpy()
            clss = boxes.cls.cpu().numpy()
            data = boxes.xywhn.cpu().numpy()

            for conf, box, cls in zip(confs, data, clss):
                if conf >= confidence_threshold:
                    class_id = cls  # La clase está en el quinto valor
                    x_center, y_center, width, height = box[:4]  # Coordenadas de la caja
                    labeled_images.append((image_path, int(class_id), x_center, y_center, width, height))

    return labeled_images

def save_predictions(predicted_labels, train_images_dir, train_labels_dir, processed_images):
    for image_path, class_id, x_center, y_center, width, height in predicted_labels:
        # Verificar si la imagen aún existe
        if not os.path.exists(image_path):
            continue

        # Generar un nuevo nombre de archivo basado en los existentes en la carpeta de entrenamiento
        image_filename = os.path.basename(image_path)
        new_image_path = os.path.join(train_images_dir, image_filename)

        if os.path.exists(new_image_path):
            base_name, ext = os.path.splitext(image_filename)
            counter = 1
            new_image_path = os.path.join(train_images_dir, f"{base_name}_{counter}{ext}")
            while os.path.exists(new_image_path):
                counter += 1
                new_image_path = os.path.join(train_images_dir, f"{base_name}_{counter}{ext}")

        # Mover la imagen al directorio de entrenamiento
        shutil.move(image_path, new_image_path)

        # Guardar las etiquetas en el formato YOLO
        label_path = os.path.join(train_labels_dir, f"{os.path.splitext(image_filename)[0]}.txt")
        with open(label_path, 'w') as f:
            f.write(f"{int(class_id)} {x_center} {y_center} {width} {height}\n")

        # Añadir la imagen al conjunto de imágenes procesadas
        processed_images.add(image_path)

def pseudolabeling_iteration(model, num_iterations=5, batch_size=100, confidence_threshold=0.7):
    processed_images = load_processed_images(processed_images_file)

    for i in range(num_iterations):
        print(f"Iteración {i+1}/{num_iterations}")

        # Seleccionar un lote de imágenes sin etiquetar
        unlabeled_images = get_unlabeled_images(batch_size)

        if not unlabeled_images:
            print("No quedan más imágenes sin etiquetar.")
            break

        # Obtener las predicciones para el lote de imágenes
        predicted_labels = predict_and_label_images(unlabeled_images, model, confidence_threshold)

        # Guardar las predicciones y mover las imágenes etiquetadas
        save_predictions(predicted_labels, train_images_dir, train_labels_dir, processed_images)

        # Guardar el estado de las imágenes procesadas
        save_processed_images(processed_images_file, processed_images)

# Ejecutar el proceso de pseudolabeling
pseudolabeling_iteration(model, num_iterations=5, batch_size=100, confidence_threshold=0.7)


Iteración 1/5
Iteración 2/5
Iteración 3/5
Iteración 4/5
Iteración 5/5


In [8]:
import shutil
shutil.make_archive('images', 'zip',train_images_dir)
shutil.make_archive('labels', 'zip',train_labels_dir)

'/content/labels.zip'