<a href="https://colab.research.google.com/github/Jakelinecs/Tareas-Machine-Learning/blob/main/N28.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
# coding: utf-8
"""
Script to convert Pascal VOC format XML annotation files to YOLOv3 training text file format.
Before use, confirm the paths to XML and image files, and the correspondence of class names.
"""
import xml.etree.ElementTree as ET
import os

# --- Settings ---
# Directory containing annotation XML files
VOC_ANNOTATIONS_DIR = 'model_data'
# Directory containing image files (base path described in train.txt)
IMAGE_DIR_PREFIX = './characters'
# Output YOLO format annotation file
OUTPUT_FILE = 'simpsons_train.txt'
# List of class names (should correspond to simpsons_classes.txt)
CLASSES = ['abraham_grampa_simpson', 'apu_nahasapeemapetilon', 'barney_gumble', 'bart_simpson', 'carl_carlson', 'charles_montgomery_burns', 'chief_wiggum', 'comic_book_guy', 'disco_stu', 'edna_krabappel', 'fat_tony', 'groundskeeper_willie', 'homer_simpson', 'kent_brockman', 'krusty_the_clown', 'lenny_leonard', 'lisa_simpson', 'maggie_simpson', 'marge_simpson', 'mayor_quimby', 'milhouse_van_houten', 'moe_szyslak', 'ned_flanders', 'nelson_muntz', 'otto_mann', 'patty_bouvier', 'principal_skinner', 'professor_john_frink', 'ralph_wiggum', 'selma_bouvier', 'sideshow_bob', 'sideshow_mel', 'snake_jailbird', 'troy_mcclure', 'waylon_smithers']

def convert_annotation(xml_file_path, classes):
    """
    Reads a single XML file and returns a YOLO format annotation string.
    """
    try:
        tree = ET.parse(xml_file_path)
        root = tree.getroot()
    except Exception as e:
        print(f"Error parsing {xml_file_path}: {e}")
        return ""

    # Get image filename
    img_filename = root.find('filename').text

    # The beginning of the YOLO format line is the image path
    line = os.path.join(IMAGE_DIR_PREFIX, img_filename)

    for obj in root.iter('object'):
        cls_name = obj.find('name').text
        if cls_name not in classes:
            print(f"Warning: Class '{cls_name}' not found in CLASSES list. Skipping.")
            continue

        # Get class ID
        cls_id = classes.index(cls_name)

        # Get bounding box coordinates
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        # Add to YOLO format string
        line += f" {xmin},{ymin},{xmax},{ymax},{cls_id}"

    return line + '\n'

def generate_yolo_annotation_file():
    """
    Reads the existing annotation.txt file and outputs a YOLO format annotation file.
    """
    annotation_file = os.path.join(VOC_ANNOTATIONS_DIR, 'annotation.txt')

    if not os.path.isfile(annotation_file):
        print(f"Error: Annotation file '{annotation_file}' not found. Please check settings.")
        return

    with open(annotation_file, 'r') as f:
        lines = f.readlines()

    # Create a mapping from class name to ID
    class_to_id = {cls_name: idx for idx, cls_name in enumerate(CLASSES)}

    with open(OUTPUT_FILE, 'w') as f:
        for line in lines:
            line = line.strip()
            if not line:
                continue

            parts = line.split(',')
            if len(parts) >= 6:
                image_path = parts[0]
                x1, y1, x2, y2 = int(parts[1]), int(parts[2]), int(parts[3]), int(parts[4])
                class_name = parts[5]

                if class_name in class_to_id:
                    class_id = class_to_id[class_name]
                    # Output in YOLO format: image_path x1,y1,x2,y2,class_id
                    yolo_line = f"{image_path} {x1},{y1},{x2},{y2},{class_id}\n"
                    f.write(yolo_line)
                else:
                    print(f"Warning: Class '{class_name}' not found in CLASSES list. Skipping.")

    print(f"--- Complete ---")
    print(f"YOLO annotation file '{OUTPUT_FILE}' created.")
    print(f"Place this file in {os.getcwd()} for training.")
    classes_text = '\n'.join(CLASSES)
    print(f"Write the following content to the class file 'model_data/simpsons_classes.txt':\n{classes_text}")

# Execute the script
if __name__ == '__main__':
    # Check configured directories
    if not os.path.isdir(VOC_ANNOTATIONS_DIR):
        print(f"Error: Annotation directory '{VOC_ANNOTATIONS_DIR}' does not exist. Please check settings.")
    elif not os.path.isfile(os.path.join(VOC_ANNOTATIONS_DIR, 'annotation.txt')):
        print(f"Error: Annotation file '{VOC_ANNOTATIONS_DIR}/annotation.txt' does not exist.")
    else:
        print(f"Annotation directory: {VOC_ANNOTATIONS_DIR}")
        print(f"Image directory prefix: {IMAGE_DIR_PREFIX}")
        print(f"Output file: {OUTPUT_FILE}")
        print(f"Number of classes: {len(CLASSES)}")
        generate_yolo_annotation_file()

Annotation directory: model_data
Image directory prefix: ./characters
Output file: simpsons_train.txt
Number of classes: 35
--- Complete ---
YOLO annotation file 'simpsons_train.txt' created.
Place this file in /content for training.
Write the following content to the class file 'model_data/simpsons_classes.txt':
abraham_grampa_simpson
apu_nahasapeemapetilon
barney_gumble
bart_simpson
carl_carlson
charles_montgomery_burns
chief_wiggum
comic_book_guy
disco_stu
edna_krabappel
fat_tony
groundskeeper_willie
homer_simpson
kent_brockman
krusty_the_clown
lenny_leonard
lisa_simpson
maggie_simpson
marge_simpson
mayor_quimby
milhouse_van_houten
moe_szyslak
ned_flanders
nelson_muntz
otto_mann
patty_bouvier
principal_skinner
professor_john_frink
ralph_wiggum
selma_bouvier
sideshow_bob
sideshow_mel
snake_jailbird
troy_mcclure
waylon_smithers
