In [6]:
import os
import xml.etree.ElementTree as ET

# Define class mapping — make sure they match your labelImg usage
CLASS_MAPPING = {
    "clean": 0,
    "dust": 1,
    "physical-damaged": 2,
    "bird-drop": 3,
    "snow-covered": 4,
    "electrical-damaged": 5
}

# Paths
ANNOTATION_DIR = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"  # This should contain 6 folders with XML files inside
YOLO_LABEL_DIR = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_YOLO/labels"

# Create output folders
for subset in ['train', 'val']:
    os.makedirs(os.path.join(YOLO_LABEL_DIR, subset), exist_ok=True)

def convert_xml_to_yolo(xml_path, txt_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    size = root.find('size')
    img_w = int(size.find('width').text)
    img_h = int(size.find('height').text)

    yolo_lines = []

    for obj in root.findall('object'):
        class_name = obj.find('name').text.lower().strip()
        if class_name not in CLASS_MAPPING:
            print(f"⚠️ Unknown class '{class_name}' in {xml_path}")
            continue
        class_id = CLASS_MAPPING[class_name]

        bbox = obj.find('bndbox')
        xmin = int(float(bbox.find('xmin').text))
        ymin = int(float(bbox.find('ymin').text))
        xmax = int(float(bbox.find('xmax').text))
        ymax = int(float(bbox.find('ymax').text))

        x_center = ((xmin + xmax) / 2) / img_w
        y_center = ((ymin + ymax) / 2) / img_h
        width = (xmax - xmin) / img_w
        height = (ymax - ymin) / img_h

        yolo_lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    with open(txt_path, "w") as f:
        f.write("\n".join(yolo_lines))

# Convert all XMLs
converted_count = 0
for folder in os.listdir(ANNOTATION_DIR):
    folder_path = os.path.join(ANNOTATION_DIR, folder)
    if not os.path.isdir(folder_path):
        continue

    for file in os.listdir(folder_path):
        if file.endswith(".xml"):
            xml_path = os.path.join(folder_path, file)
            txt_name = os.path.splitext(file)[0] + ".txt"

            # Alternate train/val split (e.g. 80-20)
            subset = "train" if converted_count % 5 != 0 else "val"
            txt_path = os.path.join(YOLO_LABEL_DIR, subset, txt_name)

            convert_xml_to_yolo(xml_path, txt_path)
            converted_count += 1

print(f"✅ Converted {converted_count} XML files to YOLO format.")


⚠️ Unknown class 'its an electrically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage\Electrical (1).xml
⚠️ Unknown class 'its an electrically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage\Electrical (10).xml
⚠️ Unknown class 'its an electrically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage\Electrical (11).xml
⚠️ Unknown class 'its an electrically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage\Electrical (12).xml
⚠️ Unknown class 'its an electrically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage\Electrical (13).xml
⚠️ Unknown class 'its an electrically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage\Electrical (14).xml
⚠️ Unknown class 'its an electrically dam

In [7]:
import os
import xml.etree.ElementTree as ET

# Update this mapping based on the exact class names in your XML files
CLASS_MAPPING = {
    "clean": 0,
    "dust": 1,
    "its an physically damaged panel": 2,
    "bird-drop": 3,
    "snow-covered": 4,
    "its an electrically damaged panel": 5
}

ANNOTATION_DIR = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"
YOLO_LABEL_DIR = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_YOLO/labels"

# Create output folders
for subset in ['train', 'val']:
    os.makedirs(os.path.join(YOLO_LABEL_DIR, subset), exist_ok=True)

def convert_xml_to_yolo(xml_path, txt_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    size = root.find('size')
    img_w = int(size.find('width').text)
    img_h = int(size.find('height').text)

    yolo_lines = []

    for obj in root.findall('object'):
        class_name = obj.find('name').text.lower().strip()
        if class_name not in CLASS_MAPPING:
            print(f"⚠️ Unknown class '{class_name}' in {xml_path}")
            continue
        class_id = CLASS_MAPPING[class_name]

        bbox = obj.find('bndbox')
        xmin = int(float(bbox.find('xmin').text))
        ymin = int(float(bbox.find('ymin').text))
        xmax = int(float(bbox.find('xmax').text))
        ymax = int(float(bbox.find('ymax').text))

        x_center = ((xmin + xmax) / 2) / img_w
        y_center = ((ymin + ymax) / 2) / img_h
        width = (xmax - xmin) / img_w
        height = (ymax - ymin) / img_h

        yolo_lines.append(f"{class_id} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}")

    with open(txt_path, "w") as f:
        f.write("\n".join(yolo_lines))

# Convert all XMLs with 80-20 train/val split
converted_count = 0
for folder in os.listdir(ANNOTATION_DIR):
    folder_path = os.path.join(ANNOTATION_DIR, folder)
    if not os.path.isdir(folder_path):
        continue

    for file in os.listdir(folder_path):
        if file.endswith(".xml"):
            xml_path = os.path.join(folder_path, file)
            txt_name = os.path.splitext(file)[0] + ".txt"

            subset = "train" if converted_count % 5 != 0 else "val"  # 80% train, 20% val split
            txt_path = os.path.join(YOLO_LABEL_DIR, subset, txt_name)

            convert_xml_to_yolo(xml_path, txt_path)
            converted_count += 1

print(f"✅ Converted {converted_count} XML files to YOLO format.")


⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage\Physical (22).xml
⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage\Physical (23).xml
⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage\Physical (24).xml
⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage\Physical (25).xml
⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage\Physical (26).xml
⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage\Physical (27).xml
⚠️ Unknown class 'its a physically damaged panel' in C:/Users/madhugula padmavathi

In [8]:
import os
import xml.etree.ElementTree as ET

# Define your valid classes here exactly as in your dataset
valid_classes = [
    "clean",
    "dust",
    "bird-drop",
    "snow-covered",
    "Its an Physically damaged panel",
    "Its an Electrically damaged panel"
]

def convert_bbox_to_yolo(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    xmin, ymin, xmax, ymax = box
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    width = xmax - xmin
    height = ymax - ymin
    x_center *= dw
    width *= dw
    y_center *= dh
    height *= dh
    return (x_center, y_center, width, height)

def process_xml_file(xml_file_path, output_folder):
    try:
        tree = ET.parse(xml_file_path)
        root = tree.getroot()

        size = root.find('size')
        w = int(size.find('width').text)
        h = int(size.find('height').text)

        yolo_lines = []
        for obj in root.findall('object'):
            class_name = obj.find('name').text.strip()

            if class_name not in valid_classes:
                print(f"⚠️ Warning: Unknown class '{class_name}' in file {xml_file_path}")
                # You can choose to skip this object or the entire file here
                # For example, skip this object only:
                continue

            class_id = valid_classes.index(class_name)
            bndbox = obj.find('bndbox')
            xmin = float(bndbox.find('xmin').text)
            ymin = float(bndbox.find('ymin').text)
            xmax = float(bndbox.find('xmax').text)
            ymax = float(bndbox.find('ymax').text)

            yolo_bbox = convert_bbox_to_yolo((w, h), (xmin, ymin, xmax, ymax))
            yolo_line = f"{class_id} " + " ".join(f"{coord:.6f}" for coord in yolo_bbox)
            yolo_lines.append(yolo_line)

        if yolo_lines:
            # Save YOLO txt file with same base name as XML but .txt extension
            base_filename = os.path.splitext(os.path.basename(xml_file_path))[0]
            output_path = os.path.join(output_folder, base_filename + ".txt")

            with open(output_path, 'w') as f:
                f.write("\n".join(yolo_lines))
            print(f"✅ Converted {xml_file_path} to {output_path}")

        else:
            print(f"⚠️ No valid objects found in {xml_file_path}. Skipping file.")

    except ET.ParseError as e:
        print(f"❌ XML Parse error in file {xml_file_path}: {e}")
    except Exception as e:
        print(f"❌ Error processing file {xml_file_path}: {e}")

def convert_all_xmls_in_folder(xml_folder_path, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    xml_files = [f for f in os.listdir(xml_folder_path) if f.endswith('.xml')]
    print(f"Found {len(xml_files)} XML files in {xml_folder_path}")

    for xml_file in xml_files:
        full_xml_path = os.path.join(xml_folder_path, xml_file)
        process_xml_file(full_xml_path, output_folder)

# ==== USAGE ====

xml_folder = r"C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\Physical-Damage"
output_folder = r"C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\yolo_labels"

convert_all_xmls_in_folder(xml_folder, output_folder)


Found 50 XML files in C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\Physical-Damage
✅ Converted C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\Physical-Damage\Physical (1).xml to C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\yolo_labels\Physical (1).txt
✅ Converted C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\Physical-Damage\Physical (10).xml to C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\yolo_labels\Physical (10).txt
✅ Converted C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\Physical-Damage\Physical (11).xml to C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\yolo_labels\Physical (11).txt
✅ Converted C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\Physical-Damage\Physical (12).xml to C:\Users\madhugula padmavathi\Downloads\SolarPanel_Defect_Cleaned\yolo_labels\Physical (12).txt
✅ Converted C:\Users\madhugula padmavathi\Downloads\So

In [10]:
import os
import xml.etree.ElementTree as ET

# Define your valid classes and folder-to-class mapping
valid_classes = [
    "clean",
    "dust",
    "bird-drop",
    "snow-covered",
    "its a physically damaged panel",
    "its an electrically damaged panel"
]

folder_to_class = {
    "clean": "clean",
    "dust": "dust",
    "bird-drop": "bird-drop",
    "snow-covered": "snow-covered",
    "physical-damage": "its a physically damaged panel",
    "electrical-damage": "its an electrically damaged panel"
}

def convert_bbox(size, box):
    """
    Convert Pascal VOC bbox to YOLO format
    size: (width, height)
    box: (xmin, ymin, xmax, ymax)
    Returns (x_center, y_center, width, height) normalized
    """
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]

    xmin, ymin, xmax, ymax = box
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    w = xmax - xmin
    h = ymax - ymin

    x_center *= dw
    w *= dw
    y_center *= dh
    h *= dh

    return (x_center, y_center, w, h)

def process_xml(xml_path, output_dir):
    tree = ET.parse(xml_path)
    root = tree.getroot()

    # Get folder name from XML (to get class from folder_to_class mapping)
    folder = root.find('folder').text.strip().lower()

    if folder not in folder_to_class:
        print(f"⚠️ Warning: Folder '{folder}' not in folder_to_class mapping for file {xml_path}. Skipping.")
        return

    expected_class_name = folder_to_class[folder]

    size_tag = root.find('size')
    width = int(size_tag.find('width').text)
    height = int(size_tag.find('height').text)

    yolo_lines = []
    valid_object_found = False

    for obj in root.findall('object'):
        class_name_raw = obj.find('name').text.strip().lower()
        # Normalize class name (strip and lower)
        class_name = class_name_raw

        # Check class name against expected_class_name
        if class_name != expected_class_name:
            print(f"⚠️ Warning: Class '{class_name_raw}' in {xml_path} does not match expected '{expected_class_name}'. Skipping this object.")
            continue

        if class_name not in valid_classes:
            print(f"⚠️ Warning: Unknown class '{class_name_raw}' in {xml_path}. Skipping this object.")
            continue

        valid_object_found = True

        class_id = valid_classes.index(class_name)

        bndbox = obj.find('bndbox')
        xmin = float(bndbox.find('xmin').text)
        ymin = float(bndbox.find('ymin').text)
        xmax = float(bndbox.find('xmax').text)
        ymax = float(bndbox.find('ymax').text)

        bbox = convert_bbox((width, height), (xmin, ymin, xmax, ymax))
        yolo_line = f"{class_id} {' '.join(f'{a:.6f}' for a in bbox)}"
        yolo_lines.append(yolo_line)

    if not valid_object_found:
        print(f"⚠️ No valid objects found in {xml_path}. Skipping file.")
        return

    # Save YOLO txt file
    base_filename = os.path.splitext(os.path.basename(xml_path))[0]
    output_path = os.path.join(output_dir, f"{base_filename}.txt")
    with open(output_path, 'w') as f:
        f.write('\n'.join(yolo_lines))

    print(f"✅ Converted {xml_path} to {output_path}")

def main(input_base_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Walk through folders inside input_base_dir
    for folder_name in os.listdir(input_base_dir):
        folder_path = os.path.join(input_base_dir, folder_name)
        if not os.path.isdir(folder_path):
            continue

        # Process XML files in the folder
        for filename in os.listdir(folder_path):
            if not filename.endswith('.xml'):
                continue

            xml_file_path = os.path.join(folder_path, filename)
            process_xml(xml_file_path, output_dir)

if __name__ == "__main__":
    # Change these paths accordingly
    input_base_dir = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"
    output_dir = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned/yolo_labels"

    main(input_base_dir, output_dir)


✅ Converted C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Bird-drop\Bird (1).xml to C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned/yolo_labels\Bird (1).txt
✅ Converted C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Bird-drop\Bird (10).xml to C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned/yolo_labels\Bird (10).txt
✅ Converted C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Bird-drop\Bird (11).xml to C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned/yolo_labels\Bird (11).txt
✅ Converted C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Bird-drop\Bird (12).xml to C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned/yolo_labels\Bird (12).txt
✅ Converted C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Bird-drop\Bird (13).xml to C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned/yolo_labels\Bird (13).txt
✅ Converted C:/Us

In [11]:
import os
import xml.etree.ElementTree as ET

# Path to your dataset with XML files
input_dir = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"

# Output folder for YOLO annotations
output_dir = os.path.join(input_dir, "yolo_labels")
os.makedirs(output_dir, exist_ok=True)

# Natural-language label to short YOLO class name
class_name_to_folder = {
    "clean": "clean",
    "dust": "dust",
    "bird-drop": "bird-drop",
    "snow-covered": "snow-covered",
    "its a physically damaged panel": "physical-damage",
    "its an physically damaged panel": "physical-damage",  # typo case
    "its an electrically damaged panel": "electrical-damage"
}

# List of final class names used in YOLO format
valid_classes = list(set(class_name_to_folder.values()))
valid_classes.sort()

# Process all XML files recursively
for root, _, files in os.walk(input_dir):
    for file in files:
        if file.endswith(".xml"):
            xml_path = os.path.join(root, file)
            tree = ET.parse(xml_path)
            root_element = tree.getroot()

            filename = root_element.find("filename").text
            width = int(root_element.find("size/width").text)
            height = int(root_element.find("size/height").text)

            yolo_lines = []

            for obj in root_element.findall("object"):
                name = obj.find("name").text.strip().lower()

                # Convert natural name to class label
                class_label = class_name_to_folder.get(name)
                if class_label is None or class_label not in valid_classes:
                    print(f"Skipping unknown class: '{name}' in {file}")
                    continue

                class_id = valid_classes.index(class_label)

                bndbox = obj.find("bndbox")
                xmin = int(bndbox.find("xmin").text)
                ymin = int(bndbox.find("ymin").text)
                xmax = int(bndbox.find("xmax").text)
                ymax = int(bndbox.find("ymax").text)

                # Convert to YOLO format
                x_center = ((xmin + xmax) / 2) / width
                y_center = ((ymin + ymax) / 2) / height
                w = (xmax - xmin) / width
                h = (ymax - ymin) / height

                yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
                yolo_lines.append(yolo_line)

            if yolo_lines:
                output_filename = os.path.splitext(filename)[0] + ".txt"
                output_path = os.path.join(output_dir, output_filename)
                with open(output_path, "w") as f:
                    f.write("\n".join(yolo_lines))
            else:
                print(f"No valid objects found in {file}")

print(f"✅ Conversion completed. YOLO annotations saved in: {output_dir}")


Skipping unknown class: 'broken-damaged' in Physical (4).xml
No valid objects found in Physical (4).xml
✅ Conversion completed. YOLO annotations saved in: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\yolo_labels


In [12]:
import os
import xml.etree.ElementTree as ET

# Input: folder with XMLs, Output: YOLO labels folder
input_dir = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"
output_dir = os.path.join(input_dir, "yolo_labels")
os.makedirs(output_dir, exist_ok=True)

# Fix mislabels → YOLO standard label
label_map = {
    "clean": "clean",
    "dust": "dust",
    "bird-drop": "bird-drop",
    "snow-covered": "snow-covered",
    "its a physically damaged panel": "physical-damage",
    "its an physically damaged panel": "physical-damage",
    "its a physically damage panel": "physical-damage",
    "its a physical damaged panel": "physical-damage",
    "its an physically damage panel": "physical-damage",
    "its a electrical damaged panel": "electrical-damage",
    "its an electrically damaged panel": "electrical-damage",
    "its a electrically damaged panel": "electrical-damage",
    "its an electrical damaged panel": "electrical-damage",
    "electrical damage": "electrical-damage",
    "physical damage": "physical-damage",
    "broken-damaged": "physical-damage",
    "damaged": "physical-damage",
}

# Create label to ID map for YOLO classes
class_names = sorted(set(label_map.values()))
class_to_id = {name: i for i, name in enumerate(class_names)}

unknown_classes = set()
skipped_files = []

for subdir, _, files in os.walk(input_dir):
    for file in files:
        if file.endswith(".xml"):
            xml_path = os.path.join(subdir, file)
            try:
                tree = ET.parse(xml_path)
                root = tree.getroot()

                size = root.find("size")
                width = int(size.find("width").text)
                height = int(size.find("height").text)

                yolo_lines = []

                for obj in root.findall("object"):
                    raw_label = obj.find("name").text.strip().lower()
                    label = label_map.get(raw_label, None)
                    if label is None:
                        unknown_classes.add(raw_label)
                        continue

                    class_id = class_to_id[label]
                    bndbox = obj.find("bndbox")
                    xmin = int(bndbox.find("xmin").text)
                    ymin = int(bndbox.find("ymin").text)
                    xmax = int(bndbox.find("xmax").text)
                    ymax = int(bndbox.find("ymax").text)

                    # YOLO format: class_id x_center y_center width height (all normalized)
                    x_center = (xmin + xmax) / 2 / width
                    y_center = (ymin + ymax) / 2 / height
                    box_width = (xmax - xmin) / width
                    box_height = (ymax - ymin) / height

                    yolo_line = f"{class_id} {x_center:.6f} {y_center:.6f} {box_width:.6f} {box_height:.6f}"
                    yolo_lines.append(yolo_line)

                if yolo_lines:
                    output_file = os.path.join(output_dir, file.replace(".xml", ".txt"))
                    with open(output_file, "w") as f:
                        f.write("\n".join(yolo_lines))
                else:
                    skipped_files.append(file)

            except Exception as e:
                print(f"❌ Error parsing {file}: {e}")
                skipped_files.append(file)

# === Summary ===
print(f"\n✅ YOLO conversion completed. Labels saved to: {output_dir}")
print(f"\n📊 Classes used: {class_to_id}")
if unknown_classes:
    print("\n⚠️ Unknown class labels (please review these):")
    for uc in sorted(unknown_classes):
        print(f"  - '{uc}'")
if skipped_files:
    print(f"\n⚠️ Skipped {len(skipped_files)} files (no valid objects or errors):")
    for sf in skipped_files:
        print(f"  - {sf}")



✅ YOLO conversion completed. Labels saved to: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\yolo_labels

📊 Classes used: {'bird-drop': 0, 'clean': 1, 'dust': 2, 'electrical-damage': 3, 'physical-damage': 4, 'snow-covered': 5}


In [15]:
import os
import xml.etree.ElementTree as ET

# Base folder containing subfolders with images and XMLs
base_path = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"

# Mapping annotation texts (from XML <name>) to YOLO classes
annotation_name_corrections = {
    "its a physically damaged panel": "physical-damage",
    "its an physically damaged panel": "physical-damage",
    "broken-damaged": "physical-damage",
    "physical damage": "physical-damage",

    "its an electrically damaged panel": "electrical-damage",
    "electrical damage": "electrical-damage",

    "clean": "clean",
    "dust": "dust",
    "bird-drop": "bird-drop",
    "snow-covered": "snow-covered"
}

# YOLO classes and their indices (assign an index per class)
class_names = sorted(set(annotation_name_corrections.values()))
class_to_index = {cname: idx for idx, cname in enumerate(class_names)}

print("📊 Classes used:", class_to_index)

# Folder to save YOLO labels
output_labels_folder = os.path.join(base_path, "yolo_labels")
os.makedirs(output_labels_folder, exist_ok=True)

unknown_classes_found = set()
total_xml = 0
converted_xml = 0

def convert_bbox(size, box):
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    xmin, ymin, xmax, ymax = box
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    w = xmax - xmin
    h = ymax - ymin
    return (x_center * dw, y_center * dh, w * dw, h * dh)

# Process each folder under base_path
for folder_name in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder_name)
    if not os.path.isdir(folder_path):
        continue
    print(f"Processing folder: {folder_path}")

    # For each XML file in folder
    for file in os.listdir(folder_path):
        if not file.endswith(".xml"):
            continue
        total_xml += 1
        xml_path = os.path.join(folder_path, file)

        try:
            tree = ET.parse(xml_path)
            root = tree.getroot()
        except Exception as e:
            print(f"Failed to parse {xml_path}: {e}")
            continue

        size_tag = root.find("size")
        if size_tag is None:
            print(f"No size tag in {xml_path}, skipping")
            continue

        width = int(size_tag.find("width").text)
        height = int(size_tag.find("height").text)

        yolo_lines = []

        for obj in root.findall("object"):
            original_name = obj.find("name").text.strip().lower()

            # Map to corrected class
            if original_name in annotation_name_corrections:
                class_name = annotation_name_corrections[original_name]
            else:
                # Unknown class found, remember to show it once
                if original_name not in unknown_classes_found:
                    print(f"Skipping unknown class '{original_name}' in {file}")
                    unknown_classes_found.add(original_name)
                continue  # skip this object

            bndbox = obj.find("bndbox")
            xmin = float(bndbox.find("xmin").text)
            ymin = float(bndbox.find("ymin").text)
            xmax = float(bndbox.find("xmax").text)
            ymax = float(bndbox.find("ymax").text)

            # Convert bbox to YOLO format
            x_center, y_center, w, h = convert_bbox((width, height), (xmin, ymin, xmax, ymax))

            class_idx = class_to_index[class_name]
            yolo_line = f"{class_idx} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}"
            yolo_lines.append(yolo_line)

        if yolo_lines:
            # Save YOLO annotation file
            label_filename = os.path.splitext(file)[0] + ".txt"
            label_path = os.path.join(output_labels_folder, label_filename)
            with open(label_path, "w") as f:
                f.write("\n".join(yolo_lines))
            converted_xml += 1
        else:
            print(f"No valid objects found in {file}, skipping saving.")

print("✅ Conversion completed.")
print(f"Total XML files processed: {total_xml}")
print(f"Successfully converted to YOLO format: {converted_xml}")
print(f"YOLO labels saved in: {output_labels_folder}")
print(f"Unknown classes encountered: {unknown_classes_found if unknown_classes_found else 'None'}")


📊 Classes used: {'bird-drop': 0, 'clean': 1, 'dust': 2, 'electrical-damage': 3, 'physical-damage': 4, 'snow-covered': 5}
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Bird-drop
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Clean
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Dusty
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Electrical-damage
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Snow-Covered
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\SolarPanel_YOLO_Dataset
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\yolo_labels
✅ Conversion completed.
Total XML files processed: 303
Successfully converted to YOLO format: 

In [16]:
import os

base_path = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"

total_files = 0
for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if not os.path.isdir(folder_path):
        continue
    xml_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".xml")]
    print(f"{folder}: {len(xml_files)} XML files")
    total_files += len(xml_files)

print(f"Total XML files counted: {total_files}")


Bird-drop: 50 XML files
Clean: 52 XML files
Dusty: 51 XML files
Electrical-damage: 50 XML files
Physical-Damage: 50 XML files
Snow-Covered: 50 XML files
SolarPanel_YOLO_Dataset: 0 XML files
yolo_labels: 0 XML files
Total XML files counted: 303


In [None]:
import os

base_path = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"
max_files_per_folder = 50

for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if not os.path.isdir(folder_path):
        continue

    xml_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".xml")]
    file_count = len(xml_files)

    print(f"{folder}: {file_count} XML files")

    if file_count > max_files_per_folder:
        print(f"  --> Folder has {file_count - max_files_per_folder} extra XML files:")
        # Sort files alphabetically so extras are the last ones
        xml_files.sort()
        extras = xml_files[max_files_per_folder:]  # Files after the first 50
        for extra_file in extras:
            print(f"      {extra_file}")

print("Done.")


In [14]:
import os
import xml.etree.ElementTree as ET

# Base path to your dataset folders
base_path = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"

# Folders where images & XML annotations are stored
images_dirs = [
    "Clean",
    "Physical-Damage",
    "Electrical-Damage",
    "Dust",
    "Bird-Drop",
    "Snow-Covered"
]

# YOLO class name to ID mapping (as you defined)
class_mapping = {
    "clean": 1,
    "physical-damage": 4,
    "electrical-damage": 3,
    "dust": 2,
    "bird-drop": 0,
    "snow-covered": 5
}

# Map possible variations in annotation names to correct class names
annotation_name_corrections = {
    "its a physically damaged panel": "physical-damage",
    "physical damage": "physical-damage",
    "its an electrically damaged panel": "electrical-damage",
    "electrical damage": "electrical-damage",
    "clean": "clean",
    "dust": "dust",
    "bird-drop": "bird-drop",
    "snow-covered": "snow-covered"
}

# Where to save YOLO labels
yolo_labels_dir = os.path.join(base_path, "yolo_labels")
os.makedirs(yolo_labels_dir, exist_ok=True)

def convert_bbox(size, box):
    """
    Convert VOC bbox to YOLO bbox format (normalized center_x, center_y, width, height)
    size: tuple of (width, height)
    box: tuple of (xmin, ymin, xmax, ymax)
    """
    dw = 1.0 / size[0]
    dh = 1.0 / size[1]
    xmin, ymin, xmax, ymax = box
    x_center = (xmin + xmax) / 2.0
    y_center = (ymin + ymax) / 2.0
    width = xmax - xmin
    height = ymax - ymin
    x_center_norm = x_center * dw
    y_center_norm = y_center * dh
    width_norm = width * dw
    height_norm = height * dh
    return (x_center_norm, y_center_norm, width_norm, height_norm)

def parse_xml_to_yolo(xml_file):
    """
    Parse a single Pascal VOC XML file and return YOLO formatted annotations as strings.
    Each line is: <class_id> <x_center> <y_center> <width> <height>
    """
    tree = ET.parse(xml_file)
    root = tree.getroot()

    size = root.find('size')
    if size is None:
        print(f"⚠️ No size tag found in {xml_file}, skipping.")
        return []

    width = int(size.find('width').text)
    height = int(size.find('height').text)

    yolo_annotations = []

    objects = root.findall('object')
    if not objects:
        print(f"⚠️ No object tag found in {xml_file}, skipping.")
        return []

    for obj in objects:
        name = obj.find('name').text.lower().strip()
        # Correct the name if needed
        name_corrected = annotation_name_corrections.get(name, None)

        if name_corrected is None:
            # Unknown class - skip this object and print warning
            print(f"Skipping unknown class '{name}' in {os.path.basename(xml_file)}")
            continue

        class_id = class_mapping.get(name_corrected)
        if class_id is None:
            print(f"Skipping unmapped class '{name_corrected}' in {os.path.basename(xml_file)}")
            continue

        bndbox = obj.find('bndbox')
        xmin = int(float(bndbox.find('xmin').text))
        ymin = int(float(bndbox.find('ymin').text))
        xmax = int(float(bndbox.find('xmax').text))
        ymax = int(float(bndbox.find('ymax').text))

        bbox = convert_bbox((width, height), (xmin, ymin, xmax, ymax))
        yolo_line = f"{class_id} " + " ".join([f"{coord:.6f}" for coord in bbox])
        yolo_annotations.append(yolo_line)

    return yolo_annotations

def main():
    total_xml_files = 0
    converted_files = 0

    for subdir in images_dirs:
        folder_path = os.path.join(base_path, subdir)
        if not os.path.exists(folder_path):
            print(f"⚠️ Skipping missing folder: {folder_path}")
            continue

        print(f"Processing folder: {folder_path}")
        for file in os.listdir(folder_path):
            if file.lower().endswith(".xml"):
                total_xml_files += 1
                xml_path = os.path.join(folder_path, file)
                yolo_ann = parse_xml_to_yolo(xml_path)
                if yolo_ann:
                    label_filename = os.path.splitext(file)[0] + ".txt"
                    label_path = os.path.join(yolo_labels_dir, label_filename)
                    with open(label_path, "w") as f:
                        f.write("\n".join(yolo_ann))
                    converted_files += 1
                else:
                    # No valid annotations found or skipped all objects
                    pass

    print(f"\n✅ Conversion completed.")
    print(f"Total XML files processed: {total_xml_files}")
    print(f"Successfully converted to YOLO format: {converted_files}")
    print(f"YOLO labels saved in: {yolo_labels_dir}")

if __name__ == "__main__":
    main()


Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Clean
Processing folder: C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned\Physical-Damage
Skipping unknown class 'its an physically damaged panel' in Physical (1).xml
Skipping unknown class 'its an physically damaged panel' in Physical (10).xml
Skipping unknown class 'its an physically damaged panel' in Physical (11).xml
Skipping unknown class 'its an physically damaged panel' in Physical (12).xml
Skipping unknown class 'its an physically damaged panel' in Physical (13).xml
Skipping unknown class 'its an physically damaged panel' in Physical (14).xml
Skipping unknown class 'its an physically damaged panel' in Physical (15).xml
Skipping unknown class 'its an physically damaged panel' in Physical (16).xml
Skipping unknown class 'its an physically damaged panel' in Physical (17).xml
Skipping unknown class 'its an physically damaged panel' in Physical (18).xml
Skipping unknown class 'its a

In [17]:
import os

base_path = r"C:/Users/madhugula padmavathi/Downloads/SolarPanel_Defect_Cleaned"
max_files_per_folder = 50

for folder in os.listdir(base_path):
    folder_path = os.path.join(base_path, folder)
    if not os.path.isdir(folder_path):
        continue

    xml_files = [f for f in os.listdir(folder_path) if f.lower().endswith(".xml")]
    file_count = len(xml_files)

    print(f"{folder}: {file_count} XML files")

    if file_count > max_files_per_folder:
        print(f"  --> Folder has {file_count - max_files_per_folder} extra XML files:")
        # Sort files alphabetically so extras are the last ones
        xml_files.sort()
        extras = xml_files[max_files_per_folder:]  # Files after the first 50
        for extra_file in extras:
            print(f"      {extra_file}")

print("Done.")


Bird-drop: 50 XML files
Clean: 52 XML files
  --> Folder has 2 extra XML files:
      Clean (98).xml
      Clean (99).xml
Dusty: 51 XML files
  --> Folder has 1 extra XML files:
      Dust (99).xml
Electrical-damage: 50 XML files
Physical-Damage: 50 XML files
Snow-Covered: 50 XML files
SolarPanel_YOLO_Dataset: 0 XML files
yolo_labels: 0 XML files
Done.


In [None]:
import os
import xml.etree.ElementTree as ET

# 💡 Define your folder where all 6 class folders are stored
root_dir = r"C:/Users/madhugula padmavathi/DownloadsSolarPanel_Defect_Cleaned"

# 🔁 Mapping: Long label → Short label
label_map = {
    "its a cleaned panel": "clean",
    "its a dusty panel": "dust",
    "its a snow on panel": "snow-covered",
    "its bird drop on panel": "bird-drop",
    "its a damaged panel": "physical-damaged",
    "its a electrical damaged": "electrical-damaged"
}

# 🔧 Go through each XML file and replace label
def update_labels_in_xml(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    updated = False

    for obj in root.findall('object'):
        name_tag = obj.find('name')
        if name_tag is not None:
            original = name_tag.text.lower().strip()
            if original in label_map:
                name_tag.text = label_map[original]
                updated = True

    if updated:
        tree.write(xml_path)

# 📁 Traverse all subfolders and update .xml files
for subdir, _, files in os.walk(root_dir):
    for file in files:
        if file.endswith(".xml"):
            xml_path = os.path.join(subdir, file)
            update_labels_in_xml(xml_path)

print("✅ All labels updated successfully.")


✅ All labels updated successfully.
