In [2]:
import os
import csv
import xml.etree.ElementTree as ET

def parse_xml(xml_file, image_folder):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    objects = []
    xml_filename = os.path.splitext(os.path.basename(xml_file))[0]  # Extract XML file name without extension
    for ext in ['.jpg', '.jpeg', '.png', '.JPG']:  # Try different image extensions
        image_filename = xml_filename + ext
        image_path = os.path.join(image_folder, image_filename)
        if os.path.exists(image_path):
            break  # Found the image, exit loop
    else:
        raise FileNotFoundError(f"Image not found for XML file: {xml_file}")

    for obj in root.findall('object'):
        name = obj.find('name').text
        bbox = obj.find('bndbox')
        x = int(bbox.find('xmin').text)
        y = int(bbox.find('ymin').text)
        width = int(bbox.find('xmax').text) - x
        height = int(bbox.find('ymax').text) - y
        objects.append({'Filepath': image_path, 'ObjectName': name, 'x': x, 'y': y, 'width': width, 'height': height})
    return objects

def xml_to_csv(xml_folder, image_folder, csv_file):
    with open(csv_file, 'w', newline='') as csvfile:
        fieldnames = ['Filepath', 'ObjectName', 'x', 'y', 'width', 'height']
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()

        for xml_file in os.listdir(xml_folder):
            if xml_file.endswith('.xml'):
                xml_path = os.path.join(xml_folder, xml_file)
                objects = parse_xml(xml_path, image_folder)
                for obj in objects:
                    writer.writerow(obj)

# Define paths
xml_folder = 'C:/Users/aniru/Desktop/fresh/OCR/xml'
image_folder = 'C:/Users/aniru/Desktop/fresh/OCR/images'
csv_file = 'output.csv'

# Convert XML to CSV
xml_to_csv(xml_folder, image_folder, csv_file)