In [None]:
import os
import json
import xml.etree.ElementTree as ET

def convert_voc_to_coco(voc_dir, train_txt, output_file):
    # Define the basic structure of COCO format
    coco_data = {
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Category mapping table
    category_map = {}
    annotation_id = 1  # Annotation ID
    image_id = 1  # Image ID

    # Get the list of category names
    voc_classes = ['fire', 'smoke']

    # Map category names to category_id
    for i, class_name in enumerate(voc_classes, 1):
        category_map[class_name] = i
        coco_data['categories'].append({
            "id": i,
            "name": class_name,
            "supercategory": "none"
        })

    # Read the file name list from train.txt, remove spaces from each line
    train_file_list = []
    with open(train_txt, 'r') as f:
        train_file_list = [line.strip() for line in f.readlines()]

    # Iterate over each image and its annotations in train.txt
    annotation_dir = os.path.join(voc_dir, "Annotations")

    for file_name in train_file_list:
        xml_file = file_name.strip() + ".xml"  # Remove spaces from the file name
        xml_path = os.path.join(annotation_dir, xml_file)
        
        if not os.path.exists(xml_path):
            print(f"XML file {xml_file} does not exist, skipping.")
            continue
        
        tree = ET.parse(xml_path)
        root = tree.getroot()

        # Get image information, remove spaces from the file name
        filename = root.find('filename').text.strip()
        size = root.find('size')
        width = int(size.find('width').text)
        height = int(size.find('height').text)

        # Add image information to COCO format
        coco_data['images'].append({
            "file_name": filename,
            "height": height,
            "width": width,
            "id": image_id
        })

        # Process annotations for each object
        for obj in root.findall('object'):
            category_name = obj.find('name').text
            # Ignore categories not in voc_classes
            if category_name not in category_map:
                continue
            category_id = category_map[category_name]
            bndbox = obj.find('bndbox')
            xmin = round(float(bndbox.find('xmin').text), 2)
            ymin = round(float(bndbox.find('ymin').text), 2)
            xmax = round(float(bndbox.find('xmax').text), 2)
            ymax = round(float(bndbox.find('ymax').text), 2)
            bbox_width = round(xmax - xmin, 2)
            bbox_height = round(ymax - ymin, 2)

            # Add annotation information to COCO format
            coco_data['annotations'].append({
                "id": annotation_id,
                "image_id": image_id,
                "category_id": category_id,
                "bbox": [xmin, ymin, bbox_width, bbox_height],
                "area": round(bbox_width * bbox_height, 2),
                "iscrowd": 0
            })

            annotation_id += 1

        image_id += 1

    # Save the data as a COCO format JSON file
    with open(output_file, 'w') as json_file:
        json.dump(coco_data, json_file, indent=4)

voc_dataset_path = r""  # VOC dataset root (e.g., r"G:\path\to\voc_type")

train_txt_path = os.path.join(voc_dataset_path, "ImageSets", "Main", "val.txt")  
# Path to split file (e.g., val.txt for validation; change to train.txt if needed)

output_coco_json = r""  # Output JSON path (e.g., r"G:\path\to\instances_val2017.json")

# Run conversion: Processes XML annotations from VOC to COCO format
convert_voc_to_coco(voc_dataset_path, train_txt_path, output_coco_json)
