In [1]:
import os
import xml.etree.ElementTree as ET
import tensorflow as tf

2024-09-22 14:40:40.578875: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Function to parse Pascal VOC annotation XML file
def parse_voc_annotation(xml_file):
    """Parses a single Pascal VOC annotation XML file."""
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Extract image file name
    file_name = root.find('filename').text

    # Extract size of the image
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)

    # Extract bounding boxes and labels
    boxes = []
    labels = []
    
    for obj in root.findall('object'):
        label = obj.find('name').text

        # Get the bounding box coordinates
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        boxes.append([xmin, ymin, xmax, ymax])
        labels.append(label)
        return {
        'file_name': file_name,
        'width': width,
        'height': height,
        'boxes': boxes,
        'labels': labels
    }

In [3]:
# Function to create a single tf.train.Example for a single image-annotation pair
def create_tf_example(annotation, image_dir):
    """Converts annotation data to tf.train.Example."""
    # Load the image
    image_path = os.path.join(image_dir, annotation['file_name'])
    with tf.io.gfile.GFile(image_path, 'rb') as fid:
        encoded_image = fid.read()
    
    # Convert image format to bytes
    filename = annotation['file_name'].encode('utf8')
    image_format = b'jpg'  # or 'png' if your images are in PNG format

    # Prepare bounding boxes and labels
    xmins = [box[0] / annotation['width'] for box in annotation['boxes']]
    xmaxs = [box[2] / annotation['width'] for box in annotation['boxes']]
    ymins = [box[1] / annotation['height'] for box in annotation['boxes']]
    ymaxs = [box[3] / annotation['height'] for box in annotation['boxes']]
    
    labels = annotation['labels']

    # Here you can map labels to integers, if needed
    classes_text = [label.encode('utf8') for label in labels]
    classes = [1 if label == 'your_class_name' else 0 for label in labels]  # Update class mapping here

    # Create a tf.train.Example message ready to be written to a file
    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/filename': tf.train.Feature(bytes_list=tf.train.BytesList(value=[filename])),
        'image/encoded': tf.train.Feature(bytes_list=tf.train.BytesList(value=[encoded_image])),
        'image/format': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_format])),
        'image/object/bbox/xmin': tf.train.Feature(float_list=tf.train.FloatList(value=xmins)),
        'image/object/bbox/xmax': tf.train.Feature(float_list=tf.train.FloatList(value=xmaxs)),
        'image/object/bbox/ymin': tf.train.Feature(float_list=tf.train.FloatList(value=ymins)),
        'image/object/bbox/ymax': tf.train.Feature(float_list=tf.train.FloatList(value=ymaxs)),
        'image/object/class/text': tf.train.Feature(bytes_list=tf.train.BytesList(value=classes_text)),
        'image/object/class/label': tf.train.Feature(int64_list=tf.train.Int64List(value=classes)),
    }))
    
    return tf_example

In [4]:
# Function to create a TFRecord file for a specific dataset (train or test)
def create_tfrecord(output_path, annotations, image_dir):
    """Converts a list of annotations to a TFRecord file."""
    writer = tf.io.TFRecordWriter(output_path)

    for annotation in annotations:
        tf_example = create_tf_example(annotation, image_dir)
        writer.write(tf_example.SerializeToString())

    writer.close()
    print(f"TFRecord saved at: {output_path}")

In [5]:
# Main function to create TFRecords for both train and test directories
def create_tfrecords_for_datasets(base_dir):
    """Creates TFRecord files for both train and test datasets."""
    for dataset_type in ['train', 'test']:  # Process both train and test
        data_dir = os.path.join(base_dir, dataset_type)
        output_path = f"{base_dir}{dataset_type}_data.tfrecord"
        annotations = []

        # Iterate over all XML files in the directory and parse annotations
        for xml_file in os.listdir(data_dir):
            if xml_file.endswith('.xml'):
                annotation = parse_voc_annotation(os.path.join(data_dir, xml_file))
                annotations.append(annotation)

        # Create a TFRecord file for each dataset (train or test)
        create_tfrecord(output_path, annotations, data_dir)

In [6]:
data_dir = 'data/osetr/'
create_tfrecords_for_datasets(data_dir)

TFRecord saved at: data/osetr/train_data.tfrecord
TFRecord saved at: data/osetr/test_data.tfrecord
