# Convert PASCAL Objects from roboflow to ESRI

### Organise Roboflow Files

In [None]:
# steps:
    #1: rename the train folder to images
    #2: move the xml files from the images folder into another folder at the root directory under the name labels
    #2: delete the other roboflow files...

import os
import shutil

# put your roboflow PSCAL VOC training data folder in the same directory as this notbook

labels_folder_name = './TwoClass_test/'

input_dir = os.path.join(labels_folder_name, 'train')
image_dir = os.path.join(labels_folder_name, 'images')
label_dir = os.path.join(labels_folder_name, 'labels')

if os.path.exists(input_dir):
    os.rename(input_dir, image_dir)
    os.makedirs(label_dir)

    # Loop through the source directory
    for file_name in os.listdir(image_dir):
        if file_name.endswith('.xml'):
            # Construct the full path of the source file
            source_file = os.path.join(image_dir, file_name)
            
            # Move the XML file to the destination directory
            shutil.move(source_file, label_dir)


#### Rename the files

to clean up the roboflow naming convention

In [None]:
# Define the paths to the image and label folders
image_dir = './TwoClass_test/images'
label_dir = './TwoClass_test/labels'

# Get the list of files in the image folder
image_files = sorted([f for f in os.listdir(image_dir) if f.endswith('.jpg')])

# Get the list of files in the label folders
label_files_1 = sorted([f for f in os.listdir(label_dir) if f.endswith('.xml')])

# Check if the number of files in each folder is the same
if len(image_files) != len(label_files_1):
    print("Error: The number of files in the folders doesn't match.")
    exit()

# Rename the files
for i, (image_file, label_file_1) in enumerate(zip(image_files, label_files_1), start=1):
    # Generate the new file names
    new_image_name = f"{i:05}.jpg"
    new_label_name_1 = f"{i:05}.xml"


    # Rename the files in each folder
    os.rename(os.path.join(image_dir, image_file), os.path.join(image_dir, new_image_name))
    os.rename(os.path.join(label_dir, label_file_1), os.path.join(label_dir, new_label_name_1))

    print(f"Renamed: {image_file} to {new_image_name}")
    print(f"Renamed: {label_file_1} to {new_label_name_1}")


### Convert XML labels to ESRI format

this only currently works for one class in the labels

In [None]:
import fileinput
import sys

# Set your class name and values
class_name = 'Surfer'
class_value = '1'
class_name2 = 'Surfer standing'
class_value2 = '2'

# Iterate through each file in the directory
for filename in os.listdir(label_dir):
    if filename.endswith('.xml'):
        # Read in the contents of the file
        with fileinput.input(os.path.join(label_dir, filename), inplace=True) as f:
            for line in f:
                # Remove specified lines
                if '<path>' in line or '<pose>' in line or '<truncated>' in line or '<difficult>' in line or '<occluded>' in line or '<segmented>' in line:
                    continue
                # Replace specified text
                elif '<database>roboflow.ai</database>' in line:
                    sys.stdout.write(line.replace('<database>roboflow.ai</database>', '<annotation>ESRI ArcGIS Pro</annotation>'))
                # Replace Surfer with 1
                elif '<name>{}</name>'.format(class_name) in line:
                    sys.stdout.write(line.replace('<name>{}</name>'.format(class_name), '<name>{}</name>'.format(class_value)))
                # Replace Surfer standing with 2
                elif '<name>{}</name>'.format(class_name2) in line:
                    sys.stdout.write(line.replace('<name>{}</name>'.format(class_name2), '<name>{}</name>'.format(class_value2)))
                # Replace filename line with current filename
                elif '<filename>' in line:
                    new_filename = filename[:-4] + '.jpg'
                    sys.stdout.write(line.replace(line, '\t<filename>{}</filename>\n'.format(new_filename)))
                # Add specified line
                elif fileinput.isfirstline():
                    sys.stdout.write('<?xml version="1.0"?>\n')
                    sys.stdout.write(line)
                # Write the line as is
                else:
                    sys.stdout.write(line)


### Create map.txt

In [None]:
# set output map.txt directory
out_map = os.path.join(labels_folder_name, 'map.txt')

image_dir_split = os.path.split(image_dir)
label_dir_split = os.path.split(label_dir)

# create a list of image and label files
image_files = sorted(os.listdir(image_dir))
label_files = sorted(os.listdir(label_dir))

# create a list of tuples with the image and label filenames
file_list = list(zip(image_files, label_files))

# create a text file and write the file paths to it
with open(out_map, 'w') as f:
    for image_file, label_file in file_list:
        image_path = os.path.join(image_dir_split[-1], image_file)
        label_path = os.path.join(label_dir_split[-1], label_file)
        line = f"{image_path}  {label_path}\n"
        f.write(line)

### Calculate stats.txt file

In [None]:
import xml.etree.ElementTree as ET

IMAGE_DIR = image_dir
ANNOT_DIR = label_dir
STATS_FILE = os.path.join(labels_folder_name , "stats.txt")

IMAGE_WIDTH = 640
IMAGE_HEIGHT = 540
NUM_CLASSES = 2
CLASS_NAMES = ["Surfer", "Surfer standing"]  # change this if you have multiple classes

# initialize stats
num_images = 0
num_features = 0
min_features_per_image = float("inf")
max_features_per_image = float("-inf")
total_features_per_image = 0
min_width = float("inf")
max_width = float("-inf")
min_height = float("inf")
max_height = float("-inf")
total_width = 0
total_height = 0

# loop through each image file in the directory
image_iteration = 0
for image_filename in os.listdir(IMAGE_DIR):
    if image_filename.endswith(".jpg"):
        num_images += 1
        image_path = os.path.join(IMAGE_DIR, image_filename)
        image_width = IMAGE_WIDTH
        image_height = IMAGE_HEIGHT

        # loop through each annotation file and count the number of features
        num_features_in_image = 0
        for annot_filename in os.listdir(ANNOT_DIR):
            
            if annot_filename.endswith(".xml"):
                annot_path = os.path.join(ANNOT_DIR, annot_filename)
                tree = ET.parse(annot_path)
                root = tree.getroot()
                if root.find("filename").text == image_filename:
                    for obj in root.iter("object"):
                        class_name = obj.find("name").text
                        if class_name not in CLASS_NAMES:
                            CLASS_NAMES.append(class_name)
                            NUM_CLASSES += 1
                        num_features += 1
                        num_features_in_image += 1
                        bbox = obj.find("bndbox")
                        xmin = float(bbox.find("xmin").text)
                        ymin = float(bbox.find("ymin").text)
                        xmax = float(bbox.find("xmax").text)
                        ymax = float(bbox.find("ymax").text)
                        width = xmax - xmin
                        height = ymax - ymin
                        total_width += width
                        total_height += height
                        if width < min_width:
                            min_width = width
                        if width > max_width:
                            max_width = width
                        if height < min_height:
                            min_height = height
                        if height > max_height:
                            max_height = height

        if num_features_in_image < min_features_per_image:
            min_features_per_image = num_features_in_image
        if num_features_in_image > max_features_per_image:
            max_features_per_image = num_features_in_image
        total_features_per_image += num_features_in_image
    
    print('Image iteration: ' , image_iteration)
    image_iteration += 1
# calculate stats
mean_features_per_image = total_features_per_image / num_images
mean_width = total_width / num_features
mean_height = total_height / num_features


# write stats to file
with open(STATS_FILE, "w") as f:
    f.write("images = {} *{}*{}*{}\n".format(num_images, NUM_CLASSES, IMAGE_WIDTH, IMAGE_HEIGHT))
    f.write("\nClass feature statistics:\n")
    f.write("features = {}\n".format(num_features))
    f.write("features per image = [min = {}, mean = {:.2f}, max = {}]\n".format(
        min_features_per_image, mean_features_per_image, max_features_per_image))
    f.write("classes = {}\n".format(NUM_CLASSES))
    f.write("{:<40} {:<12} {:<8} {:<12} {:<12} {:<12} {:<12}\n".format(
        "cls name", "cls value", "images", "features", "min size", "mean size", "max size"))
    for i, cls_name in enumerate(CLASS_NAMES):
        f.write("{:<40} {:<12} {:<8} {:<12} {:<12.2f} {:<12.2f} {:<12.2f}\n".format(
            cls_name, i+1, num_images, num_features, min_width, mean_width, max_width))


### now need to add the .emd file and the esri accumulated stats

maybe create a download link to a template or keep it linked to the notebook

In [None]:
import xml.etree.ElementTree as ET

ANNOTATION_FOLDER = label_dir
IMAGE_FOLDER = image_dir

# Function to calculate area of a bounding box
def calculate_area(box):
    return (box[2] - box[0]) * (box[3] - box[1])

# Initialize variables for calculating stats
min_area = float('inf')
max_area = 0
total_area = 0
count = 0

xml_iteration = 0
# Loop through each annotation file
for filename in os.listdir(ANNOTATION_FOLDER):
    if not filename.endswith('.xml'):
        continue
    tree = ET.parse(os.path.join(ANNOTATION_FOLDER, filename))
    root = tree.getroot()
    # Loop through each bounding box in the annotation file
    for box in root.findall('.//bndbox'):
        xmin = int(box.find('xmin').text)
        ymin = int(box.find('ymin').text)
        xmax = int(box.find('xmax').text)
        ymax = int(box.find('ymax').text)
        area = calculate_area([xmin, ymin, xmax, ymax])

        # Update stats
        min_area = min(min_area, area)
        max_area = max(max_area, area)
        total_area += area
        count += 1
    print("\rReading Annotation File: %d" % xml_iteration, end="")
    xml_iteration += 1
print('Read all files')
# Calculate mean area
mean_area = total_area / count if count != 0 else 0

# Print stats
print(f"Min area: {min_area}")
print(f"Max area: {max_area}")
print(f"Mean area: {mean_area}")
print(f"Total area: {total_area}")
print(f"Number of bounding boxes: {count}")

### Generate stats

Use these Cells to help produce the information required for these esri files:

    - stats.txt
    - esri_accumulated_stas.json
    - esri_model_definition.emd

#### All stats

In [None]:
import xml.etree.ElementTree as ET

ANNOTATION_FOLDER = label_dir

# Function to calculate area of a bounding box
def calculate_area(box):
    return (box[2] - box[0]) * (box[3] - box[1])

# Initialize variables for calculating stats
min_area = float('inf')
max_area = 0
total_area = 0
count = 0
annotated_files = 0
total_annotation_files = 0
min_annotations = float('inf')  # Variable to track minimum number of annotations in a file
max_annotations = 0  # Variable to track maximum number of annotations in a file
total_annotations = 0  # Variable to track total number of annotations

class_labels = set()  # Set to store unique class labels

xml_iteration = 0
# Loop through each annotation file
for filename in os.listdir(ANNOTATION_FOLDER):
    if not filename.endswith('.xml'):
        continue
    tree = ET.parse(os.path.join(ANNOTATION_FOLDER, filename))
    root = tree.getroot()

    # Check if the file has annotations
    if len(root.findall('.//bndbox')) > 0:
        annotated_files += 1

    # Increment total_annotation_files for each XML file encountered
    total_annotation_files += 1
    
    num_annotations = len(root.findall('.//bndbox'))

    # Update min_annotations and max_annotations
    if num_annotations < min_annotations:
        min_annotations = num_annotations
    if num_annotations > max_annotations:
        max_annotations = num_annotations
        xml_with_max_annotations = filename

    # Loop through each bounding box in the annotation file
    for box in root.findall('.//bndbox'):
        xmin = int(box.find('xmin').text)
        ymin = int(box.find('ymin').text)
        xmax = int(box.find('xmax').text)
        ymax = int(box.find('ymax').text)
        
        # Check if the <name> tag exists
        if box.find('name') is not None and box.find('name').text is not None:
            class_label = int(box.find('name').text)
            class_labels.add(class_label)

        area = calculate_area([xmin, ymin, xmax, ymax])

        # Update stats
        min_area = min(min_area, area)
        max_area = max(max_area, area)
        total_area += area
        count += 1

    total_annotations += num_annotations
    print("\rReading Annotation File: %d" % xml_iteration, end="")
    xml_iteration += 1
print('   Read all files')

# Calculate mean area
mean_area = total_area / count if count != 0 else 0

# Calculate mean number of annotations per file
mean_annotations = total_annotations / total_annotation_files if total_annotation_files != 0 else 0

# Print stats
print(f"Min area: {min_area}")
print(f"Max area: {max_area}")
print(f"Mean area: {mean_area}")
print(f"Total area: {total_area}")
print(f"Number of bounding boxes: {count}")
print(f"Number of files with annotations: {annotated_files}")
print(f"Total number of annotation files: {total_annotation_files}")
print(f"Min annotations per file: {min_annotations}")
print(f"Max annotations per file: {max_annotations}")
print(f"Mean annotations per file: {mean_annotations}")
print(f"XML file with the highest number of annotations: {xml_with_max_annotations}")
print(f"Unique class labels: {class_labels}")

#### Class Stats

change the value in the class_label_value parameter to calculate for the different classes

In [None]:
# Function to calculate area of a bounding box
def calculate_area(box):
    return (box[2] - box[0]) * (box[3] - box[1])

# Initialize variables for calculating stats

class_label_value = '1'  ###CHANGE THIS ONE FOR DIFFERENT CLASSES AND REPEAT
min_area = float('inf')
max_area = 0
total_area = 0
count = 0
annotated_files = 0
total_annotation_files = 0
min_annotations = float('inf')  # Variable to track minimum number of annotations in a file
max_annotations = 0  # Variable to track maximum number of annotations in a file
total_annotations = 0  # Variable to track total number of annotations
class_annotation_files = 0  # Variable to track the number of annotation files for the class


xml_iteration = 0
# Loop through each annotation file
for filename in os.listdir(ANNOTATION_FOLDER):
    if not filename.endswith('.xml'):
        continue
    tree = ET.parse(os.path.join(ANNOTATION_FOLDER, filename))
    root = tree.getroot()

    # Check if the file has annotations
    if len(root.findall('.//bndbox')) > 0:
        annotated_files += 1

    # Increment total_annotation_files for each XML file encountered
    total_annotation_files += 1
    
    num_annotations = 0
    class_num_annotations = 0 

    # Loop through each bounding box in the annotation file
    for box in root.findall('.//object'):
        # Check if the class label is 1
        if box.find('name').text == class_label_value:
            xmin = int(box.find('bndbox/xmin').text)
            ymin = int(box.find('bndbox/ymin').text)
            xmax = int(box.find('bndbox/xmax').text)
            ymax = int(box.find('bndbox/ymax').text)
            area = calculate_area([xmin, ymin, xmax, ymax])

            # Update stats
            min_area = min(min_area, area)
            max_area = max(max_area, area)
            total_area += area
            count += 1
            num_annotations += 1
            class_num_annotations += 1

    # Update min_annotations and max_annotations
    if num_annotations < min_annotations:
        min_annotations = num_annotations
    if num_annotations > max_annotations:
        max_annotations = num_annotations
        xml_with_max_annotations = filename

    if class_num_annotations > 0:
        class_annotation_files += 1 

    print("\rReading Annotation File: %d" % xml_iteration, end="")
    xml_iteration += 1

print('   Read all files')

# Calculate mean area
mean_area = total_area / count if count != 0 else 0

# Calculate mean number of annotations per file
mean_annotations = total_annotations / total_annotation_files if total_annotation_files != 0 else 0

# Print stats
print("Class ", class_label_value , " Stats:")
print(" ")
print("Area: ")
print(f"    Min area: {min_area}")
print(f"    Max area: {max_area}")
print(f"    Mean area: {mean_area}")
print(f"    Total area: {total_area}")
print(" ")
print("Annotations: ")
print(f"    Number of bounding boxes: {count}")
print(f"    Number of files with annotations: {annotated_files}")
print(f"    Total number of annotation files: {total_annotation_files}")
print(f"    Number of annotation files for Class {class_label_value}: {class_annotation_files}")
print(f"    Min annotations per file: {min_annotations}")
print(f"    Max annotations per file: {max_annotations}")
print(f"    Mean annotations per file: {mean_annotations}")
print(" ")
print(f"XML file with the highest number of annotations: {xml_with_max_annotations}")

