File Renaming Based on Small, Medium, Large Annotation Count

In [None]:
import os
import shutil
import xml.etree.ElementTree as ET

In [None]:
# Insert your dataset download code from Roboflow into a new cell below!

# Here is how to get the code:
# 1. Download the provided dataset from our github
# 2. Sign Up and Log In to Roboflow
# 3. Make a new Roboflow project (Input "potholes" in the Annotation Group field)
# 4. Upload the downloaded images and xml file to Roboflow
# 5. Choose the dataset split at put all the files into the Train split
# 6. Go to the Generate tab and create a new dataset version
# 7. Go to the Version tab and download the dataset in Pascal VOC format, and select the Show Download Code selection
# 8. Copy and paste the code into a new cell below

In [None]:
path = '/content/roboflow_project_name/train' # Replace the roboflow_project_name with your project name

new_ds = '/content/Final Dataset'
new_ds_xml = '/content/Final Dataset XML'

os.makedirs(new_ds, exist_ok=True)
os.makedirs(new_ds_xml, exist_ok=True)

In [None]:
img_list = []
xml_list = []

for f in os.listdir(path):
    if f.endswith('.xml'):
        xml_list.append(f)
    elif f.lower().endswith(('.jpg')):
        img_list.append(f)

In [None]:
name_count = {}
for img in img_list:
  small = 0
  medium = 0
  large = 0
  if os.path.isfile(os.path.join(path, img)):
    filename = img[:-4]
    xml = img.replace('.jpg', '.xml')

    img_path = os.path.join(path, img)
    xml_path = os.path.join(path, xml)

    tree = ET.parse(xml_path)
    root = tree.getroot()

    annotation_file_name = root.find('filename')
    annotation_file_path = root.find('path')

    size = root.find('size')
    w = int(size.find('width').text)
    h = int(size.find('height').text)

    for obj in root.findall('object'):
      name = obj.find('name')
      box = obj.find('bndbox')

      width = float(box.find('xmax').text) - float(box.find('xmin').text)
      height = float(box.find('ymax').text) - float(box.find('ymin').text)

      width = width * 300 / w
      height = height * 300 / h

      area = width * height

      if(area <= 1024):
        name.text = 'Small Pothole'
        small += 1
      elif(area > 1024 and area <= 9216):
        name.text = 'Medium Pothole'
        medium += 1
      elif(area > 9216):
        name.text = 'Large Pothole'
        large += 1

    new_name = f"SML {small} {medium} {large}"

    if new_name not in name_count:
      name_count[new_name] = 1
    else:
      name_count[new_name] += 1

    final_name = f"{new_name} ({name_count[new_name]})"

    img_name = f"{final_name}.jpg"
    xml_name = f"{final_name}.xml"

    annotation_file_name.text = img_name
    annotation_file_path.text = img_name

    shutil.copy(img_path, os.path.join(new_ds, img_name))
    tree.write(os.path.join(new_ds_xml, os.path.basename(xml_name)))

    print(final_name, img_path)

SML 0 0 1 (1) /content/Pothole2-500-2/train/SML-0-0-1-39-FOOT_jpg.rf.9f98a56ed2d20c88fe7af34dbd115324.jpg
SML 1 1 0 (1) /content/Pothole2-500-2/train/SML-1-1-0-9-FOOT_jpg.rf.39a1f5a4f06034c376d3b7a327b45d8d.jpg
SML 3 2 0 (1) /content/Pothole2-500-2/train/SML-3-2-0-1-FOOT_jpg.rf.9930f7296a17bf6588b464b4dce48ec0.jpg
SML 7 1 0 (1) /content/Pothole2-500-2/train/SML-7-1-0-1-FOOT_jpg.rf.cbf72990a520f3afc4a90c30ad28ff76.jpg
SML 0 1 1 (1) /content/Pothole2-500-2/train/SML-0-1-1-68-_jpg.rf.7c998cd1a5aed8ff631f586f72264d61.jpg
SML 6 1 2 (1) /content/Pothole2-500-2/train/SML-6-1-2-1-_jpg.rf.3ad7a80f3046c154e9289c2036b2ce94.jpg
SML 0 1 1 (2) /content/Pothole2-500-2/train/SML-0-1-1-92-_jpg.rf.1c2aaffb4de212bbb32d21a524faa420.jpg
SML 2 1 0 (1) /content/Pothole2-500-2/train/SML-2-1-0-13-FOOT_jpg.rf.b46b229ccdfb8d9aa4069f44c4a69077.jpg
SML 0 0 1 (2) /content/Pothole2-500-2/train/SML-0-0-1-78-_jpg.rf.42c0198594327904327ce5f7b15a161e.jpg
SML 0 0 1 (3) /content/Pothole2-500-2/train/SML-0-0-1-94-FOOT_jpg.

In [None]:
sorted_by_keys = dict(sorted(name_count.items()))
for key, value in sorted_by_keys.items():
  print(f"{key}: {value}")

SML 0 0 1: 246
SML 0 0 2: 9
SML 0 1 0: 10
SML 0 1 1: 98
SML 0 1 2: 8
SML 0 2 0: 5
SML 0 2 1: 22
SML 0 2 2: 1
SML 1 0 1: 20
SML 1 0 2: 1
SML 1 1 0: 41
SML 1 1 1: 18
SML 1 1 2: 2
SML 1 2 0: 4
SML 1 2 1: 13
SML 1 3 1: 3
SML 10 3 0: 1
SML 11 5 0: 2
SML 13 4 0: 1
SML 2 0 0: 4
SML 2 0 1: 2
SML 2 0 2: 1
SML 2 1 0: 16
SML 2 1 1: 3
SML 2 2 0: 8
SML 2 2 1: 7
SML 2 3 0: 9
SML 2 3 1: 2
SML 2 4 0: 1
SML 3 0 0: 2
SML 3 0 1: 4
SML 3 1 0: 2
SML 3 1 1: 3
SML 3 2 0: 8
SML 3 2 1: 1
SML 3 2 2: 1
SML 3 3 0: 3
SML 3 4 0: 2
SML 4 0 1: 1
SML 4 1 0: 3
SML 4 2 0: 2
SML 4 2 1: 2
SML 4 3 0: 1
SML 4 4 0: 1
SML 4 5 0: 1
SML 5 0 1: 2
SML 5 2 0: 1
SML 5 3 0: 1
SML 5 3 1: 1
SML 5 5 0: 2
SML 6 1 0: 1
SML 6 1 2: 1
SML 6 3 1: 1
SML 6 4 1: 1
SML 6 7 1: 1
SML 7 1 0: 1
SML 7 2 0: 2
SML 7 2 1: 1
SML 7 3 0: 1
SML 7 5 0: 1
SML 9 3 0: 1
SML 9 4 0: 1


In [None]:
folder_to_download = '/content/Final Dataset XML'

zip_file_name = 'Final Dataset XML.zip'

shutil.make_archive(zip_file_name.split('.')[0], 'zip', folder_to_download)

'/content/Final Dataset XML.zip'