In [4]:
# Install imaging library Pillow
%pip install pillow

Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
from PIL import Image
import xml.etree.ElementTree as ET


In [3]:
unlabeled_data_folder = os.path.join("Data", "train")

data_labels_folder = os.path.join("Labelled_Training_Data")

extracted_data_folder = os.path.join("Extracted_Training_Data")
os.makedirs(extracted_data_folder, exist_ok=True)

In [4]:
def crop_and_save_image(input_image_path, output_folder, object_list):
    # Open the input image
    with Image.open(input_image_path) as img:
        for index, object in enumerate(object_list):
            # Crop the image
            name = object['name']
            cropped_img = img.crop((int(object['xmin']), int(object['ymin']), int(object['xmax']), int(object['ymax'])))
            output_coin_type = os.path.join(output_folder, name)
            # Ensure the output folder exists
            os.makedirs(output_coin_type, exist_ok=True)
            
            # Construct the output image path, weird naming to avoid duplicates from same image, index placed before filetype
            image_name = os.path.basename(input_image_path[:-4]) + '_' + str(index) + '.JPG'
            output_image_path = os.path.join(output_coin_type, image_name)
            
            # Save the cropped image
            cropped_img.save(output_image_path)
            print(f'Cropped image saved to: {output_image_path}')

In [5]:
def read_xml_file_content(filepath):
    tree = ET.parse(filepath)
    root = tree.getroot()
    object_list = []
    for object in root.findall('object'):
        d = dict()
        d['name'] = object.findtext('name')
        bounding_box = object.find('bndbox')
        d['xmin'] = bounding_box.findtext('xmin')
        d['ymin'] = bounding_box.findtext('ymin')
        d['xmax'] = bounding_box.findtext('xmax')
        d['ymax'] = bounding_box.findtext('ymax')
        object_list.append(d)

    return object_list

In [6]:
for folder in os.listdir(unlabeled_data_folder):
    label_folder = os.path.join(data_labels_folder, folder)
    folder = os.path.join(unlabeled_data_folder, folder)
    for image in os.listdir(folder):
        # Find corresponding XML_file
        for label_file in os.listdir(label_folder):
            if (image[:-4] in label_file):
                label_filepath = os.path.join(label_folder, label_file)
                image_filepath = os.path.join(folder, image)
                object_list = read_xml_file_content(label_filepath)

                crop_and_save_image(image_filepath, extracted_data_folder, object_list)


Cropped image saved to: Extracted_Training_Data\5CHF\L1010277_0.JPG
Cropped image saved to: Extracted_Training_Data\2EUR\L1010277_1.JPG
Cropped image saved to: Extracted_Training_Data\0.2EUR\L1010277_2.JPG
Cropped image saved to: Extracted_Training_Data\0.2EUR\L1010277_3.JPG
Cropped image saved to: Extracted_Training_Data\0.5EUR\L1010277_4.JPG
Cropped image saved to: Extracted_Training_Data\0.5CHF\L1010279_0.JPG
Cropped image saved to: Extracted_Training_Data\0.2CHF\L1010279_1.JPG
Cropped image saved to: Extracted_Training_Data\2CHF\L1010279_2.JPG
Cropped image saved to: Extracted_Training_Data\0.1CHF\L1010279_3.JPG
Cropped image saved to: Extracted_Training_Data\0.2CHF\L1010281_0.JPG
Cropped image saved to: Extracted_Training_Data\0.05EUR\L1010281_1.JPG
Cropped image saved to: Extracted_Training_Data\0.05CHF\L1010281_2.JPG
Cropped image saved to: Extracted_Training_Data\0.1CHF\L1010281_3.JPG
Cropped image saved to: Extracted_Training_Data\0.1CHF\L1010281_4.JPG
Cropped image saved to: 