In [3]:
# Example Usage
data = filtered_dataset
export_file = 'data/exported_dataset.zip'
crop_and_export_to_zip(export_file, data)

Note: you may need to restart the kernel to use updated packages.


In [None]:
import random
from tqdm.notebook import tqdm
import json
import zipfile
import io

def crop_and_export_to_zip(zip_filename, data, crop_size=(512, 512)):
    '''
    Helper function to crop and compress images for exporting into a smaller dataset
    '''
    # Create a zip file
    with zipfile.ZipFile(zip_filename, 'w') as zip_file:
        # Loop through all the images in the dataset
        for i, row in tqdm(enumerate(data)):
            # get the image
            img = row['image']
            # get the side information (everything in the row but the image)
            side_info = row
            side_info.pop('image')
            image_filename = f"cropped_image_{i}.jpg"
            json_filename = f"cropped_image_{i}.json"

            # Get the dimensions of the image
            img_width, img_height = img.size
            
            # define crop dimensions
            crop_width, crop_height = crop_size

            # Check if the image is smaller than the desired crop size
            if img_width < crop_width or img_height < crop_height:
                print(f"Image {i} is too small for the desired crop size.")
                continue

            # Calculate random position for the crop
            left = random.randint(0, img_width - crop_width)
            upper = random.randint(0, img_height - crop_height)

            # Perform the crop
            cropped_img = img.crop((left, upper, left + crop_width, upper + crop_height))

            # Convert the cropped image to RGB
            cropped_img = cropped_img.convert('RGB')

            # Save the cropped image in JPEG format with optimization and reduced quality
            img_bytes_io = io.BytesIO()
            cropped_img.save(img_bytes_io, 'JPEG', optimize=True, quality=85)
            img_bytes_io.seek(0)

            # Write the cropped image bytes to the zip file
            zip_file.writestr(image_filename, img_bytes_io.read())

            # Write the side information to a JSON file and add it to the zip file
            zip_file.writestr(json_filename, json.dumps(side_info))