In [1]:
import os
import pandas as pd
from PIL import Image

# Define the paths to each set
dataset_path = 'dataset'
output_path = 'output_dataset'  # Path to save resized images and updated CSVs
resize_dim = (224, 224)  # Target size

# Function to resize images and adjust bounding boxes
def resize_images_and_bboxes(data_folder):
    # Load the CSV file
    csv_path = os.path.join(data_folder, '_annotations.csv')
    df = pd.read_csv(csv_path)

    # Loop through each image in the CSV
    for i, row in df.iterrows():
        img_path = os.path.join(data_folder, row['filename'])
        image = Image.open(img_path)

        # Calculate scale factors for bounding box adjustment
        width, height = image.size
        x_scale = resize_dim[0] / width
        y_scale = resize_dim[1] / height

        # Resize the image
        image_resized = image.resize(resize_dim)
        output_img_path = os.path.join(output_path, data_folder, row['filename'])
        os.makedirs(os.path.dirname(output_img_path), exist_ok=True)
        image_resized.save(output_img_path)

        # Update bounding box coordinates
        df.at[i, 'xmin'] = int(row['xmin'] * x_scale)
        df.at[i, 'ymin'] = int(row['ymin'] * y_scale)
        df.at[i, 'xmax'] = int(row['xmax'] * x_scale)
        df.at[i, 'ymax'] = int(row['ymax'] * y_scale)

    # Save the updated CSV file
    updated_csv_path = os.path.join(output_path, data_folder, '_annotations.csv')
    os.makedirs(os.path.dirname(updated_csv_path), exist_ok=True)
    df.to_csv(updated_csv_path, index=False)

# Process each set
for folder in ['train', 'valid', 'test']:
    resize_images_and_bboxes(os.path.join(dataset_path, folder))

print("Resizing and bounding box adjustment complete.")


Resizing and bounding box adjustment complete.
