### Data Preprocessing

In [2]:
import os
from PIL import Image
import torchvision.transforms as T
from shutil import copy2

# Define the resizing transformation
resize_transform = T.Resize((350, 350))

# Define directories
base_dir = 'dataset/prod/yolo_dataset'
folders = ['train', 'test', 'valid']  # Folder names to process

# Function to resize images and save them to a new folder
def resize_images(folder):
    image_dir = os.path.join(base_dir, folder, 'images')
    label_dir = os.path.join(base_dir, folder, 'labels')

    # Create new directories for resized images
    resized_image_dir = os.path.join(base_dir, folder, 'resized_images')
    resized_label_dir = os.path.join(base_dir, folder, 'resized_labels')
    os.makedirs(resized_image_dir, exist_ok=True)
    os.makedirs(resized_label_dir, exist_ok=True)

    # Process each image in the folder
    for image_file in os.listdir(image_dir):
        if image_file.endswith(('.jpg', '.png', '.jpeg')):
            image_path = os.path.join(image_dir, image_file)

            # Open image and resize
            img = Image.open(image_path)
            resized_img = resize_transform(img)

            # Save resized image to new folder
            resized_image_path = os.path.join(resized_image_dir, image_file)
            resized_img.save(resized_image_path)

            # Copy the corresponding label (no change needed for labels)
            label_file = image_file.replace('.jpg', '.xml')  # Adjust extension based on your label format
            original_label_path = os.path.join(label_dir, label_file)
            if os.path.exists(original_label_path):
                resized_label_path = os.path.join(resized_label_dir, label_file)
                copy2(original_label_path, resized_label_path)

# Loop through each folder (train, test, valid) and resize images
for folder in folders:
    resize_images(folder)
