In [None]:
import os
import cv2
import shutil
from PIL import Image
from tqdm import tqdm
from sklearn.model_selection import train_test_split
# Paths
IMAGE_PATH = "/home/gchengzhan/private/SemSeg/Dataset/dataset/semantic_drone_dataset/original_images/"
TARGET_PATH = "/home/gchengzhan/private/SemSeg/Dataset/dataset/semantic_drone_dataset/label_images_semantic/"

# Destination paths
DEST_PATH = "/home/gchengzhan/private/SemSeg/Dataset/dataset/semantic_drone_dataset/"
TRAIN_IMG_PATH = os.path.join(DEST_PATH, "train/images")
TRAIN_MASK_PATH = os.path.join(DEST_PATH, "train/masks")
VAL_IMG_PATH = os.path.join(DEST_PATH, "val/images")
VAL_MASK_PATH = os.path.join(DEST_PATH, "val/masks")
TEST_IMG_PATH = os.path.join(DEST_PATH, "test/images")
TEST_MASK_PATH = os.path.join(DEST_PATH, "test/masks")

def copy_and_resize_files(files, src_img, src_mask, dest_img, dest_mask, new_size=(608, 416)):
    for file in tqdm(files):
        # Resize and save image
        img_path = os.path.join(src_img, file)
        img = Image.open(img_path)
        img_resized = img.resize(new_size, Image.ANTIALIAS)
        img_resized.save(os.path.join(dest_img, file))

        # Assuming mask file names are identical to image file names, but with .png extension
        mask_file = os.path.splitext(file)[0] + '.png'
        mask_path = os.path.join(src_mask, mask_file)
        mask = Image.open(mask_path)
        mask_resized = mask.resize(new_size, Image.NEAREST)  # Use NEAREST for masks to avoid introducing new classes
        mask_resized.save(os.path.join(dest_mask, mask_file))

# Create directories
for path in [TRAIN_IMG_PATH, TRAIN_MASK_PATH, VAL_IMG_PATH, VAL_MASK_PATH, TEST_IMG_PATH, TEST_MASK_PATH]:
    os.makedirs(path, exist_ok=True)

# Get all file names from the image directory
all_files = os.listdir(IMAGE_PATH)
files = [f for f in all_files if os.path.isfile(os.path.join(IMAGE_PATH, f))]

# Split files into training, validation, and test sets
train_files, test_files = train_test_split(files, test_size=0.2, random_state=42)  
val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)  

# Use the modified function to copy and resize files to their respective directories
copy_and_resize_files(train_files, IMAGE_PATH, TARGET_PATH, TRAIN_IMG_PATH, TRAIN_MASK_PATH)
copy_and_resize_files(val_files, IMAGE_PATH, TARGET_PATH, VAL_IMG_PATH, VAL_MASK_PATH)
copy_and_resize_files(test_files, IMAGE_PATH, TARGET_PATH, TEST_IMG_PATH, TEST_MASK_PATH)