In [4]:
import os
import random
from shutil import copy2
from PIL import Image

input_dir = "cv_p3_images_original"
output_dir = "cv_p3_images_split"
train_ratio = 0.8
val_ratio = 0.1
test_ratio = 0.1
train_colored_dir = os.path.join(output_dir, "train", "colored")
train_grayscale_dir = os.path.join(output_dir, "train", "grayscale")
val_colored_dir = os.path.join(output_dir, "validation", "colored")
val_grayscale_dir = os.path.join(output_dir, "validation", "grayscale")
test_colored_dir = os.path.join(output_dir, "test", "colored")
test_grayscale_dir = os.path.join(output_dir, "test", "grayscale")

os.makedirs(train_colored_dir, exist_ok=True)
os.makedirs(train_grayscale_dir, exist_ok=True)
os.makedirs(val_colored_dir, exist_ok=True)
os.makedirs(val_grayscale_dir, exist_ok=True)
os.makedirs(test_colored_dir, exist_ok=True)
os.makedirs(test_grayscale_dir, exist_ok=True)

image_files = [f for f in os.listdir(input_dir) if f.lower().endswith((".jpg", ".jpeg", ".png"))]
random.shuffle(image_files)
split_idx_train = int(len(image_files) * train_ratio)
split_idx_val = int(len(image_files) * (train_ratio + val_ratio))
train_files = image_files[:split_idx_train]
val_files = image_files[split_idx_train:split_idx_val]
test_files = image_files[split_idx_val:]


def save_grayscale_images(files, source_dir, dest_colored_dir, dest_grayscale_dir):
    for file in files:
        source_path = os.path.join(source_dir, file)
        colored_dest_path = os.path.join(dest_colored_dir, file)
        grayscale_dest_path = os.path.join(dest_grayscale_dir, file)
        copy2(source_path, colored_dest_path)
        img = Image.open(source_path).convert("L")
        img.save(grayscale_dest_path)

save_grayscale_images(train_files, input_dir, train_colored_dir, train_grayscale_dir)
save_grayscale_images(val_files, input_dir, val_colored_dir, val_grayscale_dir)
save_grayscale_images(test_files, input_dir, test_colored_dir, test_grayscale_dir)

print(f"Dataset prepared in {output_dir}.")
print(f"Training set: {len(train_files)} images.")
print(f"Validation set: {len(val_files)} images.")
print(f"Test set: {len(test_files)} images.")


Dataset prepared in cv_p3_images_split.
Training set: 5912 images.
Validation set: 739 images.
Test set: 739 images.
