In [1]:
import os
from PIL import Image
from tqdm import tqdm

In [2]:
source_data_dir = "dataset"
new_data_dir = "dataset_preprocessed"
image_size = (299, 299)

In [3]:
os.makedirs(new_data_dir, exist_ok=True)

In [None]:
for split in ['train', 'test']:
    source_split_dir = os.path.join(source_data_dir, split)
    new_split_dir = os.path.join(new_data_dir, split)
    os.makedirs(new_split_dir, exist_ok=True)

    # Loop through the class folders (e.g., 'real', 'fake')
    for class_name in os.listdir(source_split_dir):
        source_class_dir = os.path.join(source_split_dir, class_name)
        new_class_dir = os.path.join(new_split_dir, class_name)
        os.makedirs(new_class_dir, exist_ok=True)

        if not os.path.isdir(source_class_dir):
            continue

        print(f"Processing images in: {source_class_dir}")
        # Get list of images to process
        image_filenames = [f for f in os.listdir(source_class_dir) if os.path.isfile(os.path.join(source_class_dir, f))]
        
        # Loop through each image and process it
        for filename in tqdm(image_filenames, desc=f"Processing {class_name}"):
            try:
                source_path = os.path.join(source_class_dir, filename)
                target_path = os.path.join(new_class_dir, filename)
                
                with Image.open(source_path) as img:
                    # Resize the image
                    img_resized = img.resize(image_size, Image.Resampling.LANCZOS)
                    # Convert to RGB to ensure 3 channels
                    if img_resized.mode != 'RGB':
                        img_resized = img_resized.convert('RGB')
                    # Save the new image
                    img_resized.save(target_path)
            except Exception as e:
                print(f"Could not process {filename}: {e}")

print("\nDataset pre-processing complete!")
print(f"New dataset is located at: {new_data_dir}")

Processing images in: dataset\train\FAKE


Processing FAKE: 100%|██████████| 50000/50000 [06:42<00:00, 124.19it/s]


Processing images in: dataset\train\REAL


Processing REAL: 100%|██████████| 50000/50000 [13:32<00:00, 61.53it/s]


Processing images in: dataset\test\FAKE


Processing FAKE: 100%|██████████| 10000/10000 [02:49<00:00, 58.95it/s]


Processing images in: dataset\test\REAL


Processing REAL: 100%|██████████| 10000/10000 [02:42<00:00, 61.39it/s]


Dataset pre-processing complete!
New dataset is located at: dataset_preprocessed



