In [None]:
import os
import shutil
import random

# Set random seed for reproducibility
random.seed(42)

# Source directory
source_dir = ''

# Destination base directory
dest_base = 'data'

# Split ratios
train_ratio = 0.7
val_ratio = 0.15
test_ratio = 0.15

# Class folders
classes = ['Cat', 'Dog']

# Ensure destination folders exist
for split in ['train', 'val', 'test']:
    for cls in classes:
        os.makedirs(os.path.join(dest_base, split, cls), exist_ok=True)

# Function to split and move files
def split_and_copy(class_name):
    class_path = os.path.join(source_dir, class_name)
    images = os.listdir(class_path)
    random.shuffle(images)

    total = len(images)
    train_cut = int(total * train_ratio)
    val_cut = train_cut + int(total * val_ratio)

    splits = {
        'train': images[:train_cut],
        'val': images[train_cut:val_cut],
        'test': images[val_cut:]
    }

    for split, split_images in splits.items():
        for img in split_images:
            src = os.path.join(class_path, img)
            dst = os.path.join(dest_base, split, class_name, img)
            shutil.copy2(src, dst)

# Run the split for both Cat and Dog folders
for cls in classes:
    split_and_copy(cls)

print("✅ Dataset split complete!")


In [1]:
from PIL import Image, UnidentifiedImageError
import os

def clean_corrupted_images(folder):
    removed = 0
    for root, _, files in os.walk(folder):
        for file in files:
            path = os.path.join(root, file)
            try:
                img = Image.open(path)
                img.verify()  # Validate if it's an actual image
            except (UnidentifiedImageError, OSError):
                print(f"Removing corrupted file: {path}")
                os.remove(path)
                removed += 1
    print(f"✅ Removed {removed} corrupted or invalid files from {folder}")

# Call it on both train and validation directories
clean_corrupted_images('data/train')
clean_corrupted_images('data/val')


Removing corrupted file: data/train\Cat\666.jpg
Removing corrupted file: data/train\Cat\Thumbs.db
Removing corrupted file: data/train\Dog\11702.jpg




Removing corrupted file: data/train\Dog\Thumbs.db
✅ Removed 4 corrupted or invalid files from data/train
✅ Removed 0 corrupted or invalid files from data/val
