In [1]:
import os
import cv2
import shutil
from PIL import Image, UnidentifiedImageError
from tqdm import tqdm

# Source directory (update to your actual path)
SOURCE_DIR = "data"  # Should contain subfolders like explosion/, fire/, etc.
PROCESSED_DIR = "processed_data"
IMG_SIZE = (224, 224)

# Clean and resize images
def preprocess_and_clean_images():
    if os.path.exists(PROCESSED_DIR):
        shutil.rmtree(PROCESSED_DIR)
    os.makedirs(PROCESSED_DIR)

    classes = os.listdir(SOURCE_DIR)
    for cls in tqdm(classes, desc="Processing classes"):
        cls_path = os.path.join(SOURCE_DIR, cls)
        save_path = os.path.join(PROCESSED_DIR, cls)
        os.makedirs(save_path, exist_ok=True)

        for root, _, files in os.walk(cls_path):
            for file in files:
                try:
                    file_path = os.path.join(root, file)
                    img = Image.open(file_path).convert('RGB')
                    img = img.resize(IMG_SIZE)
                    img.save(os.path.join(save_path, file))
                except (UnidentifiedImageError, OSError):
                    continue

preprocess_and_clean_images()


Processing classes: 100%|██████████| 6/6 [02:31<00:00, 25.32s/it]


In [2]:
from pathlib import Path
import os

def rename_images_by_folder(root_dir, valid_exts=('.jpg', '.png', '.jpeg')):
    """Renames images in subfolders to [folder_name]_[number].[ext]"""
    
    root_path = Path(root_dir)
    
    for folder in root_path.iterdir():
        if folder.is_dir():
            print(f"\nProcessing folder: {folder.name}")
            
            # Get all image files in directory
            images = sorted([f for f in folder.iterdir() 
                           if f.suffix.lower() in valid_exts])
            
            # Rename with sequential numbering
            for idx, img_path in enumerate(images, 1):
                new_name = f"{folder.name}_{idx:03d}{img_path.suffix}"
                new_path = img_path.with_name(new_name)
                
                # Check for name conflicts
                if new_path.exists():
                    print(f"Warning: {new_name} already exists, skipping")
                    continue
                
                img_path.rename(new_path)
                print(f"Renamed: {img_path.name} -> {new_name}")

if __name__ == "__main__":
    # Set your root directory containing category folders
    ROOT_DIR = "processed_data"
    
    rename_images_by_folder(ROOT_DIR)
    print("\nRenaming complete!")



Processing folder: accident
Renamed: 00001.jpg -> accident_001.jpg
Renamed: 00002.jpg -> accident_002.jpg
Renamed: 00003.jpg -> accident_003.jpg
Renamed: 00004.jpg -> accident_004.jpg
Renamed: 00005.jpg -> accident_005.jpg
Renamed: 00006.jpg -> accident_006.jpg
Renamed: 00007.jpg -> accident_007.jpg
Renamed: 00008.jpg -> accident_008.jpg
Renamed: 00009.jpg -> accident_009.jpg
Renamed: 00010.jpg -> accident_010.jpg
Renamed: 00011.jpg -> accident_011.jpg
Renamed: 00012.jpg -> accident_012.jpg
Renamed: 00013.jpg -> accident_013.jpg
Renamed: 00014.jpg -> accident_014.jpg
Renamed: 00015.jpg -> accident_015.jpg
Renamed: 00016.jpg -> accident_016.jpg
Renamed: 00017.jpg -> accident_017.jpg
Renamed: 00018.jpg -> accident_018.jpg
Renamed: 00019.jpg -> accident_019.jpg
Renamed: 00020.jpg -> accident_020.jpg
Renamed: 00021.jpg -> accident_021.jpg
Renamed: 00022.jpg -> accident_022.jpg
Renamed: 00023.jpg -> accident_023.jpg
Renamed: 00024.jpg -> accident_024.jpg
Renamed: 00025.jpg -> accident_025.

In [3]:
from sklearn.model_selection import train_test_split

TRAIN_DIR = "processed_data/train"
VAL_DIR = "processed_data/val"

def split_train_val(processed_dir=ROOT_DIR, train_dir=TRAIN_DIR, val_dir=VAL_DIR, test_size=0.3):
    if os.path.exists(train_dir):
        shutil.rmtree(train_dir)
    if os.path.exists(val_dir):
        shutil.rmtree(val_dir)

    os.makedirs(train_dir)
    os.makedirs(val_dir)

    for cls in os.listdir(processed_dir):
        cls_path = os.path.join(processed_dir, cls)
        images = [img for img in os.listdir(cls_path) if img.lower().endswith(('.png', '.jpg', '.jpeg'))]
        
        if len(images) == 0:
            print(f"Skipping class '{cls}' because it contains no images.")
            continue

        train_imgs, val_imgs = train_test_split(images, test_size=test_size, random_state=42)

        train_cls_path = os.path.join(train_dir, cls)
        val_cls_path = os.path.join(val_dir, cls)
        os.makedirs(train_cls_path, exist_ok=True)
        os.makedirs(val_cls_path, exist_ok=True)

        for img in train_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(train_cls_path, img))
        for img in val_imgs:
            shutil.copy(os.path.join(cls_path, img), os.path.join(val_cls_path, img))

split_train_val()


Skipping class 'train' because it contains no images.
Skipping class 'val' because it contains no images.
