In [5]:
# ==============================
# Step 2: Preprocess Data
# ==============================

# 1️⃣ Import libraries
import os
import shutil
import random
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import warnings

# Ignore FutureWarnings from TensorFlow/Keras
warnings.filterwarnings("ignore", category=FutureWarning)

# ------------------------------
# 2️⃣ Define paths
# ------------------------------
RAW_DIR = r"C:\Users\uthay\Desktop\cv-waste-classification\data\raw"
PROCESSED_DIR = r"C:\Users\uthay\Desktop\cv-waste-classification\data\processed"

TRAIN_DIR = os.path.join(PROCESSED_DIR, "train")
VAL_DIR = os.path.join(PROCESSED_DIR, "val")
TEST_DIR = os.path.join(PROCESSED_DIR, "test")

# Create processed folders if they don't exist
os.makedirs(TRAIN_DIR, exist_ok=True)
os.makedirs(VAL_DIR, exist_ok=True)
os.makedirs(TEST_DIR, exist_ok=True)

# ------------------------------
# 3️⃣ Define split ratios
# ------------------------------
TRAIN_SPLIT = 0.7
VAL_SPLIT = 0.15
TEST_SPLIT = 0.15

# ------------------------------
# 4️⃣ Split images into train/val/test
# ------------------------------
classes = sorted(os.listdir(RAW_DIR))

for cls in classes:
    cls_path = os.path.join(RAW_DIR, cls)
    images = os.listdir(cls_path)
    random.shuffle(images)

    train_count = int(len(images) * TRAIN_SPLIT)
    val_count = int(len(images) * VAL_SPLIT)

    train_imgs = images[:train_count]
    val_imgs = images[train_count:train_count + val_count]
    test_imgs = images[train_count + val_count:]

    # Copy images to processed folders
    for folder, img_list in zip(
        [TRAIN_DIR, VAL_DIR, TEST_DIR],
        [train_imgs, val_imgs, test_imgs]
    ):
        class_folder = os.path.join(folder, cls)
        os.makedirs(class_folder, exist_ok=True)

        for img in img_list:
            src = os.path.join(cls_path, img)
            dst = os.path.join(class_folder, img)
            shutil.copy(src, dst)

print("✅ Dataset split completed!")
print("Train / Val / Test folders are ready.")


✅ Dataset split completed!
Train / Val / Test folders are ready.


In [6]:
# 5️⃣ ImageDataGenerator for training (augmentation) and validation/test (rescale)
IMG_SIZE = (224, 224)
BATCH_SIZE = 32

train_gen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)

val_test_gen = ImageDataGenerator(rescale=1./255)

# 6️⃣ Generators
train_data = train_gen.flow_from_directory(
    TRAIN_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

val_data = val_test_gen.flow_from_directory(
    VAL_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical"
)

test_data = val_test_gen.flow_from_directory(
    TEST_DIR,
    target_size=IMG_SIZE,
    batch_size=BATCH_SIZE,
    class_mode="categorical",
    shuffle=False
)


Found 14142 images belonging to 10 classes.
Found 3027 images belonging to 10 classes.
Found 3043 images belonging to 10 classes.
