### 1. ****Import libraries****

In [1]:
# Basic libraries
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

print("Libraries loaded successfully!")


Libraries loaded successfully!


### 2. ****Dataset paths****

In [2]:
# Raw dataset path
RAW_DIR = "../data/raw"

# Processed dataset path
PROCESSED_DIR = "../data/processed"

IMG_SIZE = 224   # Target image size

# Create processed folder if not present
os.makedirs(PROCESSED_DIR, exist_ok=True)

print("Paths set and folders checked!")


Paths set and folders checked!


### 3. ****Resize all images and save****

In [3]:
# This loop will go through each class folder
# resize every image to 224x224
# and save it in processed folder

for cls in os.listdir(RAW_DIR):
    
    class_path = os.path.join(RAW_DIR, cls)
    save_path = os.path.join(PROCESSED_DIR, cls)
    
    # Create class folder inside processed
    os.makedirs(save_path, exist_ok=True)
    
    for img_name in os.listdir(class_path):
        
        img_path = os.path.join(class_path, img_name)
        
        # Read image
        img = cv2.imread(img_path)
        
        # Resize image
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        
        # Save resized image
        cv2.imwrite(os.path.join(save_path, img_name), img)

print("All images resized and saved successfully!")


All images resized and saved successfully!


### 4. ****Check resized image shape****

In [4]:
# Checking one sample image

sample_class = os.listdir(PROCESSED_DIR)[0]
sample_img = os.listdir(os.path.join(PROCESSED_DIR, sample_class))[0]

img_path = os.path.join(PROCESSED_DIR, sample_class, sample_img)
img = cv2.imread(img_path)

print("Resized image shape:", img.shape)


Resized image shape: (224, 224, 3)


### 5. ****Normalization (0â€“1 scaling)****

In [5]:
# Normalization means converting pixel values from 0-255 to 0-1
# This helps model train better

img_norm = img / 255.0

print("Before normalization max value:", img.max())
print("After normalization max value:", img_norm.max())


Before normalization max value: 246
After normalization max value: 0.9647058823529412


### 6. ****Data Augmentation setup****

In [6]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation settings
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True
)

print("Data augmentation pipeline ready!")


Data augmentation pipeline ready!


### 7. ****Generator test****

In [7]:
train_gen = train_datagen.flow_from_directory(
    PROCESSED_DIR,
    target_size=(224,224),
    batch_size=32,
    class_mode="categorical"
)

print("Generator working properly!")


Found 3076 images belonging to 7 classes.
Generator working properly!
