                      Data Preprocessing


        
        Optional Augmentations: 
            -Rotation: ±15 degrees (faces can be slightly tilted)
            -Horizontal flip: ONLY for non-asymmetric emotions
            -Brightness adjustment: ±10% (lighting variations)
            -Small zoom: ±5% (distance variations)
            -NO vertical flip (would create upside-down faces!)

        Steps:
    
        
       

Total corrupted files found: 0

In [None]:
import os
import cv2
import numpy as np
#from tqdm import tqdm
# import shutil
import random
from pathlib import Path
# from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
#set paths
RAW_DIR = Path("/app/data/raw/fer2013")
PROCESSED_DIR = Path("/app/data/processed/FC211002_Hirunika")
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)


In [None]:
## Mapping FER2013 classes to 5 project classes
CLASS_MAPPING = {
    'angry': 'angry',
    'disgust': 'angry',
    'fear': 'stressed',
    'surprise': 'stressed',
    'happy': 'happy',
    'neutral': 'neutral',
    'sad': 'sad'
}
TARGET_CLASSES = ['angry', 'happy', 'sad', 'stressed', 'neutral']

In [None]:
# Image Preprocessing 

def preprocess_image(img_path):
    img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
    if img is None or img.shape != (48, 48):
        return None
    img = cv2.resize(img, (224, 224))                          # Resize
    img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)            # Convert to RGB
    img_rgb = img_rgb.astype(np.float32) / 255.0               # Normalize to [0, 1]
    return img_rgb

In [None]:
# Data Augmentation

def augment_image(img):
    rows, cols, _ = img.shape

    angle = random.uniform(-15, 15)
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    img = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT)

    factor = random.uniform(0.9, 1.1)
    img = np.clip(img * factor, 0, 1)

    zoom_factor = random.uniform(0.95, 1.05)
    new_w = int(cols / zoom_factor)
    new_h = int(rows / zoom_factor)
    x1 = max((cols - new_w) // 2, 0)
    y1 = max((rows - new_h) // 2, 0)
    cropped = img[y1:y1+new_h, x1:x1+new_w]
    img = cv2.resize(cropped, (224, 224))

    if random.random() > 0.5:
        img = cv2.flip(img, 1)

    return img

In [None]:
def process_split(split, augment=False, augment_count=3):
    print(f"\n Processing split: '{split}'")
    input_dir = RAW_DIR / split
    output_dir = PROCESSED_DIR / split
    total_images = 0
    saved_images = 0
    skipped_images = 0

    for orig_class in os.listdir(input_dir):
        orig_path = input_dir / orig_class
        if not orig_path.is_dir():
            continue

        mapped_class = CLASS_MAPPING.get(orig_class)
        if mapped_class not in TARGET_CLASSES:
            print(f" Skipping class '{orig_class}' — not in mapping")
            continue

        out_class_dir = output_dir / mapped_class
        out_class_dir.mkdir(parents=True, exist_ok=True)
        image_files = os.listdir(orig_path)

        print(f" Class '{orig_class}' → '{mapped_class}': {len(image_files)} images")

        for img_name in tqdm(image_files, desc=f"{orig_class} → {mapped_class}"):
            total_images += 1
            img_path = orig_path / img_name
            img = preprocess_image(img_path)
            if img is None:
                print(f" Skipped '{img_name}' — corrupt or invalid format")
                skipped_images += 1
                continue

            save_name = Path(img_name).stem + ".png"
            save_path = out_class_dir / save_name
            cv2.imwrite(str(save_path), (img * 255).astype(np.uint8))
            saved_images += 1

            if augment:
                for i in range(augment_count):
                    aug_img = augment_image(img.copy())
                    aug_name = Path(img_name).stem + f"_aug{i}.png"
                    aug_path = out_class_dir / aug_name
                    cv2.imwrite(str(aug_path), (aug_img * 255).astype(np.uint8))
                    saved_images += 1

    print(f"\n    Finished split: '{split}'")
    print(f"   ➤ Total images found     : {total_images}")
    print(f"   ➤ Successfully processed : {saved_images}")
    print(f"   ➤ Skipped (corrupt/etc)  : {skipped_images}")


In [None]:
!pip install tqdm

In [None]:
 # Run Script
from tqdm import tqdm 
if __name__ == "__main__":
    print(" Starting FER2013 preprocessing for EfficientNetB0...\n")

    process_split("train", augment=True, augment_count=3)
    process_split("test", augment=False)

    print(f"\n All preprocessing complete.")
    print(f"Processed data saved at: {PROCESSED_DIR}")