              Data Preprocessing

        Model: EfficientNet B0 

        Already Done
            -Images are already 48x48 pixels
            -Images are already grayscale
            -Face detection already applied (FER2013 is pre-cropped)


            
        Steps
        -01]Load raw FER2013 data
        -02]Map original class → project class
        -03]Preprocess images (resize, normalize, RGB)
        -04]Save processed images
        -05]Apply data augmentation
        -06]Organize all images by mapped class

        
    
        
       

        Total corrupted files found: 0

In [1]:
import os
import cv2
import numpy as np
import random
from pathlib import Path


In [2]:
#set paths
RAW_DIR = Path("/app/data/raw/fer2013")
PROCESSED_DIR = Path("/app/data/processed/FC211002_Nethmi")
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)


In [3]:
## Mapping FER2013 classes to 5 project classes
CLASS_MAPPING = {
    'angry': 'angry',
    'disgust': 'angry',
    'fear': 'stressed',
    'surprise': 'stressed',
    'happy': 'happy',
    'neutral': 'neutral',
    'sad': 'sad'
}
TARGET_CLASSES = ['angry', 'happy', 'sad', 'stressed', 'neutral']

In [4]:
# Image Preprocessing 

def preprocess_image(img_path):
    img = cv2.imread(str(img_path), cv2.IMREAD_GRAYSCALE)
    if img is None or img.shape != (48, 48):   #Check that the image is of shape (48, 48), else skip.
        return None
    #img = cv2.resize(img, (224, 224))   # Resize
    
    img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)            # Convert to RGB
    img_rgb = img_rgb.astype(np.float32) / 255.0               # Normalize to [0, 1]
    return img_rgb

In [5]:
# Data Augmentation

def augment_image(img):
    rows, cols, _ = img.shape     # Get image dimensions

    # ---------------------- Rotation ----------------------
    # Randomly rotate the image between -15 and 15 degrees
    angle = random.uniform(-15, 15)
    angle = random.uniform(-15, 15)
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), angle, 1)
    img = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT)

    # ---------------------- Brightness Adjustment ----------------------
    # Randomly adjust brightness by multiplying pixel values by a factor between 0.9 and 1.1
    factor = random.uniform(0.9, 1.1)
    img = np.clip(img * factor, 0, 1)

    # ---------------------- Zoom In/Out ----------------------
    # Randomly zoom in or out by resizing the image to a smaller or larger size and then cropping/resizing
    zoom_factor = random.uniform(0.95, 1.05)
    new_w = int(cols / zoom_factor)
    new_h = int(rows / zoom_factor)

    # Calculate top-left corner for center cropping
    x1 = max((cols - new_w) // 2, 0)
    y1 = max((rows - new_h) // 2, 0)
    cropped = img[y1:y1+new_h, x1:x1+new_w] # Crop the zoomed image
    img = cv2.resize(cropped, (48, 48))


    # ---------------------- Horizontal Flip ----------------------
    # Randomly flip image horizontally (50% chance)
    if random.random() > 0.5:
        img = cv2.flip(img, 1)

    return img

In [6]:
def process_split(split, augment=False, augment_count=3):
    print(f"\n Processing split: '{split}'")

    # Define input and output directories
    input_dir = RAW_DIR / split
    output_dir = PROCESSED_DIR / split
    
    total_images = 0
    saved_images = 0
    skipped_images = 0

    # Loop through each original class in the input directory
    for orig_class in os.listdir(input_dir):
        orig_path = input_dir / orig_class
        if not orig_path.is_dir():  # Skip if it's not a directory
            continue

        mapped_class = CLASS_MAPPING.get(orig_class) # Get the mapped class using the class mapping dictionary
        if mapped_class not in TARGET_CLASSES:
            print(f" Skipping class '{orig_class}' — not in mapping")
            continue

        out_class_dir = output_dir / mapped_class  # Create output directory for the mapped class
        out_class_dir.mkdir(parents=True, exist_ok=True)
        
        image_files = os.listdir(orig_path)# List all image files in the original class directory

        print(f" Class '{orig_class}' → '{mapped_class}': {len(image_files)} images")

        # Process each image file

        for img_name in tqdm(image_files, desc=f"{orig_class} → {mapped_class}"):
            total_images += 1
            img_path = orig_path / img_name
            img = preprocess_image(img_path) # Preprocess the image (resize, convert, normalize)

            # Skip if image is invalid or corrupt
            if img is None:
                print(f" Skipped '{img_name}' — corrupt or invalid format")
                skipped_images += 1
                continue

            save_name = Path(img_name).stem + ".png"
            save_path = out_class_dir / save_name
            cv2.imwrite(str(save_path), (img * 255).astype(np.uint8))
            saved_images += 1

            # If augmentation is enabled, create and save augmented versions
            if augment:
                for i in range(augment_count):
                    aug_img = augment_image(img.copy())
                    aug_name = Path(img_name).stem + f"_aug{i}.png"
                    aug_path = out_class_dir / aug_name
                    cv2.imwrite(str(aug_path), (aug_img * 255).astype(np.uint8))
                    saved_images += 1

    # Print a summary of processing for the split
    print(f"\n    Finished split: '{split}'")
    print(f"   ➤ Total images found     : {total_images}")
    print(f"   ➤ Successfully processed : {saved_images}")
    print(f"   ➤ Skipped (corrupt/etc)  : {skipped_images}")


In [7]:
!pip install tqdm

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [8]:
# Run Script
from tqdm import tqdm 
if __name__ == "__main__":
    print(" Starting FER2013 preprocessing for EfficientNetB0...\n")

    process_split("train", augment=True, augment_count=3)
    process_split("test", augment=False)

    print(f"\n All preprocessing complete.")
    print(f"Processed data saved at: {PROCESSED_DIR}")

 Starting FER2013 preprocessing for EfficientNetB0...


 Processing split: 'train'
 Class 'angry' → 'angry': 3995 images


angry → angry: 100%|██████████| 3995/3995 [07:50<00:00,  8.49it/s]


 Class 'disgust' → 'angry': 436 images


disgust → angry: 100%|██████████| 436/436 [00:59<00:00,  7.35it/s]


 Class 'fear' → 'stressed': 4097 images


fear → stressed: 100%|██████████| 4097/4097 [08:12<00:00,  8.32it/s]


 Class 'happy' → 'happy': 7215 images


happy → happy: 100%|██████████| 7215/7215 [14:31<00:00,  8.28it/s]


 Class 'neutral' → 'neutral': 4965 images


neutral → neutral: 100%|██████████| 4965/4965 [09:23<00:00,  8.81it/s]


 Class 'sad' → 'sad': 4830 images


sad → sad: 100%|██████████| 4830/4830 [09:51<00:00,  8.17it/s]


 Class 'surprise' → 'stressed': 3171 images


surprise → stressed: 100%|██████████| 3171/3171 [06:53<00:00,  7.68it/s]



    Finished split: 'train'
   ➤ Total images found     : 28709
   ➤ Successfully processed : 114836
   ➤ Skipped (corrupt/etc)  : 0

 Processing split: 'test'
 Class 'angry' → 'angry': 958 images


angry → angry: 100%|██████████| 958/958 [00:56<00:00, 17.07it/s]


 Class 'disgust' → 'angry': 111 images


disgust → angry: 100%|██████████| 111/111 [00:05<00:00, 18.52it/s]


 Class 'fear' → 'stressed': 1024 images


fear → stressed: 100%|██████████| 1024/1024 [00:59<00:00, 17.29it/s]


 Class 'happy' → 'happy': 1774 images


happy → happy: 100%|██████████| 1774/1774 [01:47<00:00, 16.54it/s]


 Class 'neutral' → 'neutral': 1233 images


neutral → neutral: 100%|██████████| 1233/1233 [01:08<00:00, 18.13it/s]


 Class 'sad' → 'sad': 1247 images


sad → sad: 100%|██████████| 1247/1247 [01:08<00:00, 18.28it/s]


 Class 'surprise' → 'stressed': 831 images


surprise → stressed: 100%|██████████| 831/831 [00:53<00:00, 15.56it/s]


    Finished split: 'test'
   ➤ Total images found     : 7178
   ➤ Successfully processed : 7178
   ➤ Skipped (corrupt/etc)  : 0

 All preprocessing complete.
Processed data saved at: /app/data/processed/FC211002_Nethmi



