**Dataset Link:** https://www.kaggle.com/c/challenges-in-representation-learning-facial-expression-recognition-challenge

In [6]:
import os
import shutil
import pandas as pd
import numpy as np
from PIL import Image

In [7]:
# The FER labels already align with the universal emotion labels
FER_Label_Mapping = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral', 7:'Contempt'}
FER_Label_Mapping_Inverse = {v: k for k, v in FER_Label_Mapping.items()}

In [8]:
def save_images(dataset, base_dir, split_name, target_dir):
    """
    Save images and create labels CSV for a dataset
    
    Parameters:
    dataset (DataFrame): The dataset containing image data
    split_name (str): Prefix for filenames (train/val/test)
    target_dir (str): Subdirectory to save images and CSV
    """
    label_data = []
    
    for idx, (_, row) in enumerate(dataset.iterrows()):
        # Process pixels
        pixels = np.array(list(map(int, row['pixels'].split())), dtype=np.uint8)
        img_array = pixels.reshape(48, 48)
        img = Image.fromarray(img_array)
        
        # Create filename components
        emotion_label = row['emotion']
        filename = f"{split_name}_{idx}_{FER_Label_Mapping[emotion_label]}.png"
        
        # Save image
        img.save(os.path.join(base_dir, target_dir, filename))
        
        # Store label information
        label_data.append({'filename': filename, 'label': emotion_label})
    
    # Create and save labels DataFrame
    labels_df = pd.DataFrame(label_data)
    labels_df.to_csv(os.path.join(base_dir, target_dir, 'labels.csv'), index=False)

In [11]:
def copy_images_to_label_subdirectories(base_dir):
    """
    For each subdirectory (test, train, validation) in base_dir,
    copy images into subdirectories based on their label, extracted from the filename.
    
    Expected filename format: {usecase}_{index}_{label}.{ext}
    """
    # Define the usage folders.
    usage_dirs = ['train', 'test', 'validation']
    
    for usage in usage_dirs:
        usage_path = os.path.join(base_dir, usage)
        if not os.path.isdir(usage_path):
            print(f"Directory {usage_path} does not exist. Skipping.")
            continue
        
        # Process each file in the usage folder.
        for filename in os.listdir(usage_path):
            file_path = os.path.join(usage_path, filename)
            if os.path.isfile(file_path):
                # Parse the filename. We expect at least 3 parts separated by '_'
                parts = filename.split('_')
                if len(parts) < 3:
                    print(f"Filename {filename} does not match expected format. Skipping.")
                    continue
                
                # The label is assumed to be the last part, with the file extension removed.
                label_with_ext = parts[-1]
                label, _ = os.path.splitext(label_with_ext)
                label = str(FER_Label_Mapping_Inverse.get(label, None))
                
                # Create the label subdirectory if it doesn't exist.
                label_dir = os.path.join(usage_path, label)
                os.makedirs(label_dir, exist_ok=True)
                
                # Copy the image into the label subdirectory.
                destination_file_path = os.path.join(label_dir, filename)
                shutil.copy2(file_path, destination_file_path)

In [None]:
# Read the CSV file
df = pd.read_csv('FER/fer2013.csv')

# Define base directory structure
base_directory = 'FER__Structured_Aligned'
os.makedirs(base_directory, exist_ok=True)

# Create subdirectories
for folder in ['train', 'validation', 'test']:
    os.makedirs(os.path.join(base_directory, folder), exist_ok=True)

# Filter the dataset into three sets based on the value in 'Usage'
training_set = df[df['Usage'] == 'Training']
validation_set = df[df['Usage'] == 'PublicTest']
testing_set = df[df['Usage'] == 'PrivateTest']

# Save all sets with images and labels
save_images(training_set, base_directory, "train", "train")
save_images(validation_set, base_directory, "val", "validation")
save_images(testing_set, base_directory, "test", "test")

copy_images_to_label_subdirectories(base_directory)

Filename labels.csv does not match expected format. Skipping.
