In [28]:
import os
import shutil
import csv

In [None]:
# The affect net labels already align with the universal emotion labels
# universal_emotion_mapping = {0: 'Angry', 1: 'Disgust', 2: 'Fear', 3: 'Happy', 4: 'Sad', 5: 'Surprise', 6: 'Neutral'}
AffectNet_Label_Mapping = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral', 7:'Contempt'}

In [None]:
def flatten_affectnet(source_base, destination_base):
    """
    Processes the AffectNet dataset by flattening images from subdirectories
    (0 to 7) of each usage folder (train, test, val) into separate directories
    within the destination folder. In addition, a labels.csv file is created in 
    each usage folder mapping each new filename to its original label.
    
    The new filename format is: {usage}_{index}_{label}.{ext}
    
    Args:
        source_base (str): The path to the root directory of the AffectNet dataset.
        destination_base (str): The path where the flattened directories will be created.
    """
    # Define usage directories and how they will be represented in filenames.
    usage_dirs = ['train', 'test', 'val']
    usage_prefix = {
        'train': 'train',
        'test': 'test',
        'val': 'validation'
    }
    
    for usage in usage_dirs:
        source_usage_path = os.path.join(source_base, usage)
        # Use the mapping for naming output folders (e.g., 'validation' for 'val')
        output_usage_dirname = usage_prefix[usage]
        output_usage_path = os.path.join(destination_base, output_usage_dirname)
        os.makedirs(output_usage_path, exist_ok=True)
        
        # List to keep track of (filename, label) pairs for the CSV file.
        csv_rows = []
        image_index = 0
        
        # Process each label subdirectory (e.g., "0" through "7")
        for label in sorted(os.listdir(source_usage_path)):
            label_path = os.path.join(source_usage_path, label)
            if os.path.isdir(label_path):
                for image_file in os.listdir(label_path):
                    image_path = os.path.join(label_path, image_file)
                    if os.path.isfile(image_path):
                        ext = os.path.splitext(image_file)[1]
                        # Construct new filename with the desired format.
                        # new_filename = f"{usage_prefix[usage]}_{image_index}_{label}{ext}"
                        new_filename = f"{usage_prefix[usage]}_{image_index}_{AffectNet_Label_Mapping[int(label)]}{ext}"
                        new_filepath = os.path.join(output_usage_path, new_filename)
                        shutil.copy2(image_path, new_filepath)
                        # Append the new filename and label to our CSV list.
                        csv_rows.append((new_filename, label))
                        image_index += 1
        
        # Write the CSV file in the current usage output directory.
        csv_path = os.path.join(output_usage_path, 'labels.csv')
        with open(csv_path, mode='w', newline='') as csvfile:
            csv_writer = csv.writer(csvfile)
            # Write header row.
            csv_writer.writerow(['filename', 'label'])
            # Write each filename and label mapping.
            csv_writer.writerows(csv_rows)
        print(f"Processed {usage_prefix[usage]}: {len(csv_rows)} images. CSV saved to {csv_path}")

In [32]:
def copy_images_to_label_subdirectories(base_dir):
    """
    For each subdirectory (test, train, validation) in base_dir,
    copy images into subdirectories based on their label, extracted from the filename.
    
    Expected filename format: {usecase}_{index}_{label}.{ext}
    """
    # Define the usage folders.
    usage_dirs = ['train', 'test', 'validation']
    
    for usage in usage_dirs:
        usage_path = os.path.join(base_dir, usage)
        if not os.path.isdir(usage_path):
            print(f"Directory {usage_path} does not exist. Skipping.")
            continue
        
        # Process each file in the usage folder.
        for filename in os.listdir(usage_path):
            file_path = os.path.join(usage_path, filename)
            if os.path.isfile(file_path):
                # Parse the filename. We expect at least 3 parts separated by '_'
                parts = filename.split('_')
                if len(parts) < 3:
                    print(f"Filename {filename} does not match expected format. Skipping.")
                    continue
                
                # The label is assumed to be the last part, with the file extension removed.
                label_with_ext = parts[-1]
                label, _ = os.path.splitext(label_with_ext)
                
                # Create the label subdirectory if it doesn't exist.
                label_dir = os.path.join(usage_path, label)
                os.makedirs(label_dir, exist_ok=True)
                
                # Copy the image into the label subdirectory.
                destination_file_path = os.path.join(label_dir, filename)
                shutil.copy2(file_path, destination_file_path)

In [None]:
# Set the source (AffectNet dataset) and destination directories.
source_base = 'AffectNet'        
destination_base = 'AffectNet_Structured_Mapping'  
flatten_affectnet(source_base, destination_base)

# Set the base directory containing the train, test, and validation subdirectories.
base_directory = 'AffectNet_Structured_Mapping'
copy_images_to_label_subdirectories(base_directory)

Filename labels.csv does not match expected format. Skipping.
Filename labels.csv does not match expected format. Skipping.
Filename labels.csv does not match expected format. Skipping.
