# JAFFE Dataset

Download the dataset from the link below and then run the code to foramt it according to the requirements of the models. 

Additionally, run the FaceAlignment.ipynb to align the images such that features are consistent across all images.

**Dataset Link:** https://zenodo.org/records/14974867

**NOTE:** The JAFFE dataset comes with a **A_README_FIRST.txt** file or similar. This file contains the image mappings in the strucutre below. 
This needs to be manually copied and pasted into another file named **mapping.csv** in the same directory prior to running the code below. 


**Old Format:**

<p># HAP SAD SUR ANG DIS FEA PIC <br>
-------------------------------------------------------------- <br>
1 2.87 2.52 2.10 1.97 1.97 2.06 KM-NE1 <br>
2 2.87 2.42 1.58 1.84 1.77 1.77 KM-NE2 <br>
3 2.50 2.10 1.70 1.50 1.73 1.53 KM-NE3 <br></p>


**Mapping.csv Format:**
<p>Image_No HAP SAD SUR ANG DIS FEA PIC <br>
1 2.87 2.52 2.10 1.97 1.97 2.06 KM-NE1 <br>
2 2.87 2.42 1.58 1.84 1.77 1.77 KM-NE2 <br>
3 2.50 2.10 1.70 1.50 1.73 1.53 KM-NE3 <br></p>

In [1]:
import os
import pandas as pd
from PIL import Image
from sklearn.model_selection import train_test_split
import shutil

In [2]:
Universal_Label_Mapping = {0:'Angry', 1:'Disgust', 2:'Fear', 3:'Happy', 4:'Sad', 5:'Surprise', 6:'Neutral', 7:'Contempt'}
Universal_Label_Mapping_Inverse = {v: k for k, v in Universal_Label_Mapping.items()}

In [3]:
def save_images(dataset, split_name, image_dir, base_dir):
    labels = []
    for idx, (_, row) in enumerate(dataset.iterrows()):
        try:
            # Process image
            img_path = os.path.join(image_dir, row['filename'])
            img = Image.open(img_path)
            
            # Create new filename
            new_filename = f"{split_name}_{idx}_{row['label']}.png"
            save_path = os.path.join(base_dir, split_name, new_filename)
            img.save(save_path)
            
            # Record label
            labels.append({'filename': new_filename, 'label': Universal_Label_Mapping_Inverse[row['label']]})
        except Exception as e:
            print(f"Error processing {row['filename']}: {str(e)}")
    
    # Save labels CSV with numerical sorting
    if labels:
        labels_df = pd.DataFrame(labels)
        labels_df.to_csv(os.path.join(base_dir, split_name, 'labels.csv'), index=False)

In [4]:
def copy_images_to_label_subdirectories(base_dir):
    """
    For each subdirectory (test, train, validation) in base_dir,
    copy images into subdirectories based on their label, extracted from the filename.
    
    Expected filename format: {usecase}_{index}_{label}.{ext}
    """
    # Define the usage folders.
    usage_dirs = ['train', 'test', 'validation']
    
    for usage in usage_dirs:
        usage_path = os.path.join(base_dir, usage)
        if not os.path.isdir(usage_path):
            print(f"Directory {usage_path} does not exist. Skipping.")
            continue
        
        # Process each file in the usage folder.
        for filename in os.listdir(usage_path):
            file_path = os.path.join(usage_path, filename)
            if os.path.isfile(file_path):
                # Parse the filename. We expect at least 3 parts separated by '_'
                parts = filename.split('_')
                if len(parts) < 3:
                    print(f"Filename {filename} does not match expected format. Skipping.")
                    continue
                
                # The label is assumed to be the last part, with the file extension removed.
                label_with_ext = parts[-1]
                label, _ = os.path.splitext(label_with_ext)
                label = str(Universal_Label_Mapping_Inverse.get(label, None))
                
                # Create the label subdirectory if it doesn't exist.
                label_dir = os.path.join(usage_path, label)
                os.makedirs(label_dir, exist_ok=True)
                
                # Copy the image into the label subdirectory.
                destination_file_path = os.path.join(label_dir, filename)
                shutil.copy2(file_path, destination_file_path)

In [None]:
# Configuration
base_dir = "JAFFE_Structured_Aligned"
image_dir = "JAFFE"
mapping_file = "mapping.csv"
seed = 42
test_val_size = 0.4  # 40% for test+validation (will split to 20% each)

# Create directory structure
os.makedirs(base_dir, exist_ok=True)
for split in ['train', 'test', 'validation']:
    os.makedirs(os.path.join(base_dir, split), exist_ok=True)

# Load and process mapping data
df = pd.read_csv(os.path.join(image_dir, mapping_file), delim_whitespace=True)
emotion_columns = ['HAP', 'SAD', 'SUR', 'ANG', 'DIS', 'FEA']
emotion_mapping = {
    'HAP': 'Happy',
    'SAD': 'Sad',
    'SUR': 'Surprise',
    'ANG': 'Angry',
    'DIS': 'Disgust',
    'FEA': 'Fear'
}

# Determine dominant emotion for each image
df['label'] = df[emotion_columns].idxmax(axis=1).map(emotion_mapping)

df['filename'] = df.apply(
    lambda row: (
        row['PIC'].replace('-', '.') + 
        '.' + 
        str(row['Image_No']) + 
        '.tiff'
    ), 
    axis=1
)

# Split dataset with stratification
train_df, temp_df = train_test_split(
    df,
    test_size=test_val_size,
    stratify=df['label'],
    random_state=seed
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=0.5,
    stratify=temp_df['label'],
    random_state=seed
)

# Save all splits
save_images(train_df, 'train', image_dir, base_dir)
save_images(val_df, 'validation', image_dir, base_dir)
save_images(test_df, 'test', image_dir, base_dir)

copy_images_to_label_subdirectories(base_dir)

  df = pd.read_csv(os.path.join(image_dir, mapping_file), delim_whitespace=True)


Error processing KM.SA4.12.tiff: [Errno 2] No such file or directory: 'JAFFE\\KM.SA4.12.tiff'
Error processing NM.DI2.108.tiff: [Errno 2] No such file or directory: 'JAFFE\\NM.DI2.108.tiff'
Error processing KM.DI2.21.tiff: [Errno 2] No such file or directory: 'JAFFE\\KM.DI2.21.tiff'
Error processing TM.HA4.183.tiff: [Errno 2] No such file or directory: 'JAFFE\\TM.HA4.183.tiff'
Error processing KM.HA5.8.tiff: [Errno 2] No such file or directory: 'JAFFE\\KM.HA5.8.tiff'
Error processing KR.HA3.76.tiff: [Errno 2] No such file or directory: 'JAFFE\\KR.HA3.76.tiff'
Filename labels.csv does not match expected format. Skipping.
Filename labels.csv does not match expected format. Skipping.
Filename labels.csv does not match expected format. Skipping.
