# Face Emotion Recognition

https://huggingface.co/datasets/tukey/human_face_emotions_roboflow/viewer/default/train?p=1&views%5B%5D=train

# Import Data

In [2]:
import pandas as pd
import io
from PIL import Image

df = pd.read_parquet("hf://datasets/tukey/human_face_emotions_roboflow/data/train-00000-of-00001.parquet")

# Data Overview & Cleaning

In [3]:
# Standardize column names (strip whitespace, lower-case, replace spaces with underscores)
df.columns = [col.strip().lower().replace(' ', '_') for col in df.columns]

# Check for missing values
print("Missing values per column:")
print(df.isna().sum())

# No missing values or duplicates, so we can proceed with the data as is

# Print out summary information
print("\nDataframe Info:")
print(df.info())

# Print the first few rows to inspect the data
print("\nFirst 5 rows of the dataset:")
print(df.head())

Missing values per column:
image    0
qa       0
dtype: int64

Dataframe Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9400 entries, 0 to 9399
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   image   9400 non-null   object
 1   qa      9400 non-null   object
dtypes: object(2)
memory usage: 147.0+ KB
None

First 5 rows of the dataset:
                                               image  \
0  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
1  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
2  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
3  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   
4  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...   

                                                  qa  
0  [{'question': 'How does the person feel in the...  
1  [{'question': 'How does the person feel in the...  
2  [{'question': 'How does the person feel in the...  
3  [{'question': 'How does the person

In [4]:
# Check for missing values in each column
print("\nMissing values per column:")
print(df.isnull().sum())


Missing values per column:
image    0
qa       0
dtype: int64


In [5]:
import json
import numpy as np

def extract_emotion(qa_entry):
    try:
        # If the qa_entry is a string, strip it and parse as JSON.
        if isinstance(qa_entry, str):
            qa_entry = qa_entry.strip()
            qa_data = json.loads(qa_entry)
        else:
            qa_data = qa_entry

        # If the data is a numpy array, convert it to a list.
        if isinstance(qa_data, np.ndarray):
            qa_data = qa_data.tolist()

        # Now you can check if it's a list or tuple using this condition.
        if isinstance(qa_data, (list, tuple)) and len(qa_data) > 0:
            return qa_data[0].get("answer")
        else:
            print("Unexpected qa_data structure:", qa_data, "with type", type(qa_data))
    except Exception as e:
        print("Error parsing qa entry:", qa_entry, "\nError:", e)
    return None

In [6]:
# Assuming df is your DataFrame that includes the 'qa' column
df["emotion"] = df["qa"].apply(extract_emotion)

# Verify the new column
print(df[["qa", "emotion"]].head())

                                                  qa  emotion
0  [{'question': 'How does the person feel in the...      sad
1  [{'question': 'How does the person feel in the...    anger
2  [{'question': 'How does the person feel in the...  neutral
3  [{'question': 'How does the person feel in the...     fear
4  [{'question': 'How does the person feel in the...  content


In [7]:
# Check unique values and distribution of facial emotion labels
if 'emotion' in df.columns:
    print("\nUnique emotion labels:")
    print(df['emotion'].unique())

    print("\nDistribution of emotion labels:")
    print(df['emotion'].value_counts())


Unique emotion labels:
['sad' 'anger' 'neutral' 'fear' 'content' 'happy' 'disgust' 'surprise']

Distribution of emotion labels:
emotion
surprise    1238
neutral     1225
sad         1184
fear        1181
anger       1175
disgust     1165
content     1144
happy       1088
Name: count, dtype: int64


In [8]:
import matplotlib.pyplot as plt

# Example: Plot a histogram for a numeric column, adjust 'score' to the relevant column name
if 'score' in df.columns:
    plt.hist(df['score'].dropna(), bins=30, edgecolor='k')
    plt.xlabel("Score")
    plt.ylabel("Frequency")
    plt.title("Histogram of Scores")
    plt.show()

In [9]:
# Optionally, save the cleaned dataframe to disk as a new parquet file or CSV
df.to_parquet("cleaned_human_face_emotions.parquet")
# Alternatively, you can save as CSV:
# df.to_csv("cleaned_human_face_emotions.csv", index=False)

# drop qa column
df.drop(columns=["qa"], inplace=True)

print(df.head())

                                               image  emotion
0  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...      sad
1  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...    anger
2  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...  neutral
3  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...     fear
4  {'bytes': b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x...  content


Now we just have images in the first column with the emotion in the second column.

In [10]:
from sklearn.model_selection import train_test_split

# Separate feature (X) and label (y)
X = df['image']
y = df['emotion']

# Perform a stratified split to keep class distribution consistent
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,      # 80% training, 20% testing
    random_state=42,    # for reproducibility
    stratify=y          # important for classification
)

# Validation split from X_train if needed:
X_train, X_val, y_train, y_val = train_test_split(
    X_train,
    y_train,
    test_size=0.25,     # 25% of the training set (which is 20% of the total) -> 15% overall
    random_state=42,
    stratify=y_train
)

print("Training set size:", len(X_train))
print("Test set size:", len(X_test))
print("Validation set size:", len(X_val))

Training set size: 5640
Test set size: 1880
Validation set size: 1880


In [11]:
# Image bytes -> numpy arrays
def decode_images(image_series, target_size=(224, 224)):
    """
    Takes a pandas Series of dictionaries, each containing {'bytes': ...}.
    Decodes them into a list of NumPy arrays (RGB).
    Resizes images to target_size.
    Normalizes pixel values to [0, 1].

    Returns:
      - A NumPy array of shape (num_samples, target_size[0], target_size[1], 3)
    """
    decoded_list = []
    for item in image_series:
        # item should be a dict like {'bytes': b'...'}
        try:
            img_bytes = item['bytes']
            with Image.open(io.BytesIO(img_bytes)) as img:
                # Convert to RGB if needed
                img = img.convert('RGB')
                # Resize
                img = img.resize(target_size)
                # Convert to array
                arr = np.array(img, dtype=np.float32) / 255.0
            decoded_list.append(arr)
        except Exception as e:
            # If there's a bad image, you might want to handle or skip it
            print("Error decoding image:", e)
            # Optionally skip or handle it somehow. For now, let's skip:
            # Continue with the loop
            continue

    return np.stack(decoded_list, axis=0)

print("\nDecoding and resizing images...")

# Decode train set
X_train_array = decode_images(X_train, target_size=(224, 224))
print("X_train_array shape:", X_train_array.shape)

# Decode val set
X_val_array = decode_images(X_val, target_size=(224, 224))
print("X_val_array shape:", X_val_array.shape)

# Decode test set
X_test_array = decode_images(X_test, target_size=(224, 224))
print("X_test_array shape:", X_test_array.shape)


Decoding and resizing images...
X_train_array shape: (5640, 224, 224, 3)
X_val_array shape: (1880, 224, 224, 3)
X_test_array shape: (1880, 224, 224, 3)


In [12]:
# Encode labels
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_val_encoded   = label_encoder.transform(y_val)
y_test_encoded  = label_encoder.transform(y_test)

print("\nLabel classes found:", label_encoder.classes_)
print("Sample of encoded labels:", y_train_encoded[:10])


Label classes found: ['anger' 'content' 'disgust' 'fear' 'happy' 'neutral' 'sad' 'surprise']
Sample of encoded labels: [6 3 5 6 0 6 7 3 4 0]


In [13]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Suppose you have: (224, 224, 3) images
# If you used a different size (e.g. 160x160 for MobileNet), be consistent

num_classes = len(label_encoder.classes_)

# 1) Load a MobileNetV2 (or EfficientNet, ResNet, etc.) without its top layers
base_model = tf.keras.applications.MobileNetV2(
    input_shape=(224, 224, 3),
    include_top=False,
    weights='imagenet'
)

# 2) Freeze the base_model so we only train the new head first
base_model.trainable = False

# 3) Build your classifier on top
model = models.Sequential([
    base_model,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.2),
    layers.Dense(num_classes, activation='softmax')
])

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.summary()

# 4) Train the new top layers
history = model.fit(
    X_train_array, y_train_encoded,
    validation_data=(X_val_array, y_val_encoded),
    epochs=5,
    batch_size=32
)

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test_array, y_test_encoded)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_acc:.4f}")

# 5) (Optional) Fine-tune deeper layers
# Unfreeze part (or all) of base_model and re-compile with a lower learning rate
base_model.trainable = True
# You can selectively unfreeze only some layers:
# for layer in base_model.layers[:100]:
#     layer.trainable = False

model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=tf.keras.optimizers.Adam(1e-5),  # smaller LR for fine-tuning
    metrics=['accuracy']
)

history_fine = model.fit(
    X_train_array, y_train_encoded,
    validation_data=(X_val_array, y_val_encoded),
    epochs=5,
    batch_size=32
)

test_loss, test_acc = model.evaluate(X_test_array, y_test_encoded)
print(f"\nFinal Test Loss after fine-tuning: {test_loss:.4f}")
print(f"Final Test Accuracy after fine-tuning: {test_acc:.4f}")

Epoch 1/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m34s[0m 186ms/step - accuracy: 0.2082 - loss: 2.1595 - val_accuracy: 0.3154 - val_loss: 1.8075
Epoch 2/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 167ms/step - accuracy: 0.3333 - loss: 1.7945 - val_accuracy: 0.3463 - val_loss: 1.7361
Epoch 3/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 170ms/step - accuracy: 0.3668 - loss: 1.6936 - val_accuracy: 0.3191 - val_loss: 1.7568
Epoch 4/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 175ms/step - accuracy: 0.4040 - loss: 1.5991 - val_accuracy: 0.3596 - val_loss: 1.7115
Epoch 5/5
[1m177/177[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 243ms/step - accuracy: 0.4333 - loss: 1.5305 - val_accuracy: 0.3681 - val_loss: 1.6924
[1m59/59[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 236ms/step - accuracy: 0.3545 - loss: 1.7492

Test Loss: 1.7052
Test Accuracy: 0.3686
Epoch 1/5
[1m177/177[0m [3

In [16]:
# Save the model in the native TensorFlow SavedModel format.
model.save("saved_MobileNetV2.keras")  # Use .keras extension for the Keras format.

# Load the model later.
loaded_model = tf.keras.models.load_model("saved_MobileNetV2.keras")