# Facial Expression Recognition with CNNs using FER2013 Dataset
This notebook implements a deep learning model for classifying facial expressions using the FER2013 dataset. We'll follow the CRISP-DM methodology, including downloading the dataset, preprocessing, building a CNN model, training, evaluation, and model improvements.

In [1]:
# Step 1: Install and Setup Kaggle API
!pip install kaggle

# Upload the kaggle.json file
from google.colab import files
files.upload()

# Create Kaggle directory and move the kaggle.json file there
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

# Download the FER2013 dataset from Kaggle
!kaggle datasets download -d msambare/fer2013

# Unzip the dataset
!unzip fer2013.zip

In [2]:
# Step 2: Data Understanding and Preprocessing
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load the dataset
data = pd.read_csv('fer2013.csv')
print(data.head())  # Explore the first few rows
print(data['emotion'].value_counts())  # Check the distribution of emotions

# Preprocess the images
def preprocess_data(data):
    images = data['pixels'].str.split(" ").tolist()
    images = np.array(images, dtype=float).reshape(-1, 48, 48, 1)
    images = images / 255.0  # Normalize pixel values to [0, 1]
    labels = pd.get_dummies(data['emotion']).values  # One-hot encode the labels
    return images, labels

# Apply preprocessing
images, labels = preprocess_data(data)

# Split into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [3]:
# Step 3: Build the CNN Model
import tensorflow as tf
from tensorflow.keras import layers, models

# Build the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(48, 48, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(7, activation='softmax')  # 7 emotion classes
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()  # Display the model's architecture

In [4]:
# Step 4: Train the Model
history = model.fit(X_train, y_train, epochs=30, batch_size=64, validation_data=(X_val, y_val))

In [5]:
# Step 5: Evaluate the Model and Plot Confusion Matrix
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Evaluate the model
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
print(f"Test accuracy: {test_acc}")

# Predict the labels for the test set
y_pred = np.argmax(model.predict(X_test), axis=1)
y_true = np.argmax(y_test, axis=1)

# Plot confusion matrix
cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'],
            yticklabels=['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.show()

In [6]:
# Step 6: Improving the Model (Optional)
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data Augmentation
datagen = ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

# Apply augmentation
datagen.fit(X_train)

# Retrain the model with augmented data
history_augmented = model.fit(datagen.flow(X_train, y_train, batch_size=64), epochs=30, validation_data=(X_val, y_val))