In [29]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Dense, Flatten, MaxPooling2D, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l1, l2
import cv2
import mediapipe as mp
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import load_model


In [14]:
trainingset = np.loadtxt('fer2013_training_onehot.csv', delimiter=',')
testingset = np.loadtxt('fer2013_publictest_onehot.csv', delimiter=',')


In [16]:
n_inputs = 2304  # 48x48 flattened image size (48 * 48 = 2304)
n_classes = 7    # Number of classes (7 facial expressions in FER2013)
img_dim = 48     # Image dimensions (48x48)

# Split the dataset into inputs (x) and labels (y)
x_training = trainingset[:, 0:n_inputs]  # All pixels
y_training = trainingset[:, n_inputs:n_inputs + n_classes]  # One-hot encoded labels

x_testing = testingset[:, 0:n_inputs]  # All pixels
y_testing = testingset[:, n_inputs:n_inputs + n_classes]  # One-hot encoded labels

# Reshape the input images to (num_samples, 48, 48)
x_training = x_training.reshape(x_training.shape[0], img_dim, img_dim)
x_testing = x_testing.reshape(x_testing.shape[0], img_dim, img_dim)

# Add a channel dimension (greyscale images, so channel = 1)
x_training = np.expand_dims(x_training, axis=-1)  # Shape becomes (num_samples, 48, 48, 1)
x_testing = np.expand_dims(x_testing, axis=-1)  # Shape becomes (num_samples, 48, 48, 1)

# Normalize the images from range [0, 255] to [0, 1]
x_training = x_training.astype('float32') / 255.0
x_testing = x_testing.astype('float32') / 255.0

# Check the new shapes of the data
print(f"x_training shape: {x_training.shape}")
print(f"x_testing shape: {x_testing.shape}")

x_training shape: (28709, 48, 48, 1)
x_testing shape: (3589, 48, 48, 1)


In [17]:
def get_emotion(ohv):
    if ohv.shape[0] == 1:
        indx = ohv[0]
    else:
        indx = np.argmax(ohv)
        
    if indx == 0:
        return 'angry'
    elif indx == 1:
        return 'disgust'
    elif indx == 2:
        return 'fear'
    elif indx == 3:
        return 'happy'
    elif indx == 4:
        return 'sad'
    elif indx == 5:
        return 'surprise'
    elif indx == 6:
        return 'neutral'

In [18]:
model = Sequential()
model.add(Conv2D(64, (3, 3), activation='relu', padding='same', input_shape = (48, 48, 1)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu', padding='same'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(1024, activation='relu', kernel_regularizer=l2(0.001)))
model.add(Dropout(0.5))
model.add(Dense(n_classes, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
opt = Adam(learning_rate=0.0001, decay=10e-6)

# Compile the model with categorical crossentropy loss and Adam optimizer
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])

# Set batch size and number of epochs
batch_size = 128
n_epochs = 1  # Corrected typo from 'n_epoches' to 'n_epochs'

# EarlyStopping callback to stop training if validation loss doesn't improve
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)

# Train the model with validation data and early stopping
history = model.fit(x_training, y_training, batch_size=batch_size, epochs=n_epochs,
                    validation_data=(x_testing, y_testing), shuffle=True,
                    callbacks=[early_stopping])

[1m225/225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 692ms/step - accuracy: 0.9452 - loss: 0.4079 - val_accuracy: 0.5988 - val_loss: 1.8896


In [34]:
scores = model.evaluate(x_testing, y_testing)
print('%s: %.2f%%'% (model.metrics_names[1], scores[1]*100))
# --- . Save the model and weights ---
model.save('fer2013_model.h5')
model.save_weights('fer2013.weights.h5')


[1m113/113[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 58ms/step - accuracy: 0.5976 - loss: 1.8984




compile_metrics: 59.88%


In [30]:
# --- 1. Load the pre-trained model for facial expression classification ---
model = load_model('fer2013.h5')  # Load your trained model 

# --- 2. Initialize MediaPipe Face Detection and Drawing Utilities ---
mp_face_detection = mp.solutions.face_detection
mp_drawing = mp.solutions.drawing_utils

# Initialize MediaPipe Face Detection model
face_detection = mp_face_detection.FaceDetection(min_detection_confidence=0.2)

# Define a dictionary for class labels based on your model's output (assuming 7 expressions)
class_labels = ['angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral']

# --- 3. Function to preprocess the face image for classification ---
def preprocess_face(face):
    # Resize to match input dimensions of the model (e.g., 48x48)
    face = cv2.resize(face, (48, 48))
    # Convert to grayscale (if model expects grayscale)
    face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
    # Normalize the face image (as done during training)
    face = face.astype('float32') / 255.0
    # Expand dimensions to match model input shape (1, 48, 48, 1)
    face = np.expand_dims(face, axis=-1)
    face = np.expand_dims(face, axis=0)  # Shape: (1, 48, 48, 1)
    return face

# --- 4. Start Real-time Video Capture ---
cap = cv2.VideoCapture(0)  # Start webcam

while True:
    ret, frame = cap.read()
    if not ret:
        break
    
    # Flip the frame horizontally for a more natural selfie view
    frame = cv2.flip(frame, 1)

    # Convert the frame to RGB (OpenCV uses BGR by default)
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame and get face detections
    results = face_detection.process(rgb_frame)
    
    if results.detections:
        # For each detected face, draw a bounding box and process it
        for detection in results.detections:
            # Draw the bounding box around the face
            mp_drawing.draw_detection(frame, detection)
            
            # Get bounding box coordinates
            bboxC = detection.location_data.relative_bounding_box
            ih, iw, _ = frame.shape
            x, y, w, h = int(bboxC.xmin * iw), int(bboxC.ymin * ih), int(bboxC.width * iw), int(bboxC.height * ih)
            
            # Crop the face from the frame
            face = frame[y:y+h, x:x+w]
            
            if face.size != 0:
                # Preprocess the face for the model
                processed_face = preprocess_face(face)
                
                # Make a prediction using the trained model
                prediction = model.predict(processed_face)
                predicted_class = np.argmax(prediction)
                
                # Display the predicted emotion label
                label = class_labels[predicted_class]
                cv2.putText(frame, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)
    
    # Show the resulting frame with bounding boxes and predicted emotion
    cv2.imshow('Real-time Facial Expression Recognition', frame)
    
    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the capture and close the window
cap.release()
cv2.destroyAllWindows()



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 81ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 39ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24