In [58]:
import cv2
import numpy as np
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.models import load_model
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam

In [103]:
# Function to load and preprocess images for face recognition with data augmentation
def load_images_for_face_recognition(directory):
    images = []
    labels = []
    label_dict = {}
    current_label = 0  # it will be used to assign labels to different individuals.

    # Use ImageDataGenerator for data augmentation
    datagen = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )

     
    for person_folder in os.listdir(directory):  #Iterating Through Person Folders .
        person_path = os.path.join(directory, person_folder)
        if os.path.isdir(person_path):
            label_dict[current_label] = person_folder #assigns the (current_label) to the folder name (person_folder) in the label_dict.
            for filename in os.listdir(person_path):
                img_path = os.path.join(person_path, filename)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (128, 128)) # resizing to a fixed size of (128, 128).
                
                # Apply data augmentation
                img = img.reshape((1,) + img.shape)  #reshapes the image to add an extra dimension at the beginning. 
                for batch in datagen.flow(img, batch_size=1): #The flow method continuously applies data augmentation to the input image (img)
                    img = batch[0]
                    break

                images.append(img)
                labels.append(current_label)
            current_label += 1 #is incremented for the next person.

    return np.array(images), np.array(labels), label_dict 

# Function to load and preprocess images for emotion recognition
def load_images_for_emotion_recognition(directory):
    images = []
    labels = []
    label_dict = {}
    current_label = 0

    for emotion_folder in os.listdir(directory):
        emotion_path = os.path.join(directory, emotion_folder)
        if os.path.isdir(emotion_path):
            label_dict[current_label] = emotion_folder
            for filename in os.listdir(emotion_path):
                img_path = os.path.join(emotion_path, filename)
                img = cv2.imread(img_path)
                img = cv2.resize(img, (48, 48))
                images.append(img)
                labels.append(current_label)
            current_label += 1

    return np.array(images), np.array(labels), label_dict

# Load face recognition dataset with data augmentation
face_images, face_labels, face_label_dict = load_images_for_face_recognition('face3')

# Load emotion recognition dataset
emotion_images, emotion_labels, emotion_label_dict = load_images_for_emotion_recognition('emotion3')
    

In [104]:
# Train-test split for face recognition data
face_images_train, face_images_test, face_labels_train, face_labels_test = train_test_split(
    face_images, face_labels, test_size=0.2, random_state=42)

# Train-test split for emotion recognition data
emotion_images_train, emotion_images_test, emotion_labels_train, emotion_labels_test = train_test_split(
    emotion_images, emotion_labels, test_size=0.2, random_state=42)

In [105]:
# Assume 2 classes for emotion recognition and 5 classes for face recognition
num_emotion_classes = 2
num_person_classes =5

# Input for face recognition

# (height of the image,width of the image,3 color channels)
face_input = Input(shape=(128, 128, 3), name='face_input') 
#Convolutional layer with 32 filters, 3x3 kernel size, and 'relu' activation function
x_face = Conv2D(32, (3, 3), activation='relu')(face_input)
# MaxPooling layer with 2x2 pool size , helps in reducing dimensionality after convolution
x_face = MaxPooling2D(pool_size=(2, 2))(x_face)  
x_face = Conv2D(64, (3, 3), activation='relu')(x_face)
x_face = MaxPooling2D(pool_size=(2, 2))(x_face)
# Flatten layer to convert 2D data to a vector
x_face = Flatten()(x_face)
# Dense (fully connected) layer with 128 units and 'relu' activation function
x_face = Dense(128, activation='relu')(x_face)
#dropout layer with a dropout rate of 0.5 to prevent overfitting
x_face = Dropout(0.5)(x_face)
# Output layer with number of units = num_person_classes and 'softmax' activation function
output_face = Dense(num_person_classes, activation='softmax', name='output_face')(x_face)

# Input for emotion recognition
emotion_input = Input(shape=(48, 48, 3), name='emotion_input')
x_emotion = Conv2D(32, (3, 3), activation='relu')(emotion_input)
x_emotion = MaxPooling2D(pool_size=(2, 2))(x_emotion)
x_emotion = Conv2D(64, (3, 3), activation='relu')(x_emotion)
x_emotion = MaxPooling2D(pool_size=(2, 2))(x_emotion)
x_emotion = Flatten()(x_emotion)
x_emotion = Dense(128, activation='relu')(x_emotion)
x_emotion = Dropout(0.5)(x_emotion)
output_emotion = Dense(num_emotion_classes, activation='softmax', name='output_emotion')(x_emotion)



In [106]:
# Combine the two streams
combined_model = Model(inputs=[face_input, emotion_input], outputs=[output_face, output_emotion])

# Compile the model
combined_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) #uses accuracy as the evaluation metric.


In [107]:
# Convert labels to one-hot encoding (Categorical values to numeric values) 
face_labels_train_onehot = to_categorical(face_labels_train, num_classes=num_person_classes)
emotion_labels_train_onehot = to_categorical(emotion_labels_train, num_classes=num_emotion_classes)



In [108]:
# Train the model
combined_model.fit(
    [face_images_train, emotion_images_train],
    [face_labels_train_onehot, emotion_labels_train_onehot], #The second argument represents the corresponding target labels for both face and emotion recognition
    epochs=10,
    batch_size=32,
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x1d3947de790>

In [109]:
# Evaluate the model on the test data
test_loss_and_accuracy = combined_model.evaluate(
    [face_images_test, emotion_images_test],
    [to_categorical(face_labels_test, num_classes=num_person_classes), to_categorical(emotion_labels_test, num_classes=num_emotion_classes)], #to_categorical convertس integer-encoded class labels into one-hot encoded vectors.
    batch_size=32
)

# Extracting the accuracy value from the result
test_accuracy = test_loss_and_accuracy[1]

print(f'Test Accuracy: {test_accuracy * 100:.2f}%')

Test Accuracy: 272.82%


In [110]:
from sklearn.metrics import classification_report
# Uses the trained model to make predictions
face_predictions, emotion_predictions = combined_model.predict([face_images_test, emotion_images_test])

# Convert predictions to class labels
face_predictions_labels = np.argmax(face_predictions, axis=1)
emotion_predictions_labels = np.argmax(emotion_predictions, axis=1)

# Convert true labels to class labels (necessary to compare the predictions with the true labels.)
true_face_labels = np.argmax(to_categorical(face_labels_test, num_classes=num_person_classes), axis=1)
true_emotion_labels = np.argmax(to_categorical(emotion_labels_test, num_classes=num_emotion_classes), axis=1)

# Classification report for face recognition
print("Face Recognition Classification Report:")
print(classification_report(true_face_labels, face_predictions_labels))

# Classification report for emotion recognition
print("Emotion Recognition Classification Report:")
print(classification_report(true_emotion_labels, emotion_predictions_labels))

# Aggregate overall accuracy
overall_accuracy = np.mean((face_predictions_labels == true_face_labels) & (emotion_predictions_labels == true_emotion_labels))
print(f"Overall Accuracy: {overall_accuracy * 100:.2f}%")

Face Recognition Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.50      0.67         2
           1       0.33      1.00      0.50         1
           3       0.75      0.75      0.75         4
           4       1.00      0.50      0.67         2

    accuracy                           0.67         9
   macro avg       0.77      0.69      0.65         9
weighted avg       0.81      0.67      0.69         9

Emotion Recognition Classification Report:
              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       1.00      0.60      0.75         5

    accuracy                           0.78         9
   macro avg       0.83      0.80      0.77         9
weighted avg       0.85      0.78      0.77         9

Overall Accuracy: 55.56%


In [111]:
# Save the combined model
combined_model.save('Music/combined_model.h7')

INFO:tensorflow:Assets written to: Music/combined_model.h7\assets


INFO:tensorflow:Assets written to: Music/combined_model.h7\assets


In [112]:
# Load the combined model
face_model = load_model('Music/combined_model.h7')

In [113]:

#Load and preprocess the new image
new_image_path = 'ab.jpeg'
new_image = cv2.imread(new_image_path)
new_face_image = cv2.resize(new_image, (128, 128))
new_emotion_image = cv2.resize(new_image, (48, 48))

# Expand dimensions to match the input shape expected by the model
new_face_image = np.expand_dims(new_face_image, axis=0)
new_emotion_image = np.expand_dims(new_emotion_image, axis=0)

# Normalize pixel values to be between 0 and 1
new_face_image = new_face_image / 255.0
new_emotion_image = new_emotion_image / 255.0

# Predict using the combined model
predictions = combined_model.predict([new_face_image, new_emotion_image])

# Extract predictions for face and emotion
predicted_person_id = np.argmax(predictions[0], axis=1)[0]
predicted_person_name = face_label_dict[predicted_person_id]

predicted_emotion_id = np.argmax(predictions[1], axis=1)[0]
predicted_emotion_label = emotion_label_dict[predicted_emotion_id]

# Print the predicted face and emotion
print("Predicted Face ID:", predicted_person_id)
print("Predicted Face Name:", predicted_person_name)
print("Predicted Emotion ID:", predicted_emotion_id)
print("Predicted Emotion Label:", predicted_emotion_label)





Predicted Face ID: 3
Predicted Face Name: Abdullah_Gul
Predicted Emotion ID: 0
Predicted Emotion Label: happy


In [116]:
# Function to preprocess the input image for face recognition
def preprocess_face_image(image):
    resized_image = cv2.resize(image, (128, 128))
    expanded_image = np.expand_dims(resized_image, axis=0)
    normalized_image = expanded_image / 255.0
    return normalized_image

# Function to preprocess the input image for emotion recognition
def preprocess_emotion_image(image):
    resized_image = cv2.resize(image, (48, 48))
    expanded_image = np.expand_dims(resized_image, axis=0)
    normalized_image = expanded_image / 255.0
    return normalized_image

# Open a connection to the camera (0 is the default camera)
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the camera
    ret, frame = cap.read() # ret is A boolean value indicating whether the frame was read successfully

    # Perform face and emotion recognition
    face_image = preprocess_face_image(frame)
    emotion_image = preprocess_emotion_image(frame)

    predictions = face_model.predict([face_image, emotion_image])

    # Extract predictions for face and emotion
    predicted_person_id = np.argmax(predictions[0], axis=1)[0]
    predicted_person_name = face_label_dict[predicted_person_id]

    predicted_emotion_id = np.argmax(predictions[1], axis=1)[0]
    predicted_emotion_label = emotion_label_dict[predicted_emotion_id]

    # Display the predictions on the frame
    cv2.putText(frame, f'Person: {predicted_person_name}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2) #(10,30) are the coordinates ,font typr and style ,color , thickness of lines
    cv2.putText(frame, f'Emotion: {predicted_emotion_label}', (10, 70), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    # Display the frame
    cv2.imshow('Face and Emotion Recognition', frame)

    # Break the loop if 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the camera and close all windows
cap.release()
cv2.destroyAllWindows()

