In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!pip install dlib
!pip install opencv-python
!pip install scikit-image
!pip install scikit-learn
!pip install keras-vggface keras-applications

In [None]:
import os
import cv2
import dlib
import numpy as np
from matplotlib import pyplot as plt
from skimage.feature import local_binary_pattern
from skimage.feature import hog
from skimage import exposure
from keras_vggface.vggface import VGGFace
from keras_vggface.utils import preprocess_input
from keras.models import Model
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score, f1_score
import seaborn as sns

In [None]:
# Path to dataset in Google Drive
train_dataset_path = '/content/drive/My Drive/DataSet/train'
test_dataset_path = '/content/drive/My Drive/DataSet/test'

# List of expression folders (e.g., angry, happy, sad, etc.)
expression_folders = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']

In [None]:
# Load Haar Cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [None]:
def preprocess_image(image_path):
    # Load the image
    img = cv2.imread(image_path)

    # Convert the image to grayscale (if not already grayscale)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Detect faces in the image
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)

    # If a face is detected, crop to the face, otherwise use the whole image
    for (x, y, w, h) in faces:
        face = gray[y:y+h, x:x+w]
        break  # Only consider the first detected face

    # Resize the face region to 48x48 (FER2013 image size)
    face_resized = cv2.resize(face, (48, 48))

    return face_resized

In [None]:
def process_images(dataset_dir):
    processed_images = []
    labels = []

    # Loop through each expression folder
    for expression in expression_folders:
        expression_path = os.path.join(dataset_dir, expression)

        # Process each image in the folder
        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)
            processed_img = preprocess_image(img_path)

            # Append the processed image and label
            processed_images.append(processed_img)
            labels.append(expression)

    return np.array(processed_images), np.array(labels)

In [None]:
# Process the train dataset
train_images, train_labels = process_images(train_dataset_path)
print(f'Processed {len(train_images)} train images.')

# Process the test dataset
test_images, test_labels = process_images(test_dataset_path)
print(f'Processed {len(test_images)} test images.')

In [None]:
# Display a sample image
plt.imshow(train_images[0], cmap='gray')
plt.title(f'Label: {train_labels[0]}')
plt.show()

In [None]:
# Load pre-trained face detector and shape predictor for facial landmarks
face_detector = dlib.get_frontal_face_detector()
landmark_predictor = dlib.shape_predictor('/content/drive/My Drive/shape_predictor_68_face_landmarks.dat')

In [None]:
def extract_facial_landmarks(image):
    # Convert the image to grayscale (if not already grayscale)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Detect faces in the image using Dlib
    faces = face_detector(gray)

    for face in faces:
        # Predict facial landmarks for the detected face
        landmarks = landmark_predictor(gray, face)

        # Convert the landmarks to a NumPy array of (x, y) coordinates
        landmark_coords = np.zeros((68, 2), dtype="int")
        for i in range(0, 68):
            landmark_coords[i] = (landmarks.part(i).x, landmarks.part(i).y)

        # Return the landmark coordinates for the first detected face
        return landmark_coords

In [None]:
def display_facial_landmarks(image, landmarks):
    # Draw circles on the landmarks
    for (x, y) in landmarks:
        cv2.circle(image, (x, y), 2, (0, 255, 0), -1)

    # Display the image with landmarks
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')  # Hide axis
    plt.show()

# Example usage on a sample image
sample_image_path = '/content/drive/My Drive/DataSet/train/happy/image_1.jpg'  # Change this to your image path
image = cv2.imread(sample_image_path)

# Extract facial landmarks
landmarks = extract_facial_landmarks(image)

# Display image with facial landmarks
if landmarks is not None:
    display_facial_landmarks(image, landmarks)
else:
    print("No face detected in the image.")

In [None]:
def process_images_for_landmarks(dataset_dir):
    image_landmarks = {}

    # Loop through each expression folder
    for expression in expression_folders:
        expression_path = os.path.join(dataset_dir, expression)

        # Process each image in the folder
        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)
            img = cv2.imread(img_path)

            # Extract landmarks for the image
            landmarks = extract_facial_landmarks(img)

            if landmarks is not None:
                # Store the landmarks
                image_landmarks[img_name] = landmarks

    return image_landmarks

# Process the train dataset to extract landmarks
train_image_landmarks = process_images_for_landmarks(train_dataset_path)

In [None]:
def extract_lbp_features(image, radius=1, n_points=8):
    """
    Extract Local Binary Pattern (LBP) features from a grayscale image.

    Parameters:
    - image: Grayscale image.
    - radius: Radius of the LBP.
    - n_points: Number of points to sample around the pixel.

    Returns:
    - LBP image (encoded with LBP values).
    """
    # Apply LBP
    lbp = local_binary_pattern(image, n_points, radius, method="uniform")

    # Return the LBP image
    return lbp

In [None]:
# Load a sample grayscale image (preprocessed earlier)
sample_image_path = '/content/drive/My Drive/DataSet/train/happy/image_1.jpg'  # Change this to your image path
image = cv2.imread(sample_image_path, cv2.IMREAD_GRAYSCALE)

# Extract LBP features from the image
lbp_image = extract_lbp_features(image)

# Display the original image and the LBP image
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))

ax1.imshow(image, cmap='gray')
ax1.set_title('Original Image')
ax1.axis('off')

ax2.imshow(lbp_image, cmap='gray')
ax2.set_title('LBP Image')
ax2.axis('off')

plt.show()

In [None]:
def process_images_for_lbp(dataset_dir):
    image_lbp_features = []
    labels = []

    # Loop through each expression folder
    for expression in expression_folders:
        expression_path = os.path.join(dataset_dir, expression)

        # Process each image in the folder
        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load grayscale image

            # Extract LBP features
            lbp_features = extract_lbp_features(img)

            # Store the LBP image and label
            image_lbp_features.append(lbp_features)
            labels.append(expression)

    return np.array(image_lbp_features), np.array(labels)

# Process the train dataset
train_lbp_features, train_labels = process_images_for_lbp(train_dataset_path)
print(f'Processed LBP features for {len(train_lbp_features)} train images.')

# Process the test dataset
test_lbp_features, test_labels = process_images_for_lbp(test_dataset_path)
print(f'Processed LBP features for {len(test_lbp_features)} test images.')

In [None]:
def extract_hog_features(image):
    """
    Extract Histogram of Oriented Gradients (HOG) features from an image.

    Parameters:
    - image: Grayscale image.

    Returns:
    - HOG image (visualization) and HOG feature vector.
    """
    # Compute HOG features and return the HOG image for visualization
    hog_features, hog_image = hog(image, orientations=9, pixels_per_cell=(8, 8),
                                  cells_per_block=(2, 2), block_norm='L2-Hys',
                                  visualize=True, transform_sqrt=True)

    return hog_features, hog_image

In [None]:
# Load a sample grayscale image (preprocessed earlier)
sample_image_path = '/content/drive/My Drive/DataSet/train/happy/image_1.jpg'  # Change this to your image path
image = cv2.imread(sample_image_path, cv2.IMREAD_GRAYSCALE)

# Extract HOG features from the image
hog_features, hog_image = extract_hog_features(image)

# Display the original image and the HOG image
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 5))

ax1.imshow(image, cmap='gray')
ax1.set_title('Original Image')
ax1.axis('off')

ax2.imshow(hog_image, cmap='gray')
ax2.set_title('HOG Image')
ax2.axis('off')

plt.show()

In [None]:
def process_images_for_hog(dataset_dir):
    image_hog_features = []
    labels = []

    # Loop through each expression folder
    for expression in expression_folders:
        expression_path = os.path.join(dataset_dir, expression)

        # Process each image in the folder
        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)  # Load grayscale image

            # Extract HOG features
            hog_features, _ = extract_hog_features(img)

            # Store the HOG feature vector and label
            image_hog_features.append(hog_features)
            labels.append(expression)

    return np.array(image_hog_features), np.array(labels)

# Process the train dataset
train_hog_features, train_labels = process_images_for_hog(train_dataset_path)
print(f'Processed HOG features for {len(train_hog_features)} train images.')

# Process the test dataset
test_hog_features, test_labels = process_images_for_hog(test_dataset_path)
print(f'Processed HOG features for {len(test_hog_features)} test images.')

In [None]:
# Load the pre-trained VGGFace model
base_model = VGGFace(model='vgg16', include_top=False, input_shape=(224, 224, 3), pooling='avg')

# Print the model architecture to check layers
base_model.summary()

In [None]:
def preprocess_image_for_vggface(image_path):
    """
    Preprocess the image for VGGFace input.

    Parameters:
    - image_path: Path to the image to be processed.

    Returns:
    - Preprocessed image ready for VGGFace.
    """
    # Load the image and resize to 224x224 (required input size for VGGFace)
    img = cv2.imread(image_path)
    img = cv2.resize(img, (224, 224))

    # Convert the image to a numpy array and expand dimensions to match VGGFace input
    img_array = np.array(img, dtype=np.float32)
    img_array = np.expand_dims(img_array, axis=0)

    # Preprocess the image (subtract mean pixel values)
    img_array = preprocess_input(img_array, version=1)  # VGGFace version 1 preprocessing

    return img_array

In [None]:
def extract_vggface_features(image_path):
    """
    Extract features from the pre-trained VGGFace model.

    Parameters:
    - image_path: Path to the image to be processed.

    Returns:
    - Feature vector extracted from the image.
    """
    # Preprocess the image for VGGFace
    preprocessed_image = preprocess_image_for_vggface(image_path)

    # Extract features using VGGFace
    features = base_model.predict(preprocessed_image)

    return features

In [None]:
# Example: Extract features from a sample image
sample_image_path = '/content/drive/My Drive/DataSet/train/happy/image_1.jpg'  # Change this to your image path
vggface_features = extract_vggface_features(sample_image_path)

print("VGGFace Features Shape:", vggface_features.shape)

In [None]:
def process_images_for_vggface(dataset_dir):
    image_vggface_features = []
    labels = []

    # Loop through each expression folder
    for expression in expression_folders:
        expression_path = os.path.join(dataset_dir, expression)

        # Process each image in the folder
        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)

            # Extract VGGFace features
            vggface_features = extract_vggface_features(img_path)

            # Store the feature vector and label
            image_vggface_features.append(vggface_features)
            labels.append(expression)

    return np.array(image_vggface_features), np.array(labels)

# Process the train dataset
train_vggface_features, train_labels = process_images_for_vggface(train_dataset_path)
print(f'Processed VGGFace features for {len(train_vggface_features)} train images.')

# Process the test dataset
test_vggface_features, test_labels = process_images_for_vggface(test_dataset_path)
print(f'Processed VGGFace features for {len(test_vggface_features)} test images.')

In [None]:
def apply_pca(features, n_components=0.95):
    """
    Apply PCA to reduce the dimensionality of the feature set.

    Parameters:
    - features: The input feature set (numpy array).
    - n_components: Number of principal components to keep, or a float indicating the percentage of variance to retain.

    Returns:
    - Transformed feature set with reduced dimensions.
    """
    # Initialize PCA
    pca = PCA(n_components=n_components)

    # Fit and transform the features
    reduced_features = pca.fit_transform(features)

    print(f"Original number of features: {features.shape[1]}")
    print(f"Reduced number of features: {reduced_features.shape[1]}")

    return reduced_features

In [None]:
# Assume you have already extracted the HOG features for the train and test datasets
# Example: HOG Features
train_hog_features = np.array(train_hog_features)
test_hog_features = np.array(test_hog_features)

# Apply PCA to reduce dimensions of HOG features
train_hog_pca = apply_pca(train_hog_features, n_components=0.95)
test_hog_pca = apply_pca(test_hog_features, n_components=0.95)

In [None]:
# For LBP Features
train_lbp_pca = apply_pca(train_lbp_features, n_components=0.95)
test_lbp_pca = apply_pca(test_lbp_features, n_components=0.95)

# For VGGFace Features
train_vggface_pca = apply_pca(train_vggface_features, n_components=0.95)
test_vggface_pca = apply_pca(test_vggface_features, n_components=0.95)

In [None]:
def visualize_pca_variance(features):
    pca = PCA().fit(features)
    plt.plot(np.cumsum(pca.explained_variance_ratio_))
    plt.xlabel('Number of Components')
    plt.ylabel('Cumulative Explained Variance')
    plt.title('PCA Variance Retention')
    plt.show()

# Example: Visualize variance explained by PCA on HOG features
visualize_pca_variance(train_hog_features)

In [None]:
# Example: Using HOG PCA-reduced features for training SVM
# Note: train_hog_pca and test_hog_pca are the reduced feature sets from the PCA step

# Initialize the SVM model (RBF kernel is commonly used for non-linear classification)
svm_model = SVC(kernel='rbf', random_state=42)

# Train the SVM model on the training data
svm_model.fit(train_hog_pca, train_labels)

# Make predictions on the test data
test_predictions = svm_model.predict(test_hog_pca)

In [None]:
# Calculate accuracy
accuracy = accuracy_score(test_labels, test_predictions)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Generate classification report
report = classification_report(test_labels, test_predictions)
print("Classification Report:")
print(report)

# Generate confusion matrix
conf_matrix = confusion_matrix(test_labels, test_predictions)
print("Confusion Matrix:")
print(conf_matrix)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=expression_folders, yticklabels=expression_folders)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

In [None]:
# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [1, 0.1, 0.01],
    'kernel': ['rbf']
}

# Initialize GridSearchCV
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2, cv=3)

# Train the model with hyperparameter tuning
grid.fit(train_hog_pca, train_labels)

# Make predictions with the best estimator
best_predictions = grid.predict(test_hog_pca)

# Evaluate the best model
best_accuracy = accuracy_score(test_labels, best_predictions)
print(f'Best Accuracy: {best_accuracy * 100:.2f}%')
print("Best Hyperparameters:", grid.best_params_)

In [None]:
# Predictions from the trained SVM model
# Example: Using predictions from HOG features
# test_predictions = svm_model.predict(test_hog_pca)

# Compute accuracy
accuracy = accuracy_score(test_labels, test_predictions)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Compute precision, recall, and F1-score (for each class)
precision = precision_score(test_labels, test_predictions, average='weighted')
recall = recall_score(test_labels, test_predictions, average='weighted')
f1 = f1_score(test_labels, test_predictions, average='weighted')

print(f'Precision: {precision * 100:.2f}%')
print(f'Recall: {recall * 100:.2f}%')
print(f'F1-Score: {f1 * 100:.2f}%')

# Generate a full classification report
print("Classification Report:")
print(classification_report(test_labels, test_predictions, target_names=expression_folders))


In [None]:
# Generate confusion matrix
conf_matrix = confusion_matrix(test_labels, test_predictions)

# Plot the confusion matrix using seaborn heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=expression_folders, yticklabels=expression_folders)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()


In [None]:
# Flatten raw image data for training
def flatten_images(dataset_dir):
    flattened_images = []
    labels = []

    # Loop through each expression folder
    for expression in expression_folders:
        expression_path = os.path.join(dataset_dir, expression)

        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            img_flattened = img.flatten()  # Flatten the image to a 1D array
            flattened_images.append(img_flattened)
            labels.append(expression)

    return np.array(flattened_images), np.array(labels)

# Process train and test data with raw pixel features
train_raw_features, train_labels = flatten_images(train_dataset_path)
test_raw_features, test_labels = flatten_images(test_dataset_path)

In [None]:
# Initialize and train the SVM model with raw features
svm_raw_model = SVC(kernel='rbf', random_state=42)
svm_raw_model.fit(train_raw_features, train_labels)

# Make predictions and evaluate performance
test_raw_predictions = svm_raw_model.predict(test_raw_features)

# Evaluate the baseline performance (accuracy, precision, recall, F1-score)
print("Performance with Raw Features:")
print(f'Accuracy: {accuracy_score(test_labels, test_raw_predictions) * 100:.2f}%')
print(classification_report(test_labels, test_raw_predictions))

In [None]:
# Compare the performance of raw features vs. engineered features
print("Performance with Engineered Features:")
print(f'Accuracy: {accuracy_score(test_labels, test_predictions) * 100:.2f}%')
print(classification_report(test_labels, test_predictions))

In [None]:
def predict_expression_on_test_data(test_dataset_path):
    """
    This function processes the test images, extracts features using LBP, HOG, and VGGFace,
    applies PCA for dimensionality reduction, and predicts facial expressions using the trained SVM model.

    Parameters:
    - test_dataset_path: Path to the test dataset.

    Returns:
    - test_predictions: Predicted emotion labels for the test dataset.
    """
    test_predictions = []
    test_labels = []

    # Loop through each expression folder (e.g., happy, sad, etc.)
    for expression in expression_folders:
        expression_path = os.path.join(test_dataset_path, expression)

        # Process each image in the folder
        for img_name in os.listdir(expression_path):
            img_path = os.path.join(expression_path, img_name)
            img = cv2.imread(img_path)

            # Step 1: Convert to grayscale and resize (for LBP, HOG)
            img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            img_gray_resized = cv2.resize(img_gray, (48, 48))  # For LBP, HOG

            # Step 2: Extract features (LBP, HOG, VGGFace)
            lbp_features = local_binary_pattern(img_gray_resized, P=8, R=1, method='uniform').flatten()
            hog_features = hog(img_gray_resized, orientations=9, pixels_per_cell=(8, 8), cells_per_block=(2, 2), block_norm='L2-Hys', visualize=False)

            # Resize and preprocess for VGGFace
            img_resized = cv2.resize(img, (224, 224))
            img_array = np.expand_dims(img_resized, axis=0).astype('float32')
            img_preprocessed = preprocess_input(img_array, version=1)
            vgg_features = base_model.predict(img_preprocessed).flatten()

            # Step 3: Combine features
            combined_features = np.hstack((lbp_features, hog_features, vgg_features))

            # Step 4: Apply PCA for dimensionality reduction
            reduced_features = pca_model.transform([combined_features])

            # Step 5: Predict the emotion using the trained SVM model
            predicted_emotion = svm_model.predict(reduced_features)

            # Store the prediction and the true label
            test_predictions.append(predicted_emotion[0])
            test_labels.append(expression)  # The folder name represents the true label

    return test_predictions, test_labels

In [None]:
# Predict the facial expressions for all images in the test dataset
test_predictions, test_labels = predict_expression_on_test_data(test_dataset_path)

# Evaluate the model's performance
accuracy = accuracy_score(test_labels, test_predictions)
print(f'Accuracy on Test Data: {accuracy * 100:.2f}%')

# Print the classification report for detailed metrics
print(classification_report(test_labels, test_predictions, target_names=expression_folders))