In [1]:
# Import necessary libraries
import os                        # Library to interact with the operating system
import cv2                       # OpenCV library for computer vision tasks
import numpy as np               # NumPy library for numerical operations
from sklearn.datasets import fetch_lfw_people   # Function to load the LFW dataset
from sklearn.model_selection import train_test_split   # Function to split dataset into train and test subsets

# Function to split augmented dataset into training and testing subsets
def split_augmented_dataset(augmented_dir, test_size=0.2):
    # Get the list of label names (subdirectories in augmented_dir)
    label_names = os.listdir(augmented_dir)   # Fetch the names of subdirectories in the augmented dataset
    images = []   # Initialize an empty list to store images
    labels = []   # Initialize an empty list to store corresponding labels

    # Loop through each label and read images from subdirectories
    for label_idx, label_name in enumerate(label_names):
        label_dir = os.path.join(augmented_dir, label_name)   # Create the full path of the label subdirectory
        for image_file in os.listdir(label_dir):   # Loop through each image file in the subdirectory
            image_path = os.path.join(label_dir, image_file)   # Create the full path of the image file
            image = cv2.imread(image_path)   # Read the image using OpenCV
            try:
                if image.ndim == 2:  # Grayscale image
                    # Convert the grayscale image to RGB format
                    image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
                images.append(image)   # Append the image to the images list
                labels.append(label_idx)   # Append the corresponding label to the labels list
            except:
                pass
    images = np.stack(images, axis=0)
    # images = np.array(images)   # Convert the list of images to a NumPy array
    labels = np.array(labels)   # Convert the list of labels to a NumPy array
    print(images.shape)
    print(labels.shape)

    # Split the images and labels into training and testing subsets
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=test_size, random_state=42)   # Split using sklearn's train_test_split
    return X_train, X_test, y_train, y_test   # Return the training and testing subsets of images and labels

In [2]:
# Function to perform image augmentation using OpenCV
def augment_image(image):
    # Check if the image is grayscale (2-dimensional)
    if image.ndim == 2:  # Grayscale image
        # Convert the grayscale image to RGB format
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)

    rows, cols, _ = image.shape   # Get the dimensions of the image

    # Random rotation between -10 to 10 degrees
    random_angle = np.random.randint(-10, 11)   # Generate a random angle between -10 and 10 degrees
    M = cv2.getRotationMatrix2D((cols / 2, rows / 2), random_angle, 1)   # Get the rotation matrix for the random angle
    augmented_image = cv2.warpAffine(image, M, (cols, rows))   # Apply the rotation to the image using warpAffine

    # Random horizontal flipping
    if np.random.rand() > 0.5:   # Generate a random number between 0 and 1, and check if it's greater than 0.5
        flipped_image = cv2.flip(augmented_image, 1)  # 1 means horizontal flip. Flip the image horizontally
    else:
        flipped_image = augmented_image   # Keep the image as is (no horizontal flip)

    # Random brightness adjustment
    brightness_factor = np.random.uniform(0.7, 1.3)   # Generate a random brightness factor between 0.7 and 1.3
    hsv_image = cv2.cvtColor(flipped_image, cv2.COLOR_RGB2HSV)   # Convert the RGB image to HSV color space
    hsv_image[:, :, 2] = hsv_image[:, :, 2] * brightness_factor   # Adjust the brightness (V channel) by the brightness factor
    augmented_image = cv2.cvtColor(hsv_image, cv2.COLOR_HSV2RGB)   # Convert the HSV image back to RGB color space

    return augmented_image   # Return the augmented image

In [3]:
def augment_lfw_people_dataset(lfw_people, target_count=10, output_directory='lfw_augmented'):
    # Create a new directory for the augmented dataset
    augmented_dir = os.path.join(lfw_people.target_names[0], output_directory)   # Create the full path of the augmented directory
    os.makedirs(augmented_dir, exist_ok=True)   # Create the augmented directory if it doesn't exist

    # Loop through each label in the dataset
    for label_idx, label_name in enumerate(lfw_people.target_names):
        label_dir = os.path.join(augmented_dir, label_name)   # Create the full path of the label subdirectory
        os.makedirs(label_dir, exist_ok=True)   # Create the label subdirectory if it doesn't exist

        # Get images belonging to the current label
        label_images = lfw_people.images[lfw_people.target == label_idx]   # Fetch the images with the current label

        # Check if the label folder already has enough images (>= target_count)
        if len(label_images) >= target_count:   # If the label already has enough images
            selected_images = label_images[:target_count]   # Select the first target_count number of images
        else:
            # If the label folder has fewer images, duplicate and augment the existing images
            selected_images = []
            while len(selected_images) < target_count:
                for image in label_images:
                    selected_images.append(image)   # Augment the image and add to selected_images
                    if len(selected_images) == target_count:   # Check if we have enough augmented images
                        break

        # Perform augmentation for images with count < target_count
        for idx, image in enumerate(selected_images):
            image_path = os.path.join(label_dir, f'{label_name}_{idx}.png')   # Create the full path of the augmented image
            cv2.imwrite(image_path, image)   # Write the augmented image to the specified path

In [4]:
# Load the LFW dataset
lfw_people = fetch_lfw_people(min_faces_per_person=1, resize=0.4)   # Fetch the LFW dataset with specified parameters

   # Augment the dataset with 10 images per class

# Split the augmented dataset into training and testing subsets
augmented_dir = os.path.join(lfw_people.target_names[0], 'lfw_augmented')   # Create the full path of the augmented directory
if os.path.exists(augmented_dir):
    # Augment the LFW dataset
    augment_lfw_people_dataset(lfw_people, target_count=10)
    X_train, X_test, y_train, y_test = split_augmented_dataset(augmented_dir, test_size=0.2)   # Split the dataset into train and test subsets
else:
    X_train, X_test, y_train, y_test = split_augmented_dataset(augmented_dir, test_size=0.2)   # Split the dataset into train and test subsets


(57490, 50, 37, 3)
(57490,)


In [5]:
X_train.shape

(45992, 50, 37, 3)

In [6]:
X_test.shape

(11498, 50, 37, 3)

In [7]:
# X_train = X_train.reshape(-1, 50 * 37 * 3)
# X_test = X_test.reshape(-1, 50 * 37 * 3)

In [10]:
from skimage.feature import hog, local_binary_pattern
import xgboost as xgb
from sklearn.metrics import classification_report, accuracy_score
from skimage import exposure
  
def scale_invariant_feature_transform(image):
    
    # Convert the image depth to CV_8U
    image8bit = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')
    
    #reading image
    gray1 = cv2.cvtColor(image8bit, cv2.COLOR_BGR2GRAY)
    
    #keypoints
    #keypoints``
    sift = cv2.SIFT_create()
    keypoints_1, descriptors_1 = sift.detectAndCompute(gray1, None)

    img_1 = cv2.drawKeypoints(gray1,keypoints_1,image)
    return img_1
    
def local_binary_patterns(image):
    
    # settings for LBP
    radius = 3
    n_points = 8 * radius
    
    # Convert the image depth to CV_8U
    image8bit = cv2.normalize(image, None, 0, 255, cv2.NORM_MINMAX).astype('uint8')

    # Convert the array to grayscale and reshape it to (height, width)
    gray1 = cv2.cvtColor(image8bit, cv2.COLOR_BGR2GRAY)

    lbp = local_binary_pattern(gray1, n_points, radius, method = "uniform")

    return lbp

# Function to extract combined features (HOG, SIFT, and LBP) from an image
def extract_combined_features(image):
    sift_features = scale_invariant_feature_transform(image)
    lbp_features = local_binary_patterns(image)
    
    sift_features = sift_features.flatten()
    lbp_features = lbp_features.flatten()
    
    merged_feature = np.hstack((sift_features,lbp_features))
    return merged_feature
    

In [11]:
# Extract combined features from the training and testing images
X_train_features = np.array([extract_combined_features(image) for image in X_train])
X_test_features = np.array([extract_combined_features(image) for image in X_test])

In [12]:
X_train_features.shape

(45992, 7400)

In [17]:
from time import time
from sklearn.decomposition import PCA
def pca_implementation(X_train, X_test):
    # Compute a PCA (eigenfaces) on the face dataset (treated as unlabeled
    # dataset): unsupervised feature extraction / dimensionality reduction
    n_components = 60

    print("Extracting the top %d eigenfaces from %d faces"
          % (n_components, X_train.shape[0]))
    t0 = time()
    pca = PCA(n_components=n_components, svd_solver='randomized',
              whiten=True).fit(X_train)
    print("done in %0.3fs" % (time() - t0))

    print("Projecting the input data on the eigenfaces orthonormal basis")
    t0 = time()
    X_train_pca = pca.transform(X_train)
    X_test_pca = pca.transform(X_test)
    print("done in %0.3fs" % (time() - t0))
    return X_train_pca, X_test_pca

In [18]:
X_train, X_test= pca_implementation(X_train_features, X_test_features)

Extracting the top 60 eigenfaces from 45992 faces
done in 19.738s
Projecting the input data on the eigenfaces orthonormal basis
done in 2.897s


In [None]:
# Train an XGBoost model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

In [None]:
# Predict using the XGBoost model
y_pred = model.predict(X_test)

# Calculate classification report and accuracy score
classification_report_output = classification_report(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Print the results
print("Classification Report:")
print(classification_report_output)
print("Accuracy Score:", accuracy)`