In [None]:
import cv2 as cv
import numpy as np
import os
from ultralytics import YOLO
import matplotlib.pyplot as plt

In [None]:
class FACELOADING:
    """
    A class for loading and processing face images using YOLOv8 face detection.
    This class handles loading images from a directory structure, detecting faces,
    and preparing them for further processing like face recognition.
    """
    
    def __init__(self, directory):
        """
        Initialize the FACELOADING class.
        
        Args:
            directory (str): Root directory containing subdirectories of face images
        """
        self.directory = directory
        self.target_size = (160, 160)  # Standard size for face recognition
        self.X = []  # Will store face images
        self.Y = []  # Will store corresponding labels
        # Initialize YOLOv8 face detector with the face detection model
        self.detector = YOLO('yolov8l-face.pt')

    def extract_face(self, filename):
        """
        Extract a face from a single image using YOLOv8.
        
        Args:
            filename (str): Path to the image file
            
        Returns:
            numpy.ndarray: Processed face image resized to target_size
        """
        # Read and convert image to RGB (YOLOv8 works with RGB)
        img = cv.imread(filename)
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        
        # Perform face detection
        results = self.detector.predict(img, conf=0.5)[0]
        
        # Get the first detected face (assumes one face per image)
        if len(results.boxes) > 0:
            # Extract bounding box coordinates
            box = results.boxes[0]
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            
            # Extract and resize face region
            face = img[y1:y2, x1:x2]
            face_arr = cv.resize(face, self.target_size)
            return face_arr
        else:
            raise Exception("No face detected in the image")

    def load_faces(self, dir):
        """
        Load all faces from a directory.
        
        Args:
            dir (str): Directory containing face images
            
        Returns:
            list: List of processed face images
        """
        faces = []
        for im_name in os.listdir(dir):
            try:
                # Construct full path and process image
                path = os.path.join(dir, im_name)
                single_face = self.extract_face(path)
                faces.append(single_face)
            except Exception as e:
                print(f"Error processing {im_name}: {str(e)}")
        return faces

    def load_classes(self):
        """
        Load all classes (subjects) from the main directory.
        Each subdirectory name is treated as a class label.
        
        Returns:
            tuple: (numpy.ndarray of face images, numpy.ndarray of labels)
        """
        for sub_dir in os.listdir(self.directory):
            # Construct path for each subject's directory
            path = os.path.join(self.directory, sub_dir)
            if os.path.isdir(path):
                # Load all faces for current subject
                faces = self.load_faces(path)
                # Create labels for all faces of current subject
                labels = [sub_dir for _ in range(len(faces))]
                print(f"Loaded {len(labels)} images for subject: {sub_dir}")
                # Extend our collections
                self.X.extend(faces)
                self.Y.extend(labels)
        
        return np.asarray(self.X), np.asarray(self.Y)

    def plot_images(self):
        """
        Plot all processed face images in a grid layout.
        """
        if not self.X:
            print("No images loaded to plot")
            return
            
        plt.figure(figsize=(18, 16))
        ncols = 3
        nrows = len(self.Y) // ncols + 1
        
        for num, image in enumerate(self.X):
            plt.subplot(nrows, ncols, num + 1)
            plt.imshow(image)
            plt.title(self.Y[num], pad=10)
            plt.axis('off')
        
        plt.tight_layout()
        plt.show()

In [None]:
face_loading=FACELOADING(r'faces_data')
X,y=face_loading.load_classes()

In [None]:
face_loading.plot_images()

In [None]:
# Import the necessary FaceNet model from keras_facenet
from keras_facenet import FaceNet

# Initialize the FaceNet model. It will automatically load the pre-trained weights.
embedder = FaceNet()

def get_embedding(face_img):
    # Convert the input face image to a 32-bit floating point format
    # TensorFlow models typically expect images to be in a float format.
    face_img = face_img.astype('float32')  # 3D image (160x160x3)

    # Add an extra dimension to the image to create a batch of size 1
    # This is because the model expects input as a batch, even if it's just one image.
    # So we expand the dimensions of the image to (1, 160, 160, 3).
    # Before: (160, 160, 3), After: (1, 160, 160, 3)
    face_img = np.expand_dims(face_img, axis=0)  # 4D (None, 160, 160, 3)
    
    # Pass the prepared image through the FaceNet model to get the embedding (feature vector)
    # The model will output a 512-dimensional vector (1x512) for the given face image
    yhat = embedder.embeddings(face_img)

    # Return the embedding of the first (and only) image in the batch
    # The output is a 512-dimensional vector, which represents the features of the face
    return yhat[0]  # 512D vector for the image (1x1x512)

In [None]:
EMBEDDED_X = []

# Iterate over each image in X
for img in X:
    # For each image, get the embedding (feature vector) using the get_embedding function
    EMBEDDED_X.append(get_embedding(img))

# Convert the list of embeddings into a NumPy array
EMBEDDED_X = np.asarray(EMBEDDED_X)

In [None]:
np.savez_compressed('faces_embeddings.npz', EMBEDDED_X, y)

In [None]:
from sklearn.preprocessing import LabelEncoder

# Initialize the LabelEncoder object
encoder = LabelEncoder()

# Fit the encoder to the labels (y) and then transform them into numerical format
encoder.fit(y)

# Transform the original labels (y) into numerical values
y = encoder.transform(y)

In [None]:
plt.plot(EMBEDDED_X[0]) 
plt.ylabel(y[0])

In [None]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(EMBEDDED_X, y, shuffle=True, random_state=17)

In [None]:
from sklearn.svm import SVC

# Initialize the Support Vector Machine model with a linear kernel
model = SVC(kernel='linear', probability=True)

# Train (fit) the model using the training data
model.fit(X_train, Y_train)

In [None]:
ypreds_train = model.predict(X_train)
ypreds_test = model.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(Y_train, ypreds_train)

In [None]:
accuracy_score(Y_test,ypreds_test)

In [None]:
import pickle
#save the model
with open('svm_model_160x160.pkl','wb') as f:
    pickle.dump(model,f)