#### Project 2 Facial Recognition  
#### Author: Brian Reppeto 1/26/2025

### Import necessary libraries 

In [None]:
# import libraries

import cv2
import os
from deepface import DeepFace
from sklearn.metrics import precision_score, recall_score, f1_score, confusion_matrix
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### Specify the paths for reference images, input images (where faces will be detected), and output folder (where matched faces will be saved).

In [None]:
# paths of images

reference_folder = r"C:\Users\brepp\OneDrive\Desktop\Photos\ReferenceImages"
input_folder = r"C:\Users\brepp\OneDrive\Desktop\Photos\Brian\iCloud Photos"
output_folder = r"C:\Users\brepp\OneDrive\Desktop\ExtractedFaces"

###  Check if my face matches any of the reference images

In [None]:
# function to verify if my face matches any of the reference images

def is_my_face(reference_folder, test_face, threshold=0.65):
    temp_face_path = "temp_face.jpg" # create temp file 
    cv2.imwrite(temp_face_path, test_face)
    
    try:  # try except block 
        best_similarity = 0 # keep track of best score
        predictions = []
        ground_truth = []
        
        for reference_image_name in os.listdir(reference_folder):  #  loop thru ref images skip non-image Files
            reference_image_path = os.path.join(reference_folder, reference_image_name)
            if not reference_image_name.lower().endswith((".jpg", ".jpeg", ".png")):
                continue

            try: #  face comparison - compare ref image to test image to get simularity score
                result = DeepFace.verify(
                    img1_path=reference_image_path,
                    img2_path=temp_face_path,
                    model_name="Facenet",
                    enforce_detection=False
                )
                similarity = 1 - result["distance"]  # convert distance to similarity # lower means more similar
                predictions.append(1 if similarity > threshold else 0)
                ground_truth.append(1)  # assuming all reference images are correct matches
                best_similarity = max(best_similarity, similarity)
            except Exception as e:
                print(f"Error comparing with {reference_image_name}: {e}") # handle errors during processing
        # compute metrics
        if predictions:
            precision = precision_score(ground_truth, predictions)
            recall = recall_score(ground_truth, predictions)
            f1 = f1_score(ground_truth, predictions)
            tn, fp, fn, tp = confusion_matrix(ground_truth, predictions).ravel()
            far = fp / (fp + tn) if (fp + tn) > 0 else 0
            frr = fn / (fn + tp) if (fn + tp) > 0 else 0
            
            print(f"Precision: {precision:.2f}") # print performance metrics
            print(f"Recall: {recall:.2f}")
            print(f"F1-score: {f1:.2f}")
            print(f"False Acceptance Rate (FAR): {far:.2f}")
            print(f"False Rejection Rate (FRR): {frr:.2f}")
        
        return best_similarity > threshold # 
    except Exception as e:
        print(f"Error during face verification: {e}")
        return False
    finally:
        if os.path.exists(temp_face_path): # remove temp images
            os.remove(temp_face_path)

###  Function to detect and crop faces from an image using OpenCV's Haar cascade classifier

In [None]:
# function to detect and crop faces using OpenCV

def detect_faces(image):
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml") #  load Haar cascade classifier for frontal face detection using OpenCV
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # convert image to grey scale
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30)) # detect face in images
    cropped_faces = [image[y:y+h, x:x+w] for (x, y, w, h) in faces] # ret 
    return cropped_faces

###  Extract and save images of my face and compare to reference images

In [None]:
# function to process images and evaluate performance

def extract_my_faces_from_folder(reference_folder, input_folder, output_folder, threshold=0.65):
    os.makedirs(output_folder, exist_ok=True) # create out put folder
     ground_truth_labels = []
    predictions = []
    
    for file_name in os.listdir(input_folder): # loop over images
        if file_name.lower().endswith((".jpg", ".jpeg", ".png")):
            image_path = os.path.join(input_folder, file_name) # load each image
            image = cv2.imread(image_path)
            if image is None:
                continue

            faces = detect_faces(image) # detect faces in the images
            for i, face in enumerate(faces): # loop over each face and compare to ref. image
                is_match = is_my_face(reference_folder, face, threshold)
                predictions.append(1 if is_match else 0)
                ground_truth_labels.append(1)  
                
                if is_match: # save matched faces to folder...  This is how I calculated my errors 
                    output_path = os.path.join(output_folder, f"{os.path.splitext(file_name)[0]}_face_{i}.jpg")
                    cv2.imwrite(output_path, face)

### Call extract_my_faces_from_folder to run the script

In [None]:
# run the extraction and evaluation

extract_my_faces_from_folder(reference_folder, input_folder, output_folder, threshold=0.65)

### Plot Correlation Matrix of the changes in the threshold

In [None]:
# the thresholds used along with the correct True Positive, True Negative, False Positives, and False Negatives
# plotting the results

data = {
    'Threshold':  [6, 4, 7, 7.5, 6.5, 6.5, 6, 6, 6, 7],
    'TPR_counts': [63,162,93, 94, 30, 121,172,189,227,189],
    'FPR_counts': [1, 59, 0,  0,  0,   1,   0,   3,   30,  0],
    'Ref_Photos': [5,  5,  5,  5,  5,  10,  20,  20,  30,  30]
}

# build the DataFrame
df = pd.DataFrame(data)

# calc the correlation matrix
corr_matrix = df.corr()

# plot the heatmap
plt.figure(figsize=(6, 5))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix Heatmap")
plt.show()


### Scatter Plot with regression line for the relationship between the count of ref. photos to total accuracy count

In [None]:
# ref images used and total captured faces

data = {
    'Ref_Photos': [5,  5,   5,  5,  5,  10,  20,  20,  30,  30],
    'TPR_counts': [63,162, 93, 94, 30, 121,172,189,227,189]
}

# create dataFrame
df = pd.DataFrame(data)

# plot a scatter with regression line
plt.figure(figsize=(6, 5))
sns.regplot(data=df, x='Ref_Photos', y='TPR_counts', ci=None, scatter_kws={'s':80})

plt.title("Scatter Plot with Regression: Ref_Photos vs. TPR_counts")
plt.xlabel("Number of Reference Photos")
plt.ylabel("Positive Detections (TPR_counts)")
plt.grid(True)
plt.show()
