# Data Processing
These sections are utilized to take photos from my webcam, rename, and place photos from LFW dataset into proper folders, as well as split the dataset into training and testing partitions.

In [84]:
import os

def rename_images(directory):
    # Dictionaries to store counts
    image_count = {}
    name_counter = {}
    total_names = 0
    
    # Iterate through the files in the directory
    for filename in os.listdir(directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif')):
            # Split filename and extension
            name, ext = os.path.splitext(filename)
            name,x,y=name.split('.')
            #print(name)
            
            # Get the count for the filename or initialize to 0
            count = image_count.get(name, 0)
            
            # Increment the name counter or add new name
            if name not in name_counter:
                total_names += 1
                name_counter[name] = total_names
                os.makedirs(os.path.join(directory,name))
            
            # New filename with counts
            new_filename = f"{name}.{name_counter[name]-1}.{count}{ext}"
            new_path = os.path.join(directory, name, new_filename)
            
            # Rename the file
            os.replace(os.path.join(directory, filename), new_path)
            #print(new_path)
            
            # Update counts for the filename
            image_count[name] = count + 1

# Replace 'directory_path' with the path of your directory containing images
directory_path = 'dataset'
rename_images(directory_path)

In [16]:
import os
import cv2
import uuid

ETHAN_PATH = os.path.join('data', 'ethan')
SADIE_PATH = os.path.join('data', 'sadie')
MATT_PATH = os.path.join('data', 'matt')

"""
os.makedirs(ETHAN_PATH)
os.makedirs(SADIE_PATH)
os.makedirs(MATT_PATH)
"""

cap = cv2.VideoCapture(0)
while cap.isOpened(): 
    ret, frame = cap.read()
   
    # Cut down frame to 250x250px
    frame = frame[120:120+250,200:200+250, :]
    
    # Collect anchors 
    if cv2.waitKey(1) & 0XFF == ord('e'):
        # Create the unique file path 
        imgname = os.path.join(ETHAN_PATH, '{}.jpg'.format(uuid.uuid1()))
        # Write out anchor image
        cv2.imwrite(imgname, frame)
    
    # Collect positives
    if cv2.waitKey(1) & 0XFF == ord('s'):
        # Create the unique file path 
        imgname = os.path.join(SADIE_PATH, '{}.jpg'.format(uuid.uuid1()))
        # Write out positive image
        cv2.imwrite(imgname, frame)

    # Collect positives
    if cv2.waitKey(1) & 0XFF == ord('m'):
        # Create the unique file path 
        imgname = os.path.join(MATT_PATH, '{}.jpg'.format(uuid.uuid1()))
        # Write out positive image
        cv2.imwrite(imgname, frame)
    
    # Show image back to screen
    cv2.imshow('Image Collection', frame)
    
    # Breaking gracefully
    if cv2.waitKey(1) & 0XFF == ord('q'):
        break
        
# Release the webcam
cap.release()
# Close the image show frame
cv2.destroyAllWindows()

In [17]:
import os
import splitfolders

inputFolder='data'
outputFolder='testDataset2'
splitfolders.ratio(inputFolder, output=outputFolder, seed=42, ratio=(.8,.2))

# Facial Recognition
This section is where the data is imported, the model is trained, and the model is tested. It requires two folders, one for training and one for testing, to be in the same directory and have subfolders containing the different classes of items

In [1]:
import cv2
import os
import numpy as np

# Load the pre-trained Haar cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

# Initialize an LBPH face recognizer
recognizer = cv2.face.LBPHFaceRecognizer_create()

# Function to load images and labels from a directory
def load_images_from_folder(folder):
    images = []
    labels = []
    label = 0
    for subfolder in os.listdir(folder):
        subfolder_path = os.path.join(folder, subfolder)
        if os.path.isdir(subfolder_path):
            for filename in os.listdir(subfolder_path):
                if filename.endswith('.jpg'):
                    img = cv2.imread(os.path.join(subfolder_path, filename))
                    if img is not None:
                        images.append(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY))
                        labels.append(label)
            label += 1
    return images, labels

# Function to train the recognizer with training images
def train_recognizer(train_images_folder):
    images, labels = load_images_from_folder(train_images_folder)
    recognizer.train(images, np.array(labels))

# Function to update recognizer with additional training images
def update_recognizer(train_images_folder):
    images, labels = load_images_from_folder(train_images_folder)
    recognizer.update(images, np.array(labels))  # Update the model with new data

# Function to calculate recognition metrics
def calculate_metrics(test_images_folder):
    test_images, test_labels = load_images_from_folder(test_images_folder)

    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0

    for idx, img in enumerate(test_images):
        faces_found = face_cascade.detectMultiScale(img, scaleFactor=1.3, minNeighbors=5)

        for (x, y, w, h) in faces_found:
            label, _ = recognizer.predict(img[y:y+h, x:x+w])

            # True positive: correctly recognized faces with matching labels
            if label == test_labels[idx]:
                true_positive += 1
            # False positive: incorrectly recognized faces with non-matching labels
            else:
                false_positive += 1

    total_faces = len(test_images)
    true_negative = total_faces - (true_positive + false_positive)
    false_negative = 0  # No false negatives in this setup (assumes all faces are present in the test set)

    # Calculate error rate, precision, false discovery rate, true positive rate, false negative rate, accuracy
    error_rate = (false_positive + false_negative) / total_faces if total_faces > 0 else -1
    precision = true_positive / (true_positive + false_positive) if (true_positive + false_positive) > 0 else -1
    false_discovery_rate = false_positive / (false_positive + true_positive) if (false_positive + true_positive) > 0 else -1
    true_positive_rate = true_positive / total_faces if total_faces > 0 else -1
    false_negative_rate = false_negative / total_faces if total_faces > 0 else -1
    accuracy = (true_positive + true_negative) / total_faces if total_faces > 0 else -1

    print(f"True Positive: {true_positive}")
    print(f"True Negative: {true_negative}")
    print(f"False Positive: {false_positive}")
    print(f"False Negative: {false_negative}")

    return error_rate, precision, false_discovery_rate, true_positive_rate, false_negative_rate, accuracy

# Replace 'train_images_folder' and 'test_images_folder' with your directories
train_images_folder = 'dataset2'
test_images_folder = 'testDataset2'

# Train the recognizer initially
train_recognizer(train_images_folder)

# Update the recognizer with additional data
#for i in range(5):
#    update_recognizer('dataset')

# Calculate and print metrics
error_rate, precision, false_discovery_rate, true_positive_rate, false_negative_rate, accuracy = calculate_metrics(test_images_folder)
print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"False Discovery Rate: {false_discovery_rate:.4f}")
print(f"True Positive Rate: {true_positive_rate:.4f}")
print(f"False Negative Rate: {false_negative_rate:.4f}")


True Positive: 20
True Negative: 55
False Positive: 105
False Negative: 0
Accuracy: 0.4167
Error Rate: 0.5833
Precision: 0.1600
False Discovery Rate: 0.8400
True Positive Rate: 0.1111
False Negative Rate: 0.0000


True Positive: 28
True Negative: 46
False Positive: 106
False Negative: 0
Accuracy: 0.4111
Error Rate: 0.5889
Precision: 0.2090
False Discovery Rate: 0.7910
True Positive Rate: 0.1556
False Negative Rate: 0.0000