Face Detection and Classification Pipeline

Description:
This script processes a specified folder of .jpg images by copying and renaming the images, detecting faces within each image, and then grouping the detected faces by individual. Each unique person is identified based on facial encodings, allowing for grouped classification of images by person.

Usage Instructions:
1. Specify the path to the source images folder containing .jpg files only.
2. The script will output renamed images, cropped faces, and grouped folders for each identified individual.

WARNING:
Ensure the source images folder contains ONLY .jpg files. Other formats are not supported and may cause errors.

Process Overview:
- Step 1: `copy_and_rename_files` - Copies images from the source folder to a temporary folder, renaming them sequentially.
- Step 2: `create_faces_folder` - Detects and crops faces from each renamed image, saving individual face images.
- Step 3: `group_faces` - Groups face images by individual based on facial encoding comparisons and saves each group to a separate folder.


In [None]:
import os
import shutil
from shutil import copy2
import face_recognition
from PIL import Image

# Define the source folder containing the images
source_images_folder = "C:/Users/noahv/Coding Projects/CrowdTag R&D/datasets/Cabin Party"

def copy_and_rename_files(source_folder, destination_folder):
    """
    Copies all files from the source folder to the destination folder, 
    renaming each file with a unique ID for organization.
    """
    # Create or clear the destination folder
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    else:
        shutil.rmtree(destination_folder)  # Removes all existing subdirectories and files
        os.makedirs(destination_folder)

    # Initialize a counter for unique photo IDs
    photoID_counter = 1

    # Loop through all files in the source folder
    for file in os.listdir(source_folder):
        # Generate full file paths
        full_file_path = os.path.join(source_folder, file)

        # Define a new filename using the counter
        new_filename = f"picture_{photoID_counter}.jpg"
        new_file_path = os.path.join(destination_folder, new_filename)

        # Copy and rename the file
        shutil.copy(full_file_path, new_file_path)
        print(f"Copied and renamed to {new_filename}", end='\r')

        # Increment the counter
        photoID_counter += 1

    print("All files have been copied and renamed.")

def detect_faces(image_path):
    """
    Detects faces in an image using face recognition and returns their locations.
    """
    # Load the image file
    image = face_recognition.load_image_file(image_path)

    # Get face locations within the image
    face_locations = face_recognition.face_locations(image)

    return face_locations

def create_faces_folder(images_with_id_path, destination_folder):
    """
    Detects faces in each image, crops each detected face, and saves it in a specified folder.
    """
    # Create or clear the destination folder
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
    else:
        shutil.rmtree(destination_folder)
        os.makedirs(destination_folder)

    # Process each image in the provided folder
    for index, file in enumerate(os.listdir(images_with_id_path)):
        full_file_path = os.path.join(images_with_id_path, file)

        # Detect faces and get the percentage completion
        print(f"Detecting faces in: {full_file_path}, {((index+1) / len(os.listdir(images_with_id_path)) * 100):.2f}% done.", end='\r')
        faces = detect_faces(full_file_path)

        # Open the image file for cropping
        image = Image.open(full_file_path)

        # Crop and save each detected face
        for i, face in enumerate(faces):
            cropped_image = image.crop((face[3], face[0], face[1], face[2]))

            # Extract original ID from the filename and save the cropped face
            split = full_file_path.split("_")
            cropped_image.save(os.path.join(destination_folder, f'picture_{split[-1][:-4]}_face_{i}.jpg'))

def group_faces(faces_folder, output_folder):
    """
    Groups similar faces into folders by comparing their encodings. 
    Each group represents a unique individual.
    """
    # Create or clear the output folder
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    else:
        shutil.rmtree(output_folder)
        os.makedirs(output_folder)

    # Dictionary to hold face encodings and corresponding image paths for grouping
    face_groups = {}

    # Loop through each face image in the input folder
    for index, filename in enumerate(os.listdir(faces_folder)):
        file_path = os.path.join(faces_folder, filename)
        print(f"Classifying image: {file_path}, {((index+1) / len(os.listdir(faces_folder)) * 100):.2f}% done.", end='\r')

        # Encode faces in the current image
        image = face_recognition.load_image_file(file_path)
        encodings = face_recognition.face_encodings(image)

        # Check each encoding to see if it matches existing groups
        for encoding in encodings:
            found_match = False

            # Compare with each existing group of faces
            for group_id, group_encodings in face_groups.items():
                if face_recognition.compare_faces(group_encodings, encoding, tolerance=0.6)[0]:  # Adjust tolerance if needed
                    # Match found; add to existing group
                    group_encodings.append(encoding)
                    dest_folder = os.path.join(output_folder, f"person_{group_id}")
                    os.makedirs(dest_folder, exist_ok=True)
                    copy2(file_path, dest_folder)
                    found_match = True
                    break
            
            # If no match, create a new group
            if not found_match:
                new_group_id = len(face_groups) + 1
                face_groups[new_group_id] = [encoding]
                dest_folder = os.path.join(output_folder, f"person_{new_group_id}")
                os.makedirs(dest_folder, exist_ok=True)
                copy2(file_path, dest_folder)

    print("Faces grouped successfully.")

# Run the processes
copy_and_rename_files(source_images_folder, "temp/photos_with_id")
create_faces_folder("temp/photos_with_id", "temp/faces")
group_faces("temp/faces", "temp/persons")

Face Encoding and Export Script

Description:
This script exports facial encodings and organizes images by individuals for a specified event. After classifying and grouping images by detected individuals, it saves face encodings associated with each person to a text file. It also creates a list of images without identified faces.

Usage Instructions:
1. Ensure images have been pre-processed and grouped by individual in the specified input folder (`faces_folder`).
2. Specify an event name and a unique event code to generate a unique output directory name.
3. Run the script to save the encoding data to the output directory, which includes:
   - A text file of encodings for identified faces (`encodings.txt`)
   - A text file listing non-identified images (`non_identified.txt`)
   - A copy of the original images in an organized folder structure.

WARNING:
Ensure the `faces_folder` contains only pre-processed images of faces, organized in subdirectories by individual.

Process Overview:
- Step 1: `save_encodings_to_text` - Saves the face encodings for each person and lists unidentified images.
- Step 2: `calculate_file_name` - Generates a unique directory name based on the event name and code using SHA-256 hashing and Base64 encoding.



In [None]:
import face_recognition
import os
import hashlib
import base64
import shutil

# Define event information
event_name = "2024 oud en nieuw"
event_code = "123123"

def save_encodings_to_text(output_path, faces_folder="temp/persons"):
    """
    Saves face encodings and image associations to text files for reference.
    - Copies the renamed photos into the output directory.
    - Records encodings of representative faces for each detected person.
    - Saves IDs of photos that contain unidentified faces.
    """
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_path):
        os.makedirs(output_path)

    # Copy images with photo IDs to the output folder
    shutil.copytree("temp/photos_with_id", os.path.join(output_path, "pictures"))

    # Track IDs of identified pictures
    identified_pictures = []

    # Open the encoding file to write face encodings for each person
    with open(os.path.join(output_path, "encodings.txt"), "w") as file:
        # Loop through each person folder to extract face encodings
        for person_folder in os.listdir(faces_folder):
            person_path = os.path.join(faces_folder, person_folder)

            if not os.path.isdir(person_path):
                continue  # Skip non-folder items

            # Use the first image in the folder as the representative encoding
            first_image_file = os.listdir(person_path)[0]
            first_image_path = os.path.join(person_path, first_image_file)

            # List to store picture IDs for this person
            picture_ids = []
            for filename in os.listdir(person_path):
                picture_id = filename.split("_")[1]
                picture_ids.append(picture_id)
                # Track identified picture IDs to avoid duplication
                if picture_id not in identified_pictures:
                    identified_pictures.append(picture_id)

            # Load and encode the representative face in the first image
            stored_image = face_recognition.load_image_file(first_image_path)
            stored_encodings = face_recognition.face_encodings(stored_image)

            # Save encoding with associated picture IDs and person folder name
            for encoding in stored_encodings:
                encoding_str = ",".join(map(str, encoding))  # Convert encoding array to string
                picture_ids_str = ", ".join(picture_ids)
                file.write(f"{person_folder}:{picture_ids_str}:{encoding_str}\n")

    # Record IDs of photos that were not matched to a detected person
    with open(os.path.join(output_path, "non_identified.txt"), "w") as file:
        for picture in os.listdir(os.path.join(output_path, "pictures")):
            picture_id = picture.split("_")[1][:-4]
            if picture_id not in identified_pictures:
                file.write(f"{picture_id},")

    print(f"Data saved to {output_path}")

def calculate_file_name(event_name, event_code):
    """
    Generates a unique filename by hashing event information.
    - Combines the event name and code.
    - Hashes the result with SHA-256 and encodes in Base64.
    - Returns a shortened 16-character name for brevity.
    """
    # Combine event name and code as a single string
    combined_str = f"{event_name}|{event_code}".encode()
    
    # Create a SHA-256 hash of the combined string
    hash_object = hashlib.sha256(combined_str)
    hash_digest = hash_object.digest()
    
    # Encode the hash using Base64 and return a truncated version
    encoded = base64.urlsafe_b64encode(hash_digest).decode('utf-8')
    
    return encoded[:16]  # Limit to 16 characters for a shorter output

# Generate a unique output directory name and save encodings
output_name = "../outputs/" + calculate_file_name(event_name, event_code)
save_encodings_to_text(output_name)