In [1]:
import os
import numpy as np
from face_recognition import api
from sklearn.cluster import DBSCAN
from collections import defaultdict

In [2]:
def get_face_encodings(folder_path):
    encodings = []
    file_paths = []
    for file_name in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file_name)
        image = api.load_image_file(file_path)
        face_encodings = api.face_encodings(image)
        for encoding in face_encodings:
            encodings.append(encoding)
            file_paths.append(file_path)  # Associate each encoding with the image path
    return encodings, file_paths

# Paths to known and unknown face folders
known_folder = "photos/known_faces"
unknown_folder = "photos/unknown_faces"

# Get encodings and file paths for both known and unknown images
known_encodings, known_paths = get_face_encodings(known_folder)
unknown_encodings, unknown_paths = get_face_encodings(unknown_folder)

# Combine all encodings and paths
all_encodings = known_encodings + unknown_encodings
all_paths = known_paths + unknown_paths

In [3]:
# Run DBSCAN on all encodings
dbscan = DBSCAN(eps=0.6, min_samples=1, metric="euclidean").fit(all_encodings)
labels = dbscan.labels_

# Organize clusters by unique faces
face_clusters = defaultdict(set)

for idx, label in enumerate(labels):
    if label != -1:  # Exclude noise points
        image_path = all_paths[idx]
        face_clusters[label].add(image_path)

# Display clusters
for label, images in face_clusters.items():
    print(f"Cluster {label} (unique face):")
    for image_path in images:
        print(f"  - {image_path}")

# For unclustered (noise) points
unclustered_images = [all_paths[idx] for idx, label in enumerate(labels) if label == -1]
if unclustered_images:
    print("\nUnclustered faces:")
    for image_path in set(unclustered_images):  # Unique images only
        print(f"  - {image_path}")

Cluster 0 (unique face):
  - photos/unknown_faces\image0.jpg
  - photos/unknown_faces\image2.jpg
  - photos/known_faces\IMG_5401.jpg
  - photos/unknown_faces\image1.jpg
Cluster 1 (unique face):
  - photos/unknown_faces\20240324_023250.jpg
  - photos/unknown_faces\20240413_122016.jpg
  - photos/unknown_faces\20241006_162622.jpg
  - photos/known_faces\rn_image_picker_lib_temp_820ef847-389d-4b67-9a7a-f36edc967bbc.jpg
Cluster 2 (unique face):
  - photos/unknown_faces\IMG_3721.jpg
  - photos/known_faces\Screenshot 2024-10-24 221400.png
  - photos/unknown_faces\IMG_2747.jpg
  - photos/unknown_faces\IMG_2488.jpg
  - photos/unknown_faces\IMG_2617.png
Cluster 3 (unique face):
  - photos/known_faces\Screenshot 2024-10-24 221507.png
  - photos/unknown_faces\IMG_2617.png
  - photos/unknown_faces\IMG_2747.jpg
Cluster 4 (unique face):
  - photos/unknown_faces\20240413_122016.jpg
Cluster 5 (unique face):
  - photos/unknown_faces\20240413_122016.jpg
Cluster 6 (unique face):
  - photos/unknown_faces\20