In [1]:
!pip install tensorflow mtcnn scikit-learn Pillow keras-facenet

Collecting mtcnn
  Downloading mtcnn-1.0.0-py3-none-any.whl.metadata (5.8 kB)
Collecting keras-facenet
  Downloading keras-facenet-0.3.2.tar.gz (10 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting lz4>=4.3.3 (from mtcnn)
  Downloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Downloading mtcnn-1.0.0-py3-none-any.whl (1.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading lz4-4.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m23.8 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: keras-facenet
  Building wheel for keras-facenet (setup.py) ... [?25l[?25hdone
  Created wheel for keras-facenet: filename=keras_facenet-0.3.2-py3-none-any.whl size=10368 sha256=4c106508e86b51406e390db47862a6eb9788d39d696f4b76cc026

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from mtcnn.mtcnn import MTCNN
from keras_facenet import FaceNet
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from PIL import Image
import os
from google.colab import drive

drive.mount('/content/drive')

folder_path = "/content/drive/MyDrive/Dataset/all_images"

# Load MTCNN face detector
detector = MTCNN()

# Load FaceNet embedder
embedder = FaceNet()

# Function to extract faces from an image
def extract_faces(image_path, required_size=(160, 160)):
    """Detect and extract faces from an image."""
    img = Image.open(image_path).convert("RGB")
    img_array = np.asarray(img)
    detections = detector.detect_faces(img_array)

    faces = []
    for detection in detections:
        x, y, width, height = detection['box']
        x, y = max(0, x), max(0, y)
        face = img_array[y:y+height, x:x+width]
        face_image = Image.fromarray(face).resize(required_size)
        faces.append(np.asarray(face_image))
    return faces, detections

# Function to get face embeddings using keras_facenet
def get_embedding(face_img):
    """Generate embeddings for a single face."""
    face_img = face_img.astype('float32')
    face_img = np.expand_dims(face_img, axis=0)
    yhat = embedder.embeddings(face_img)
    return yhat[0]

# Perform DBSCAN clustering
def cluster_faces(embeddings, eps=0.5, min_samples=2):
    """Cluster embeddings using DBSCAN."""
    dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric="euclidean")
    labels = dbscan.fit_predict(embeddings)
    return labels

# Visualize clustered faces
def visualize_clusters(image_path, detections, labels):
    """Draw bounding boxes with cluster labels on the original image."""
    img = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    ax = plt.gca()

    for i, detection in enumerate(detections):
        x, y, width, height = detection['box']
        label = labels[i]
        color = "green" if label == 0 else "blue"
        rect = plt.Rectangle((x, y), width, height, fill=False, color=color, linewidth=2)
        ax.add_patch(rect)
        ax.text(x, y - 10, f"Cluster {label}", color=color, fontsize=12, bbox=dict(facecolor="white", alpha=0.5))
    plt.axis("off")
    plt.show()

# Process all images in the folder
def process_folder(folder_path):
    all_faces = []
    face_info = []
    all_embeddings = []
    image_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.png', '.jpeg'))]

    for image_path in image_paths:
        print(f"Processing {image_path}...")
        faces, detections = extract_faces(image_path)

        if not faces:
            print(f"No faces detected in {image_path}.")
            continue

        embeddings = [get_embedding(face) for face in faces]  # Extract embeddings for all faces
        all_faces.extend(faces)
        all_embeddings.extend(embeddings)
        face_info.append((image_path, detections))

    if not all_embeddings:
        print("No embeddings generated. No faces found in any image.")
        return

    pca = PCA(n_components=40)
    reduced_embeddings = pca.fit_transform(np.array(all_embeddings))
    labels = cluster_faces(np.array(reduced_embeddings), eps=1.0, min_samples=2)

    # Visualize clusters for each image
    for i, (image_path, detections) in enumerate(face_info):
        visualize_clusters(image_path, detections, labels)

# Run the main function
process_folder(folder_path)


Output hidden; open in https://colab.research.google.com to view.

In [3]:
import numpy as np
import matplotlib.pyplot as plt
from mtcnn.mtcnn import MTCNN
from keras_facenet import FaceNet
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import cv2 as cv
from PIL import Image
import os
from google.colab import drive

drive.mount('/content/drive')

folder_path = "/content/drive/MyDrive/Dataset/all_images"

# Load MTCNN face detector
detector = MTCNN()

# Load FaceNet embedder
embedder = FaceNet()

# Function to extract faces from an image
def extract_faces(image_path, required_size=(160, 160)):
    """Detect and extract faces from an image."""
    img = Image.open(image_path).convert("RGB")
    # img = cv.imread(image_path)
    # img = cv.cvtColor(img, cv.COLOR_BGR2RGB)

    img_array = np.asarray(img)
    detections = detector.detect_faces(img_array)

    faces = []
    for detection in detections:
        x, y, width, height = detection['box']
        x, y = max(0, x), max(0, y)
        face = img_array[y:y+height, x:x+width]
        face_image = Image.fromarray(face).resize(required_size)
        faces.append(np.asarray(face_image))
    return faces, detections

# Function to get face embeddings using keras_facenet
def get_embedding(face_img):
    """Generate embeddings for a single face."""
    face_img = face_img.astype('float32')
    face_img = np.expand_dims(face_img, axis=0)
    yhat = embedder.embeddings(face_img)
    return yhat[0]

# Perform K-Means clustering
def cluster_faces(embeddings, n_clusters):
    """Cluster embeddings using K-Means."""
    kmeans = KMeans(n_clusters=n_clusters, random_state=7)
    labels = kmeans.fit_predict(embeddings)
    return labels

# Visualize clustered faces
def visualize_clusters(image_path, detections, labels):
    """Draw bounding boxes with cluster labels on the original image."""
    img = Image.open(image_path).convert("RGB")
    plt.figure(figsize=(10, 10))
    plt.imshow(img)
    ax = plt.gca()

    for i, detection in enumerate(detections):
        x, y, width, height = detection['box']
        label = labels[i]
        color = plt.cm.tab10(label % 10)  # Color based on cluster label
        rect = plt.Rectangle((x, y), width, height, fill=False, color=color, linewidth=2)
        ax.add_patch(rect)
        ax.text(x, y - 10, f"Cluster {label}", color="white", fontsize=12, bbox=dict(facecolor=color, alpha=0.5))
    plt.axis("off")
    plt.show()

# Process all images in the folder
def process_folder(folder_path, n_clusters):
    all_faces = []
    face_info = []
    all_embeddings = []
    image_paths = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith(('.jpg', '.png', '.jpeg'))]

    for image_path in image_paths:
        print(f"Processing {image_path}...")
        faces, detections = extract_faces(image_path)

        if not faces:
            print(f"No faces detected in {image_path}.")
            continue

        embeddings = [get_embedding(face) for face in faces]
        all_faces.extend(faces)
        all_embeddings.extend(embeddings)
        face_info.append((image_path, detections))

    if not all_embeddings:
        print("No embeddings generated. No faces found in any image.")
        return

    pca = PCA(n_components=50)
    reduced_embeddings = pca.fit_transform(np.array(all_embeddings))
    all_embeddings = StandardScaler().fit_transform(reduced_embeddings)
    labels = cluster_faces(np.array(all_embeddings), n_clusters=n_clusters)

    # Visualize clusters for each image
    for i, (image_path, detections) in enumerate(face_info):
        visualize_clusters(image_path, detections, labels)

process_folder(folder_path, n_clusters=5)

Output hidden; open in https://colab.research.google.com to view.