In [60]:
import numpy as np
import cv2
import face_recognition
import os
from tqdm import tqdm
import shutil

In [61]:
import logging
import contextlib
import tensorflow as tf
import warnings
from mtcnn import MTCNN
from tqdm import tqdm

# Suppress TensorFlow logging
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress TensorFlow logs at level 3 (ERROR)
tf.get_logger().setLevel('ERROR')

# Configure logging to suppress logs from TensorFlow and MTCNN
logging.getLogger('tensorflow').setLevel(logging.FATAL)
logging.getLogger('mtcnn').setLevel(logging.ERROR)

# Suppress warnings
warnings.filterwarnings('ignore')

# Function to suppress stdout and stderr
@contextlib.contextmanager
def suppress_output():
    with open(os.devnull, 'w', encoding='utf-8') as fnull:
        with contextlib.redirect_stdout(fnull), contextlib.redirect_stderr(fnull):
            yield

def detect_faces_mtcnn(image):
    """
    Detect faces using MTCNN and return bounding boxes in a format compatible with face_recognition.
    """
    with suppress_output():
        # Initialize the MTCNN detector without printing logs
        detector = MTCNN()

        # Detect faces in the image
        faces = detector.detect_faces(image)

        # Filter faces with confidence higher than 0.70 and extract their bounding boxes
        rectangles = []
        for face in faces:
            if face['confidence'] > 0.70:
                x, y, width, height = face['box']
                # Convert to top, right, bottom, left format
                top, right, bottom, left = y, x + width, y + height, x
                rectangles.append((top, right, bottom, left))
                
        return rectangles

In [62]:

def get_embeddings(rgb_image, face_locations):
    """
    Extract face embeddings from the given image based on face locations.
    Args:
    rgb_image (np.array): RGB image from which to extract embeddings.
    face_locations (list): List of face locations in (top, right, bottom, left) format.
    Returns:
    list: List of face encodings.
    """
    if not face_locations:
        return []
    # Compute face embeddings
    face_encodings = face_recognition.face_encodings(rgb_image, face_locations)
    return face_encodings

def process_images(dirpath):
    """
    Process images in the given directory to extract face embeddings.
    Args:
    dirpath (str): Path to the directory containing images.
    Returns:
    list, list: List of embeddings and list of corresponding image paths.
    """
    embeddings = []
    image_paths = []

    # Iterate over all files in the directory
    for filename in tqdm(os.listdir(dirpath)):
        file_path = os.path.join(dirpath, filename)
        image = cv2.imread(file_path)

        # Check if the image was loaded successfully
        if image is None:
            print(f"Error: Could not load image from {file_path}")
            continue

        # Convert the image from BGR to RGB for face_recognition
        rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        # Detect faces and extract face images
        face_locations = detect_faces_mtcnn(rgb_image)

        # Extract embeddings for each detected face
        face_encodings = get_embeddings(rgb_image, face_locations)
    
        for encoding in face_encodings:
            embeddings.append(encoding)
            image_paths.append(file_path)

    return embeddings, image_paths


In [63]:
dir= 'train4'
embeddings,image_paths = process_images(dir)

100%|██████████████████████████████████████████████████████████████████████████████████| 10/10 [01:05<00:00,  6.51s/it]


In [65]:
from sklearn.cluster import DBSCAN
eps = 0.5  # The maximum distance between two samples for one to be considered as in the neighborhood of the other
min_samples = 3  # The number of samples (or total weight) in a neighborhood for a point to be considered as a core point
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
cluster_labels = dbscan.fit_predict(embeddings)


In [66]:
# from sklearn.cluster import KMeans

# num_clusters = 5

# # Apply K-means clustering on extracted features
# kmeans = KMeans(n_clusters=num_clusters, random_state=42)
# cluster_labels = kmeans.fit_predict(embeddings)

In [67]:
cluster_labels

array([ 0,  0,  1,  1,  1,  0,  0, -1,  0,  1,  1], dtype=int64)

In [68]:
mapping = {}
for i,file in enumerate(image_paths):
    mapping[file] = cluster_labels[i]

In [69]:
for image_path, cluster_label in mapping.items():
    cluster_folder = f'Clusters_{dir}/Cluster_{cluster_label}/'
    os.makedirs(cluster_folder, exist_ok=True)
    
    filename = os.path.basename(image_path)
    destination_path = os.path.join(cluster_folder, filename)
    
    try:
        shutil.copyfile(image_path, destination_path)
        print(f"Copied '{filename}' to '{cluster_folder}'")
    except FileNotFoundError:
        print(f"Error: '{filename}' not found")
    except FileExistsError:
        print(f"Error: '{filename}' already exists in '{cluster_folder}'")

Copied '1615919242148.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1615919242221.jpg' to 'Clusters_train4/Cluster_0/'
Copied '1628522161324.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1628594944197.jpg' to 'Clusters_train4/Cluster_1/'
Copied '1628594944261.jpg' to 'Clusters_train4/Cluster_1/'
Copied '8deVjGlgsFmydcYzXASRyOXUGMV.jpg' to 'Clusters_train4/Cluster_0/'
Copied '8dYyRetSRJzwVLMxhuSAhRKvJst.jpg' to 'Clusters_train4/Cluster_-1/'
Copied '8emHWsGsfXbkEGFdUgzFNGBZfCH.jpg' to 'Clusters_train4/Cluster_0/'
Copied 'IMG_20190302_145206.jpg' to 'Clusters_train4/Cluster_1/'
Copied '_20191001_103914.JPG' to 'Clusters_train4/Cluster_1/'
