In [15]:
import os
import cv2
import numpy as np
from matplotlib import pyplot as plt

In [16]:
def load_face_detection_model(weights_path, arch_path):
    """
    Loads the DNN face detection model.
    """
    return cv2.dnn.readNetFromTensorflow(weights_path, arch_path)

In [17]:
def get_image_paths(base_path):
    """
    Walks through the base directory and returns a list of image file paths.
    """
    image_paths = []
    for root, dirs, files in os.walk(base_path):
        for file in files:
            if file.endswith(('.jpg', '.jpeg', '.png')):
                image_paths.append(os.path.join(root, file))
    return image_paths


In [18]:
def crop_faces_from_image(image, net, image_path):
    """
    Detects faces in the image and returns a list of cropped faces and corresponding labels.
    """
    cropped_faces = []
    labels = []
    
    blob = cv2.dnn.blobFromImage(image, 1.0, (300, 300), [104., 177., 123.], False, False)
    net.setInput(blob)
    detections = net.forward()

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        
        if confidence > 0.7:  # Confidence threshold
            box = detections[0, 0, i, 3:7] * np.array([image.shape[1], image.shape[0], image.shape[1], image.shape[0]])
            (startX, startY, endX, endY) = box.astype("int")

            # Crop the face
            cropped_face = image[startY:endY, startX:endX]
            
            # Store the cropped face and label (based on folder name)
            cropped_faces.append(cropped_face)
            labels.append(os.path.basename(os.path.dirname(image_path)))  # Label is folder name (person's name)
    
    return cropped_faces, labels

In [19]:
def save_cropped_faces(cropped_faces, labels, output_path, image_path):
    """
    Saves cropped faces to the output directory with corresponding labels.
    """
    person_folder = labels[0]  # Assuming all faces in one image are from the same person

    person_output_path = os.path.join(output_path, person_folder)
    if not os.path.exists(person_output_path):
        os.makedirs(person_output_path)

    for i, cropped_face in enumerate(cropped_faces):
        cropped_filename = os.path.join(person_output_path, f'{person_folder}_{os.path.basename(image_path)}_{i}.jpg')
        cv2.imwrite(cropped_filename, cropped_face)
        print(f"Saved cropped face: {cropped_filename}")

In [20]:
def process_images(image_paths, net, output_path):
    """
    Processes all images: detects faces, crops them, and saves them to the output path.
    """
    for image_path in image_paths:
        image = cv2.imread(image_path)
        
        # Detect faces and get the cropped faces and labels
        cropped_faces, labels = crop_faces_from_image(image, net, image_path)

        if cropped_faces:  # Only save if there are faces detected
            save_cropped_faces(cropped_faces, labels, output_path, image_path)

In [21]:
# Set the base path for your dataset and the output path
base_path = '/kaggle/input/pins-face-recognition/105_classes_pins_dataset/'
output_path = '/kaggle/working/cropped_faces/'

# Set the paths for the DNN model files
dnn_weights_path = '/kaggle/input/dnn/tensorflow2/default/1/opencv_face_detector_uint8.pb'
dnn_arch_path = '/kaggle/input/dnn/tensorflow2/default/1/opencv_face_detector.pbtxt'

# Load the face detection model
net = load_face_detection_model(dnn_weights_path, dnn_arch_path)

# Get image paths
image_paths = get_image_paths(base_path)

# Process images to detect faces and save cropped faces
process_images(image_paths, net, output_path)

Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther233_93.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther37_110.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther228_91.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther92_145.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther40_113.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther196_69.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther159_45.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex Lawther_Alex Lawther129_24.jpg_0.jpg
Saved cropped face: /kaggle/working/cropped_faces/pins_Alex Lawther/pins_Alex La

In [27]:
import pandas as pd
import os

# List to store file paths and labels
data_records = []

# Define the output path where the cropped images are saved
output_path = '/kaggle/working/cropped_faces/'  # Ensure this matches your cropped image output path

# Loop through the output directory to collect file paths and labels
for label_folder in os.listdir(output_path):
    label_folder_path = os.path.join(output_path, label_folder)
    if os.path.isdir(label_folder_path):
        for file in os.listdir(label_folder_path):
            if file.endswith(('.jpg', '.jpeg', '.png')):
                file_path = os.path.join(label_folder_path, file)
                data_records.append({'image_path': file_path, 'label': label_folder})

# Convert list to DataFrame and save as CSV
df = pd.DataFrame(data_records)
df.to_csv('/kaggle/working/labels.csv', index=False)


In [28]:
import shutil

# Compress the entire directory of cropped faces and the labels CSV
shutil.make_archive('/kaggle/working/cropped_faces_dataset', 'zip', '/kaggle/working/cropped_faces')
shutil.copy('/kaggle/working/labels.csv', '/kaggle/working/cropped_faces_dataset')


'/kaggle/working/cropped_faces_dataset'