In [2]:
from platform import python_version

print(python_version())


3.12.7


In [3]:
from platform import python_version

print(python_version())


3.12.7


In [4]:
import cv2
import torch
import numpy as np
import sqlite3
from retinaface import RetinaFace
from facenet_pytorch import MTCNN, InceptionResnetV1




In [5]:
from ultralytics import YOLO

In [6]:
image_path = 'images/AshutoshSingh1.png'

In [7]:
image_path2 = 'images/IMG_0018.JPG'

In [8]:
image = cv2.imread(image_path)

In [9]:
image1 = cv2.imread(image_path2)

In [10]:
# RetinaFace detection
def detect_faces_retina(image):
    # Detect faces using RetinaFace
    faces = RetinaFace.detect_faces(image)

    # Initialize a list to store detected face coordinates
    result = []
    if faces:
        for key in faces.keys():
            identity = faces[key]
            # Get the bounding box of the face
            facial_area = identity['facial_area']
            # Append the coordinates of the face to the result list
            result.append(facial_area)
    return result

In [11]:
detect_faces_retina(image_path)

[[881, 393, 1518, 1206]]

In [12]:
# Load the YOLOv8 model
model = YOLO("yolov8n.pt")

def detect_faces_yolov8(image):
    # Perform object detection
    results = model(image)

    # Get image dimensions
    height, width, _ = image.shape

    # List to store face coordinates
    faces = []

    # Iterate through detected objects
    for result in results:
        for box in result.boxes:
            # Extract class ID and confidence
            class_id = int(box.cls)
            confidence = float(box.conf)

            # Check if the detected object is a person with confidence > 0.5
            if model.names[class_id] == "person" and confidence > 0.5:
                # Extract bounding box coordinates
                x1, y1, x2, y2 = box.xyxy[0]
                x, y, w, h = int(x1), int(y1), int(x2 - x1), int(y2 - y1)
                faces.append((x, y, w, h))

    return faces

In [13]:
# Example usage:
# Load an image
image = cv2.imread(image_path)

# Detect faces
faces = detect_faces_yolov8(image)

# Print the detected faces' coordinates
print(faces)


0: 640x512 1 person, 342.7ms
Speed: 15.6ms preprocess, 342.7ms inference, 15.6ms postprocess per image at shape (1, 3, 640, 512)
[(226, 163, 2157, 2930)]


In [14]:
faces1 = detect_faces_yolov8(image1)
print(faces1)


0: 448x640 8 persons, 244.4ms
Speed: 11.4ms preprocess, 244.4ms inference, 15.6ms postprocess per image at shape (1, 3, 448, 640)
[(1405, 2036, 1714, 1928), (565, 1300, 1161, 2664), (3468, 1834, 2222, 2128), (0, 468, 792, 3353), (1660, 823, 1948, 3119), (2796, 121, 1799, 2727), (350, 0, 1771, 1748)]


In [15]:
def detect_faces_combined(image):
    # Step 1: Perform person detection using YOLOv8
    results = model(image)
    
    # Get image dimensions
    height, width, _ = image.shape

    # List to store detected face coordinates
    faces = []

    for result in results:
        for box in result.boxes:
            # Extract class ID and confidence
            class_id = int(box.cls)
            confidence = float(box.conf)

            # Check if the detected object is a person with confidence > 0.5
            if model.names[class_id] == "person" and confidence > 0.5:
                # Extract bounding box coordinates
                x1, y1, x2, y2 = box.xyxy[0]
                x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)

                # Crop the region of interest (ROI) where the person is detected
                person_roi = image[y1:y2, x1:x2]

                # Step 2: Detect faces within the person ROI using RetinaFace
                detected_faces = RetinaFace.detect_faces(person_roi)

                # If faces are detected within the ROI, append them to the faces list
                if detected_faces:
                    for key in detected_faces.keys():
                        identity = detected_faces[key]
                        facial_area = identity['facial_area']

                        # Adjust the facial area coordinates relative to the full image
                        x, y, w, h = facial_area
                        faces.append((x + x1, y + y1, w, h))
    
    return faces

In [None]:
# print(detect_faces_combined(image1))

In [17]:
# Function to convert numpy array to binary for SQLite
def convert_array_to_binary(array):
    return array.tobytes()


In [23]:
# Initialize Inception Resnet V1 for face embedding
face_embedding_model = InceptionResnetV1(pretrained='vggface2').eval()

# Function to extract face embeddings
def get_face_embedding(face):
     """
    Function to extract the face embedding using InceptionResNetV1 with proper normalization.

    Parameters:
    - face: A cropped face image in BGR format (loaded using OpenCV).

    Returns:
    - A 512-dimensional embedding vector as a flattened numpy array.
    """
    face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB
    face = cv2.resize(face, (160, 160))  # Resize to the input size expected by the model
    face = torch.FloatTensor(face).permute(2, 0, 1)  # Convert to Tensor and rearrange dimensions
    face = face.unsqueeze(0)  # Add batch dimension
    with torch.no_grad():
        embedding = face_embedding_model(face).detach().numpy()  # Get the face embedding
    return embedding.flatten()  # Flatten to a 1D array


In [24]:
def main(image_path):
    # Load image
    image = cv2.imread(image_path)
    
    # Gather additional details
    name = input("Enter Name: ")
    age = int(input("Enter Age: "))
    description = input("Enter Description: ")
    
    # Detect faces using YOLO 
    faces = detect_faces_yolov8(image)
    
    for (x, y, w, h) in faces:
        face = image[y:y+h, x:x+w]
        # Here, process face (e.g., align and extract embeddings using FaceNet or another model)
        embedding = get_face_embedding(face)  # Extract face embedding using a recognition model

        # Placeholder embedding - replace with actual embedding extraction
        # embedding = np.random.rand(128).astype(np.float32)  # Replace with actual embedding code

        # Store face details in SQLite
        # store_face_details(name, age, description, face, convert_array_to_binary(embedding))
        # print(name)
        # print(age) 
        # print(description) 
        # print(face)
        print(convert_array_to_binary(embedding))
    #     cv2.rectangle(image, (x, y), (x+w, y+h), (0, 255, 0), 2)
    
    # # Display the output
    # cv2.imshow("Detected Faces", image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

In [22]:
main(image_path2)

Enter Name:  Ashutosh
Enter Age:  22
Enter Description:  hello



0: 448x640 8 persons, 244.5ms
Speed: 9.1ms preprocess, 244.5ms inference, 2.5ms postprocess per image at shape (1, 3, 448, 640)
Ashutosh
22
hello
[[[117 156 231]
  [118 156 234]
  [120 158 236]
  ...
  [135 176 239]
  [134 175 238]
  [132 173 236]]

 [[115 154 232]
  [116 155 233]
  [116 155 233]
  ...
  [135 176 239]
  [134 175 238]
  [132 173 236]]

 [[116 156 231]
  [115 154 232]
  [114 153 231]
  ...
  [134 174 239]
  [134 174 239]
  [133 173 238]]

 ...

 [[ 56 116 198]
  [ 50 112 196]
  [ 55 117 201]
  ...
  [ 82 134 201]
  [ 72 120 192]
  [ 68 112 189]]

 [[ 59 117 199]
  [ 54 114 198]
  [ 58 118 202]
  ...
  [ 67 117 193]
  [ 60 105 186]
  [ 57  99 182]]

 [[ 61 117 198]
  [ 60 116 198]
  [ 61 117 199]
  ...
  [ 41  88 172]
  [ 45  87 176]
  [ 40  82 171]]]
b'C\x12 ?\x94\x83|?;1\x08>|\xfa\xad>\x1f\x96\xde=\xc4B3>`\xa3F?\xb6\xdcz?\xe1\x7fu>&\x0b\\?\x83\xf1P>HZ\x8a>y\xd3h>g\x0f\x8a>\x99\x152?\xf0S@?c\xfe\xc4>?\xca\x1b?Za,?\x8d\xe85?\xd3D??\x8fse=\x1a&\'>(~\xfd<\x92\xe4,?\x1c\xce

In [25]:
main(image_path2)

Enter Name:  Ashutosh
Enter Age:  22
Enter Description:  hello



0: 448x640 8 persons, 252.9ms
Speed: 11.9ms preprocess, 252.9ms inference, 15.6ms postprocess per image at shape (1, 3, 448, 640)
b'\xf5$8=\xcb\x87G\xbcA\x81,=s\xe5Q\xbc\xcf\x16f=\xba_\x95\xbdB\n1<\xa9\x87\xd2\xbc\xa3\x98!<\xb7F\x02\xbdq\xbc\x98\xbd\xcde\x83\xbd\xa2=\x9e\xbd\x02\x13\xaa\xbc\xe6\xfa\x8b=#s\x1c\xbd\x0f\xb6T\xbcTGG\xbd\xb1\x1b\xa8<!\xa4\x8f=\xe1\xd3\xda<\x82\x9b\x03=\xf9\x80l\xbd\xc2\x16\x84\xbb\xe9lB<\x14O=<\x11\x1a\xba=\x9e6\x8f\xbcj\xa1\xba<\x17l\xd5\xbduf\x06\xbd\xf5)\x93=\xc4\xdcQ\xbb\xab\x97\x07=\xdc}w=\xfdXx<PJ]\xbd:\xf2\x12=M\x82\x15\xbc\xe0\xe8\xa2<\x17\xa5\xdc;5:\x1c<\xdb\xb1\xe9:[3\xc9\xbd\x9e\x0b\x07\xbd\x15\x8bH\xbd@\xbc\xd0<S\xd0\xfa\xbc\xac \xd8\xbcqu\xbb<@\xc9\xd9=n\x85\'=;N\xc2<9m\x86=\x04~w;\xc7e4\xbd6%\xe1<\x910]=\xbewc<M\xf4\x8d<\xab\x16\x0b\xbcI\xd2\xc3\xbdeu{=\x1f\xaf=\xbd\x8f-\x80<\x8d\xed\xc9<\xad\xdc\xe3<\x8bm-;\x88\x0e\xcc;)\x9fs\xbd\x91#|<\x94l\xbe\xbd\x9d\x98\x19;<\xa0\xa9\xbcb\xed\x90\xbc\xe1\xdaF=\x15c\xba=\xa7J\xd4;ar\xe0\xbca\xe1j=>\x0b-<s