In [None]:
import cv2
import numpy as np
from insightface.app import FaceAnalysis


app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))
img = cv2.imread("test.jpg")
faces = app.get(img)
rimg = app.draw_on(img, faces)
cv2.imwrite("./t1_output.jpg", rimg)

# Calculate the face embedding

In [None]:
import cv2

# Ex
import cv2
from insightface.app import FaceAnalysis

# Initialize the FaceAnalysis app
app = FaceAnalysis(providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))

# Load the image
img = cv2.imread("test.jpg")

# Detect faces in the image
faces = app.get(img)

# Loop through detected faces and calculate embeddings
for i, face in enumerate(faces):
    print(f"Face {i+1}:")
    print("Bounding Box:", face.bbox)
    print("Embedding:", face.embedding)  # Face embedding vector


In [None]:
import cv2
import numpy as np
from insightface.model_zoo import model_zoo

# Load the face recognition model
model = model_zoo.get_model('buffalo_l')  # You can choose other models if needed
model.prepare(ctx_id=0)  # Use GPU (ctx_id=0) or CPU (ctx_id=-1)

# Load the image
img = cv2.imread("test.jpg")

# Preprocess the image (resize and normalize)
# Assuming you already have the face bounding box (e.g., from a face detector)
bbox = [50, 50, 200, 200]  # Example bounding box [x1, y1, x2, y2]
face = img[int(bbox[1]):int(bbox[3]), int(bbox[0]):int(bbox[2])]  # Crop the face
face = cv2.resize(face, (112, 112))  # Resize to 112x112 as required by the model
face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)  # Convert to RGB
face = np.transpose(face, (2, 0, 1))  # Change to CHW format
face = np.expand_dims(face, axis=0).astype(np.float32) / 255.0  # Normalize to [0, 1]

# Get the face embedding
embedding = model.forward(face)

# Print the embedding
print("Face Embedding:", embedding.shape)

# Test chormadb vector database

In [None]:
from collections import collection

Creating collection 'face_embeddings'...
Collection 'face_embeddings' created.


In [None]:
import os
from get_embedding import get_face_embedding, get_gender_and_age

def ingest_known_faces(dataset_path):
    print(f"Starting ingestion from: {dataset_path}")
    known_embeddings = []
    known_metadatas = []
    known_ids = []
    image_counter = 0

    for person_name in os.listdir(dataset_path):
        person_dir = os.path.join(dataset_path, person_name)
        if os.path.isdir(person_dir):
            for image_name in os.listdir(person_dir):
                image_path = os.path.join(person_dir, image_name)
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    print(f"Processing {image_path} for {person_name}...")
                    embedding = get_face_embedding(image_path)
                    if embedding is not None:
                        known_embeddings.append(embedding)
                        # Metadata can store useful info like the person's name, image source
                        known_metadatas.append({"person_name": person_name, "source_image": image_name})
                        # IDs must be unique strings
                        known_ids.append(f"{person_name}_{image_name}_{image_counter}")
                        image_counter += 1
                    else:
                        print(f"Skipping {image_path} - no embedding generated.")
    
    if known_embeddings:
        print(f"Adding {len(known_embeddings)} embeddings to the collection...")
        try:
            collection.add(
                embeddings=known_embeddings,
                metadatas=known_metadatas,
                ids=known_ids
            )
            print(f"Successfully added {len(known_embeddings)} embeddings to ChromaDB.")
        except Exception as e:
            print(f"Error adding embeddings to Chroma: {e}")
            # You might encounter errors if IDs are not unique or data format is wrong
    else:
        print("No embeddings were generated to add to the collection.")



In [None]:
import os
from get_embedding import get_face_embedding, get_gender_and_age

def ingest_known_faces(dataset_path):
    """
    Ingest known faces from a dataset directory into the database.
    Adds embeddings, metadata (including gender and age), and unique IDs.
    """
    print(f"Starting ingestion from: {dataset_path}")
    known_embeddings = []
    known_metadatas = []
    known_ids = []
    image_counter = 0

    for person_name in os.listdir(dataset_path):
        person_dir = os.path.join(dataset_path, person_name)
        if os.path.isdir(person_dir):
            for image_name in os.listdir(person_dir):
                image_path = os.path.join(person_dir, image_name)
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    print(f"Processing {image_path} for {person_name}...")
                    
                    # Get face embedding
                    embedding = get_face_embedding(image_path)
                    
                    # Get gender and age
                    gender_age_info = get_gender_and_age(image_path)

                    if embedding is not None and gender_age_info is not None:
                        known_embeddings.append(embedding)
                        
                        # Add metadata including person name, source image, gender, and age
                        metadata = {
                            "person_name": person_name,
                            "source_image": image_name,
                            "gender": "male" if gender_age_info.get("gender") == 1 else "female",
                            "age": str(gender_age_info.get("age"))
                        }
                        known_metadatas.append(metadata)
                        
                        # Generate a unique ID for the face
                        known_ids.append(f"{person_name}_{image_name}_{image_counter}")
                        image_counter += 1
                    else:
                        print(f"Skipping {image_path} - no embedding or gender/age info generated.")
    
    if known_embeddings:
        print(f"Adding {len(known_embeddings)} embeddings to the collection...")
        try:
            collection.add(
                embeddings=known_embeddings,
                metadatas=known_metadatas,
                ids=known_ids
            )
            print(f"Successfully added {len(known_embeddings)} embeddings to the database.")
        except Exception as e:
            print(f"Error adding embeddings to the database: {e}")
    else:
        print("No embeddings were generated to add to the collection.")

  from .autonotebook import tqdm as notebook_tqdm


Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider'], with options: {'CUDAExecutionProvider': {'device_id': '0', 'has_user_compute_stream': '0', 'cudnn_conv1d_pad_to_nc1d': '0', 'user_compute_stream': '0', 'gpu_external_alloc': '0', 'gpu_mem_limit': '18446744073709551615', 'enable_cuda_graph': '0', 'gpu_external_free': '0', 'gpu_external_empty_cache': '0', 'arena_extend_strategy': 'kNextPowerOfTwo', 'cudnn_conv_algo_search': 'EXHAUSTIVE', 'do_copy_in_default_stream': '1', 'cudnn_conv_use_max_workspace': '1', 'tunable_op_enable': '0', 'tunable_op_tuning_enable': '0', 'tunable_op_max_tuning_duration_ms': '0', 'enable_skip_layer_norm_strict_mode': '0', 'prefer_nhwc': '0', 'use_ep_level_unified_stream': '0', 'use_tf32': '1', 'sdpa_kernel': '0', 'fuse_conv_bias': '0'}, 'CPUExecutionProvider': {}}
find model: C:\Users\user/.insightface\models\buffalo_l\1k3d68.onnx landmark_3d_68 ['None', 3, 192, 192] 0.0 1.0
Applied providers: ['CUDAExecutionProvider', 'CPUExecutionProvider']

In [3]:
# --- Example Usage: Ingest faces from your 'dataset' directory ---
DATASET_ROOT = "./database" # Change this to your actual dataset path

# Run ingestion (only run this once, or manage ID conflicts if re-running)
# To avoid issues on re-runs with existing IDs, you can either:
# 1. Delete the DB: `client.delete_collection(name=COLLECTION_NAME)` then recreate
# 2. Use `collection.upsert()` instead of `collection.add()` if you want to update existing IDs
#    or add new ones. `upsert` requires IDs to be present.
# For simplicity, we assume a fresh run or that new images will have new unique IDs.

# Check if collection is empty before ingesting
if collection.count() == 0:
    print("Collection is empty. Ingesting faces...")
    ingest_known_faces(DATASET_ROOT)
else:
    print(f"Collection already contains {collection.count()} embeddings. Skipping ingestion.")
    print("If you want to re-ingest, clear the './chroma_db' directory or delete the collection.")

Collection is empty. Ingesting faces...
Starting ingestion from: ./database
Processing ./database\Anh\Anh_1.jpg for Anh...
Metadata: {'person_name': 'Anh', 'source_image': 'Anh_1.jpg', 'gender': 'male', 'age': '40'}
Processing ./database\Anh\Anh_2.jpg for Anh...
Metadata: {'person_name': 'Anh', 'source_image': 'Anh_2.jpg', 'gender': 'male', 'age': '48'}
Processing ./database\Anh\Anh_3.jpg for Anh...
Metadata: {'person_name': 'Anh', 'source_image': 'Anh_3.jpg', 'gender': 'male', 'age': '50'}
Processing ./database\Dr.Lee\Lee1.jpg for Dr.Lee...
Metadata: {'person_name': 'Dr.Lee', 'source_image': 'Lee1.jpg', 'gender': 'male', 'age': '45'}
Processing ./database\Dr.Lee\Lee_2.jpg for Dr.Lee...
Metadata: {'person_name': 'Dr.Lee', 'source_image': 'Lee_2.jpg', 'gender': 'male', 'age': '57'}
Processing ./database\Dr.Lee\Lee_3.jpg for Dr.Lee...
Metadata: {'person_name': 'Dr.Lee', 'source_image': 'Lee_3.jpg', 'gender': 'male', 'age': '54'}
Processing ./database\GwangHyun\GH_1.jpg for GwangHyun...
M

In [None]:
from search_face import search_face
import os
from collections import collection

query_image_file = "query\Anh_1.jpg" # CHANGE THIS to an actual image path

if os.path.exists(query_image_file):
    search_face(collection, query_image_file, top_n=3, threshold=0.5)
else:
    print(f"\nQuery image {query_image_file} not found. Skipping search demonstration.")
    print("Please create it or update the path to an existing image for search.")


Searching for faces similar to: query\Anh_1.jpg

--- Search Results ---
  Candidate 1: ID: Anh_Anh_2.jpg_1, Person: Anh (from Anh_2.jpg)
    Cosine Distance: 0.3004 (Similarity: 0.6996)
    MATCH FOUND! (Similarity 0.6996 >= Threshold 0.5000)
  Candidate 2: ID: Anh_Anh_1.jpg_0, Person: Anh (from Anh_1.jpg)
    Cosine Distance: 0.3460 (Similarity: 0.6540)
    MATCH FOUND! (Similarity 0.6540 >= Threshold 0.5000)
  Candidate 3: ID: Anh_Anh_3.jpg_2, Person: Anh (from Anh_3.jpg)
    Cosine Distance: 0.3507 (Similarity: 0.6493)
    MATCH FOUND! (Similarity 0.6493 >= Threshold 0.5000)


In [None]:
# inspect the collection
from collections import collection

collection_info = collection.get(include=['embeddings'])

# query the collection
print(collection_info['embeddings'].shape)


(19, 512)


# Test with Weaviate database

In [38]:
from db_collections import init_weaviate_db


collection = init_weaviate_db()


Collection 'face_embeddings' loaded.


In [43]:
# populate the collection

import os
from get_embedding import get_face_embedding, get_gender_and_age
import weaviate as wv

def ingest_known_faces(dataset_path):
    """
    Ingest known faces from a dataset directory into the database.
    Adds embeddings, metadata (including gender and age), and unique IDs.
    """
    print(f"Starting ingestion from: {dataset_path}")
    known_embeddings = list()
    image_counter = 0

    for person_name in os.listdir(dataset_path):
        person_dir = os.path.join(dataset_path, person_name)
        if os.path.isdir(person_dir):
            for image_name in os.listdir(person_dir):
                image_path = os.path.join(person_dir, image_name)
                if image_path.lower().endswith(('.png', '.jpg', '.jpeg')):
                    print(f"Processing {image_path} for {person_name}...")
                    
                    # Get face embedding
                    embedding = get_face_embedding(image_path)
                    
                    # Get gender and age
                    gender_age_info = get_gender_and_age(image_path)

                    if embedding is not None and gender_age_info is not None:
                        
                        known_embeddings.append(wv.classes.data.DataObject(
                                properties={
                                    "age": str(gender_age_info.get("age")),
                                    "gender": "male" if gender_age_info.get("gender") == 1 else "female",
                                    "person_name": person_name,
                                    "source_image": image_name,
                                    # "id": f"{person_name}_{image_name}_{image_counter}" # Generate a unique ID for the face
                                },
                                vector=embedding,
                            ))
                        image_counter += 1
                    else:
                        print(f"Skipping {image_path} - no embedding or gender/age info generated.")
    
    if known_embeddings:
        print(f"Adding {len(known_embeddings)} embeddings to the collection...")
        try:
            collection.data.insert_many(known_embeddings)
            print(f"Successfully added {len(known_embeddings)} embeddings to the database.")
        except Exception as e:
            print(f"Error adding embeddings to the database: {e}")
    else:
        print("No embeddings were generated to add to the collection.")

In [44]:
# --- Example Usage: Ingest faces from your 'dataset' directory ---
DATASET_ROOT = "./database" # Change this to your actual dataset path

ingest_known_faces(DATASET_ROOT)

Starting ingestion from: ./database
Processing ./database\Anh\Anh_1.jpg for Anh...
Processing ./database\Anh\Anh_2.jpg for Anh...
Processing ./database\Anh\Anh_3.jpg for Anh...
Processing ./database\Dr.Lee\Lee1.jpg for Dr.Lee...
Processing ./database\Dr.Lee\Lee_2.jpg for Dr.Lee...
Processing ./database\Dr.Lee\Lee_3.jpg for Dr.Lee...
Processing ./database\GwangHyun\GH_1.jpg for GwangHyun...
Processing ./database\GwangHyun\GH_2.jpg for GwangHyun...
Processing ./database\GwangHyun\GH_3.jpg for GwangHyun...
Processing ./database\Han\Han.jpg for Han...
Processing ./database\Han\Han3.jpg for Han...
No face detected in ./database\Han\Han3.jpg
No face detected in ./database\Han\Han3.jpg
Skipping ./database\Han\Han3.jpg - no embedding or gender/age info generated.
Processing ./database\Han\Han_2.jpg for Han...
Processing ./database\Jin\Jin.jpg for Jin...
Processing ./database\Jin\Jin_1.jpg for Jin...
Processing ./database\Jin\Jin_2.jpg for Jin...
Processing ./database\Professor\Professor_1.jpg 

In [7]:
from search_face import search_face_weaviate
import os
from db_collections import init_weaviate_db

collection = init_weaviate_db()

query_image_file = "query/GH_1.jpg" # CHANGE THIS to an actual image path

if os.path.exists(query_image_file):
    search_face_weaviate(collection, query_image_file, top_n=3, threshold=0.5)
else:
    print(f"\nQuery image {query_image_file} not found. Skipping search demonstration.")
    print("Please create it or update the path to an existing image for search.")

Collection 'face_embeddings' loaded.

Searching for faces similar to: query/GH_1.jpg


  collection = init_weaviate_db()


{
  "age": "68",
  "person_name": "GwangHyun",
  "source_image": "GH_3.jpg",
  "gender": "male"
}
    Similarity Score: 0.8653337955474854
    MATCH FOUND! (Score 0.8653 >= Threshold 0.5000)
{
  "age": "60",
  "person_name": "GwangHyun",
  "source_image": "GH_2.jpg",
  "gender": "female"
}
    Similarity Score: 0.7918325662612915
    MATCH FOUND! (Score 0.7918 >= Threshold 0.5000)
{
  "age": "58",
  "person_name": "GwangHyun",
  "source_image": "GH_1.jpg",
  "gender": "male"
}
    Similarity Score: 0.6602085828781128
    MATCH FOUND! (Score 0.6602 >= Threshold 0.5000)
