## Setting up the environment and db connection

In [18]:
import pymongo
import datetime
from dotenv import load_dotenv
import os

load_dotenv()
MONGO_URL = os.getenv("MONGO_URL1")
connection_string = MONGO_URL  # your connection string
client = pymongo.MongoClient(connection_string)

In [19]:
emp1 = client["FaceRec"]["ImageDB"].find_one({"Name": "Devasy"})
print(emp1)
embedding = emp1["embeddings"][0][0]["embedding"]
print("Embeddings are: ", embedding)

{'_id': ObjectId('663bb22c51feb0f810f3c6a6'), 'EmployeeCode': 22, 'Name': 'Devasy', 'gender': 'Male', 'Department': 'AI/ML', 'time': datetime.datetime(2024, 5, 8, 22, 40, 59, 900000), 'embeddings': [[{'embedding': [-1.6631312370300293, -0.3263624906539917, -2.060389995574951, 0.4802457392215729, 2.099061965942383, -0.09320534765720367, -0.5597538948059082, -0.2341357171535492, 0.12537622451782227, 1.234802007675171, -0.571344256401062, -1.2292050123214722, -1.5539861917495728, -1.1835013628005981, 0.7829186916351318, 0.3972225785255432, 2.2264130115509033, 0.22024168074131012, -1.3177181482315063, -2.229318141937256, 0.61540687084198, 0.364970326423645, 0.2292432188987732, 0.6173582077026367, -0.54261714220047, 0.4317587614059448, -0.36843952536582947, -0.47389209270477295, -0.11245006322860718, 0.9459837079048157, 0.4314466714859009, 0.04081222042441368, -0.3548491597175598, -0.4516597390174866, 2.355724811553955, 1.6814329624176025, -0.5261238813400269, 0.8705232739448547, 1.25216364

## One-time run to add embeddings to the dataset from nested attribute

In [25]:
# for every document in database add face_embedding attribute from embeddings.0.embedding
for emp in client["FaceRec"]["ImageDB"].find({"Name": "Devasy"}):
    print(emp)
    # print(emp['embeddings'][0]['embedding'])
    client["FaceRec"]["ImageDB"].update_one(
        {"_id": emp["_id"]},
        {"$set": {"face_embedding": emp["embeddings"][0][0]["embedding"]}},
    )
    print("Updated")

{'_id': ObjectId('663bb22c51feb0f810f3c6a6'), 'EmployeeCode': 22, 'Name': 'Devasy', 'gender': 'Male', 'Department': 'AI/ML', 'time': datetime.datetime(2024, 5, 8, 22, 40, 59, 900000), 'embeddings': [[{'embedding': [-1.6631312370300293, -0.3263624906539917, -2.060389995574951, 0.4802457392215729, 2.099061965942383, -0.09320534765720367, -0.5597538948059082, -0.2341357171535492, 0.12537622451782227, 1.234802007675171, -0.571344256401062, -1.2292050123214722, -1.5539861917495728, -1.1835013628005981, 0.7829186916351318, 0.3972225785255432, 2.2264130115509033, 0.22024168074131012, -1.3177181482315063, -2.229318141937256, 0.61540687084198, 0.364970326423645, 0.2292432188987732, 0.6173582077026367, -0.54261714220047, 0.4317587614059448, -0.36843952536582947, -0.47389209270477295, -0.11245006322860718, 0.9459837079048157, 0.4314466714859009, 0.04081222042441368, -0.3548491597175598, -0.4516597390174866, 2.355724811553955, 1.6814329624176025, -0.5261238813400269, 0.8705232739448547, 1.25216364

## Pipeline and Vector search

In [29]:
result = client["FaceRec"]["ImageDB"].aggregate(
    [
        {
            "$vectorSearch": {
                "index": "vector_index",
                "path": "face_embedding",
                "queryVector": embedding,
                "numCandidates": 20,
                "limit": 20,
            }
        },
        {
            "$project": {
                "_id": 0,
                "Name": 1,
                "gender": 1,
                "EmployeeCode": 1,
                "department": 1,
                # 'Image': 0,
                "score": {"$meta": "vectorSearchScore"},
            }
        },
    ]
)

In [30]:
for doc in result:
    print(doc)

{'EmployeeCode': 22, 'Name': 'Devasy', 'gender': 'Male', 'score': 1.0}
{'EmployeeCode': 2303, 'Name': 'Rakshit Sathvara', 'gender': 'Male', 'score': 0.006940748076885939}
{'EmployeeCode': 2273, 'Name': 'Jignesh Parmar', 'gender': 'Male', 'score': 0.005807479843497276}
{'EmployeeCode': 8057, 'Name': 'Sushilkumar Pande', 'gender': 'Male', 'score': 0.005555780604481697}
{'EmployeeCode': 2277, 'Name': 'Nirali khatra', 'gender': 'Female', 'score': 0.005438359919935465}
{'EmployeeCode': 2322, 'Name': 'Radhika Vaishnav', 'gender': 'Female', 'score': 0.005379958543926477}
{'EmployeeCode': 8061, 'Name': 'KuldipSinh Chavda', 'gender': 'Male', 'score': 0.005218847189098597}
{'EmployeeCode': 8067, 'Name': 'Nandani Gupta', 'gender': 'Female', 'score': 0.00516529893502593}
{'EmployeeCode': 2211, 'Name': 'Nehal Shah', 'gender': 'Female', 'score': 0.004871389362961054}
{'EmployeeCode': 8050, 'Name': 'Hetvi Kachot', 'gender': 'Female', 'score': 0.004833665210753679}
{'EmployeeCode': 2291, 'Name': 'Dish

In [15]:
from API.database import Database

client = Database(MONGO_URL, "FaceRec")
collection = "ImageDB"

result = client.vector_search(collection, embedding)
for doc in result:
    print(doc)

{'Name': 'Nehal Shah', 'Image': '/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAIBAQEBAQIBAQECAgICAgQDAgICAgUEBAMEBgUGBgYFBgYGBwkIBgcJBwYGCAsICQoKCgoKBggLDAsKDAkKCgr/2wBDAQICAgICAgUDAwUKBwYHCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgr/wAARCAHgAoADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD9KfJHtR5I9qIelHnn+9X0yrTPDJvs59B+VEEFQVP9oPqPzrTmAWpKqeef71TW/X8KOefcCWpYIKio8/2o5gJKnh61F9oHqfzo+0D1P51QEvktRBBUFTw9armMyXyPeiijz/aqAm+

In [34]:
# write code to fetch each and every document's image from database and calcluate embeddings using facenet512 and store in database

from deepface.DeepFace import represent
import base64
import io
from PIL import Image

for emp in client["FaceRec"]["ImageDB"].find():
    print(emp)
    # print(emp['embeddings'][0]['embedding'])
    try:
        img = emp["Image"]
        # decode image from base64
        image = base64.b64decode(img)
        image = Image.open(io.BytesIO(image))
        image.save("temp.jpg")
        embedding = represent(
            img_path="temp.jpg", model_name="Facenet512", detector_backend="mtcnn"
        )[0]["embedding"]
        client["FaceRec"]["ImageDB"].update_one(
            {"_id": emp["_id"]}, {"$set": {"face_embedding": embedding}}
        )
        print("Updated")
    except KeyError:
        img = emp["Images"][0]
        image = base64.b64decode(img)
        image = Image.open(io.BytesIO(image))
        image.save("temp.jpg")
        embedding = represent(
            img_path="temp.jpg", model_name="Facenet512", detector_backend="mtcnn"
        )[0]["embedding"]
        client["FaceRec"]["ImageDB"].update_one(
            {"_id": emp["_id"]}, {"$set": {"face_embedding": embedding}}
        )
        print("Updated")

{'_id': ObjectId('65eaae8c776c51b6bf624194'), 'EmployeeCode': 2211, 'Name': 'Nehal Shah', 'gender': 'Female', 'Department': 'Software', 'time': datetime.datetime(2024, 3, 8, 11, 52, 1, 643000), 'embeddings': [{'embedding': [-0.8978811502456665, -1.2267374992370605, -1.7475792169570923, -0.5941258668899536, -1.08853018283844, 1.8027818202972412, -1.2445658445358276, 0.9187960028648376, -0.7670843601226807, 0.21528728306293488, -0.6110206842422485, -0.3177332878112793, 0.38642698526382446, -0.6458835601806641, 1.8944649696350098, 0.13678884506225586, 2.1456236839294434, -0.39252614974975586, -0.18613582849502563, -2.2897377014160156, -0.3242430090904236, -0.40122759342193604, 0.982740581035614, -0.41683676838874817, 0.5810261964797974, -0.18567821383476257, -0.23365458846092224, 1.328766107559204, -0.0721575915813446, -0.11788485944271088, -0.278534471988678, -2.015449285507202, 1.1188316345214844, 0.21899348497390747, 2.4954662322998047, 0.5204265713691711, -1.9391143321990967, 1.606939