In [33]:
import dlib
import cv2
import numpy as np
from matplotlib import pyplot as plt
import torch
from transformers import ViTModel, ViTFeatureExtractor
from sklearn.metrics.pairwise import cosine_similarity
from langchain.vectorstores import Chroma
from langchain.schema import Document
from PIL import Image
import warnings
import os
warnings.filterwarnings('ignore')


In [None]:
# Load the pre-trained ViT model and feature extractor
detector = dlib.get_frontal_face_detector()
model = ViTModel.from_pretrained('google/vit-base-patch16-224')
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
vector_store = Chroma(persist_directory="./chroma_db", embedding_function=None)
known_image_embedding = []
known_names = []

In [11]:
def face_detector(gray_image, img):
    # Detect faces
    faces = detector(gray_image)

    face_region = []
        # Draw rectangles around faces
    for i, face in enumerate(faces):
            x, y, w, h = (face.left(), face.top(), face.width(), face.height())
            face_region.append(img[y:y + h, x:x + w])
    return face_region, faces

In [12]:
def image_embedding(image):
        inputs = feature_extractor(images=image, return_tensors='pt')
        # Perform inference to get embeddings
        with torch.no_grad():
                outputs = model(**inputs)
                embeddings = outputs.last_hidden_state  # Shape: (batch_size, sequence_length, hidden_size)

        # Extract the embedding for the [CLS] token
        cls_embedding = embeddings[:, 0, :]  # The first token represents the image embedding

        # Convert to numpy array if needed
        embedding_array = cls_embedding.numpy()
        return embedding_array

In [14]:
def training_images_finc(path, names):
    known_image_embedding = []
    known_names = []
    for i in os.listdir(path):
        img = cv2.imread(i)
        gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        face_region, face_region_box = face_detector(gray_image, img)
        if len(face_region) >0:
            known_image_embedding_1 = [image_embedding(i) for i in face_region]
            known_names_1 = [Document(page_content=names) for i in known_image_embedding_1]
            known_image_embedding.extend(known_image_embedding_1)
            known_names.extend(known_names_1)
            vector_store.add_documents(documents=known_names_1, embeddings=known_image_embedding_1)
    return known_image_embedding, known_names

In [31]:
known_image_embedding_1 = [Document(embedding = (i), page_content='names') for i in range(10)]
known_image_embedding_1

[Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names'),
 Document(metadata={}, page_content='names')]

In [None]:
embed, names = training_images_finc('Training_images/gk')
known_image_embedding.extend(embed)
known_names.extend(names)

In [27]:
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 2
color = (0, 255, 0)  # Green color in BGR
thickness = 10
lineType = cv2.LINE_AA

In [24]:
def video_idetification(frame_image):
    u_grey = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)
    un_face_region, unknow_region = face_detector(u_grey, frame_image)
    un_known_image_embedding = [image_embedding(i) for i in un_face_region]
    for j, know_embed in enumerate(known_image_embedding):
        for  i, u_embed in enumerate(un_known_image_embedding):
            similarity = cosine_similarity(know_embed, u_embed)
            print(similarity)
            if similarity > 0.2:
                x, y, w, h = (unknow_region[i].left(), unknow_region[i].top(), unknow_region[i].width(), unknow_region[i].height())
                cv2.rectangle(frame_image, (x, y), (x + w, y + h), (0, 0, 255), 4)
                cv2.putText(frame_image, known_names[j], (x, y-60), font, fontScale, color, thickness, lineType)
    return frame_image


In [None]:
plt.imshow(unknown_face)

In [None]:
cv2.imwrite('file.jpeg', unknown_face)

In [None]:

# Open the default camera (usually the webcam)
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open camera.")
else:
    while True:
        # Capture frame-by-frame
        ret, frame = cap.read()
        frame = video_idetification(frame)
        # Display the resulting frame in a popup window
        cv2.imshow('Camera Feed', frame)

        # Exit on pressing 'q'
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release the camera and close the popup window
    cap.release()
    cv2.destroyAllWindows()


In [29]:
cap.release()
cv2.destroyAllWindows()

In [30]:
import chromadb

In [None]:


# Step 1: Initialize the embedding model (OpenAI in this case, can also be HuggingFace)
# You can specify persist_directory to store embeddings on disk (persistent mode) or leave it blank for in-memory mode

# Step 3: Prepare the documents for embedding
docs = [
    Document(page_content="The quick brown fox jumps over the lazy dog.", metadata={"source": "text1"}),
    Document(page_content="Artificial Intelligence is transforming industries.", metadata={"source": "text2"}),
]

# Step 4: Add documents to ChromaDB (saves embeddings)
vector_store.add_documents(docs)

# Step 5: Query ChromaDB using the same embedding model
query = "What jumps over the lazy dog?"
query_embedding = embeddings.embed_query(query)

# Step 6: Perform similarity search in ChromaDB
results = vector_store.similarity_search(query, k=2)

# Display results
for result in results:
    print(result.page_content, result.metadata)
