In [1]:
from ultralytics import YOLO
from deepface import DeepFace
import cv2
from deepface.extendedmodels import Emotion
import numpy as np
import os
import pinecone
import tqdm

# Establish pinecone vector database

In [2]:
#Connect to pinecone and create an index for saving facial embeddings of the database if 
#it doesn't exist
pinecone.init(api_key = "c3be1f15-9d16-4c29-954b-82a30326a330", environment="us-west4-gcp-free")

existing_indexes = pinecone.list_indexes() 
 
if "verification" not in existing_indexes:
    pinecone.create_index(name = "verification", metric = "euclidean", dimension=128)
 
index = pinecone.Index("verification")

# Load and processing all the images into embeddings and save to pinecone

In [3]:
img_paths = []
main_path = "/home/sawaiz/Documents/Lab/In Progress/Video face detection/database"
for folder in os.listdir(main_path):
    for file in os.listdir(os.path.join(main_path, folder)):
        img_paths.append(os.path.join(main_path, folder, file))

In [4]:
ids = []
for i in tqdm.tqdm(range(0, len(img_paths))):
    ids.append(img_paths[i].split("/")[-2])

100%|██████████| 31/31 [00:00<00:00, 420787.78it/s]


In [5]:
embeddings = []
for i in tqdm.tqdm(range(0, len(img_paths))):
    img_path = img_paths[i]
    embedding = DeepFace.represent(img_path = img_path, model_name="Facenet", enforce_detection=False, detector_backend = "skip")[0]["embedding"]
    embeddings.append(embedding)

100%|██████████| 31/31 [00:09<00:00,  3.29it/s]


In [6]:
import pandas as pd
df = pd.DataFrame(ids, columns = ["ids"])
df["embedding"] = embeddings

In [7]:
index.upsert(vectors=zip(df.ids, df.embedding))

{'upserted_count': 31}

# Load Sentiment analysis model

In [8]:
senti_model = Emotion.loadModel()

In [9]:
labels = {0: "angry", 1: "disgust", 2:"fear", 3:"happy", 4:"sad", 5:"surprise", 6:"neutral"}

In [10]:
from random import randint
colors = dict()
for i in range(1,17):
    colors["id " + str(i)] = (randint(0,255), randint(0,255), randint(0,255))

# Utility functions for processing sentiment and identifying faces

In [11]:
def sentiment_analysis(cropped_image):    
    cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2GRAY)
    cropped_image = cv2.resize(cropped_image, (48,48))
    result = senti_model.predict(np.expand_dims(cropped_image, axis=0), verbose=False)
    emotion = np.argmax(result)
    return emotion

def face_verification(cropped_image):
    target_embedding = DeepFace.represent(img_path = img_path, model_name="OpenFace", enforce_detection=False, detector_backend = "skip")[0]["embedding"]
    results = index.query(queries=[target_embedding], top_k = 1)
    id = results["results"][0]["matches"][0]["id"]
    return id 

In [12]:
def draw(image, bounding_boxes, id=1):
    for x, y,w,h in bounding_boxes:

        # Draw a rectangle on the image
        x = int(x) 
        y = int(y) 
        w = int(w) 
        h = int(h)

        l = int((x - w / 2))
        r = int((x + w / 2))
        t = int((y - h / 2))
        b = int((y + h / 2))
    
        cropped_image = image[t:b, l:r]
        emotion = sentiment_analysis(cropped_image)
        id = face_verification(cropped_image)

        image = cv2.rectangle(image, (l, t), (r, b), colors["id 1"], 2)   
        image = cv2.putText(image, f"1:{labels[emotion]}", (l, t - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, colors["id 1"], 2)

    return image 

In [13]:
# Load the YOLOv8 model
model = YOLO('yolov8n.pt')

# Open the video file
video_path = "experimentation_videos/videoplayback.mp4"
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('model_output/output1.avi',cv2.VideoWriter_fourcc('M','J','P','G'), fps, (frame_width,frame_height))

# Loop through the video frames
while cap.isOpened():
    # Read a frame from the video
    success, frame = cap.read()

    if success:
        # Run YOLOv8 tracking on the frame, persisting tracks between frames
        results = model(frame, classes=[0], verbose=False, conf=0.4)

        # Visualize the results on the frame
        annotated_frame = draw(frame, results[0].boxes.xywh, id=1)

        # Display the annotated frame
        cv2.imshow("YOLOv8 Tracking", annotated_frame)
        # out.write(annotated_frame)

        # Break the loop if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord("q"):
            break
    else:
        break

# Release the video capture object and close the display window
cap.release()
out.release()
cv2.destroyAllWindows()