In [1]:
import numpy as np
import cv2
import onnxruntime as ort
from deep_sort_realtime.deepsort_tracker import DeepSort
from deepface import DeepFace




In [2]:
NORMAL = 0
FEATURES = 1 
CANNY = 2
FOURIER = 3
GRAY = 4  
FOURIER_HIGH = 5
FOURIER_LOW = 6

feature_params = dict(maxCorners=400, qualityLevel=0.1, minDistance=15, blockSize=9)

camera_ind = 0
source = cv2.VideoCapture(camera_ind)

cv2.namedWindow("Camera", cv2.WINDOW_NORMAL)
alive = True
image_filter = NORMAL

while alive:
    has_frame, frame = source.read()

    if not has_frame:
        break

    if image_filter == NORMAL:
        result = frame

    if image_filter == CANNY:
        result = cv2.Canny(frame, 80, 150)

    if image_filter == FEATURES:
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        corners = cv2.goodFeaturesToTrack(frame_gray, **feature_params)
        if corners is not None:
            result = frame.copy()
            for x, y in np.float32(corners).reshape(-1, 2):
                result = cv2.circle(frame, (int(x),int(y)), 10, (255, 0, 255), 1)

    if image_filter == FOURIER:
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        spectrum = np.fft.fft2(frame_gray)
        centered_spectrum = np.fft.fftshift(spectrum)
        fourier = np.log(np.abs(centered_spectrum))
        fourier = cv2.normalize(
        fourier,
        None,
        0,
        255,
        cv2.NORM_MINMAX
        )

        result = fourier.astype(np.uint8)

    if image_filter == FOURIER_HIGH:
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        spectrum = np.fft.fft2(frame_gray)
        centered_spectrum = np.fft.fftshift(spectrum)
        start_row = (frame_gray.shape[0]- 55)//2
        start_col = (frame_gray.shape[1]- 55)//2

        centered_spectrum[start_row:start_row+55, start_col:start_col+55] = 0

        spect = np.fft.ifftshift(centered_spectrum)
        fourier = np.real(np.fft.ifft2(spect))

        fourier = cv2.normalize(
        fourier,
        None,
        0,
        255,
        cv2.NORM_MINMAX
        )

        result = fourier.astype(np.uint8)

    if image_filter == FOURIER_LOW:
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        spectrum = np.fft.fft2(frame_gray)
        centered_spectrum = np.fft.fftshift(spectrum)
        start_row = (frame_gray.shape[0]- 20)//2
        start_col = (frame_gray.shape[1]- 20)//2
        zero = np.zeros(shape=(frame_gray.shape[0], frame_gray.shape[1]))

        zero[start_row:start_row+20, start_col:start_col+20] = 1
        centered_spectrum = centered_spectrum * zero

        spect = np.fft.ifftshift(centered_spectrum)
        fourier = np.real(np.fft.ifft2(spect))

        fourier = cv2.normalize(
        fourier,
        None,
        0,
        255,
        cv2.NORM_MINMAX
        )

        result = fourier.astype(np.uint8)

    if image_filter == GRAY:
        result = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    cv2.imshow("Camera", result)

    key = cv2.waitKey(1)
    if key == ord('q'):
        alive = False
    if key == ord('n'):
        image_filter = NORMAL
    if key == ord('c'):
        image_filter = CANNY
    if key == ord('d'):
        image_filter = FEATURES
    if key == ord('f'):
        image_filter = FOURIER
    if key == ord('h'):
        image_filter = FOURIER_HIGH
    if key == ord('l'):
        image_filter = FOURIER_LOW
    if key == ord('g'):
        image_filter = GRAY

source.release()
cv2.destroyAllWindows()

In [None]:
import time

prev_time = time.time()

source = cv2.VideoCapture(0)
alive = True
session = ort.InferenceSession("yolov11m-face-dynamic.onnx")
tracker = DeepSort(
    max_age=50,
    n_init=2,
    nms_max_overlap=0.4,
)

input_name = session.get_inputs()[0].name
input_shape = session.get_inputs()[0].shape
deep_input = []


def preprocess(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (128, 128))
    frame = np.transpose(frame, (2, 0, 1))
    frame = np.expand_dims(frame, axis=0)
    frame = frame.astype(np.float32) / 255.0
    return frame


def yolo_nms(preds, scaleX, scaleY, conf_threshold=0.75, nms_threshold=0.4):
    boxes = []
    confidences = []
    
    for detection in preds:
        if detection[4] > conf_threshold:
            x_center, y_center, w, h, conf = detection
            
            x_min = int((x_center - w/2) * scaleX)
            y_min = int((y_center - h/2) * scaleY)
            width = int(w * scaleX)
            height = int(h * scaleY)
            
            boxes.append([x_min, y_min, width, height])
            confidences.append(float(conf))
    
    if len(boxes) == 0:
        return []
    
    indices = cv2.dnn.NMSBoxes(boxes, confidences, conf_threshold, nms_threshold)
    
    output = []
    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            output.append(([x, y, w, h], confidences[i], 0))
    
    return output

frame_count = 0

while alive:
    ok, frame = source.read()
    h = frame.shape[0]
    w = frame.shape[1]
    scaleX = w/128
    scaleY = h/128
    frame_count += 1

    if not ok:
        break


    if (frame_count % 2) == 0:
        blob = preprocess(frame)
        detections = session.run(None, {input_name: blob})
        preds = detections[0][0]
        preds = preds.T
        deep_input = yolo_nms(preds, scaleX, scaleY) 
    
    tracks = tracker.update_tracks(deep_input, frame=frame)
    
    for track in tracks:

        if not track.is_confirmed():
            continue

        track_id = track.track_id
        ltrb = track.to_ltrb()
        x_min, y_min, x_max, y_max = map(int, ltrb)

        face = frame[y_min-5:y_max+5, x_min-5:x_max+5, :]

        if face.size == 0:
            continue

        result = DeepFace.analyze(
            face,
            actions=['age', 'gender', 'emotion', 'race'],
            enforce_detection=False,
            detector_backend='skip',
            silent=True
        )
        
        emotion = result[0]['dominant_emotion']
        emotions = result[0]['emotion']
        conf = emotions[emotion] / 100

        #age = result[0]['age']
        #gender = result[0]['dominant_gender']
        #race = result[0]['dominant_race']

        curr_time = time.time()
        fps = 1 / ((curr_time - prev_time) + 0.1)
        prev_time = curr_time
    
        frame = cv2.rectangle(frame, (x_min, y_min), (x_max, y_max), (255, 0, 0), 2)
        frame = cv2.putText(frame, f"ID: {track_id}; Emotion: {emotion} & Score: {conf:.2f}", (x_min, y_min-5), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 0), 2)
        #frame = cv2.putText(frame, f"Age: {age:.2f} & Gender: {gender} & Race: {race}", (x_min, y_min-20), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 0), 2)

    frame =  cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30), cv2.FONT_HERSHEY_PLAIN, 2, (0, 255, 0), 2)


    key = cv2.waitKey(1)
    if key == ord('q'):
        alive = False
    cv2.imshow('Face', frame)

source.release()
cv2.destroyAllWindows()