In [None]:
import face_recognition
import cv2
import numpy as np
import pickle

# ---------------- FACE RECOGNITION SETUP ----------------
print("[INFO] loading face encodings...")
with open("encodings.pickle", "rb") as f:
    data = pickle.loads(f.read())
known_face_encodings = data["encodings"]
known_face_names = data["names"]

cv_scaler = 4
face_locations = []
face_encodings = []
face_names = []
face_confidences = []

# ---------------- YOLO DNN SETUP ----------------
print("[INFO] loading YOLO model...")
yolo_cfg = "yolov3.cfg"
yolo_weights = "yolov3.weights"
yolo_names = "coco.names"

with open(yolo_names, "r") as f:
    class_names = [c.strip() for c in f.readlines()]

net = cv2.dnn.readNetFromDarknet(yolo_cfg, yolo_weights)
net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV)
net.setPreferableTarget(cv2.dnn.DNN_TARGET_CPU)

layer_names = net.getLayerNames()
out_layers = [layer_names[i - 1] for i in net.getUnconnectedOutLayers()]

# ---------------- STATS TRACKING ----------------
person_stats = {}  # {name: {"start": None, "end": None, "total": 0.0, "max_conf":0.0, "min_conf":1.0}}
object_stats = {}  # {classname: {"start": None, "end": None, "total": 0.0, "max_conf":0.0, "min_conf":1.0}}

# ---------------- FUNCTIONS ----------------
def process_frame(frame, video_time):
    global face_locations, face_encodings, face_names, face_confidences

    # ---- FACE RECOGNITION ----
    resized_frame = cv2.resize(frame, (0, 0), fx=(1/cv_scaler), fy=(1/cv_scaler))
    rgb_resized_frame = cv2.cvtColor(resized_frame, cv2.COLOR_BGR2RGB)

    face_locations = face_recognition.face_locations(rgb_resized_frame)
    face_encodings = face_recognition.face_encodings(rgb_resized_frame, face_locations, model='large')

    face_names = []
    face_confidences = []
    for face_encoding in face_encodings:
        matches = face_recognition.compare_faces(known_face_encodings, face_encoding)
        name = "Unknown"
        confidence = 0.0

        face_distances = face_recognition.face_distance(known_face_encodings, face_encoding)
        if len(face_distances) > 0:
            best_match_index = np.argmin(face_distances)
            if matches[best_match_index]:
                name = known_face_names[best_match_index]
                confidence = 1 - face_distances[best_match_index]

        face_names.append(f"{name} ({confidence*100:.1f}%)")
        face_confidences.append(confidence)

        # Track stats
        if name != "Unknown":
            if name not in person_stats:
                person_stats[name] = {"start": None, "end": None, "total": 0.0,
                                      "max_conf": 0.0, "min_conf": 1.0}
            if person_stats[name]["start"] is None:
                person_stats[name]["start"] = video_time
            person_stats[name]["end"] = video_time
            person_stats[name]["total"] = person_stats[name]["end"] - person_stats[name]["start"]
            # update max/min confidence
            if confidence > person_stats[name]["max_conf"]:
                person_stats[name]["max_conf"] = confidence
            if confidence < person_stats[name]["min_conf"]:
                person_stats[name]["min_conf"] = confidence

    # ---- YOLO OBJECT DETECTION ----
    H, W = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(frame, 1/255.0, (416, 416), swapRB=True, crop=False)
    net.setInput(blob)
    detections = net.forward(out_layers)

    boxes, confidences, class_ids = [], [], []
    for output in detections:
        for det in output:
            scores = det[5:]
            class_id = np.argmax(scores)
            conf = scores[class_id]
            if conf > 0.5:
                center_x = int(det[0] * W)
                center_y = int(det[1] * H)
                w = int(det[2] * W)
                h = int(det[3] * H)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(float(conf))
                class_ids.append(class_id)

    indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    for i in indices.flatten():
        x, y, w, h = boxes[i]
        classname = class_names[class_ids[i]]
        conf = confidences[i]
        label = f"{classname}: {int(conf*100)}%"
        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0), 2)
        cv2.putText(frame, label, (x, y-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,0,0), 2)

        # Track stats
        if classname not in object_stats:
            object_stats[classname] = {"start": None, "end": None, "total": 0.0,
                                       "max_conf": 0.0, "min_conf": 1.0}
        if object_stats[classname]["start"] is None:
            object_stats[classname]["start"] = video_time
        object_stats[classname]["end"] = video_time
        object_stats[classname]["total"] = object_stats[classname]["end"] - object_stats[classname]["start"]
        if conf > object_stats[classname]["max_conf"]:
            object_stats[classname]["max_conf"] = conf
        if conf < object_stats[classname]["min_conf"]:
            object_stats[classname]["min_conf"] = conf

    return frame

def draw_results(frame):
    # Draw faces
    for (top, right, bottom, left), name_with_confidence in zip(face_locations, face_names):
        top *= cv_scaler
        right *= cv_scaler
        bottom *= cv_scaler
        left *= cv_scaler
        cv2.rectangle(frame, (left, top), (right, bottom), (0, 255, 255), 2)
        cv2.rectangle(frame, (left-3, top-35), (right+3, top), (0, 255, 255), cv2.FILLED)
        cv2.putText(frame, name_with_confidence, (left+6, top-6),
                    cv2.FONT_HERSHEY_DUPLEX, 1.0, (0,0,0), 2)

    # Overlay person stats
    y_offset = 70
    for name, stats in person_stats.items():
        cv2.putText(frame,
                    f"{name}: {stats['total']:.1f}s | Max {stats['max_conf']*100:.1f}% | Min {stats['min_conf']*100:.1f}%",
                    (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,0), 2)
        y_offset += 25

    # Overlay object stats
    for obj, stats in object_stats.items():
        cv2.putText(frame,
                    f"{obj}: {stats['total']:.1f}s | Max {stats['max_conf']*100:.1f}% | Min {stats['min_conf']*100:.1f}%",
                    (10, y_offset), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,255), 2)
        y_offset += 25

    return frame

# ---------------- MAIN LOOP ----------------
video = cv2.VideoCapture("nav_backlight.mp4")
fps = video.get(cv2.CAP_PROP_FPS)

fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter("output_backlight.mp4", fourcc, fps,
                      (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)),
                       int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))))

while True:
    ret, frame = video.read()
    if not ret:
        break

    frame_number = int(video.get(cv2.CAP_PROP_POS_FRAMES))
    video_time = frame_number / fps  # seconds into video timeline

    processed_frame = process_frame(frame, video_time)
    display_frame = draw_results(processed_frame)

    out.write(display_frame)
    cv2.imshow('Face + YOLO DNN Detection', display_frame)

    if cv2.waitKey(1) == ord("q"):
        break

video.release()
out.release()
cv2.destroyAllWindows()
