In [13]:
import cv2
import numpy as np
import os 

In [15]:
# --- Configuration ---
DET_PROTOTXT = r"pretrained_models/deploy.prototxt"
DET_MODEL = r"pretrained_models/res10_300x300_ssd_iter_140000.caffemodel"
REC_MODEL = r"pretrained_models/face_recognition_sface_2021dec.onnx"
TARGET_IMAGE_FOLDER = "data/train"
CONFIDENCE_THRESHOLD = 0.5 # Minimum confidence for face detection
RECOGNITION_THRESHOLD = 0.9 # Cosine similarity threshold for matching (adjust based on testing)

In [11]:
# --- Load Models ---
print("[INFO] Loading face detector model...")
detector_net = cv2.dnn.readNetFromCaffe(DET_PROTOTXT, DET_MODEL)

print("[INFO] Loading face recognition model...")
recognizer_net = cv2.dnn.readNetFromONNX(REC_MODEL)
print("LOADED!!!")

[INFO] Loading face detector model...


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\caffe\caffe_io.cpp:1126: error: (-2:Unspecified error) FAILED: fs.is_open(). Can't open "pretrained_models/deploy.prototxt" in function 'cv::dnn::ReadProtoFromTextFile'


In [40]:
target_embeddings = []
print(f"[INFO] Processing target images from {TARGET_IMAGE_FOLDER}...")

for image_name in os.listdir(TARGET_IMAGE_FOLDER):
    image_path = os.path.join(TARGET_IMAGE_FOLDER, image_name)
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Could not read image {image_path}")
        continue

    (h, w) = image.shape[:2]

    # 1. Detect face(s) in the target image
    blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
    detector_net.setInput(blob)
    detections = detector_net.forward()

    # Assume the largest face is the target if multiple are found
    best_face_confidence = -1
    best_face_box = None

    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > CONFIDENCE_THRESHOLD:
             # Get bounding box, ensuring it's within image bounds
             box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
             (startX, startY, endX, endY) = box.astype("int")
             startX = max(0, startX)
             startY = max(0, startY)
             endX = min(w - 1, endX)
             endY = min(h - 1, endY)

             # Basic check for valid box size
             if endX > startX and endY > startY:
                 # Keep track of the most confident face detection
                 if confidence > best_face_confidence:
                     best_face_confidence = confidence
                     best_face_box = (startX, startY, endX, endY)

    # 2. Extract embedding if a face was found
    if best_face_box is not None:
        (startX, startY, endX, endY) = best_face_box
        face_roi = image[startY:endY, startX:endX]

        if face_roi.size == 0:
            print(f"Warning: Empty face ROI extracted from {image_path}")
            continue

        # Preprocess face for recognition model (specific to SFace)
        face_blob = cv2.dnn.blobFromImage(face_roi, 1.0 / 127.5, (112, 112), (127.5, 127.5, 127.5), swapRB=True)
        recognizer_net.setInput(face_blob)
        embedding = recognizer_net.forward()
        target_embeddings.append(embedding.flatten()) # Store the flattened embedding vector
        print(f"   -> Added embedding from {image_name}")
    else:
        print(f"Warning: No face detected in {image_path} above threshold {CONFIDENCE_THRESHOLD}")

if not target_embeddings:
    print("[ERROR] No target embeddings generated. Check target images and detection settings.")
    exit()

print(f"[INFO] Generated {len(target_embeddings)} target embeddings.")
# Optional: Average the embeddings for a single target representation
# target_embedding_avg = np.mean(target_embeddings, axis=0)

[INFO] Processing target images from data/train...


error: OpenCV(4.10.0) D:\a\opencv-python\opencv-python\opencv\modules\dnn\src\layers\convolution_layer.cpp:368: error: (-215:Assertion failed) !blobs.empty() || inputs.size() > 1 in function 'cv::dnn::ConvolutionLayerImpl::getMemoryShapes'


In [None]:
# --- Video Processing ---
VIDEO_SOURCE = "data/test/vid1.mp4" # Or 0 for webcam
OUTPUT_VIDEO_PATH = "data/test/vid1_PROCESSED.mp4" # Optional: Save output
BLUR_KERNEL_SIZE = (99, 99) # Must be odd numbers; larger means more blur

print(f"[INFO] Starting video processing from {VIDEO_SOURCE}...")
cap = cv2.VideoCapture(VIDEO_SOURCE)

# Optional: Setup Video Writer
writer = None
if OUTPUT_VIDEO_PATH:
    try:
        # Get video properties for the writer
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        # Define the codec and create VideoWriter object
        fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Or 'XVID', 'MJPG', etc.
        writer = cv2.VideoWriter(OUTPUT_VIDEO_PATH, fourcc, fps, (frame_width, frame_height))
        print(f"[INFO] Output video will be saved to {OUTPUT_VIDEO_PATH}")
    except Exception as e:
        print(f"[WARNING] Could not initialize video writer: {e}")
        writer = None

# Cosine Similarity function
def cosine_similarity(vec1, vec2):
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    if norm_vec1 == 0 or norm_vec2 == 0:
        return 0.0 # Avoid division by zero
    return np.dot(vec1, vec2) / (norm_vec1 * norm_vec2)

while True:
    ret, frame = cap.read()
    if not ret:
        print("[INFO] End of video stream.")
        break

    (h, w) = frame.shape[:2]

    # 1. Detect faces in the current frame
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
    detector_net.setInput(blob)
    detections = detector_net.forward()

    # 2. Process each detected face
    for i in range(0, detections.shape[2]):
        confidence = detections[0, 0, i, 2]

        if confidence > CONFIDENCE_THRESHOLD:
            # Get bounding box
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (startX, startY, endX, endY) = box.astype("int")

            # Ensure box coordinates are within frame boundaries
            startX = max(0, startX)
            startY = max(0, startY)
            endX = min(w - 1, endX)
            endY = min(h - 1, endY)

            # Check for valid box size
            if endX <= startX or endY <= startY:
                continue

            # 3. Extract face ROI and get embedding
            face_roi = frame[startY:endY, startX:endX]
            if face_roi.size == 0: continue # Skip empty ROIs

            face_blob = cv2.dnn.blobFromImage(face_roi, 1.0 / 127.5, (112, 112), (127.5, 127.5, 127.5), swapRB=True)
            recognizer_net.setInput(face_blob)
            current_embedding = recognizer_net.forward().flatten()

            # 4. Compare with target embeddings
            is_target = False
            max_similarity = 0 # Keep track of the best match similarity
            for target_emb in target_embeddings:
                similarity = cosine_similarity(current_embedding, target_emb)
                max_similarity = max(max_similarity, similarity)
                if similarity > RECOGNITION_THRESHOLD:
                    is_target = True
                    break # Found a match

            # 5. Blur if it's the target person
            if is_target:
                # Apply Gaussian Blur
                blurred_face = cv2.GaussianBlur(face_roi, BLUR_KERNEL_SIZE, 0)
                # Put the blurred face back into the frame
                frame[startY:endY, startX:endX] = blurred_face

                # Optional: Draw box and label (for debugging/visualization)
                label = f"TARGET MATCH: {max_similarity:.2f}"
                cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 0, 255), 2) # Red box for target
                y = startY - 10 if startY - 10 > 10 else startY + 10
                cv2.putText(frame, label, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2)
            # else:
                # Optional: Draw a different color box for non-target faces
                # cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 255, 0), 2) # Green box

    # 6. Display the output frame
    cv2.imshow("Frame", frame)

    # 7. Write frame to output video (if writer is initialized)
    if writer is not None:
        writer.write(frame)

    # 8. Exit condition
    key = cv2.waitKey(1) & 0xFF
    if key == ord("q"):
        break

# --- Cleanup ---
print("[INFO] Cleaning up...")
cap.release()
if writer is not None:
    writer.release()
cv2.destroyAllWindows()
print("[INFO] Finished.")