In [1]:
%reload_ext watermark
%watermark -v -p numpy,pandas,pycocotools,torch,torchvision,detectron2,tensorflow


Python implementation: CPython
Python version       : 3.11.7
IPython version      : 8.20.0

numpy      : 1.26.4
pandas     : 2.1.4
pycocotools: 2.0.6
torch      : 2.2.1
torchvision: 0.17.1
detectron2 : 0.6
tensorflow : 2.16.1
cuda       : not installed



In [2]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2024 NVIDIA Corporation
Built on Tue_Feb_27_16:28:36_Pacific_Standard_Time_2024
Cuda compilation tools, release 12.4, V12.4.99
Build cuda_12.4.r12.4/compiler.33961263_0
torch:  2.2 ; cuda:  2.2.1
detectron2: 0.6


## Face Detection

In [6]:
import torch
import cv2
import numpy as np
import os
from detectron2.config import get_cfg
from detectron2.engine import DefaultPredictor
from detectron2.model_zoo import model_zoo
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.structures import BoxMode
from detectron2.engine import DefaultTrainer
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
import ntpath
import pandas as pd
from PIL import Image
import urllib.request
import matplotlib.pyplot as plt
import itertools
import torchvision

# Load face detection model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
cfg.MODEL.DEVICE = 'cpu'  # Set device to CPU
predictor = DefaultPredictor(cfg)

def detect_faces(frame):
    outputs = predictor(frame)
    instances = outputs["instances"]
    pred_classes = instances.pred_classes if instances.has("pred_classes") else None
    boxes = instances.pred_boxes if instances.has("pred_boxes") else None

    detected_faces = []
    if pred_classes is not None and boxes is not None:
        for class_idx, box in zip(pred_classes, boxes):
            if class_idx == 0:  # 0 corresponds to 'person' class in COCO dataset
                box = box.cpu().numpy().astype(int)  # Convert box tensor to numpy array
                x0, y0, x1, y1 = box[0], box[1], box[2], box[3]
                detected_faces.append(frame[y0:y1, x0:x1])
    return detected_faces

def save_faces(detected_faces, output_dir, frame_number):
    count = 0
    for i, face in enumerate(detected_faces):
        face_name = f"{frame_number}_{count}.jpg"
        save_path = os.path.join(output_dir, face_name)
        cv2.imwrite(save_path, face)
        print(f"Saved image: {face_name}")
        count += 1
    return count

def add_frame_number(frame, frame_number):
    font = cv2.FONT_HERSHEY_SIMPLEX
    position = (10, 30)
    font_scale = 1
    font_color = (0, 255, 0)  # Green color
    line_type = 2

    cv2.putText(frame, f"Frame: {frame_number}", position, font, font_scale, font_color, line_type)

def process_video(video_path, output_dir):
    # Open video file
    cap = cv2.VideoCapture(video_path)

    frame_number = 0
    total_detected_images = 0
    frame_skip = 8  # Skip 8 frames after reading one frame

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Read only every 9th frame
        if frame_number % 9 == 0:
            # Add frame number to the frame
            add_frame_number(frame, frame_number)

            # Detect faces in the frame
            detected_faces = detect_faces(frame)

            # Save detected faces
            count = save_faces(detected_faces, output_dir, frame_number)
            total_detected_images += count

        frame_number += 1
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number * frame_skip)

    # Release video capture object
    cap.release()
    cv2.destroyAllWindows()

    return total_detected_images

# Ask user for video file path
video_path = input("Enter the path to the video file: ")

# Create output directory
output_dir = "detected_faces"
os.makedirs(output_dir, exist_ok=True)

# Process video
total_detected_images = process_video(video_path, output_dir)

print("Faces detected and saved successfully.")
print("Total detected images:", total_detected_images)


Saved image: 0_0.jpg
Saved image: 0_1.jpg
Saved image: 0_2.jpg
Saved image: 0_3.jpg
Saved image: 0_4.jpg
Saved image: 0_5.jpg
Saved image: 0_6.jpg
Saved image: 9_0.jpg
Saved image: 9_1.jpg
Saved image: 9_2.jpg
Saved image: 9_3.jpg
Saved image: 9_4.jpg
Saved image: 9_5.jpg
Saved image: 9_6.jpg
Saved image: 18_0.jpg
Saved image: 18_1.jpg
Saved image: 27_0.jpg
Saved image: 27_1.jpg
Saved image: 36_0.jpg
Saved image: 36_1.jpg
Saved image: 36_2.jpg
Saved image: 45_0.jpg
Saved image: 45_1.jpg
Saved image: 45_2.jpg
Saved image: 54_0.jpg
Saved image: 54_1.jpg
Saved image: 63_0.jpg
Saved image: 72_0.jpg
Saved image: 72_1.jpg
Saved image: 81_0.jpg
Saved image: 90_0.jpg
Saved image: 90_1.jpg
Saved image: 99_0.jpg
Saved image: 99_1.jpg
Saved image: 108_0.jpg
Saved image: 117_0.jpg
Saved image: 126_0.jpg
Saved image: 126_1.jpg
Saved image: 126_2.jpg
Saved image: 126_3.jpg
Saved image: 126_4.jpg
Saved image: 126_5.jpg
Saved image: 126_6.jpg
Saved image: 126_7.jpg
Saved image: 126_8.jpg
Saved image: 

## Face Recognition

## Face Recognition Trail - 1