## **STITCHING FRAMES TO VIDEO**

In [36]:
import cv2
import os
from pathlib import Path
import re
from tqdm.notebook import tqdm

In [38]:
# # Remove all image files from /content if you uploaded them by accident
# !rm /content/[0-9][0-9][0-9][0-9][0-9][0-9].png

In [62]:
def natural_sort_key(s):
    """
    Helper function for sorting filenames in format '000000.png', '000001.jpg', etc.
    """
    try:
        # Split the filename from extension and convert numeric part to integer
        filename_without_ext = s.rsplit('.', 1)[0]
        return int(filename_without_ext)
    except (ValueError, IndexError):
        print(f"Warning: Unexpected filename format: {s}")
        return s


def create_video_from_images(image_folder, output_path, fps=30, size=None):
    """
    Convert a sequence of images into a video file.

    Parameters:
    - image_folder: Path to the folder containing image sequences
    - output_path: Path where the output video will be saved
    - fps: Frames per second for the output video (default: 30)
    - size: Tuple of (width, height) for output video. If None, uses first image's size
    """
    # Get list of image files and sort them naturally
    image_files = [f for f in os.listdir(image_folder)
                  if f.rsplit('.', 1)[0].isdigit() and  # Check if filename without extension is numeric
                  len(f.rsplit('.', 1)[0]) == 6 and  # Check if numeric part is 6 digits
                  f.lower().endswith(('.png', '.jpg', '.jpeg'))]  # Check for image extensions
    if not image_files:
        print(f"No images found in {image_folder}")
        return False

    # Sort files naturally
    image_files.sort(key=natural_sort_key)
    print(f"Found {len(image_files)} images")

    # Read the first image to get dimensions if size is not specified
    first_image_path = os.path.join(image_folder, image_files[0])
    first_image = cv2.imread(first_image_path)
    if first_image is None:
        print(f"Could not read first image: {first_image_path}")
        return False

    if size is None:
        height, width = first_image.shape[:2]
        size = (width, height)
        print(f"Video dimensions will be {width}x{height}")

    # Define the codec and create VideoWriter object
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, size)

    try:
        # Process each image with tqdm progress bar
        for filename in tqdm(image_files, desc="Creating video"):
            image_path = os.path.join(image_folder, filename)
            frame = cv2.imread(image_path)

            if frame is None:
                print(f"\nCould not read image: {image_path}")
                continue

            # Resize frame if necessary
            if frame.shape[:2] != size[::-1]:
                frame = cv2.resize(frame, size)

            # Write the frame
            out.write(frame)

    except Exception as e:
        print(f"\nError occurred: {str(e)}")
        return False

    finally:
        # Release everything
        out.release()
        print("\nVideo creation completed!")

    return True

In [63]:
# Example usage:
# 1. Update this path to your images folder in Colab
IMAGE_FOLDER = "/content/imgs"

# 2. Set output path in Colab's temporary storage
OUTPUT_VIDEO = "/content/output_video.mp4"

# 3. Set desired FPS
FPS = 2

# Create the video
success = create_video_from_images(IMAGE_FOLDER, OUTPUT_VIDEO, FPS)

if success:
    print(f"You can now manually download the video from {OUTPUT_VIDEO}")
else:
    print("Video creation failed!")

Found 122 images
Video dimensions will be 640x480


Creating video:   0%|          | 0/122 [00:00<?, ?it/s]


Video creation completed!
You can now manually download the video from /content/output_video.mp4


## Fixing dependency issues (for insightface or onnxruntime, can't remember) - *Might not need this*

In [None]:
# Note: This command reports the maximum CUDA version that your GPU driver supports, which in this case is 12.4.
# This indicates my GPU can potentially run applications compiled with CUDA versions up to 12.4.

!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


In [None]:
!pip uninstall torch -y

Found existing installation: torch 2.1.2
Uninstalling torch-2.1.2:
  Successfully uninstalled torch-2.1.2


In [None]:
!pip install torch==2.5.0+cu124 torchvision==0.20.0+cu124 torchaudio==2.5.0+cu124 --extra-index-url https://download.pytorch.org/whl/cu124

Looking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu124


## **MAIN STARTING POINT**

In [1]:
!pip install mediapipe mtcnn retina-face==0.0.12 keras==2.13.1 ultralytics insightface onnxruntime dlib opencv-python opencv-contrib-python



In [2]:
import cv2
import mediapipe as mp
from mtcnn import MTCNN
from retinaface import RetinaFace
from ultralytics import YOLO
import insightface
import dlib

from google.colab import files
from IPython.display import clear_output

import os
import sys
import torch
import torch.nn.functional as F
import time
import numpy as np

### For CenterFace **(NOT USING ANYMORE BECAUSE OF ISSUES)**

In [None]:
# # Gotta install from GH
# !git clone https://github.com/Star-Clouds/CenterFace

fatal: destination path 'CenterFace' already exists and is not an empty directory.


In [None]:
# sys.path.append('CenterFace/prj-python')

In [None]:
# # Download model weights
# !wget https://raw.githubusercontent.com/Star-Clouds/CenterFace/master/models/onnx/centerface.onnx
# !wget https://raw.githubusercontent.com/Star-Clouds/CenterFace/master/models/onnx/centerface_bnmerged.onnx

--2025-01-30 20:35:10--  https://raw.githubusercontent.com/Star-Clouds/CenterFace/master/models/onnx/centerface.onnx
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7532772 (7.2M) [application/octet-stream]
Saving to: ‘centerface.onnx’


2025-01-30 20:35:11 (129 MB/s) - ‘centerface.onnx’ saved [7532772/7532772]

--2025-01-30 20:35:11--  https://raw.githubusercontent.com/Star-Clouds/CenterFace/master/models/onnx/centerface_bnmerged.onnx
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.111.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 7304518 (7.0M) [application/octet-stream]
Saving

In [None]:
# from centerface import CenterFace

In [None]:
# # Modify centerface.py content

# # Read the file
# with open('CenterFace/prj-python/centerface.py', 'r') as file:
#     content = file.read()

# # Replace these lines in the __init__ function:
# old_init = """        if self.landmarks:
#             self.net = cv2.dnn.readNetFromONNX('../models/onnx/centerface.onnx')
#         else:
#             self.net = cv2.dnn.readNetFromONNX('../models/onnx/cface.1k.onnx')"""

# new_init = """        if self.landmarks:
#             self.net = cv2.dnn.readNetFromONNX('centerface.onnx')
#         else:
#             self.net = cv2.dnn.readNetFromONNX('centerface_bnmerged.onnx')"""

# new_content = content.replace(old_init, new_init)

# # Write the modified content back
# with open('CenterFace/prj-python/centerface.py', 'w') as file:
#     file.write(new_content)

In [None]:
# # Read and modify centerface.py
# with open('CenterFace/prj-python/centerface.py', 'r') as file:
#     content = file.read()

# # Replace the forward calls with correct layer names
# content = content.replace(
#     'heatmap, scale, offset = self.net.forward(["535", "536", "537"])',
#     'heatmap, scale, offset = self.net.forward(["onnx_node_output_0!535", "onnx_node_output_0!536", "onnx_node_output_0!537"])'
# )

# content = content.replace(
#     'heatmap, scale, offset, lms = self.net.forward(["537", "538", "539", "540"])',
#     'heatmap, scale, offset, lms = self.net.forward(["onnx_node_output_0!537", "onnx_node_output_0!538", "onnx_node_output_0!539", "onnx_node_output_0!540"])'
# )

# # Write back the modified content
# with open('CenterFace/prj-python/centerface.py', 'w') as file:
#     file.write(content)

In [None]:
# # Read and modify centerface.py
# # with open('CenterFace/prj-python/centerface.py', 'r') as file:
# #     content = file.read()

# new_content = """import numpy as np
# import cv2
# import datetime


# class CenterFace(object):
#     def __init__(self, landmarks=True):
#         self.landmarks = landmarks
#         if self.landmarks:
#             self.net = cv2.dnn.readNetFromONNX('centerface.onnx')
#         else:
#             self.net = cv2.dnn.readNetFromONNX('centerface_bnmerged.onnx')
#         self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = 0, 0, 0, 0

#     def __call__(self, img, height, width, threshold=0.5):
#         self.img_h_new, self.img_w_new, self.scale_h, self.scale_w = self.transform(height, width)
#         return self.inference_opencv(img, threshold)

#     def inference_opencv(self, img, threshold):
#         blob = cv2.dnn.blobFromImage(img, scalefactor=1.0, size=(self.img_w_new, self.img_h_new), mean=(0, 0, 0), swapRB=True, crop=False)
#         self.net.setInput(blob)
#         begin = datetime.datetime.now()
#         lms = None  # Initialize lms
#         if self.landmarks:
#             heatmap, scale, offset, lms = self.net.forward(["onnx_node_output_0!537", "onnx_node_output_0!538", "onnx_node_output_0!539", "onnx_node_output_0!540"])
#         else:
#             heatmap, scale, offset = self.net.forward(["onnx_node_output_0!535", "onnx_node_output_0!536", "onnx_node_output_0!537"])
#         end = datetime.datetime.now()
#         print("cpu times = ", end - begin)
#         return self.postprocess(heatmap, lms, offset, scale, threshold)"""

# # Write the entire new content
# with open('CenterFace/prj-python/centerface.py', 'w') as file:
#     file.write(new_content + """
#     def transform(self, h, w):
#         img_h_new, img_w_new = int(np.ceil(h / 32) * 32), int(np.ceil(w / 32) * 32)
#         scale_h, scale_w = img_h_new / h, img_w_new / w
#         return img_h_new, img_w_new, scale_h, scale_w

#     def postprocess(self, heatmap, lms, offset, scale, threshold):
#         if self.landmarks:
#             dets, lms = self.decode(heatmap, scale, offset, lms, (self.img_h_new, self.img_w_new), threshold=threshold)
#         else:
#             dets = self.decode(heatmap, scale, offset, None, (self.img_h_new, self.img_w_new), threshold=threshold)
#         if len(dets) > 0:
#             dets[:, 0:4:2], dets[:, 1:4:2] = dets[:, 0:4:2] / self.scale_w, dets[:, 1:4:2] / self.scale_h
#             if self.landmarks:
#                 lms[:, 0:10:2], lms[:, 1:10:2] = lms[:, 0:10:2] / self.scale_w, lms[:, 1:10:2] / self.scale_h
#         else:
#             dets = np.empty(shape=[0, 5], dtype=np.float32)
#             if self.landmarks:
#                 lms = np.empty(shape=[0, 10], dtype=np.float32)
#         if self.landmarks:
#             return dets, lms
#         else:
#             return dets, None

#     def decode(self, heatmap, scale, offset, landmark, size, threshold=0.1):  # Increased threshold
#         heatmap = np.squeeze(heatmap)  # Remove batch dimension
#         # We need to handle the 3D heatmap properly
#         heatmap = heatmap[0]  # Take first channel
#         scale = np.squeeze(scale)
#         offset = np.squeeze(offset)

#         # Find top k detections instead of all points above threshold
#         max_detections = 1000  # Limit number of detections
#         heatmap_flat = heatmap.flatten()
#         top_k_indices = np.argsort(heatmap_flat)[-max_detections:]
#         top_k_values = heatmap_flat[top_k_indices]

#         # Convert flat indices back to 2D coordinates
#         h, w = heatmap.shape
#         c0 = top_k_indices // w
#         c1 = top_k_indices % w

#         if self.landmarks:
#             boxes, lms = [], []
#         else:
#             boxes = []

#         if len(c0) > 0:
#             for i in range(len(c0)):
#                 s0, s1 = np.exp(scale[c0[i], c1[i]]) * 4, np.exp(scale[c0[i], c1[i]]) * 4
#                 o0, o1 = offset[c0[i], c1[i]], offset[c0[i], c1[i]]
#                 s = heatmap[c0[i], c1[i]]

#                 if s < threshold:  # Skip low confidence detections
#                     continue

#                 x1, y1 = max(0, (c1[i] + o1 + 0.5) * 4 - s1 / 2), max(0, (c0[i] + o0 + 0.5) * 4 - s0 / 2)
#                 x1, y1 = min(x1, size[1]), min(y1, size[0])
#                 boxes.append([float(x1), float(y1),
#                             float(min(x1 + s1, size[1])),
#                             float(min(y1 + s0, size[0])),
#                             float(s)])

#         if len(boxes) > 0:
#             boxes = np.array(boxes, dtype=np.float32)
#             keep = self.nms(boxes[:, :4], boxes[:, 4], 0.3)
#             boxes = boxes[keep, :]
#         else:
#             boxes = np.empty((0, 5), dtype=np.float32)

#         return boxes

#     def nms(self, boxes, scores, nms_thresh):
#         x1 = boxes[:, 0]
#         y1 = boxes[:, 1]
#         x2 = boxes[:, 2]
#         y2 = boxes[:, 3]
#         areas = (x2 - x1 + 1) * (y2 - y1 + 1)
#         order = np.argsort(scores)[::-1]
#         num_detections = boxes.shape[0]
#         suppressed = np.zeros((num_detections,), dtype=np.bool)

#         keep = []
#         for _i in range(num_detections):
#             i = order[_i]
#             if suppressed[i]:
#                 continue
#             keep.append(i)

#             ix1 = x1[i]
#             iy1 = y1[i]
#             ix2 = x2[i]
#             iy2 = y2[i]
#             iarea = areas[i]

#             for _j in range(_i + 1, num_detections):
#                 j = order[_j]
#                 if suppressed[j]:
#                     continue

#                 xx1 = max(ix1, x1[j])
#                 yy1 = max(iy1, y1[j])
#                 xx2 = min(ix2, x2[j])
#                 yy2 = min(iy2, y2[j])
#                 w = max(0, xx2 - xx1 + 1)
#                 h = max(0, yy2 - yy1 + 1)

#                 inter = w * h
#                 ovr = inter / (iarea + areas[j] - inter)
#                 if ovr >= nms_thresh:
#                     suppressed[j] = True

#             return keep""")

In [None]:
# # Read and modify centerface.py
# with open('CenterFace/prj-python/centerface.py', 'r') as file:
#     content = file.read()

# # Replace deprecated numpy types
# content = content.replace('np.bool', 'bool')
# content = content.replace('dtype=np.bool', 'dtype=bool')
# content = content.replace('np.float', 'float')
# content = content.replace('dtype=float32', 'dtype=np.float32')
# content = content.replace('np.int', 'int')

# # Write back
# with open('CenterFace/prj-python/centerface.py', 'w') as file:
#     file.write(content)

### For RetinaFace: To fix the np.float and np.int error

In [3]:
# # To fix the np.float and np.int deprecation errors for retinaface
# # You may need to restart session/kernel after doing this to fix the errors

import retinaface
print("RetinaFace location:", retinaface.__file__)

!sed -i 's/np.float/float/g;s/np.int/int/g' /usr/local/lib/python3.11/dist-packages/retinaface/commons/postprocess.py

RetinaFace location: /usr/local/lib/python3.11/dist-packages/retinaface/__init__.py


### For YuNet

In [None]:
!wget https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx

--2025-02-06 06:02:39--  https://github.com/opencv/opencv_zoo/raw/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://media.githubusercontent.com/media/opencv/opencv_zoo/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx [following]
--2025-02-06 06:02:39--  https://media.githubusercontent.com/media/opencv/opencv_zoo/main/models/face_detection_yunet/face_detection_yunet_2023mar.onnx
Resolving media.githubusercontent.com (media.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to media.githubusercontent.com (media.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 232589 (227K) [application/octet-stream]
Saving to: ‘face_detection_yunet_2023mar.onnx’


2025-02-06 06:02:4

### For ULFG (Ultra-Light-Fast-Generic-Face-Detector)

In [73]:
!git clone https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB

Cloning into 'Ultra-Light-Fast-Generic-Face-Detector-1MB'...
remote: Enumerating objects: 953, done.[K
remote: Counting objects: 100% (169/169), done.[K
remote: Compressing objects: 100% (65/65), done.[K
remote: Total 953 (delta 120), reused 104 (delta 104), pack-reused 784 (from 1)[K
Receiving objects: 100% (953/953), 37.29 MiB | 26.80 MiB/s, done.
Resolving deltas: 100% (482/482), done.


In [74]:
!wget https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/masked_face/pretrained/RFB-640-masked_face-v2.pth

--2025-02-06 22:08:03--  https://github.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/raw/master/masked_face/pretrained/RFB-640-masked_face-v2.pth
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/master/masked_face/pretrained/RFB-640-masked_face-v2.pth [following]
--2025-02-06 22:08:04--  https://raw.githubusercontent.com/Linzaer/Ultra-Light-Fast-Generic-Face-Detector-1MB/master/masked_face/pretrained/RFB-640-masked_face-v2.pth
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1237721 (1.2M) [application/octet-stream]
Saving to: ‘RFB-640-m

In [75]:
# Add repository to path
sys.path.append('Ultra-Light-Fast-Generic-Face-Detector-1MB')
from vision.ssd.config.fd_config import define_img_size
from vision.ssd.mb_tiny_RFB_fd import create_Mb_Tiny_RFB_fd, create_Mb_Tiny_RFB_fd_predictor
from vision.utils.misc import Timer

### For DSFD **(NOT USING ANYMORE BECAUSE OF ISSUES)**

In [None]:
!git clone https://github.com/Tencent/FaceDetection-DSFD

Cloning into 'FaceDetection-DSFD'...
remote: Enumerating objects: 371, done.[K
remote: Counting objects: 100% (7/7), done.[K
remote: Compressing objects: 100% (6/6), done.[K
remote: Total 371 (delta 2), reused 4 (delta 1), pack-reused 364 (from 1)[K
Receiving objects: 100% (371/371), 148.63 MiB | 15.83 MiB/s, done.
Resolving deltas: 100% (170/170), done.


In [None]:
# !wget --load-cookies /tmp/cookies.txt "https://docs.google.com/uc?export=download&confirm=$(wget --quiet --save-cookies /tmp/cookies.txt --keep-session-cookies --no-check-certificate 'https://docs.google.com/uc?export=download&id=1WeXlNYsM6dMP3xQQELI-4gxhwKUQxc3-' -O- | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1\n/p')&id=1WeXlNYsM6dMP3xQQELI-4gxhwKUQxc3-" -O WIDERFace_DSFD_RES152.pth && rm -rf /tmp/cookies.txt

# Install gdown for Google Drive downloads
!pip install gdown

# Download weight using gdown
!gdown 1WeXlNYsM6dMP3xQQELI-4gxhwKUQxc3-

Downloading...
From (original): https://drive.google.com/uc?id=1WeXlNYsM6dMP3xQQELI-4gxhwKUQxc3-
From (redirected): https://drive.google.com/uc?id=1WeXlNYsM6dMP3xQQELI-4gxhwKUQxc3-&confirm=t&uuid=009addaf-e677-481a-881c-db9bbdf80fb6
To: /content/WIDERFace_DSFD_RES152.pth
100% 481M/481M [00:07<00:00, 65.0MB/s]


In [None]:
# TO FIX "RuntimeError: Legacy autograd function with non-static forward method is deprecated"

# Then modify the specific lines
with open('FaceDetection-DSFD/face_ssd.py', 'r') as file:
    lines = file.readlines()

# Print the exact line we need to change to verify
print("Line 345:", lines[344])

# Modify line 345 and surrounding lines
lines[344] = '                output = self.detect.forward(\n'
lines[345] = '                    face_loc.view(face_loc.size(0), -1, 4),\n'
lines[346] = '                    self.softmax(face_conf.view(face_conf.size(0), -1, self.num_classes)),\n'
lines[347] = '                    self.priors.type(type(x.data))\n'

# Write back
with open('FaceDetection-DSFD/face_ssd.py', 'w') as file:
    file.writelines(lines)

Line 345:                 output = self.detect.forward(



In [None]:
# TO REMOVE "pa_sfd_match"

# Read the file
with open('FaceDetection-DSFD/layers/modules/multibox_loss.py', 'r') as file:
    content = file.read()

# Remove pa_sfd_match from import
content = content.replace(
    'from ..box_utils import (log_sum_exp, match, pa_sfd_match, refine_match,',
    'from ..box_utils import (log_sum_exp, match, refine_match,'
)

# Write back
with open('FaceDetection-DSFD/layers/modules/multibox_loss.py', 'w') as file:
    file.write(content)

In [None]:
with open('FaceDetection-DSFD/layers/box_utils.py', 'r') as file:
    content = file.readlines()

# Modify the decode function
decode_modification = """
def decode(loc, priors, variances):
    # Ensure everything is on the same device
    device = loc.device
    priors = priors.to(device)
    variances = [torch.tensor(v, device=device) for v in variances]

    boxes = torch.cat((
        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
    boxes[:, :2] -= boxes[:, 2:] / 2
    boxes[:, 2:] += boxes[:, :2]
    return boxes
"""

# Find the decode function and replace it
with open('FaceDetection-DSFD/layers/box_utils.py', 'w') as file:
    in_decode = False
    for line in content:
        if line.startswith('def decode('):
            in_decode = True
            file.write(decode_modification)
        elif in_decode and line.strip() == '':
            in_decode = False
        elif not in_decode:
            file.write(line)

In [None]:
sys.path.append('FaceDetection-DSFD')
from face_ssd import build_ssd

### For DBFace

In [10]:
!git clone https://github.com/dlunion/DBFace
!wget https://github.com/dlunion/DBFace/raw/master/model/dbface.pth

Cloning into 'DBFace'...
remote: Enumerating objects: 379, done.[K
remote: Counting objects: 100% (59/59), done.[K
remote: Compressing objects: 100% (16/16), done.[K
remote: Total 379 (delta 46), reused 43 (delta 43), pack-reused 320 (from 1)[K
Receiving objects: 100% (379/379), 50.43 MiB | 24.02 MiB/s, done.
Resolving deltas: 100% (171/171), done.
--2025-02-06 19:22:54--  https://github.com/dlunion/DBFace/raw/master/model/dbface.pth
Resolving github.com (github.com)... 140.82.121.4
Connecting to github.com (github.com)|140.82.121.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/dlunion/DBFace/master/model/dbface.pth [following]
--2025-02-06 19:22:55--  https://raw.githubusercontent.com/dlunion/DBFace/master/model/dbface.pth
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.1

In [11]:
sys.path.append('DBFace')
from model.DBFace import DBFace
import common

### Creating folder to store processed videos

In [4]:
# Create output directory if it doesn't exist
output_dir = '/content/processed_videos'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

### Main function

In [5]:
def test_single_model(video_path, model_name):
    # Initialize chosen model
    if model_name == "mediapipe":
        mp_face_detection = mp.solutions.face_detection
        model = mp_face_detection.FaceDetection(
            model_selection=1,
            min_detection_confidence=0.55
        )

    elif model_name == "mtcnn":
        model = MTCNN()

    elif model_name == "retinaface":
        pass

    elif model_name == "yolov11":
        # HAVE TO MANUALLY DOWNLOAD IT FROM GITHUB README (https://github.com/akanametov/yolo-face) THEN UPLOAD IT TO COLAB
        model = YOLO('yolov11l-face.pt')

    elif model_name == "insightface":
        # Check if GPU is available
        ctx_id = 0 if torch.cuda.is_available() else -1  # 0 for GPU, -1 for CPU
        model = insightface.app.FaceAnalysis()
        model.prepare(ctx_id=ctx_id)

    elif model_name == "opencv-dnn":
        # Define model files
        model_file = "res10_300x300_ssd_iter_140000.caffemodel"
        config_file = "deploy.prototxt"

        # Download model files if they don't exist
        if not os.path.exists(model_file):
            print("Downloading model file...")
            !wget https://raw.githubusercontent.com/opencv/opencv_3rdparty/dnn_samples_face_detector_20170830/res10_300x300_ssd_iter_140000.caffemodel -O res10_300x300_ssd_iter_140000.caffemodel

        if not os.path.exists(config_file):
            print("Downloading config file...")
            !wget https://raw.githubusercontent.com/opencv/opencv/master/samples/dnn/face_detector/deploy.prototxt -O deploy.prototxt

        # Verify files exist
        if not os.path.exists(model_file) or not os.path.exists(config_file):
            raise FileNotFoundError(f"Required files not found: {model_file} or {config_file}")

        model = cv2.dnn.readNet(model_file, config_file)


    elif model_name == "dlib":
        # Load Dlib's face detector
        model = dlib.get_frontal_face_detector()


    elif model_name == "yunet":
        # Initialize YuNet
        model = cv2.FaceDetectorYN.create(
            model="face_detection_yunet_2023mar.onnx",
            config="",
            input_size=(640, 480),  # Can adjust based on my needs
            score_threshold=0.6,
            nms_threshold=0.3,
            top_k=50,
            backend_id=cv2.dnn.DNN_BACKEND_DEFAULT,
            target_id=cv2.dnn.DNN_TARGET_CPU
        )

    elif model_name == "ulfg":
        # Set device
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"Using device: {device}")

        # Set image size
        define_img_size(640)

        # Create model with matching dimensions
        net = create_Mb_Tiny_RFB_fd(3, is_test=True, device=device)

        try:
            # Load the weights
            checkpoint = torch.load("RFB-640-masked_face-v2.pth", map_location=device)
            net.load_state_dict(checkpoint)
            # Create predictor with candidate_size here
            predictor = create_Mb_Tiny_RFB_fd_predictor(net,
                                                      candidate_size=1500,
                                                      device=device)
            print("Model loaded successfully")

        except Exception as e:
            print(f"Error loading model: {e}")
            raise e


    elif model_name == "dbface":
        # Initialize model
        model = DBFace()
        model.eval()
        model.load("dbface.pth")  # Use regular model, not small
        print("Model loaded successfully")

        # Define preprocessing parameters
        mean = [0.408, 0.447, 0.47]
        std = [0.289, 0.274, 0.278]


    # elif model_name == "centerface":
    #   centerface = CenterFace(landmarks=False)
    #   layer_names = centerface.net.getLayerNames()
    #   print("Available layers:", layer_names)
    #   model = centerface


    # elif model_name == "dsfd":
    #     # Initialize model
    #     net = build_ssd('test')

    #     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    #     print(f"Using device: {device}")

    #     # Load weights and verify
    #     weights = torch.load('WIDERFace_DSFD_RES152.pth', map_location=device)
    #     print("Weight keys:", weights.keys())  # Debug weights

    #     net = net.to(device)
    #     net.load_state_dict(weights)
    #     net.eval()

    #     if hasattr(net, 'detect'):
    #         net.detect.variance = [torch.tensor(v, device=device) for v in net.detect.variance]

    #     if hasattr(net, 'priors'):
    #         net.priors = net.priors.to(device)



    # Opening input video
    cap = cv2.VideoCapture(video_path)

    # Getting video properties
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    # Calculating output dimensions (70% of original)
    output_width = int(width * 0.7)
    output_height = int(height * 0.7)

    # Create output video writer
    '''
    The processed mp4 file sizes were too large for my liking, so I wanted to compress it using H.264 codec.
    Problem was, the processed vids were now corrupt or some shet. Wasn't letting me play them locally.
    So, decided to go with .avi and XVID codec as it's more reliable.
    The issue with MP4 in OpenCV is that it sometimes creates files that aren't properly finalized/encoded, making them unplayable.
    AVI with XVID is more consistently supported.
    '''
    output_path = os.path.join(output_dir, f'processed_{model_name}.avi')
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(output_path,
                         fourcc,
                         fps,
                         (output_width, output_height))

    frame_count = 0
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))



    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        # Resize frame once to output dimensions
        frame = cv2.resize(frame, (output_width, output_height), interpolation=cv2.INTER_LANCZOS4)
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        # Get detections based on model type
        if model_name == "mediapipe":
            results = model.process(rgb_frame)
            if results.detections:
                for detection in results.detections:
                    bbox = detection.location_data.relative_bounding_box
                    h, w, _ = frame.shape
                    x = int(bbox.xmin * w)
                    y = int(bbox.ymin * h)
                    w = int(bbox.width * w)
                    h = int(bbox.height * h)
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        elif model_name == "mtcnn":
            results = model.detect_faces(rgb_frame)
            for result in results:
                x, y, w, h = result['box']
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)

        elif model_name == "retinaface":
            results = RetinaFace.detect_faces(frame)
            if isinstance(results, dict):
                for key in results.keys():
                    face = results[key]
                    facial_area = face['facial_area']
                    cv2.rectangle(frame,
                                (facial_area[0], facial_area[1]),
                                (facial_area[2], facial_area[3]),
                                (0, 255, 0),
                                2)

        elif model_name == "yolov11":
          results = model(frame)[0]
          for result in results.boxes.data:
              x1, y1, x2, y2, conf, _ = result
              if conf > 0.55:  # Confidence threshold
                  cv2.rectangle(frame,
                              (int(x1), int(y1)),
                              (int(x2), int(y2)),
                              (0, 255, 0),
                              2)

        elif model_name == "insightface":
            faces = model.get(rgb_frame)
            for face in faces:
                bbox = face.bbox.astype(int)
                cv2.rectangle(frame,
                            (bbox[0], bbox[1]),
                            (bbox[2], bbox[3]),
                            (0, 255, 0),
                            2)

        elif model_name == "opencv-dnn":
            blob = cv2.dnn.blobFromImage(frame, 1.0, (300, 300), [104, 117, 123])
            model.setInput(blob)
            detections = model.forward()
            for i in range(detections.shape[2]):
                confidence = detections[0, 0, i, 2]
                if confidence > 0.5:
                    box = detections[0, 0, i, 3:7] * np.array([output_width, output_height, output_width, output_height])
                    (x1, y1, x2, y2) = box.astype("int")
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)


        elif model_name == "dlib":
            # Dlib works with grayscale images
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = model(gray)
            for face in faces:
                x = face.left()
                y = face.top()
                w = face.right() - face.left()
                h = face.bottom() - face.top()
                cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)


        elif model_name == "yunet":
            # Set input size
            model.setInputSize((frame.shape[1], frame.shape[0]))

            # Detect faces
            _, faces = model.detect(frame)

            # Draw detections
            if faces is not None:
                for face in faces:
                    box = list(map(int, face[:4]))
                    # Draw rectangle
                    cv2.rectangle(frame,
                                (box[0], box[1]),
                                (box[0] + box[2], box[1] + box[3]),
                                (0, 255, 0),
                                2)


        elif model_name == "ulfg":
            # Convert to RGB for model
            image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Remove candidate_size from predict call
            boxes, labels, probs = predictor.predict(image,
                                                   prob_threshold=0.4)  # Just use threshold

            # Process detections
            for i in range(boxes.size(0)):
                box = boxes[i, :]
                prob = probs[i]

                if prob > 0.4:  # Confidence threshold
                    x1, y1, x2, y2 = int(box[0]), int(box[1]), int(box[2]), int(box[3])

                    # Draw detection
                    cv2.rectangle(frame,
                                (x1, y1),
                                (x2, y2),
                                (0, 255, 0),
                                2)

                    # Add label
                    label = f"{prob:.2f}"
                    cv2.putText(frame,
                              label,
                              (x1, y1 - 10),
                              cv2.FONT_HERSHEY_SIMPLEX,
                              0.5,
                              (0, 255, 0),
                              2)


        elif model_name == "dbface":
            # Preprocess frame
            image = common.pad(frame)
            image = ((image / 255.0 - mean) / std).astype(np.float32)
            image = image.transpose(2, 0, 1)

            # Convert to tensor
            torch_image = torch.from_numpy(image)[None]

            # Get detections
            with torch.no_grad():
                hm, box, landmark = model(torch_image)

                # Post-process
                hm_pool = F.max_pool2d(hm, 3, 1, 1)
                scores, indices = ((hm == hm_pool).float() * hm).view(1, -1).cpu().topk(1000)

                scores = scores.squeeze()
                indices = indices.squeeze()
                hm_height, hm_width = hm.shape[2:]

                ys = list((indices / hm_width).int().data.numpy())
                xs = list((indices % hm_width).int().data.numpy())
                scores = list(scores.data.numpy())
                box = box.cpu().squeeze().data.numpy()

                # Process detections
                stride = 4
                threshold = 0.4
                for cx, cy, score in zip(xs, ys, scores):
                    if score < threshold:
                        break

                    # Get bounding box
                    x, y, r, b = box[:, cy, cx]
                    xyrb = (np.array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride

                    # Draw detection
                    x1, y1, x2, y2 = map(int, xyrb)
                    cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
                    cv2.putText(frame, f"{score:.2f}",
                              (x1, y1-10),
                              cv2.FONT_HERSHEY_SIMPLEX,
                              0.5, (0, 255, 0), 2)



        # elif model_name == "centerface":
        #   h, w = frame.shape[:2]
        #   dets, _ = model(frame, h, w)
        #   for det in dets:
        #       boxes, score = det[:4], det[4]
        #       if score < 0.5:  # confidence threshold
        #           continue
        #       x1, y1, x2, y2 = boxes.astype(int)
        #       cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)


        # elif model_name == "dsfd":
        #     # Enhanced preprocessing
        #     img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        #     img = torch.from_numpy(img).float().permute(2, 0, 1)
        #     # Normalize properly
        #     img = (img - 128) / 128.0  # Try different normalization
        #     img = img.unsqueeze(0).to(device)

        #     try:
        #         with torch.no_grad():
        #             detections = net(img)

        #         # Print raw detection stats
        #         print(f"Detection stats - Min: {detections.min():.3f}, Max: {detections.max():.3f}")

        #         # Move to CPU and convert to numpy
        #         detections = detections.cpu().numpy()

        #         # Handle NMS error
        #         if detections.shape[2] > 0:  # If we have any detections
        #             for i in range(detections.shape[2]):
        #                 confidence = detections[0, 0, i, 0]
        #                 if confidence > 0.1:  # Lower threshold for testing
        #                     box = detections[0, 0, i, 1:5] * np.array([width, height, width, height])
        #                     (startX, startY, endX, endY) = box.astype("int")

        #                     cv2.rectangle(frame,
        #                                 (startX, startY),
        #                                 (endX, endY),
        #                                 (0, 255, 0),
        #                                 2)
        #                     # Print successful detection
        #                     print(f"Detection found! Confidence: {confidence:.3f}")

        #     except Exception as e:
        #         print(f"Error processing frame: {str(e)}")
        #         continue  # Skip problematic frames



        # Write frame
        out.write(frame)

        # Print progress
        frame_count += 1
        if frame_count % 30 == 0:
            print(f"Processing {model_name}: {frame_count}/{total_frames} frames")


    # Release resources
    cap.release()
    out.release()

### Infer

In [7]:
# Upload video
print("Please upload your video file")
uploaded = files.upload()
video_path = next(iter(uploaded))

# Test each model
# "mediapipe", "mtcnn", "retinaface", "yolov11", "insightface", "opencv-dnn", "dlib", "yunet", "ulfg", "dbface"
for model_name in ["retinaface"]:
    print(f"\nStarting {model_name}...")
    test_single_model(video_path, model_name)

print("\nAll processing complete. Videos are saved in /content/processed_videos/")

Please upload your video file


Saving d4_c3.mp4 to d4_c3 (1).mp4

Starting retinaface...
Directory  /root /.deepface created
Directory  /root /.deepface/weights created
retinaface.h5 will be downloaded from the url https://github.com/serengil/deepface_models/releases/download/v1.0/retinaface.h5


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/retinaface.h5
To: /root/.deepface/weights/retinaface.h5
100%|██████████| 119M/119M [00:00<00:00, 208MB/s]


Processing retinaface: 30/122 frames
Processing retinaface: 60/122 frames
Processing retinaface: 90/122 frames
Processing retinaface: 120/122 frames

All processing complete. Videos are saved in /content/processed_videos/
