<a href="https://colab.research.google.com/github/Nivratti/retinaface_ncnn/blob/main/retinaface_ncc_cppVspython.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install jupyter-autotime
%load_ext autotime

The autotime extension is already loaded. To reload it, use:
  %reload_ext autotime


In [4]:
from IPython.display import clear_output

## Check GPU

In [3]:
!nvidia-smi

Tue Jun  1 09:52:17 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 465.19.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P0    30W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Install ncnn (ON GPU)

In [5]:
!sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev
clear_output()
!vulkaninfo | grep deviceType

'DISPLAY' environment variable not set... skipping surface info
error: XDG_RUNTIME_DIR not set in the environment.
	deviceType     = PHYSICAL_DEVICE_TYPE_DISCRETE_GPU


In [6]:
%%writefile install_ncnn.sh

# https://github.com/Tencent/ncnn/tree/master/python
# 

sudo mkdir -p /usr/local/c++
sudo chmod -R 777 /usr/local/c++
cd /usr/local/c++/
git clone https://github.com/Tencent/ncnn.git
cd ncnn
git submodule init && git submodule update

mkdir -p build && cd build

# set DNCNN_VULKAN=ON on GPU
sudo cmake -DCMAKE_BUILD_TYPE=Release -DNCNN_VULKAN=ON -DNCNN_PYTHON=ON -DNCNN_SYSTEM_GLSLANG=ON -DNCNN_BUILD_EXAMPLES=ON ..
sudo make -j$(nproc)
sudo make install

# -------------------------------------------------
# python
# -------------------------------------------------
#     Install

#         cd /pathto/ncnn/python
#         pip install .

#     if you use conda or miniconda, you can also install as following:

#         cd /pathto/ncnn/python
#         python3 setup.py install


cd ../python
pip3 install .

# ------------------------------------------------------
# Model Zoo
# -----------------------------------------------------

# install requirements

#     pip install -r requirements.txt

# then you can import ncnn.model_zoo and get model list as follow:

#     import ncnn
#     import ncnn.model_zoo as model_zoo

#     print(model_zoo.get_model_list())

pip3 install -r requirements.txt

# ---------------------------------------------------
# cmd usage
# ---------------------------------------------------
# python examples/retinaface.py  "path/to/image"


Writing install_ncnn.sh


In [7]:
!sudo bash ./install_ncnn.sh

clear_output()
print("Ncnn installation completed..")

Ncnn installation completed..


test

In [10]:
import ncnn
import ncnn.model_zoo as model_zoo

print(model_zoo.get_model_list())

['mobilenet_yolov2', 'mobilenetv2_yolov3', 'yolov4_tiny', 'yolov4', 'yolov5s', 'yolact', 'mobilenet_ssd', 'squeezenet_ssd', 'mobilenetv2_ssdlite', 'mobilenetv3_ssdlite', 'squeezenet', 'faster_rcnn', 'peleenet_ssd', 'retinaface', 'rfcn', 'shufflenetv2', 'simplepose', 'nanodet']


## Python ncnn retinaface

In [41]:
#@title Modified retinaface.py

# https://raw.githubusercontent.com/Tencent/ncnn/master/python/ncnn/model_zoo/retinaface.py

## Modification
# Added support to use resnet-50 --Accurate face detector instead of mnet

import numpy as np
import ncnn
from ncnn.model_zoo.model_store import get_model_file
from ncnn.utils.objects import Point, Face_Object

import gdown
import os
from pathlib import Path

home = str(Path.home())


class RetinaFace:
    def __init__(
        self, prob_threshold=0.8, nms_threshold=0.4, num_threads=1, use_gpu=False, model="resnet50"
    ):
        self.prob_threshold = prob_threshold
        self.nms_threshold = nms_threshold
        self.num_threads = num_threads
        self.use_gpu = use_gpu

        self.net = ncnn.Net()
        self.net.opt.use_vulkan_compute = self.use_gpu

        # model is converted from
        # https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models
        # https://github.com/deepinsight/insightface/issues/669
        # the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models

        if model == "resnet50":
            param_file = os.path.join(home, 'retinaface-R50.param')
            if not os.path.exists(param_file):
                url = 'https://drive.google.com/uc?id=1qCKoY8NfnhCPjOgXcymKDDXh3AjFXda6'
                gdown.download(url, param_file, quiet=False)

            model_file = os.path.join(home, 'retinaface-R50.bin')
            if not os.path.exists(model_file):
                url = 'https://drive.google.com/uc?id=1_WBTqDcQoF0soCjuHYdJs3IO9soznBp4'
                gdown.download(url, model_file, quiet=False)

            self.net.load_param(param_file)
            self.net.load_model(model_file)

        else:
            ## mnet
            self.net.load_param(get_model_file("mnet.25-opt.param"))
            self.net.load_model(get_model_file("mnet.25-opt.bin"))


    def __del__(self):
        self.net = None

    def __call__(self, img):
        img_h = img.shape[0]
        img_w = img.shape[1]

        mat_in = ncnn.Mat.from_pixels(
            img, ncnn.Mat.PixelType.PIXEL_BGR2RGB, img_w, img_h
        )

        ex = self.net.create_extractor()
        ex.set_num_threads(self.num_threads)

        ex.input("data", mat_in)

        faceobjects32 = self.detect_stride32(ex)
        faceobjects16 = self.detect_stride16(ex)
        faceobjects8 = self.detect_stride8(ex)

        faceproposals = [*faceobjects32, *faceobjects16, *faceobjects8]

        # sort all proposals by score from highest to lowest
        faceproposals.sort(key=lambda obj: obj.prob, reverse=True)

        # apply nms with nms_threshold
        picked = self.nms_sorted_bboxes(faceproposals, self.nms_threshold)

        face_count = len(picked)

        faceobjects = []
        for i in range(face_count):
            faceobjects.append(faceproposals[picked[i]])

            # clip to image size
            x0 = faceobjects[i].rect.x
            y0 = faceobjects[i].rect.y
            x1 = x0 + faceobjects[i].rect.w
            y1 = y0 + faceobjects[i].rect.h

            x0 = np.maximum(np.minimum(x0, float(img_w) - 1), 0.0)
            y0 = np.maximum(np.minimum(y0, float(img_h) - 1), 0.0)
            x1 = np.maximum(np.minimum(x1, float(img_w) - 1), 0.0)
            y1 = np.maximum(np.minimum(y1, float(img_h) - 1), 0.0)

            faceobjects[i].rect.x = x0
            faceobjects[i].rect.y = y0
            faceobjects[i].rect.w = x1 - x0
            faceobjects[i].rect.h = y1 - y0

        return faceobjects

    def detect_stride32(self, ex):
        ret1, score_blob = ex.extract("face_rpn_cls_prob_reshape_stride32")
        ret2, bbox_blob = ex.extract("face_rpn_bbox_pred_stride32")
        ret3, landmark_blob = ex.extract("face_rpn_landmark_pred_stride32")

        base_size = 16
        feat_stride = 32
        ratios = ncnn.Mat(1)
        ratios[0] = 1.0
        scales = ncnn.Mat(2)
        scales[0] = 32.0
        scales[1] = 16.0
        anchors = self.generate_anchors(base_size, ratios, scales)

        faceobjects32 = self.generate_proposals(
            anchors,
            feat_stride,
            score_blob,
            bbox_blob,
            landmark_blob,
            self.prob_threshold,
        )

        return faceobjects32

    def detect_stride16(self, ex):
        ret1, score_blob = ex.extract("face_rpn_cls_prob_reshape_stride16")
        ret2, bbox_blob = ex.extract("face_rpn_bbox_pred_stride16")
        ret3, landmark_blob = ex.extract("face_rpn_landmark_pred_stride16")

        base_size = 16
        feat_stride = 16
        ratios = ncnn.Mat(1)
        ratios[0] = 1.0
        scales = ncnn.Mat(2)
        scales[0] = 8.0
        scales[1] = 4.0
        anchors = self.generate_anchors(base_size, ratios, scales)

        faceobjects16 = self.generate_proposals(
            anchors,
            feat_stride,
            score_blob,
            bbox_blob,
            landmark_blob,
            self.prob_threshold,
        )

        return faceobjects16

    def detect_stride8(self, ex):
        ret1, score_blob = ex.extract("face_rpn_cls_prob_reshape_stride8")
        ret2, bbox_blob = ex.extract("face_rpn_bbox_pred_stride8")
        ret3, landmark_blob = ex.extract("face_rpn_landmark_pred_stride8")

        base_size = 16
        feat_stride = 8
        ratios = ncnn.Mat(1)
        ratios[0] = 1.0
        scales = ncnn.Mat(2)
        scales[0] = 2.0
        scales[1] = 1.0
        anchors = self.generate_anchors(base_size, ratios, scales)

        faceobjects8 = self.generate_proposals(
            anchors,
            feat_stride,
            score_blob,
            bbox_blob,
            landmark_blob,
            self.prob_threshold,
        )

        return faceobjects8

    def generate_anchors(self, base_size, ratios, scales):
        num_ratio = ratios.w
        num_scale = scales.w

        # anchors = ncnn.Mat()
        # anchors.create(w=4, h=num_ratio * num_scale)

        anchors_np = np.zeros((2, 4), dtype=np.float32)

        cx = base_size * 0.5
        cy = base_size * 0.5

        for i in range(num_ratio):
            ar = ratios[i]

            r_w = np.round(base_size / np.sqrt(ar))
            r_h = np.round(r_w * ar)  # round(base_size * np.sqrt(ar))

            for j in range(num_scale):
                scale = scales[j]

                rs_w = r_w * scale
                rs_h = r_h * scale

                anchor = anchors_np[i * num_scale + j]

                anchor[0] = cx - rs_w * 0.5
                anchor[1] = cy - rs_h * 0.5
                anchor[2] = cx + rs_w * 0.5
                anchor[3] = cy + rs_h * 0.5

        anchors = ncnn.Mat(anchors_np)
        return anchors

    def generate_proposals(
        self, anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold
    ):
        faceobjects = []

        w = score_blob.w
        h = score_blob.h

        # generate face proposal from bbox deltas and shifted anchors
        num_anchors = anchors.h

        for q in range(num_anchors):
            anchor = anchors.row(q)

            score = score_blob.channel(q + num_anchors)
            bbox = bbox_blob.channel_range(q * 4, 4)
            landmark = landmark_blob.channel_range(q * 10, 10)

            # shifted anchor
            anchor_y = anchor[1]

            anchor_w = anchor[2] - anchor[0]
            anchor_h = anchor[3] - anchor[1]

            for i in range(h):
                anchor_x = anchor[0]

                for j in range(w):
                    index = i * w + j

                    prob = score[index]

                    if prob >= prob_threshold:
                        # apply center size
                        dx = bbox.channel(0)[index]
                        dy = bbox.channel(1)[index]
                        dw = bbox.channel(2)[index]
                        dh = bbox.channel(3)[index]

                        cx = anchor_x + anchor_w * 0.5
                        cy = anchor_y + anchor_h * 0.5

                        pb_cx = cx + anchor_w * dx
                        pb_cy = cy + anchor_h * dy

                        pb_w = anchor_w * np.exp(dw)
                        pb_h = anchor_h * np.exp(dh)

                        x0 = pb_cx - pb_w * 0.5
                        y0 = pb_cy - pb_h * 0.5
                        x1 = pb_cx + pb_w * 0.5
                        y1 = pb_cy + pb_h * 0.5

                        obj = Face_Object()
                        obj.rect.x = x0
                        obj.rect.y = y0
                        obj.rect.w = x1 - x0 + 1
                        obj.rect.h = y1 - y0 + 1
                        obj.landmark = [Point(), Point(), Point(), Point(), Point()]
                        obj.landmark[0].x = (
                            cx + (anchor_w + 1) * landmark.channel(0)[index]
                        )
                        obj.landmark[0].y = (
                            cy + (anchor_h + 1) * landmark.channel(1)[index]
                        )
                        obj.landmark[1].x = (
                            cx + (anchor_w + 1) * landmark.channel(2)[index]
                        )
                        obj.landmark[1].y = (
                            cy + (anchor_h + 1) * landmark.channel(3)[index]
                        )
                        obj.landmark[2].x = (
                            cx + (anchor_w + 1) * landmark.channel(4)[index]
                        )
                        obj.landmark[2].y = (
                            cy + (anchor_h + 1) * landmark.channel(5)[index]
                        )
                        obj.landmark[3].x = (
                            cx + (anchor_w + 1) * landmark.channel(6)[index]
                        )
                        obj.landmark[3].y = (
                            cy + (anchor_h + 1) * landmark.channel(7)[index]
                        )
                        obj.landmark[4].x = (
                            cx + (anchor_w + 1) * landmark.channel(8)[index]
                        )
                        obj.landmark[4].y = (
                            cy + (anchor_h + 1) * landmark.channel(9)[index]
                        )
                        obj.prob = prob

                        faceobjects.append(obj)

                    anchor_x += feat_stride

                anchor_y += feat_stride

        return faceobjects

    def nms_sorted_bboxes(self, faceobjects, nms_threshold):
        picked = []

        n = len(faceobjects)

        areas = []
        for i in range(n):
            areas.append(faceobjects[i].rect.area())

        for i in range(n):
            a = faceobjects[i]

            keep = True
            for j in range(len(picked)):
                b = faceobjects[picked[j]]

                # intersection over union
                inter_area = a.rect.intersection_area(b.rect)
                union_area = areas[i] + areas[picked[j]] - inter_area
                # float IoU = inter_area / union_area
                if inter_area / union_area > nms_threshold:
                    keep = False

            if keep:
                picked.append(i)

        return picked


In [89]:
import sys
import cv2
import numpy as np
import ncnn
# from ncnn.model_zoo import get_model
from ncnn.utils import draw_faceobjects


class FaceDetectionRTncnn:
    def __init__(self, prob_threshold=0.8, nms_threshold=0.4, num_threads=4, use_gpu=False, model="resnet50"):
        self.net = RetinaFace(
            prob_threshold=prob_threshold, nms_threshold=nms_threshold, num_threads=num_threads, use_gpu=use_gpu, 
            model=model
        )

    def get_face_bbox(self, img_bgr, max_faces=0, return_boxformat="xywh", is_draw_faceobjects=False):
        """
        Get face box

        Args:
            img_bgr (opencv): Opencv image in bgr default format
            max_faces (int, optional): If 0 return all face counts. Defaults to 0.
            return_boxformat (str, optional): If xywh return x,y and width and height. Defaults to "xywh".
                                            else x1y1-x2y2 - x, y , x+ w, x + height
        """
        def _get_box(obj, return_boxformat="xywh"):
            if return_boxformat == "xywh":
                bbox = [
                    round(obj.rect.x), round(obj.rect.y), round(obj.rect.w), round(obj.rect.h), obj.prob # xywh format
                ]
            else:
                # x1y1-x2y2
                bbox = [
                    round(obj.rect.x), round(obj.rect.y), 
                    round(obj.rect.x + obj.rect.w), round(obj.rect.y + obj.rect.h), 
                    obj.prob # x1y1x2y2 facebox format
                ]
            return bbox

        faceobjects = self.net(img_bgr)

        # for obj in faceobjects:
        #     print(
        #         "%.5f at %.2f %.2f %.2f x %.2f"
        #         % (obj.prob, obj.rect.x, obj.rect.y, obj.rect.w, obj.rect.h)
        #     )

        if is_draw_faceobjects:
            draw_faceobjects(img_bgr, faceobjects)
            
        if max_faces <= 0 or max_faces >=2:
            face_boxes = []
            for idx, obj in enumerate(faceobjects, start=1):
                print(f"obj.rect.x: {obj.rect.x}")
                print(f"obj.rect.y: {obj.rect.y}")
                print(f"obj.rect.w: {obj.rect.w}")
                print(f"obj.rect.h: {obj.rect.h}")

                bbox = _get_box(obj, return_boxformat="xywh")
                face_boxes.append(bbox)
                if idx == max_faces:
                    break
            return face_boxes
        else:
            # return singal high confidence face
            if len(faceobjects) >= 1:
                return _get_box(faceobjects[0], return_boxformat="xywh")
            else:
                return []

In [116]:
face_detector = FaceDetectionRTncnn(use_gpu=True)

In [117]:
# from ncnn.utils import draw_faceobjects
# imagepath = "/content/human.png"
imagepath = "/content/pan-card-500x500.jpg"
# imagepath = "/content/Woman Wearing White Shirt Waving Goodbye.jpg"

img_bgr = cv2.imread(imagepath)
if img_bgr is None:
    print("cv2.imread %s failed\n" % (imagepath))
    sys.exit(0)

face_boxes = face_detector.get_face_bbox(
    img_bgr, max_faces=2,
    return_boxformat="xywh",
    is_draw_faceobjects=False
)
print(f"\nface_boxes: {face_boxes}")

obj.rect.x: 381.5889571369823
obj.rect.y: 208.69838688128922
obj.rect.w: 56.583804476035425
obj.rect.h: 71.11885123742152

face_boxes: [[382, 209, 57, 71, 0.9970703125]]


In [None]:
faceobjects[0].rect.x: 381.596
faceobjects[0].rect.y: 208.707
faceobjects[0].rect.width: 56.569
faceobjects[0].rect.height: 71.1753

obj.rect.x: 381.59588212797877
obj.rect.y: 208.70658384260472
obj.rect.w: 56.56897888521678
obj.rect.h: 71.17527034884822

In [None]:
obj.rect.x: 808.7731370958414
obj.rect.y: 151.8726988363982
obj.rect.w: 339.32872580831724
obj.rect.h: 487.12960232720366

Cropping

In [45]:
#@title Image scaling cropping utility

import cv2
import numpy as np


class CropImage:
    """
    Create patch from original input image by using bbox coordinate

    Usage:
        image_cropper = CropImage()

        image_bbox = face_box
        w_input = 224
        h_input = 224

        scale = 1.45

        param = {
            "org_img": image,
            "bbox": image_bbox,
            "scale": scale,
            "out_w": w_input,
            "out_h": h_input,
            "crop": True,
        }
        if scale is None:
            param["crop"] = False

        patch_cropped_img = image_cropper.crop(**param)
        print(f"patch_cropped_img.shape : {patch_cropped_img.shape}")
        display(Image.fromarray(patch_cropped_img))
    """
    @staticmethod
    def _get_new_box(src_w, src_h, bbox, scale):
        x = bbox[0]
        y = bbox[1]
        box_w = bbox[2]
        box_h = bbox[3]

        scale = min((src_h-1)/box_h, min((src_w-1)/box_w, scale))

        new_width = box_w * scale
        new_height = box_h * scale
        center_x, center_y = box_w/2+x, box_h/2+y

        left_top_x = center_x-new_width/2
        left_top_y = center_y-new_height/2
        right_bottom_x = center_x+new_width/2
        right_bottom_y = center_y+new_height/2

        if left_top_x < 0:
            right_bottom_x -= left_top_x
            left_top_x = 0

        if left_top_y < 0:
            right_bottom_y -= left_top_y
            left_top_y = 0

        if right_bottom_x > src_w-1:
            left_top_x -= right_bottom_x-src_w+1
            right_bottom_x = src_w-1

        if right_bottom_y > src_h-1:
            left_top_y -= right_bottom_y-src_h+1
            right_bottom_y = src_h-1

        return int(left_top_x), int(left_top_y),\
               int(right_bottom_x), int(right_bottom_y)

    def crop_patch(self, org_img, bbox, scale, out_w, out_h, crop=True):

        if not crop:
            dst_img = cv2.resize(org_img, (out_w, out_h))
        else:
            src_h, src_w, _ = np.shape(org_img)
            left_top_x, left_top_y, \
                right_bottom_x, right_bottom_y = self._get_new_box(src_w, src_h, bbox, scale)

            print(f"New facebox:")
            print(f"{left_top_x}, {left_top_y}, {right_bottom_x}, {right_bottom_y}")

            img = org_img[left_top_y: right_bottom_y+1,
                          left_top_x: right_bottom_x+1]
            dst_img = cv2.resize(img, (out_w, out_h))
        return dst_img



In [None]:
from IPython.display import display
from PIL import Image

image_cropper = CropImage()

image_bbox = face_boxes[0] # face_box
w_input = 224
h_input = 224

scale = 2.0

param = {
    "org_img": img_bgr,
    "bbox": image_bbox,
    "scale": scale,
    "out_w": w_input,
    "out_h": h_input,
    "crop": True,
}
if scale is None:
    param["crop"] = False

patch_cropped_img = image_cropper.crop_patch(**param)
# print(f"patch_cropped_img.shape : {patch_cropped_img.shape}")
# display(Image.fromarray(patch_cropped_img))

New facebox:
353, 169, 467, 311


## C++ ncnn retinaface

### Setup Opencv

In [23]:
%cd "/content"
!git clone https://github.com/microsoft/vcpkg
!./vcpkg/bootstrap-vcpkg.sh

Cloning into 'vcpkg'...
remote: Enumerating objects: 111589, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (47/47), done.[K
remote: Total 111589 (delta 17), reused 13 (delta 2), pack-reused 111540[K
Receiving objects: 100% (111589/111589), 34.43 MiB | 20.83 MiB/s, done.
Resolving deltas: 100% (70387/70387), done.
Downloading cmake...
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   634  100   634    0     0   3123      0 --:--:-- --:--:-- --:--:--  3123
100 41.5M  100 41.5M    0     0  44.0M      0 --:--:-- --:--:-- --:--:-- 90.6M
Downloading cmake... done.
Extracting cmake...
Extracting cmake... done.
Downloading ninja...
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   619  100   619    0     0  13456      0 --

In [26]:
# # Search packages over vcpkg
# # $ ./vcpkg/vcpkg search [search term]
!./vcpkg/vcpkg search opencv

darknet[opencv-base]                  Build darknet with support for latest version of OpenCV
darknet[opencv-cuda]                  Build darknet with support for latest version of CUDA-enabled OpenCV
darknet[opencv2-base]                 Build darknet with support for OpenCV2
darknet[opencv2-cuda]                 Build darknet with support for CUDA-enabled OpenCV2
darknet[opencv3-base]                 Build darknet with support for OpenCV3
darknet[opencv3-cuda]                 Build darknet with support for CUDA-enabled OpenCV3
matplotplusplus[opencv]               opencv support for Matplot++
opencv               4.5.1            Computer vision library
opencv[ade]                           graph api
opencv[contrib]                       opencv_contrib module
opencv[cuda]                          CUDA support for opencv
opencv[dnn]                           Enable dnn module
opencv[eigen]                         Eigen support for opencv
opencv[ffmpeg]                        ffmpeg su

In [None]:
!./vcpkg/vcpkg install opencv4[jpeg]

Computing installation plan...
The following packages will be built and installed:
    opencv4[core,dnn,jpeg,png,quirc,tiff,webp]:x64-linux -> 4.5.1#1
  * protobuf[core]:x64-linux -> 3.15.8#1
  * quirc[core]:x64-linux -> 1.1#2
  * tiff[core]:x64-linux -> 4.1.0#2
Additional packages (*) will be modified to complete this operation.
Detecting compiler hash for triplet x64-linux...
Could not locate cached archive: /root/.cache/vcpkg/archives/2a/2a135def0cd99f0de8f098170888a2a3a96a46c6.zip
Could not locate cached archive: /root/.cache/vcpkg/archives/f9/f92a1d82e97548fb0e6d6c3054597ba450a99ea2.zip
Could not locate cached archive: /root/.cache/vcpkg/archives/34/3490656b361758c292079aa3378b573014f9edcd.zip
Could not locate cached archive: /root/.cache/vcpkg/archives/dc/dcab80a00cc7e9838b2f92799f1b4d67d051267b.zip
Starting package 1/4: protobuf:x64-linux
Building package protobuf[core]:x64-linux...
-- Using cached /content/vcpkg/downloads/protocolbuffers-protobuf-436bd7880e458532901c58f4d9d1ea2

In [39]:
# !/content/vcpkg/vcpkg export opencv4[jpeg] --zip

The following packages are already built and will be exported:
  * libjpeg-turbo:x64-linux
  * liblzma:x64-linux
  * libpng:x64-linux
  * libwebp:x64-linux
    opencv4:x64-linux
  * protobuf:x64-linux
  * quirc:x64-linux
  * tiff:x64-linux
  * zlib:x64-linux
Additional packages (*) need to be exported to complete this operation.
Exporting package libjpeg-turbo:x64-linux...
Exporting package zlib:x64-linux...
Exporting package libpng:x64-linux...
Exporting package libwebp:x64-linux...
Exporting package protobuf:x64-linux...
Exporting package quirc:x64-linux...
Exporting package liblzma:x64-linux...
Exporting package tiff:x64-linux...
Exporting package opencv4:x64-linux...
Creating zip archive...
Zip archive exported at: /content/vcpkg/vcpkg-export-20210601-110821.zip

To use the exported libraries in CMake projects use:
    -DCMAKE_TOOLCHAIN_FILE=[...]/scripts/buildsystems/vcpkg.cmake



### Define ncnn Face detector in cpp

In [73]:
#@title FaceDetector.h

%%writefile /content/FaceDetector.h

#include "net.h"

#include <iostream>

#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include <stdio.h>
#include <vector>

struct FaceObject
{
    cv::Rect_<float> rect;
    cv::Point2f landmark[5];
    float prob;
};


class NcnnFaceDetector {

    protected:

        inline float intersection_area(const FaceObject& a, const FaceObject& b);

        void qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right);

        void qsort_descent_inplace(std::vector<FaceObject>& faceobjects);

        void nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold);

        ncnn::Mat generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales);

        void generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& landmark_blob, float prob_threshold, std::vector<FaceObject>& faceobjects);

    public:
        NcnnFaceDetector();

        ncnn::Net retinaface;

        int detect_retinaface(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects);

};

Overwriting /content/FaceDetector.h


In [107]:
#@title FaceDetector.cpp

%%writefile /content/FaceDetector.cpp


// TODO:  Debug Facedetector
// Static method vs class methods getting diffrent cordinates in facebox


#include "FaceDetector.h"


NcnnFaceDetector::NcnnFaceDetector() {
    
    retinaface.opt.use_vulkan_compute = false;

    // model is converted from
    // https://github.com/deepinsight/insightface/tree/master/RetinaFace#retinaface-pretrained-models
    // https://github.com/deepinsight/insightface/issues/669
    // the ncnn model https://github.com/nihui/ncnn-assets/tree/master/models
    //     retinaface.load_param("retinaface-R50.param");
    //     retinaface.load_model("retinaface-R50.bin");

    // Liveness modles are very sensitive to face cordinates
    // Tested many lightweight models -- but only Resnet 50 found accurate and liveness models are working fine
    // Getting same results on any device after using resnet 50

    // // resnet 50 model converted from mxnet using ncnn utility and uploaded to drive
    // retinaface.load_param("/usr/share/face_recognition/models/detection/retinaface-R50.param");
    // retinaface.load_model("/usr/share/face_recognition/models/detection/retinaface-R50.bin");
    // in colab
    retinaface.load_param("/root/retinaface-R50.param");
    retinaface.load_model("/root/retinaface-R50.bin");

    //// mobilenet face detector  mostly gives diffrent faceboxes on diffrent os
    // Also python and c++ version also gives diffrent results in many cases
    // retinaface.load_param("/usr/share/face_recognition/models/detection/mnet.25-opt.param");
    // retinaface.load_model("/usr/share/face_recognition/models/detection/mnet.25-opt.bin");

    std::cout << "Loaded ncnn face detector... " << std::endl;
}

inline float NcnnFaceDetector::intersection_area(const FaceObject& a, const FaceObject& b) {
    cv::Rect_<float> inter = a.rect & b.rect;
    return inter.area();
}

void NcnnFaceDetector::qsort_descent_inplace(std::vector<FaceObject>& faceobjects, int left, int right)
{
    int i = left;
    int j = right;
    float p = faceobjects[(left + right) / 2].prob;

    while (i <= j)
    {
        while (faceobjects[i].prob > p)
            i++;

        while (faceobjects[j].prob < p)
            j--;

        if (i <= j)
        {
            // swap
            std::swap(faceobjects[i], faceobjects[j]);

            i++;
            j--;
        }
    }

    #pragma omp parallel sections
    {
        #pragma omp section
        {
            if (left < j) qsort_descent_inplace(faceobjects, left, j);
        }
        #pragma omp section
        {
            if (i < right) qsort_descent_inplace(faceobjects, i, right);
        }
    }
}

void NcnnFaceDetector::qsort_descent_inplace(std::vector<FaceObject>& faceobjects)
{
    if (faceobjects.empty())
        return;

    qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}

void NcnnFaceDetector::nms_sorted_bboxes(const std::vector<FaceObject>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
    picked.clear();

    const int n = faceobjects.size();

    std::vector<float> areas(n);
    for (int i = 0; i < n; i++)
    {
        areas[i] = faceobjects[i].rect.area();
    }

    for (int i = 0; i < n; i++)
    {
        const FaceObject& a = faceobjects[i];

        int keep = 1;
        for (int j = 0; j < (int)picked.size(); j++)
        {
            const FaceObject& b = faceobjects[picked[j]];

            // intersection over union
            float inter_area = intersection_area(a, b);
            float union_area = areas[i] + areas[picked[j]] - inter_area;
            //             float IoU = inter_area / union_area
            if (inter_area / union_area > nms_threshold)
                keep = 0;
        }

        if (keep)
            picked.push_back(i);
    }
}

// copy from src/layer/proposal.cpp
ncnn::Mat NcnnFaceDetector::generate_anchors(int base_size, const ncnn::Mat& ratios, const ncnn::Mat& scales)
{
    int num_ratio = ratios.w;
    int num_scale = scales.w;

    ncnn::Mat anchors;
    anchors.create(4, num_ratio * num_scale);

    const float cx = base_size * 0.5f;
    const float cy = base_size * 0.5f;

    for (int i = 0; i < num_ratio; i++)
    {
        float ar = ratios[i];

        int r_w = round(base_size / sqrt(ar));
        int r_h = round(r_w * ar); //round(base_size * sqrt(ar));

        for (int j = 0; j < num_scale; j++)
        {
            float scale = scales[j];

            float rs_w = r_w * scale;
            float rs_h = r_h * scale;

            float* anchor = anchors.row(i * num_scale + j);

            anchor[0] = cx - rs_w * 0.5f;
            anchor[1] = cy - rs_h * 0.5f;
            anchor[2] = cx + rs_w * 0.5f;
            anchor[3] = cy + rs_h * 0.5f;
        }
    }

    return anchors;
}

void NcnnFaceDetector::generate_proposals(const ncnn::Mat& anchors, int feat_stride, const ncnn::Mat& score_blob, const ncnn::Mat& bbox_blob, const ncnn::Mat& landmark_blob, float prob_threshold, std::vector<FaceObject>& faceobjects)
{
    int w = score_blob.w;
    int h = score_blob.h;

    // generate face proposal from bbox deltas and shifted anchors
    const int num_anchors = anchors.h;

    for (int q = 0; q < num_anchors; q++)
    {
        const float* anchor = anchors.row(q);

        const ncnn::Mat score = score_blob.channel(q + num_anchors);
        const ncnn::Mat bbox = bbox_blob.channel_range(q * 4, 4);
        const ncnn::Mat landmark = landmark_blob.channel_range(q * 10, 10);

        // shifted anchor
        float anchor_y = anchor[1];

        float anchor_w = anchor[2] - anchor[0];
        float anchor_h = anchor[3] - anchor[1];

        for (int i = 0; i < h; i++)
        {
            float anchor_x = anchor[0];

            for (int j = 0; j < w; j++)
            {
                int index = i * w + j;

                float prob = score[index];

                if (prob >= prob_threshold)
                {
                    // apply center size
                    float dx = bbox.channel(0)[index];
                    float dy = bbox.channel(1)[index];
                    float dw = bbox.channel(2)[index];
                    float dh = bbox.channel(3)[index];

                    float cx = anchor_x + anchor_w * 0.5f;
                    float cy = anchor_y + anchor_h * 0.5f;

                    float pb_cx = cx + anchor_w * dx;
                    float pb_cy = cy + anchor_h * dy;

                    float pb_w = anchor_w * exp(dw);
                    float pb_h = anchor_h * exp(dh);

                    float x0 = pb_cx - pb_w * 0.5f;
                    float y0 = pb_cy - pb_h * 0.5f;
                    float x1 = pb_cx + pb_w * 0.5f;
                    float y1 = pb_cy + pb_h * 0.5f;

                    FaceObject obj;
                    obj.rect.x = x0;
                    obj.rect.y = y0;
                    obj.rect.width = x1 - x0 + 1;
                    obj.rect.height = y1 - y0 + 1;
                    obj.landmark[0].x = cx + (anchor_w + 1) * landmark.channel(0)[index];
                    obj.landmark[0].y = cy + (anchor_h + 1) * landmark.channel(1)[index];
                    obj.landmark[1].x = cx + (anchor_w + 1) * landmark.channel(2)[index];
                    obj.landmark[1].y = cy + (anchor_h + 1) * landmark.channel(3)[index];
                    obj.landmark[2].x = cx + (anchor_w + 1) * landmark.channel(4)[index];
                    obj.landmark[2].y = cy + (anchor_h + 1) * landmark.channel(5)[index];
                    obj.landmark[3].x = cx + (anchor_w + 1) * landmark.channel(6)[index];
                    obj.landmark[3].y = cy + (anchor_h + 1) * landmark.channel(7)[index];
                    obj.landmark[4].x = cx + (anchor_w + 1) * landmark.channel(8)[index];
                    obj.landmark[4].y = cy + (anchor_h + 1) * landmark.channel(9)[index];
                    obj.prob = prob;

                    faceobjects.push_back(obj);
                }

                anchor_x += feat_stride;
            }

            anchor_y += feat_stride;
        }
    }
}


int NcnnFaceDetector::detect_retinaface(const cv::Mat& bgr, std::vector<FaceObject>& faceobjects)
{
    // ncnn::Net retinaface;

    // retinaface.opt.use_vulkan_compute = true;
    // // resnet 50 model converted from mxnet using ncnn utility and uploaded to drive
    // retinaface.load_param("/usr/share/face_recognition/models/detection/retinaface-R50.param");
    // retinaface.load_model("/usr/share/face_recognition/models/detection/retinaface-R50.bin");

    // // retinaface.load_param("/usr/share/face_recognition/models/detection/mnet.25-opt.param");
    // // retinaface.load_model("/usr/share/face_recognition/models/detection/mnet.25-opt.bin");

    const float prob_threshold = 0.8f;
    const float nms_threshold = 0.4f;

    int img_w = bgr.cols;
    int img_h = bgr.rows;

    ncnn::Mat in = ncnn::Mat::from_pixels(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, img_w, img_h);

    ncnn::Extractor ex = retinaface.create_extractor();

    ex.input("data", in);

    std::vector<FaceObject> faceproposals;

    // stride 32
    {
        ncnn::Mat score_blob, bbox_blob, landmark_blob;
        ex.extract("face_rpn_cls_prob_reshape_stride32", score_blob);
        ex.extract("face_rpn_bbox_pred_stride32", bbox_blob);
        ex.extract("face_rpn_landmark_pred_stride32", landmark_blob);

        const int base_size = 16;
        const int feat_stride = 32;
        ncnn::Mat ratios(1);
        ratios[0] = 1.f;
        ncnn::Mat scales(2);
        scales[0] = 32.f;
        scales[1] = 16.f;
        ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);

        std::vector<FaceObject> faceobjects32;
        generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects32);

        faceproposals.insert(faceproposals.end(), faceobjects32.begin(), faceobjects32.end());
    }

    // stride 16
    {
        ncnn::Mat score_blob, bbox_blob, landmark_blob;
        ex.extract("face_rpn_cls_prob_reshape_stride16", score_blob);
        ex.extract("face_rpn_bbox_pred_stride16", bbox_blob);
        ex.extract("face_rpn_landmark_pred_stride16", landmark_blob);

        const int base_size = 16;
        const int feat_stride = 16;
        ncnn::Mat ratios(1);
        ratios[0] = 1.f;
        ncnn::Mat scales(2);
        scales[0] = 8.f;
        scales[1] = 4.f;
        ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);

        std::vector<FaceObject> faceobjects16;
        generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects16);

        faceproposals.insert(faceproposals.end(), faceobjects16.begin(), faceobjects16.end());
    }

    // stride 8
    {
        ncnn::Mat score_blob, bbox_blob, landmark_blob;
        ex.extract("face_rpn_cls_prob_reshape_stride8", score_blob);
        ex.extract("face_rpn_bbox_pred_stride8", bbox_blob);
        ex.extract("face_rpn_landmark_pred_stride8", landmark_blob);

        const int base_size = 16;
        const int feat_stride = 8;
        ncnn::Mat ratios(1);
        ratios[0] = 1.f;
        ncnn::Mat scales(2);
        scales[0] = 2.f;
        scales[1] = 1.f;
        ncnn::Mat anchors = generate_anchors(base_size, ratios, scales);

        std::vector<FaceObject> faceobjects8;
        generate_proposals(anchors, feat_stride, score_blob, bbox_blob, landmark_blob, prob_threshold, faceobjects8);

        faceproposals.insert(faceproposals.end(), faceobjects8.begin(), faceobjects8.end());
    }

    // sort all proposals by score from highest to lowest
    qsort_descent_inplace(faceproposals);
    
    // apply nms with nms_threshold
    std::vector<int> picked;
    nms_sorted_bboxes(faceproposals, picked, nms_threshold);

    int face_count = picked.size();

    faceobjects.resize(face_count);
    for (int i = 0; i < face_count; i++)
    {
        faceobjects[i] = faceproposals[picked[i]];

        // clip to image size
        float x0 = faceobjects[i].rect.x;
        float y0 = faceobjects[i].rect.y;
        float x1 = x0 + faceobjects[i].rect.width;
        float y1 = y0 + faceobjects[i].rect.height;

        x0 = std::max(std::min(x0, (float)img_w - 1), 0.f);
        y0 = std::max(std::min(y0, (float)img_h - 1), 0.f);
        x1 = std::max(std::min(x1, (float)img_w - 1), 0.f);
        y1 = std::max(std::min(y1, (float)img_h - 1), 0.f);

        faceobjects[i].rect.x = x0;
        faceobjects[i].rect.y = y0;
        faceobjects[i].rect.width = x1 - x0;
        faceobjects[i].rect.height = y1 - y0;
    }

    return 0;
}

// static void draw_faceobjects(const cv::Mat& bgr, const std::vector<FaceObject>& faceobjects)
// {
//     cv::Mat image = bgr.clone();

//     for (size_t i = 0; i < faceobjects.size(); i++)
//     {
//         const FaceObject& obj = faceobjects[i];

//         fprintf(stderr, "%.5f at %.2f %.2f %.2f x %.2f\n", obj.prob,
//                 obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);

//         cv::rectangle(image, obj.rect, cv::Scalar(0, 255, 0));

//         cv::circle(image, obj.landmark[0], 2, cv::Scalar(0, 255, 255), -1);
//         cv::circle(image, obj.landmark[1], 2, cv::Scalar(0, 255, 255), -1);
//         cv::circle(image, obj.landmark[2], 2, cv::Scalar(0, 255, 255), -1);
//         cv::circle(image, obj.landmark[3], 2, cv::Scalar(0, 255, 255), -1);
//         cv::circle(image, obj.landmark[4], 2, cv::Scalar(0, 255, 255), -1);

//         char text[256];
//         sprintf(text, "%.1f%%", obj.prob * 100);

//         int baseLine = 0;
//         cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);

//         int x = obj.rect.x;
//         int y = obj.rect.y - label_size.height - baseLine;
//         if (y < 0)
//             y = 0;
//         if (x + label_size.width > image.cols)
//             x = image.cols - label_size.width;

//         cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
//                       cv::Scalar(255, 255, 255), -1);

//         cv::putText(image, text, cv::Point(x, y + label_size.height),
//                     cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
//     }

//     cv::imshow("image", image);
//     cv::waitKey(0);
// }

Overwriting /content/FaceDetector.cpp


In [76]:
#@title main_executable.cpp

%%writefile /content/main_executable.cpp

#include <iostream>
#include <cstdio>
#include <string>
#include <stdio.h>
#include <algorithm>
#include <vector>
#include <fstream>
#include <chrono> 

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include "FaceDetector.h"

using namespace cv;
using namespace dnn;
using namespace std;
using namespace std::chrono; 


int main(int argc, char **argv) {
    std::cout << "Hello from main.." << std::endl;

    string image_file;
    if  (argc == 1)
    {
        image_file = "/content/pan-card-500x500.jpg";
    }
    else if (argc == 2)
    {
        image_file = argv[1];
    }
    std::cout << "Processing " << image_file << std::endl;

    //// 2
    /// flag value 1 to imread means -- cv2.IMREAD_COLOR:
    /// It specifies to load a color image. 
    /// Any transparency of image will be neglected. It is the default flag. Alternatively, we can pass integer value 1 for this flag.
    cv::Mat frame = cv::imread(image_file.c_str(), 1);

    if (frame.empty())
    {
        fprintf(stderr, "cv::imread %s failed\n", image_file.c_str());
        return -1;
    }

    // //// 3
    std::vector<FaceObject> faceobjects;
    NcnnFaceDetector face_detector;
    face_detector.detect_retinaface(frame, faceobjects);

    std::cout << "faceobjects[0].rect.x: " << faceobjects[0].rect.x << std::endl;
    std::cout << "faceobjects[0].rect.y: " << faceobjects[0].rect.y << std::endl;
    std::cout << "faceobjects[0].rect.width: " << faceobjects[0].rect.width << std::endl;
    std::cout << "faceobjects[0].rect.height: " << faceobjects[0].rect.height << std::endl;

}

Overwriting /content/main_executable.cpp


In [58]:
#@title CMakeLists.txt

%%writefile /content/CMakeLists.txt

cmake_minimum_required(VERSION 3.9)
set( CMAKE_TOOLCHAIN_FILE "/content/vcpkg/scripts/buildsystems/vcpkg.cmake" )

project (liveness)

add_definitions(-std=c++11)
add_definitions("-Wall")


## Statically link-- os specific ncnn
## more info: https://github.com/Tencent/ncnn/releases
## cmake link -- https://github.com/Tencent/ncnn/wiki/use-ncnn-with-own-project

# set install path of ncnn on your system
set(NCNN_INSTALL_DIR "/usr/local/c++/ncnn/build/install")

## install dir will be created after running 'make install' command in ncnn build dir
set(ncnn_DIR "${NCNN_INSTALL_DIR}/lib/cmake/ncnn" CACHE PATH "Directory that contains ncnnConfig.cmake")
find_package(ncnn REQUIRED)


# =============================================================================
# make executable for testing purpose
# -----------------------------------------------------------------------------
add_executable(retinaface
	FaceDetector.cpp 
	FaceDetector.h 
	main_executable.cpp
)

# link ncnn // ncnn already defined above -- just use it
target_link_libraries( retinaface  ncnn )

# Find Package
find_package( OpenCV REQUIRED )

if( OpenCV_FOUND )
  # Additional Include Directories
  include_directories( ${OpenCV_INCLUDE_DIRS} )

  # Additional Library Directories
  link_directories( ${OpenCV_LIB_DIR} )

  # Additional Dependencies
  target_link_libraries( retinaface ${OpenCV_LIBS} )
endif()

# link opencv
# target_link_libraries( retinaface ${OpenCV_LIBS} )

Overwriting /content/CMakeLists.txt


In [32]:
!mkdir -p "build" 
%cd "/content/build"

/content/build


In [108]:
!cmake ..
!make -j$(nproc)

-- Configuring done
-- Generating done
-- Build files have been written to: /content/build
[35m[1mScanning dependencies of target retinaface[0m
[ 33%] [32mBuilding CXX object CMakeFiles/retinaface.dir/main_executable.cpp.o[0m
[ 66%] [32mBuilding CXX object CMakeFiles/retinaface.dir/FaceDetector.cpp.o[0m
[100%] [32m[1mLinking CXX executable retinaface[0m
[100%] Built target retinaface


In [114]:
!./retinaface # "/content/Woman Wearing White Shirt Waving Goodbye.jpg"

Hello from main..
Processing /content/pan-card-500x500.jpg
Loaded ncnn face detector... 
faceobjects[0].rect.x: 381.596
faceobjects[0].rect.y: 208.707
faceobjects[0].rect.width: 56.569
faceobjects[0].rect.height: 71.1753


In [None]:
obj.rect.x: 802.1411844140878
obj.rect.y: 149.21483296874882
obj.rect.w: 342.480463813426
obj.rect.h: 482.2495380496239

In [None]:
##############################################
## 1
### Python result
# obj.rect.x: 381.5889571369823
# obj.rect.y: 208.69838688128922
# obj.rect.w: 56.583804476035425
# obj.rect.h: 71.11885123742152

# ## cpp retinaface result
# faceobjects[0].rect.x 381.7
# faceobjects[0].rect.y 208.248
# faceobjects[0].rect.width 56.3604
# faceobjects[0].rect.height 71.316

##############################################
## 2
### Python result
obj.rect.x: 808.7731370958414
obj.rect.y: 151.8726988363982
obj.rect.w: 339.32872580831724
obj.rect.h: 487.12960232720366


faceobjects[0].rect.x 817.64
faceobjects[0].rect.y 145.44
faceobjects[0].rect.width 337.845
faceobjects[0].rect.height 488.139

In [None]:
[[382, 209, 57, 71, 0.9970703125]]

In [79]:
import math
math.ceil(56.3604)

57