In [1]:
import random
import re
import math
import os
import tempfile
import ssl
import cv2
import glob
import time
from retinaface import RetinaFace
from cv2 import rectangle
import numpy as np
from PIL import Image, ImageFilter, ImageDraw
from scipy.ndimage import gaussian_filter

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from __future__ import print_function
import os
import argparse
import torch
import torch.backends.cudnn as cudnn
import numpy as np
from data import cfg_mnet, cfg_re50
from layers.functions.prior_box import PriorBox
from utils.nms.py_cpu_nms import py_cpu_nms
import cv2
from retinaface import RetinaFace
from utils.box_utils import decode, decode_landm
import time


def check_keys(model, pretrained_state_dict):
    ckpt_keys = set(pretrained_state_dict.keys())
    model_keys = set(model.state_dict().keys())
    used_pretrained_keys = model_keys & ckpt_keys
    unused_pretrained_keys = ckpt_keys - model_keys
    missing_keys = model_keys - ckpt_keys
    print('Missing keys:{}'.format(len(missing_keys)))
    print('Unused checkpoint keys:{}'.format(len(unused_pretrained_keys)))
    print('Used keys:{}'.format(len(used_pretrained_keys)))
    assert len(used_pretrained_keys) > 0, 'load NONE from pretrained checkpoint'
    return True


def remove_prefix(state_dict, prefix):
    ''' Old style model is stored with all names of parameters sharing common prefix 'module.' '''
    print('remove prefix \'{}\''.format(prefix))
    f = lambda x: x.split(prefix, 1)[-1] if x.startswith(prefix) else x
    return {f(key): value for key, value in state_dict.items()}


def load_model(model, pretrained_path, load_to_cpu):
    print('Loading pretrained model from {}'.format(pretrained_path))
    if load_to_cpu:
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage)
    else:
        device = torch.cuda.current_device()
        pretrained_dict = torch.load(pretrained_path, map_location=lambda storage, loc: storage.cuda(device))
    if "state_dict" in pretrained_dict.keys():
        pretrained_dict = remove_prefix(pretrained_dict['state_dict'], 'module.')
    else:
        pretrained_dict = remove_prefix(pretrained_dict, 'module.')
    check_keys(model, pretrained_dict)
    model.load_state_dict(pretrained_dict, strict=False)
    return model

In [3]:
def larger_face(face, diagonal, frame_width, frame_height):
    links_x, links_y, rechts_x, rechts_y = face
    #  Diagonaal krijgen voor gauss blur, en box iets groter maken          
    links_x, links_y, rechts_x, rechts_y = links_x -(diagonal/10), links_y-(diagonal/10), rechts_x+(diagonal/10), rechts_y+(diagonal/10)
    if links_x<0:
        links_x = 0
    if links_y<0:
        links_y = 0
    if rechts_x>=frame_width:
        rechts_x=frame_width-1
    if rechts_y>=frame_height:
        rechts_y=frame_height-1
    return int(links_x), int(links_y), int(rechts_x), int(rechts_y)

### Loading the RetinaFace MobileNet model

In [4]:
torch.set_grad_enabled(False)
net = RetinaFace(cfg=cfg_mnet, phase = 'test')
net = load_model(net, './weights/mobilenet0.25_Final.pth', True)
net.eval()
print('Finished loading model!')
cudnn.benchmark = True
device = torch.device("cpu" if True else "cuda")
net = net.to(device)

Loading pretrained model from ./weights/mobilenet0.25_Final.pth
remove prefix 'module.'
Missing keys:0
Unused checkpoint keys:0
Used keys:300
Finished loading model!


### Video tools

In [5]:
def show_video(Video):
    while(Video.isOpened()):
        ret, frame = Video.read()
        if ret == True:
            cv2.imshow('Frame',frame)
            key = cv2.waitKey(40)
            if key == ord('q'):
                break
        else:
            break
    # drop the video
    Video.release()
    cv2.destroyAllWindows()
def video_info(Video):
    if (Video.isOpened() == False):
        print("Error opening the video file")
    else:
        fps = Video.get(5)
        print('Frames per second : ', fps,'FPS')
        frame_count = Video.get(7)
        print('Frame count : ', frame_count)
#     return Video.get(7)

### Face Detection Function MobileNetwork

In [6]:
def FaceDetect(frame):
    img = np.float32(frame)
    im_height, im_width, _ = img.shape
    scale = torch.Tensor([img.shape[1], img.shape[0], img.shape[1], img.shape[0]])
    img -= (104, 117, 123)
    img = img.transpose(2, 0, 1)
    img = torch.from_numpy(img).unsqueeze(0)
    img = img.to(device)
    scale = scale.to(device)
    loc, conf, landms = net(img) 
    priorbox = PriorBox(cfg_mnet, image_size=(im_height, im_width))
    priors = priorbox.forward()
    priors = priors.to(device)
    prior_data = priors.data
    boxes = decode(loc.data.squeeze(0), prior_data, cfg_mnet['variance'])
    boxes = boxes * scale 
    boxes = boxes.cpu().numpy()
    scores = conf.squeeze(0).data.cpu().numpy()[:, 1]
    landms = decode_landm(landms.data.squeeze(0), prior_data, cfg_mnet['variance'])
    scale1 = torch.Tensor([img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                           img.shape[3], img.shape[2], img.shape[3], img.shape[2],
                           img.shape[3], img.shape[2]])
    scale1 = scale1.to(device)
    landms = landms * scale1 
    landms = landms.cpu().numpy()
    # ignore low scores
    inds = np.where(scores > 0.02)[0]
    boxes = boxes[inds]
    landms = landms[inds]
    scores = scores[inds]

    order = scores.argsort()[::-1]
    boxes = boxes[order]
    landms = landms[order]
    scores = scores[order]

    dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
    keep = py_cpu_nms(dets, 0.4)

    dets = dets[keep, :]
    landms = landms[keep]

    # keep top-K faster NMS
    # dets = dets[:args.keep_top_k, :]
    # landms = landms[:args.keep_top_k, :]

    dets = np.concatenate((dets, landms), axis=1)
    faces = []
    for face in dets:
        if face[4] > 0.5:
            faces.append(face)
    return faces

# Final Converter Function

In [11]:
def convert_video_gauss_and_overlay(location, destination1, partition):
    video = cv2.VideoCapture(location)
    succes, frame = video.read() # get the next frame
    frame_number = 0
    width = video.get(3)
    height = video.get(4)
    fps = video.get(7)
    mask = np.zeros_like(frame)
    out = cv2.VideoWriter(destination1,cv2.VideoWriter_fourcc('M','J','P','G'), int(fps), (int(width),int(height)))
    while succes: 
        if frame_number % partition == 0:
            succes, frame = video.read()
            if succes:
                #  detect the faces
                faces = FaceDetect(frame)
                if len(faces)>0:
                    overlay_frame = np.copy(frame)
                    mask.fill(255)
                    diagonal = 0
                    for i in range(len(faces)):
                        facial_area = faces[i][:4]
                        local_diagonal = np.sqrt((facial_area[2]-facial_area[0])**2+(facial_area[3]-facial_area[1])**2)
                        if local_diagonal > diagonal:
                            diagonal = local_diagonal
                        #  Hoeken van de box en box maken
                        links_x, links_y, rechts_x, rechts_y = larger_face(facial_area, local_diagonal, width, height)
                        cv2.rectangle(mask, (links_x, links_y), (rechts_x, rechts_y), (0, 0, 0), thickness=-1)
                    mask = gaussian_filter(mask, sigma=(diagonal/10,diagonal/10,0), mode='nearest') 
                    mask = mask / 255
                    frame_blur = gaussian_filter(frame, sigma=(diagonal/10,diagonal/10,0), mode='nearest') 
                    final_image = mask * frame + (1-mask) * frame_blur
                    out.write(final_image.astype(np.uint8))
                else:
                    out.write(frame)
        # read next frame
        succes = video.grab()
        frame_number += 1                
    video.release()
    out.release()
    cv2.destroyAllWindows()

### Example of video blurring, should take a couple seconds


In [14]:
convert_video_gauss_and_overlay("Test.avi", "Destination.avi", 5)