In [1]:
# ref detector: https://towardsdatascience.com/face-detection-using-mtcnn-a-guide-for-face-extraction-with-a-focus-on-speed-c6d59f82d49
# ref rotation: https://www.kaggle.com/code/gpiosenka/align-crop-resize-save-images-using-mtcnn

import os
import sys

sys.path.append('../tflow/mtcnn')

import cv2
import json
import numpy as np
from tqdm import tqdm
from mtcnn.mtcnn import MTCNN
import matplotlib.pyplot as plt
from imutils.paths import list_images
from scipy.spatial.distance import euclidean
from utils import load_image, align, crop_image, rotate_bound

In [2]:
# 1. obj detection
# obj: usar mtcnn para detectar regiones de interes y guardarlas en un archivo json
# este preprocesado ayudará en las siguientes etapas.

# 2. make splits
# make splits for each dataset conjunction
# extra: separate them into jsons/txt
# - flickr vs flickr
# - splunk vs splunk
# - flickr vs splunk
# - splunk vs flickr


datasets = {
    'flickr': '/media/choppy/WD_BLACK/datasets/FLICKR',
    'splunk': '/media/choppy/WD_BLACK/datasets/Splunk',
}

verbose = False
debug = False
save_steps = 10 # save data each N detections
downscale_factor = 10 # used to downscale images and improve speed of mtcnn # seems not to be working. # Use wisely

In [3]:
# instance detector
# TODO: explore min and max face size of detector inference
# detector = MTCNN(min_face_size=400)
detector = MTCNN()

2022-10-06 16:52:24.970214: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-06 16:52:25.787994: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-06 16:52:25.789690: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:936] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-10-06 16:52:25.813230: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropri

In [4]:
i = 0

for dst_key in datasets:
    ds_dir = datasets.get(dst_key)
    # images = list(sorted([*list_images(ds_dir)], reverse=True))
    images = [*list_images(ds_dir)]
    # np.random.shuffle(images)
    db_name = os.path.basename(ds_dir)
    json_data = []
    
    print(f"doing inference over {len(images)} images from {db_name}")
    
    for imdir in tqdm(images, desc=dst_key):
        img = load_image(imdir)
        
        if img is None:
            if verbose: print(f'img {imdir} could not be loaded. Check')
            continue
        
        original_shape = img.shape[:2]
        _shape = np.array(img.shape[:2]) // downscale_factor
        img = cv2.resize(img, _shape[::-1])
        detections = detector.detect_faces(img)
        img_path_dir = imdir.split(db_name)[-1][1:]
        
        # find best detection and biggest bbox
        biggest = 0
        best_det = None
        if len(detections) > 1:
            if verbose: print(f'more than one face detected in img: {imdir}, but only the biggest is stored')
            for det in detections:
                box = det['box']            
                # calculate the area in the image
                area = box[2] * box[3]
                if area > biggest:
                    biggest = area
                    bbox = box
                    best_det = det
        elif len(detections) == 1:
            best_det = detections[0]
        else:
            if verbose: print(f'no predictions for {imdir}, please check.')
            continue
        
        # continue working with best_det dict
        # scale up data from best_det
        best_det['box'] = (np.array(best_det['box']) * downscale_factor).tolist()
        for bkey in best_det['keypoints'].keys():
            best_det['keypoints'][bkey] = (np.array(best_det['keypoints'][bkey]) * downscale_factor).tolist()
        
        red = [255, 0, 0]
        bbox = best_det['box']
        nose = best_det.get('keypoints')['nose']
        left_eye, right_eye = best_det.get('keypoints')['left_eye'], best_det.get('keypoints')['right_eye']
        dst1, dst2 = euclidean(left_eye, nose), euclidean(right_eye, nose)
        mean_dst = np.mean([dst1, dst2]).astype(np.uint16)

        # upscale image to checkout method
        img = cv2.resize(img, original_shape[::-1])
        periocular = img.copy()
        
        pt1 = (bbox[0], left_eye[1]-int(mean_dst*0.6))
        pt2 = (bbox[0]+bbox[2], right_eye[1]+int(mean_dst*0.6))
        
        if debug:
            periocular = periocular[ pt1[1]:pt2[1], pt1[0]:pt2[0], ... ] # use the generated points to crop the ROI

            # face + distance
            img = cv2.rectangle(img, pt1, pt2, color=red, thickness=50)

            plt.figure(figsize=(10, 8))
            plt.imshow(img)
            plt.show()
            plt.figure(figsize=(10, 8))
            plt.imshow(periocular)
            plt.show()
            
            if i == 5:
                break
            
        # make dict data with periocular region
        peri_data = {
            'image_dir': os.path.join(db_name, img_path_dir),
            'mtcnn-inference': best_det, 
            'handcrafted': {
                'periocular': [ pt1[1], pt2[1], pt1[0], pt2[0] ], # y2, y1, x2, x1 
                # 'description': 'crop of full size image with following format [y2, y1, x2, x1]. This new region was obtained calculing the 60% of euclidean distance between l/r eye and nose, by this way we get y-axis location, and x-axis location correspond to boundingbox xy detected by mtcnn'
            }   
        }
        
        
        json_data.append(peri_data)
        
        i += 1
        
        # save data per steps
        if i % save_steps == 0:
            json.dump(json_data, open(db_name+'.json', 'w'))
        
        
    
    # store json data as db-name.json
    json.dump(json_data, open(db_name+'.json', 'w'))

doing inference over 14000 images from FLICKR


flickr:   0%|                                                                                                                                                                            | 0/14000 [00:00<?, ?it/s]2022-10-06 16:52:42.972768: I tensorflow/stream_executor/cuda/cuda_dnn.cc:368] Loaded cuDNN version 8401
2022-10-06 16:52:50.858636: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-10-06 16:52:50.859692: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-10-06 16:52:50.859777: W tensorflow/stream_executor/gpu/asm_compiler.cc:80] Couldn't get ptxas version string: INTERNAL: Couldn't invoke ptxas --version
2022-10-06 16:52:50.860972: I tensorflow/core/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2022-10-06 16:52:50.861165: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] INTERNAL: Fai

doing inference over 24998 images from Splunk


splunk: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24998/24998 [5:29:32<00:00,  1.26it/s]


In [5]:
# para hacer zona periocular

# distancia entre cada ojo y agregar 20% de margen izq/der
# para altura, triangular distancia desde los ojos hasta la nariz y estimar un 20-30%


# calcular distancia euclideana entre ambos puntos (nariz y ambos ojos) y calcular promedio
# a ese promedio aplicarle 20-30% de margen

# all done