In [1]:
%matplotlib inline
import cv2
import sys
import numpy as np
import os
import inspect
from skimage import io
from scipy import misc
import matplotlib.pyplot as plt
from skimage.transform import resize
from IPython.display import clear_output
import pandas as pd

## SSD: Single Shot MultiBox Detector
[Ссылка на arxiv "SSD: Single Shot MultiBox Detector"](https://arxiv.org/pdf/1512.02325.pdf)

In [2]:
img_size = 256
faces_in_image_limit = 1

In [3]:
def extract_faces(img):
    """This function extracts a face from a photo.
    
    :param img: the image from which we wanna to derive a face.
    
    :return: np.array of an extracted face and confidence that it is a human face.
    """
    model_file = "utils/opencv_face_detector_uint8.pb"
    config_file = "utils/opencv_face_detector.pbtxt"
    
    # This network has been created for the Caffe and Tensorflow, I used the second one
    net = cv2.dnn.readNetFromTensorflow(model_file, config_file)
    
    # Returning results
    image_data_fin = []
    confidence_res = None
    
    h, w = img.shape[:2]
    
    # https://www.pyimagesearch.com/2017/11/06/deep-learning-opencvs-blobfromimage-works/ blob description
    # First we resize the image to 300x300 according to the pretrained weights
    # Second, the scale factor (standard deviation in the z-scoring), I do not use the scale therefore set it as 1.0
    # Third, mean-tupple of RGB [mu-Red, mu-Green, mu-Blue] 
    # Forth, indicates that swap first and last channels in 3-channel image is necessary.
    # Fifth, indicates whether image will be cropped after resize or not
    blob = cv2.dnn.blobFromImage(cv2.resize(img, (300, 300)), 1.0, (300, 300), [104, 117, 123], False, False)
    
    # pass the blob through the network and obtain the detections and predictions
    net.setInput(blob)
    detections = net.forward()
    
    # loop over the detections
    for i in range(detections.shape[2]):
        # extract the confidence (i.e., probability) associated with the prediction
        # https://docs.opencv.org/trunk/d3/d63/classcv_1_1Mat.html
        confidence = detections[0, 0, i, 2]
        # If confidence is higher than 50% than 
        if confidence > 0.5:
            # compute the (x, y)-coordinates of the bounding box for the object
            box = detections[0, 0, i, 3:7] * np.array([w, h, w, h])
            (x, y, x1, y1) = box.astype("int")
            # create a new image (augmented image, in the way to cut off everything except a face)
            roi_color = img[y:y1, x:x1]
            im = resize(roi_color, (img_size, img_size))
            image_data_fin.append(im)
            confidence_res = confidence
    
    # If the only one face on a photo then return it (as np.array) and confidence that it is a human face.
    if len(image_data_fin) != faces_in_image_limit:
        return [], None
    else:
        return image_data_fin, confidence_res

In [4]:
def print_progress(total, current, image, like_type, missing_imgs):
    """This function print progress whereas files are handling.
    
    :param total: total number of files
    :param current: current number of handled files
    :param image: an image's name
    :param like_type: the folder from where we are handling files
    :param missing_imgs: number of files which were missed. It's required in purpose to reflect a percentage properly. 
    """
    def progressBar(current, total, missing_imgs, barLength = 20):
        """Represent a progress bar, like that [--->    ] 50%
        
        :param total: total number of files
        :param current: current number of handled files
        :param missing_imgs: number of files which were missed. It's required in purpose to reflect a percentage properly. 
        :param barLength: required in purpose to show the bar of the same length (default 20 symbols)
        """
        percent = float(current) * 100 / (total - missing_imgs)
        arrow   = '-' * int(percent/100 * barLength) + '>'
        spaces  = ' ' * (barLength - len(arrow))
        sys.stdout.write('\rProgress: [%s%s] %d %%\n' % (arrow, spaces, percent + 1))
        
    sys.stdout.write('\r%d of %d %s files have been handling\n' % (current, total, like_type))
    sys.stdout.write('\rImage: %s\n' % image)
    progressBar(current, total, missing_imgs)
    sys.stdout.flush()

def count_files(path):
    """Count number of files in a folder (missin invisible files, like '.filename')
    
    :param path: path to folder.
    :return: Evaluated number of files
    """
    return len([name for name in path if not name[0] =="."])

In [5]:

# For each image, we want to know if each picture is attractive or unattractive

# list of images translated into np-array
images = []
# labels to each image
labels = []

def handle_images(name=''):
    """The function process all photos and prepares them for training.
    
    :param name: the name of an user of a folder (name1_like)
    """
    # The directory where this file is placed
    currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
    # Path to the folder with all samples folder
    data_path = os.path.dirname(currentdir) + '\\samples'
    
    name = name + '_' if name != '' else ''
    
    # List of files in like/dislike directory
    dislikes_images_stack = os.listdir(os.path.join(data_path, name + 'dislike'))
    likes_images_stack = os.listdir(os.path.join(data_path, name + 'like'))

    def process_folder(images_stack, like_type, name=''):
        """The function which processes a folder, by handling images an labeling them.
        
        :param images_stack: a list of images
        :param like_type: the type of folder which is processing.
        :param name: the name beside the like-type in folder name.
        :return: confidence-list (confidence that each passed image is a human face) , number of missed images, 
        files processed, total number of images
        """
        number_of_images = count_files(images_stack)
        files_processed = 0
        confidence_list = []
        number_of_missing_images = 0

        for img in images_stack:
            if not img.startswith('.'):
                # Print progress
                clear_output(wait=True)
                print_progress(number_of_images, files_processed, img, like_type, number_of_missing_images)
                try:
                    # obtain a face 
                    faces, confidence = extract_faces(cv2.imread(os.path.join(data_path, os.path.join(name + like_type, img))))
                except Exception as e:
                    raise e
                
                # Check if the only one face has been retrieved
                if len(faces) > 0 and len(faces) < 2:
                    confidence_list.append(confidence)
                elif len(faces) == 0:
                    number_of_missing_images += 1
                
                # Labeling
                for face in faces:
                    images.append(face)
                    if like_type == 'like':
                        labels.append(1)
                    else:
                        labels.append(0)
                    files_processed += 1
        return confidence_list, number_of_missing_images, files_processed, number_of_images

    # Gather infromation regard the processed files (along with processing)
    conf_list, NoMI, proc_files, NoI = process_folder(dislikes_images_stack, 'dislike', name)
    conf_list2, NoMI2, proc_files2, NoI2 = process_folder(likes_images_stack, 'like', name)
    conf_list.extend(conf_list2)
    conf_list = np.array(conf_list)
    NoMI += NoMI2
    NoI += NoI2
    return {'face_convincing': pd.DataFrame([['{:.2f} %'.format(np.mean(conf_list) * 100)], ['{:.2f} %'.format(np.amax(conf_list) * 100)], ['{:.2f} %'.format(np.amin(conf_list) * 100)], ['{:.2f} %'.format(np.std(conf_list) * 100)]], index=['mean', 'max', 'min', 'std'], columns=['percents']), 'images': pd.DataFrame([[NoI], [NoMI], ['{:.2f} %'.format((NoI - NoMI2)/NoI * 100)], [proc_files2], [proc_files]], index=['toatal amount', 'missed amount', 'handled ratio', 'handled likes', 'handled dislikes'], columns=['data'])}

In [6]:
recap = handle_images('milka')
images = np.array(images)
labels = np.array(labels)

1295 of 1350 like files have been handling
Image: 640x800_ffe1f99f-4dea-47bf-b616-a94668d6b879.jpg
Progress: [------------------->] 100 %


In [7]:
# images -- shows the information about handled photos
# face_convincing -- shows statistics about face retrieving
recap['images']

Unnamed: 0,data
toatal amount,3011
missed amount,196
handled ratio,98.21 %
handled likes,1296
handled dislikes,1519


In [8]:
print(images.shape)
print(labels.shape)

(2815, 256, 256, 3)
(2815,)
