# Marking up [WiderFace](http://shuoyang1213.me/WIDERFACE/) dataset by gender

We have all WiderFace **images** and some .mat files with **metadata** of these images. We need to **mark up these images by gender**. We should do these next **steps**: 
1. **Separate** easy-scale images to individual folder
2. **Mark up** these easy-scale images by gender
3. **Crop** faces from these images
4. **Create** file: face, gender

In [6]:
from scipy import io
import random
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
from shutil import copyfile, rmtree 
from IPython.display import clear_output

In [7]:
path_to_train_mat = 'WiderFacesDataset/wider_face_train.mat'
path_to_val_mat = 'WiderFacesDataset/wider_face_val.mat'
path_to_all_images = 'WiderFacesDataset/All/'
path_to_train_easy_scale_images = 'WiderFacesDataset/EasyScaleImages/Train/'
path_to_val_easy_scale_images = 'WiderFacesDataset/EasyScaleImages/Val/'

In [8]:
train_mat = io.loadmat(path_to_train_mat)
val_mat = io.loadmat(path_to_val_mat)

In [9]:
# Names of easy-scale image folder 
easy_names = ['Gymnastics', 'Handshaking', 'Waiter', 'Conference',
                'Worker', 'Parachutist', 'Coach', 'Meeting',
                'Aerobics', 'Boat', 'Dancing', 'Swimming', 
                'Family', 'Balloonist', 'Dresses', 'Couple', 
                'Jockey', 'Tennis', 'Spa', 'Surgeons']

## Separate easy-scale images from all

In [10]:
# Get paths to all easy-scale images
def get_easy_scale_images(mat):    
    file_list = mat['file_list']
    
    # Get easy-scale folders number
    def get_easy_folders(easy_names=easy_names):
        easy_folders = []        
        for name in easy_names:
            for i in range(0, len(file_list)):
                if (name in file_list[i][0][0][0][0]):
                    easy_folders.append(i)
        return easy_folders
    
    images = []
    easy_folders = get_easy_folders()
    for easy_folder in easy_folders:
        folder = file_list[easy_folder]
        for name in folder[0]:
            images.append(name[0][0] + ".jpg")
            
    return images

In [11]:
# Replace easy-scale images to other dirs
def separate_easy_scale_part():
    
    def replace(mat, path):
        images_names = get_easy_scale_images(mat)    
        for name in images_names:
            image_path = path_to_all_images + name
            destination_path = path + name
            copyfile(image_path, destination_path)

    # 1 - Train images
    replace(train_mat, path_to_train_easy_scale_images)    
    # 2 - Validation images    
    replace(val_mat, path_to_val_easy_scale_images)

In [12]:
# separate_easy_scale_part()

In [28]:
len(get_easy_scale_images(val_mat))

928

In [14]:
def convert2RGB(img):
    return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

In [23]:
def get_num_of_image(mat, name):
    folder_num = -1 
    file_num = -1
    for folder in mat['file_list']:
        folder_num += 1
        file_num = -1
        for file_name in folder[0]:
            file_num += 1
            name_ = file_name[0][0]
            if (name_ + '.jpg' == name):
                print(f"Folder: {folder_num} \nFile: {file_num}")
                return folder_num, file_num

In [25]:
get_num_of_image(val_mat, '11_Meeting_Meeting_11_Meeting_Meeting_11_349.jpg')

Folder: 3 
File: 25


(3, 25)

In [17]:
def get_boxes(mat, folder, file):
    return mat['face_bbx_list'][folder][0][file][0]

In [26]:
get_boxes(val_mat, 3, 25)

array([[190, 177,  74, 117],
       [144,  36,  21,  26],
       [631,  60,  17,  25],
       [668,  46,  16,  21],
       [696,  63,  21,  21],
       [730,  59,  15,  19],
       [607,  53,  17,  15],
       [586,  52,  14,  21],
       [525,  50,  18,  24],
       [475,  50,  14,  18],
       [428,  39,  15,  21],
       [826,  54,  16,  22],
       [856,  56,  15,  17],
       [894,  53,  12,  12],
       [986,  53,  11,  14]])

In [19]:
def draw_rect(image_name, boxes):
    image = cv2.imread(path_to_all_images + image_name)        
    for (x, y, w, h) in boxes:        
        x1 = round(x - w * 0.25)
        y1 = round(y - h * 0.25)
        x2 = round(x + w * 1.25)        
        y2 = round(y + h * 1.25)
        
        img_copy = np.copy(image)
        cv2.rectangle(img_copy,(x1, y1), (x2, y2), (0, 255, 0), 2)
        crop_image = image[y1:y2, x1:x2] 
        
        fig=plt.figure(figsize=(15, 15))
        fig.add_subplot(1, 2, 1)
        plt.imshow(convert2RGB(img_copy))
        plt.axis('off')
        plt.tight_layout()

        fig.add_subplot(1, 2, 2)
        plt.imshow(convert2RGB(crop_image))
        plt.axis('off')
        plt.tight_layout()

        plt.show(block=False)
        
        sex = input()
        clear_output()

In [27]:
draw_rect('11_Meeting_Meeting_11_Meeting_Meeting_11_349.jpg', get_boxes(val_mat, 3, 25))