# Wider Face: Process Training Set

## Description

In this notebook, we'll be developing a way to get face pictures solely based on the Wider Face training set. In order to do this, we've taken some of the pictures in the wider face model and created what we called a positive training set, showing only pictures of the face, and negative training set, showing pictures that don't have a face. We're going to ignore the images with height or width below 16 pixeis. All of the images will then be converted to grayscale for better results.

In [9]:
import cv2
import numpy as np
from random import randint

In [10]:
def crop_image(image, start_x, start_y, height, width):
    return image[start_y: start_y+height, start_x: start_x+width]

In [17]:
def create_positive_set(file_path, images_path, train_pos_path, info_train, val_pos_path, info_val, limit_train, limit_val):
    count = 0
    with open(file_path, "r") as file_reader, open(info_train, "w") as train_writer, open(info_val, "w") as val_writer:
        while True:
            image_path = file_reader.readline().rstrip()
            img = cv2.imread(images_path + image_path)
            num_faces = int(file_reader.readline())
            if num_faces == 0:
                #it's a negative image
                file_reader.readline()
                continue
                
            if count < limit_train:
                for face in range(num_faces):
                    image_data = [ int(elem) for elem in file_reader.readline().split()]
                    x,y,w,h= image_data[:4]
                    if h<32 or w < 32:
                        continue
                    cv2.imwrite(train_pos_path + "Image"+str(count)+".jpg", crop_image(img, x, y, h, w))
                    
                    train_writer.write("./positive/Image" + str(count)+".jpg\t1\t0 0 "+str(w)+" "+str(h)+"\n")
                    count += 1
            else:
                for face in range(num_faces):
                    image_data = [ int(elem) for elem in file_reader.readline().split()]
                    x,y,w,h= image_data[:4]
                    if h<32 or w < 32:
                        continue
                    cv2.imwrite(val_pos_path + "Image"+str(count-limit_train)+".jpg", crop_image(img, x, y, h, w))
                    val_writer.write("./positive/Image" + str(count)+".jpg\t1\t0 0 "+str(w)+" "+str(h)+"\n")

                    count += 1
                    if count > limit_train + limit_val:
                        return

In [18]:
path = "./Image Resources/Dataset/"

In [19]:
images_path = path + "WIDER_train/images/"
file_path = path + "wider_face_split/wider_face_train_bbx_gt.txt"
positive_val_path = path + "WIDER_val/cropped images/positive/"
positive_train_path = path + "WIDER_train/cropped images/positive/"
train_positive_info_path = path + "WIDER_train/cropped images/positive_info.dat"
val_positive_info_path = path + "WIDER_val/cropped images/positive_info.dat"
train = create_positive_set(file_path, images_path, positive_train_path, train_positive_info_path, positive_val_path, val_positive_info_path, 10000, 2000)

In [20]:
def intersect_images(images_to_avoid, start_x, start_y, width, height):
    end_x = start_x + width
    end_y = start_y + height
    for avoid in images_to_avoid:
        center_avoid_x = avoid[0] + avoid[3]//2
        center_avoid_y = avoid[1] + avoid[2]//2
        if center_avoid_x > start_x and center_avoid_x < end_x \
        and center_avoid_y > start_y and center_avoid_y < end_y:
            return True
    center_x = start_x + width//2
    center_y = start_y + height//2
    for avoid in images_to_avoid:
        end_avoid_x = avoid[0] + avoid[3]
        end_avoid_y = avoid[1] + avoid[2]
        if center_x > avoid[0] and center_x < end_avoid_x \
        and center_y > avoid[0] and center_y < end_avoid_y:
            return True
    return False

In [21]:
def create_negative_set(file_path, image_set_path, train_save_path,info_train, val_save_path, info_val, limit_train, limit_val):
    count = 0
    path = ""
    with open(file_path, "r") as file_reader, open(info_train, "w") as train_writer, open(info_val, "w") as val_writer:
        while True:
            image_path = file_reader.readline().rstrip()
            img = cv2.imread(image_set_path + image_path)
            try:
                img_height, img_width, _ = img.shape
            except Exception as e:
                print(image_path)
                return
            num_faces = int(file_reader.readline().rstrip())
            #List of images from which I have to avoid
            images_to_avoid = []
            if num_faces > 5:
                for face in range(num_faces):
                    file_reader.readline()
                continue
            else:
                if num_faces != 0:
                    for face in range(num_faces):
                        images_to_avoid.append([int(elem) for elem in file_reader.readline().split()[:4]])
                else:
                    file_reader.readline().split()
            #Going to create 5 images per photo, to force more diversity between photos
            if count < limit_train:
                for new_img in range(5):
                    h = randint(32, 360)
                    w = randint(32, 360)
                    while True:
                        x = randint(0, img_width - w - 1)
                        y = randint(0, img_height - h - 1)
                        if not intersect_images(images_to_avoid, x, y, h, w):
                            break
                    cv2.imwrite(train_save_path + "Image"+str(count)+".jpg", crop_image(img, x, y, h, w))
                    train_writer.write("./negative/Image" + str(count)+".jpg\n")
                    
                    count += 1
            else:
                for new_img in range(5):
                    h = randint(32, 420)
                    w = randint(32, 420)
                    while True:
                        x = randint(0, img_width - w - 1)
                        y = randint(0, img_height - h - 1)
                        if not intersect_images(images_to_avoid, x, y, h, w):
                            break
                    cv2.imwrite(val_save_path + "Image"+str(count-limit_train)+".jpg", crop_image(img, x, y, h, w))
                    val_writer.write("./negative/Image" + str(count)+".jpg\n")

                    count += 1
                    if count > limit_train + limit_val:
                        return
            

In [None]:
images_path = path + "WIDER_train/images/"
file_path = path + "wider_face_split/wider_face_train_bbx_gt.txt"
train_negative_path = path + "WIDER_train/cropped images/negative/"
train_negative_info_path = path + "WIDER_train/cropped images/negative_info.txt"
val_negative_path = path + "WIDER_val/cropped images/negative/"
val_negative_info_path = path + "WIDER_val/cropped images/negative_info.txt"
create_negative_set(file_path, images_path, train_negative_path,train_negative_info_path, val_negative_path, val_negative_info_path, 10000, 2000)