In [6]:
import warnings
warnings.simplefilter('ignore')

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cv2 as cv
import os
from skimage import io
from face_verification.utils import *
from face_verification.api import *

%matplotlib inline

# Image Reading and Cropping

### LFW Dataset

In [2]:
# lfw dataset
haar_xml='./face_verification/xmls/haarcascade_frontalface_default.xml'
from_folder = './images/lfw/'
to_folder = './images/lfw_face/'

lfw_paths = []
lfw_names = []
lfw_error_paths = []

folder_list = sorted(os.listdir(from_folder))
for folder in folder_list:
    try:
        os.listdir(from_folder + folder + '/')
    except:
        continue
        
    # check the existance of folder
    if not os.path.exists(to_folder + folder):
        os.mkdir(to_folder + folder)
        
    # load image paths
    image_list = os.listdir(from_folder + folder + '/')
    for image_name in image_list:
        from_path = from_folder + folder + '/' + image_name
        to_path = to_folder + folder + '/' + image_name
        image = read_image(from_path)
        
        # face detection
        try:
            faces = cascade_detector(image, xml=haar_xml, scale_factor=1.3, 
                                     min_neighbors=5)
        except:
            lfw_error_paths.append(from_path)
            continue
        
        # crop face areas
        crop = crop_face(image, faces, scale_factor=1.3, target_size=(256, 256))
        crop_rgb = BGR2RGB(crop)
        
        # save image
        io.imsave(to_path, crop_rgb)
        lfw_paths.append(to_path)
        lfw_names.append(folder)

In [15]:
print('Correct images:\t', len(lfw_df))
print('Error images:\t', len(lfw_error_paths))

# save results
np.save('./images/lfw_error.npy', lfw_error_paths)

maps = {'name': lfw_names, 'path': lfw_paths}
lfw_df = pd.DataFrame(maps, columns=['name', 'path'])
lfw_df.to_csv('./images/lfw_path.csv', index=False)

Correct images:	 11986
Error images:	 1247


### VGG2 Dataset

In [20]:
# VGG2 training set
haar_xml='./face_verification/xmls/haarcascade_frontalface_default.xml'
from_folder = './images/vgg2/train/'
to_folder = './images/vgg2_face/train/'

vgg2_train_paths = []
vgg2_train_names = []
vgg2_train_error_paths = []

folder_list = sorted(os.listdir(from_folder))
for folder in folder_list:
    try:
        os.listdir(from_folder + folder + '/')
    except:
        continue
        
    # check the existance of folder
    if not os.path.exists(to_folder + folder):
        os.mkdir(to_folder + folder)
        
    # load image paths
    image_list = os.listdir(from_folder + folder + '/')
    for image_name in image_list:
        from_path = from_folder + folder + '/' + image_name
        to_path = to_folder + folder + '/' + image_name
        image = read_image(from_path)
        
        # face detection
        try:
            faces = cascade_detector(image, xml=haar_xml, scale_factor=1.3, 
                                     min_neighbors=5)
        except:
            vgg2_train_error_paths.append(from_path)
            continue
        
        # crop face areas
        crop = crop_face(image, faces, scale_factor=1.3, target_size=(256, 256))
        crop_rgb = BGR2RGB(crop)
        
        # save image
        io.imsave(to_path, crop_rgb)
        vgg2_train_paths.append(to_path)
        vgg2_train_names.append(folder)

In [21]:
# save results
np.save('./images/vgg2_train_error.npy', vgg2_train_error_paths)

maps = {'name': vgg2_train_names, 'path': vgg2_train_paths}
vgg2_train_df = pd.DataFrame(maps, columns=['name', 'path'])
vgg2_train_df.to_csv('./images/vgg2_train_path.csv', index=False)

print('Correct images:\t', len(vgg2_train_df))
print('Error images:\t', len(vgg2_train_error_paths))

Correct images:	 2113881
Error images:	 1028009


In [16]:
# VGG2 test set
haar_xml='./face_verification/xmls/haarcascade_frontalface_default.xml'
from_folder = './images/vgg2/test/'
to_folder = './images/vgg2_face/test/'

vgg2_test_paths = []
vgg2_test_names = []
vgg2_test_error_paths = []

folder_list = sorted(os.listdir(from_folder))
for folder in folder_list:
    try:
        os.listdir(from_folder + folder + '/')
    except:
        continue
        
    # check the existance of folder
    if not os.path.exists(to_folder + folder):
        os.mkdir(to_folder + folder)
        
    # load image paths
    image_list = os.listdir(from_folder + folder + '/')
    for image_name in image_list:
        from_path = from_folder + folder + '/' + image_name
        to_path = to_folder + folder + '/' + image_name
        image = read_image(from_path)
        
        # face detection
        try:
            faces = cascade_detector(image, xml=haar_xml, scale_factor=1.3, 
                                     min_neighbors=5)
        except:
            vgg2_test_error_paths.append(from_path)
            continue
        
        # crop face areas
        crop = crop_face(image, faces, scale_factor=1.3, target_size=(256, 256))
        crop_rgb = BGR2RGB(crop)
        
        # save image
        io.imsave(to_path, crop_rgb)
        vgg2_test_paths.append(to_path)
        vgg2_test_names.append(folder)

In [19]:
# save results
np.save('./images/vgg2_test_error.npy', vgg2_test_error_paths)

maps = {'name': vgg2_test_names, 'path': vgg2_test_paths}
vgg2_test_df = pd.DataFrame(maps, columns=['name', 'path'])
vgg2_test_df.to_csv('./images/vgg2_test_path.csv', index=False)

print('Correct images:\t', len(vgg2_test_df))
print('Error images:\t', len(vgg2_test_error_paths))

Correct images:	 116568
Error images:	 52828
