In [1]:
import cv2
from skimage import io
import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import math
import sys
import pickle
import random
from haar_like_features import *
from utils import *
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
haar = HaarLikeFeatures()
utils = Utils()
train_faces = utils.load_pickle('./dataset/pkls/train/faces.pkl')
test_faces = utils.load_pickle('./dataset/pkls/test/faces.pkl')
train_non_faces = utils.load_pickle('./dataset/pkls/train/non_faces.pkl')
test_non_faces = utils.load_pickle('./dataset/pkls/test/non_faces.pkl')

In [4]:
n_train_faces = train_faces.shape[0]
n_train_non_faces = train_non_faces.shape[0]
n_test_faces = test_faces.shape[0]
n_test_non_faces = test_non_faces.shape[0]

n_train = n_train_faces + n_train_non_faces
n_test = n_test_faces + n_test_non_faces
print('n_train_faces: ', n_train_faces)
print('n_train_non_faces: ', n_train_non_faces)
print('n_test_faces: ', n_test_faces)
print('n_test_non_faces: ', n_test_non_faces)
print('total train set: ', n_train)
print('total test set: ', n_test)

image_width = train_faces.shape[1]
image_height = train_faces.shape[2]   
num_features_per_img = 6066
print('Image width: ', image_width)
print('Image height: ', image_height)
print('Number of features per image: ', num_features_per_img)

n_train_faces:  2429
n_train_non_faces:  4548
n_test_faces:  472
n_test_non_faces:  23573
total train set:  6977
total test set:  24045
Image width:  19
Image height:  19
Number of features per image:  6066


In [7]:
test_faces_features = utils.apply_func_parallel_nump_array(haar.extract_features, test_faces, 8, 0, 0, image_width, image_height)

Start parallel processing
Parallel processing finished in 11.816620349884033 seconds


In [8]:
train_faces_features = utils.apply_func_parallel_nump_array(haar.extract_features, train_faces, 8, 0, 0, image_width, image_height)
train_non_faces_features = utils.apply_func_parallel_nump_array(haar.extract_features, train_non_faces, 8, 0, 0, image_width, image_height)

Start parallel processing
Parallel processing finished in 101.91657018661499 seconds
Start parallel processing
Parallel processing finished in 180.03533506393433 seconds


In [9]:
test_non_faces_features = utils.apply_func_parallel_nump_array(haar.extract_features, test_non_faces, 8, 0, 0, image_width, image_height)

Start parallel processing
Parallel processing finished in 748.8589038848877 seconds


In [16]:
print(test_faces_features.shape[0])
print(train_faces_features.shape[0])
print(train_non_faces_features.shape[0])
print(test_non_faces_features.shape[0])

472
2429
4548
23573


In [17]:
utils.save_pickle(test_faces_features, './dataset/pkls/test/faces_features.pkl')
utils.save_pickle(test_non_faces_features, './dataset/pkls/test/non_faces_features.pkl')
utils.save_pickle(train_faces_features, './dataset/pkls/train/faces_features.pkl')
utils.save_pickle(train_non_faces_features, './dataset/pkls/train/non_faces_features.pkl')

In [28]:
train_faces_pk = utils.load_pickle('./dataset/pkls/train/faces_features.pkl')
train_non_faces_pk = utils.load_pickle('./dataset/pkls/train/non_faces_features.pkl')
test_faces_pk = utils.load_pickle('./dataset/pkls/test/faces_features.pkl')
test_non_faces_pk = utils.load_pickle('./dataset/pkls/test/non_faces_features.pkl')

In [29]:
print('n_train_faces: ', train_faces_pk.shape)
print('n_train_non_faces: ', train_non_faces_pk.shape)
print('n_test_faces: ', test_faces_pk.shape)
print('n_test_non_faces: ',  test_non_faces_pk.shape)


n_train_faces:  (2429, 6066)
n_train_non_faces:  (4548, 6066)
n_test_faces:  (472, 6066)
n_test_non_faces:  (23573, 6066)


In [33]:
P_train, P_val, N_train, N_val = utils.split_data(train_faces_pk, train_non_faces_pk, 0.1)
X_train, y_train = utils.merge_P_N(P_train, N_train)
X_val, y_val = utils.merge_P_N(P_val, N_val)
X_test, y_test = utils.merge_P_N(test_faces_pk, test_non_faces_pk)

print("Validation Set", X_val.shape)
print("Training Set", X_train.shape)
print("Testing Set", X_test.shape)

Validation Set (698, 6066)
Training Set (6279, 6066)
Testing Set (24045, 6066)


In [34]:
utils.save_pickle((X_train, y_train), './dataset/train_dataset.pkl')
utils.save_pickle((X_val, y_val), './dataset/val_dataset.pkl')
utils.save_pickle((X_test, y_test), './dataset/test_dataset.pkl')

In [90]:
for i, face in enumerate(faces): 
    face_feature = haar.extract_features(face, 0, 0, image_width, image_height).reshape(1, -1)
    faces_features[i] = face_feature

In [91]:
for i, non_face in enumerate(non_faces): 
    non_face_feature = haar.extract_features(non_face, 0, 0, image_width, image_height).reshape(1, -1)
    non_faces_fetures[i] = non_face_feature

In [100]:
faces_features = faces_features.astype(np.int64)
non_faces_fetures = faces_features.astype(np.int64)

In [101]:
with open('./dataset/faces_features.pkl', 'wb') as file:
    pickle.dump(faces_features, file)

with open('./dataset/non_faces_features.pkl', 'wb') as file:
    pickle.dump(non_faces_fetures, file)