## Merging Data, Select percent of features


In [1]:
import numpy as np 
from utils import *
from haar_like_features import *
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve
from skimage import io
from skimage.color import rgb2gray
from skimage.transform import resize
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectPercentile, f_classif
import pickle
import os
import time
import cv2

utils = Utils()
haar = HaarLikeFeatures()

%matplotlib inline
%reload_ext autoreload
%autoreload 2

## Merging Training Data


In [2]:
train_faces = utils.load_pickle('../dataset/featuresV2/faces_features1.pkl')
train_faces_1 = utils.load_pickle('../dataset/featuresV2/faces_features2.pkl')

train_non_faces = utils.load_pickle('../dataset/featuresV2/non_faces_features1.pkl')
train_non_faces_1 = utils.load_pickle('../dataset/featuresV2/non_faces_features2.pkl')
train_non_faces_2 = utils.load_pickle('../dataset/featuresV2/non_faces_features3.  pkl')

In [3]:
print("===== Numbers =====")
print("Train faces: ", train_faces.shape)
print("Train faces 1: ", train_faces_1.shape)
print("Train non faces: ", train_non_faces.shape)
print("Train non faces 1: ", train_non_faces_1.shape)
print("Train non faces 2: ", train_non_faces_2.shape)

===== Numbers =====
Train faces:  (2429, 6066)
Train faces 1:  (13233, 6066)
Train non faces:  (9096, 6066)
Train non faces 1:  (2860, 6066)
Train non faces 2:  (3008, 6066)


In [4]:
print("Faces: ", train_faces.shape[0] + train_faces_1.shape[0] )
print("Non Faces: ", train_non_faces.shape[0] + train_non_faces_1.shape[0] + train_non_faces_2.shape[0] )

Faces:  15662
Non Faces:  14964


In [5]:
Faces = np.concatenate((train_faces, train_faces_1), axis=0)
Faces.shape

(15662, 6066)

In [6]:
NonFaces = np.concatenate((train_non_faces, train_non_faces_1, train_non_faces_2), axis=0)
NonFaces.shape

(14964, 6066)

In [7]:
utils.save_pickle(Faces, '../dataset/pkls/train_faces_lg.pkl')
utils.save_pickle(NonFaces, '../dataset/pkls/train_non_faces_lg.pkl')

In [8]:
X = np.concatenate((Faces, NonFaces), axis=0)
y = np.concatenate((np.ones(Faces.shape[0]), np.zeros(NonFaces.shape[0])), axis=0)
print("X: ", X.shape)
print("y: ", y.shape)

X:  (30626, 6066)
y:  (30626,)


In [9]:
utils.save_pickle((X, y), '../dataset/pkls/train_lg.pkl')

## Select Best 10% feature represet Dataset


**to fast training**
most of (6066) features don't have mearning so we need only 10%


f_classif compute ANOVA F-value


In [14]:
selector = SelectPercentile(f_classif, percentile=10)
X_new = selector.fit_transform(X, y)

In [15]:
features_index = selector.get_feature_names_out(np.arange(0, 6066))
features_index

array([99, 100, 101, 107, 108, 109, 110, 346, 347, 348, 349, 352, 353,
       354, 355, 356, 366, 408, 409, 416, 417, 418, 419, 420, 421, 425,
       426, 427, 428, 429, 430, 434, 435, 436, 437, 444, 445, 446, 447,
       634, 635, 636, 675, 676, 677, 682, 683, 684, 685, 686, 687, 691,
       692, 693, 694, 695, 696, 699, 700, 701, 702, 703, 704, 708, 709,
       710, 711, 712, 713, 901, 902, 903, 904, 905, 909, 910, 911, 912,
       913, 914, 915, 917, 918, 919, 920, 921, 926, 927, 928, 929, 930,
       934, 935, 1076, 1077, 1078, 1079, 1085, 1086, 1087, 1088, 1089,
       1091, 1092, 1093, 1094, 1100, 1101, 1102, 1103, 1104, 1107, 1108,
       1225, 1226, 1235, 1236, 1552, 1866, 1869, 1882, 1885, 2136, 2137,
       2147, 2150, 2151, 3025, 3044, 3045, 3057, 3062, 3063, 3064, 3068,
       3069, 3070, 3071, 3075, 3076, 3077, 3078, 3081, 3082, 3083, 3084,
       3085, 3086, 3087, 3088, 3089, 3090, 3094, 3095, 3096, 3097, 3098,
       3101, 3105, 3106, 3107, 3108, 3109, 3113, 3114, 3115, 

In [16]:
utils.save_pickle((X_new, y), '../dataset/train_lg_percentile.pkl')

In [13]:
utils.save_pickle(features_index, '../dataset/feat_index_lg_percentile.pkl')

## Getting Feature Values


In [17]:
features_values = haar.extract_features_as_labels(0, 0, 19, 19)

In [18]:
utils.save_pickle(features_values, '../dataset/features_values.pkl')

In [19]:
features_values_selected = features_values[features_index.astype(int)]
features_values_selected

array([[ 0.,  5.,  4.,  2.,  1.],
       [ 0.,  5.,  5.,  2.,  1.],
       [ 0.,  5.,  6.,  2.,  1.],
       ...,
       [ 3., 11.,  1.,  6., 18.],
       [ 4.,  3.,  0.,  6.,  6.],
       [ 4.,  4.,  0.,  6.,  6.]])

In [20]:
utils.save_pickle(features_values, '../dataset/features_values_selected.pkl')


In [21]:
features_index.shape

(607,)