In [1]:
import os
import numpy as np
import pandas as pd
import cv2 
import gc 
from tqdm import tqdm 
from glob import glob

## Data Collection and Labelling

In [2]:
dirs=os.listdir('data')
images_path=[]
labels=[]
for folder in dirs:
    path=glob('./data/{}/*.jpg'.format(folder))
    label=['{}'.format(folder)]*len(path)
    images_path.extend(path)
    labels.extend(label)

## Face Detection

In [3]:
img_path=images_path[1]
img=cv2.imread(img_path)

In [4]:
img

array([[[  1,   3,   3],
        [  0,   2,   2],
        [  0,   2,   2],
        ...,
        [  0,   3,   8],
        [  0,   3,   8],
        [  0,   3,   8]],

       [[  1,   3,   3],
        [  0,   2,   2],
        [  0,   2,   2],
        ...,
        [  0,   3,   8],
        [  0,   3,   8],
        [  0,   3,   8]],

       [[  1,   3,   3],
        [  0,   2,   2],
        [  0,   2,   2],
        ...,
        [  0,   3,   8],
        [  0,   3,   8],
        [  0,   3,   8]],

       ...,

       [[  6,  26, 111],
        [  6,  26, 111],
        [  6,  26, 111],
        ...,
        [ 83, 130, 211],
        [ 83, 130, 211],
        [ 80, 127, 208]],

       [[  6,  26, 111],
        [  6,  26, 111],
        [  6,  26, 111],
        ...,
        [ 81, 128, 209],
        [ 82, 129, 210],
        [ 80, 127, 208]],

       [[  6,  26, 111],
        [  6,  26, 111],
        [  6,  26, 111],
        ...,
        [ 80, 127, 208],
        [ 81, 128, 209],
        [ 80, 127, 208]]

In [5]:
cv2.namedWindow('original', cv2.WINDOW_NORMAL)
cv2.resizeWindow('original', 1000, 800)
cv2.imshow('original', img)
cv2.waitKey()
cv2.destroyAllWindows()

In [6]:
face_detection_model=cv2.dnn.readNetFromCaffe('./models/deploy.prototxt.txt','./models/res10_300x300_ssd_iter_140000_fp16.caffemodel')

In [7]:
def face_detection_dnn(img):
    image=img.copy()
    h,w=image.shape[:2]
    blob=cv2.dnn.blobFromImage(image,1,(300,300),(104,117,123),swapRB=True)
    
    #get detections
    face_detection_model.setInput(blob)
    detections=face_detection_model.forward()
    for i in range(0,detections.shape[2]):
        confidence=detections[0,0,i,2]
        if confidence>0.5:
            box=detections[0,0,i,3:7]*np.array([w,h,w,h])
            box=box.astype(int)
            pt1=(box[0],box[1])
            pt2=(box[2],box[3])
       # cv2.rectangle(image,pt1,pt2,(0,255,0),2)
            roi=image[box[1]:box[3],box[0]:box[2]]
            return roi
    return None




In [8]:
img_roi=face_detection_dnn(img)

In [10]:
def datapreprocess(img):
    face=face_detection_dnn(img)
    if face is not None:
        blob=cv2.dnn.blobFromImage(face,1,(100,100),(104,117,123),swapRB=True)
        blob_squeeze=np.squeeze(blob).T
        blob_rotate=cv2.rotate(blob_squeeze,cv2.ROTATE_90_CLOCKWISE)
        blob_flip=cv2.flip(blob_rotate,1)
        
        img_norm=np.maximum(blob_flip,0)/blob_flip.max()
        return img_norm
    else:
        return None
    

In [11]:
data_img=[]
label_img=[]
i=0
for path,label in tqdm(zip(images_path,labels),desc='preprocessing'):
    img=cv2.imread(path)
    process_img=datapreprocess(img)
    if process_img is not None:
        data_img.append(process_img)
        label_img.append(label)

    i+=1
    if i%100 == 0:
        gc.collect()

preprocessing: 10000it [18:38,  8.94it/s]


In [12]:
X=np.array(data_img)
y=np.array(label_img)


In [13]:
X.shape

(9959, 100, 100, 3)

In [14]:
y.shape

(9959,)

In [15]:
np.savez("./data/data_preprocess.npz",X,y)