## File structure
>WORKSPACE_DIR(.)
>>PRJ_NAME.ipynb  
>>PRJ_NAME_get_data.ipynb  
>>PRJ_NAME_train_model.ipynb   
>>DATA_PATH(datasets)  
>>>PRJ_NAME  
>>>>TRAIN_DIR(train)  
>>>>>CLASS_1_DIR  
>>>>>CLASS_n_DIR  

>>>>VALIDATION_DIR(validation)  
>>>>>CLASS_1_DIR  
>>>>>CLASS_n_DIR  

>>>>TEST_DIR(test)
>>>>>CLASS_1_DIR  
>>>>>CLASS_n_DIR  

>>MODEL_PATH(models)  
>>>PRJ_NAME  

>>Image_PATH(images)  
>>>PRJ_NAME  

>>LOG_PATH(logs)  
>>>PRJ_NAME  


In [1]:
import os
import numpy as np
rand_seed=99
# to make this notebook's output stable across runs
np.random.seed(rand_seed)

PRJ_NAME = "face_recognition"

WORKSPACE_DIR = "."
#where to save dataset
DATA_DIR = PRJ_NAME
DATA_PATH = os.path.join( WORKSPACE_DIR,"datasets",DATA_DIR )

#datasets inlcude train data,validation data,test data
TRAIN_DIR = os.path.join( DATA_PATH, 'train' )
VALIDATION_DIR = os.path.join( DATA_PATH, 'validation' )
TEST_DIR = os.path.join( DATA_PATH, 'test' )

#where to save models
MODEL_DIR = PRJ_NAME
MODEL_PATH = os.path.join( WORKSPACE_DIR,"models",MODEL_DIR )

# where to save figures
IMAGE_DIR = PRJ_NAME
IMAGE_PATH = os.path.join( WORKSPACE_DIR,"images",IMAGE_DIR )

#where to save logs
LOG_DIR = PRJ_NAME
LOG_PATH = os.path.join( WORKSPACE_DIR,"logs",LOG_DIR )

In [2]:
#make file if not exist
def make_file(path):
    if os.path.exists(path):
        print("Proj :-*" + path + "*-  file exist!")
        pass
    else:
        os.mkdir(path)
        print("Proj :-*" + path + "*-  file making!")
    return True

In [3]:
def make_base_file(prj_name,class_name):
    if class_name == "data":
        path = DATA_PATH
    elif class_name == "model":
        path = MODEL_PATH
    elif class_name == "image":
        path = IMAGE_PATH
    elif class_name == "log":
        path = LOG_PATH
    else:
        print("Error: please check class_name! Is one of (data, model, image, log)?")
        return False;
    make_file(path)

In [4]:
#make train,validation and test files
def make_dataset_file():
    make_file(TRAIN_DIR)
    make_file(VALIDATION_DIR)
    make_file(TEST_DIR)

In [5]:
#make some files
make_base_file(PRJ_NAME,"data")
make_base_file(PRJ_NAME,"model")
make_base_file(PRJ_NAME,"image")
make_base_file(PRJ_NAME,"log")
make_dataset_file()

Proj :-*.\datasets\face_recognition*-  file exist!
Proj :-*.\models\face_recognition*-  file exist!
Proj :-*.\images\face_recognition*-  file exist!
Proj :-*.\logs\face_recognition*-  file exist!
Proj :-*.\datasets\face_recognition\train*-  file exist!
Proj :-*.\datasets\face_recognition\validation*-  file exist!
Proj :-*.\datasets\face_recognition\test*-  file exist!


In [6]:
#this is a binary classification，
#so every datafile inlcudes two classfiles,jarvis & other
train_jarvis_dir = os.path.join( TRAIN_DIR, 'jarvis' )
train_other_dir = os.path.join( TRAIN_DIR, 'other' )
validation_jarvis_dir = os.path.join( VALIDATION_DIR, 'jarvis' )
validation_other_dir = os.path.join( VALIDATION_DIR, 'other' )
test_jarvis_dir = os.path.join( TEST_DIR, 'jarvis' )
test_other_dir = os.path.join( TEST_DIR, 'other' )

make_file(train_jarvis_dir)
make_file(train_other_dir)
make_file(validation_jarvis_dir)
make_file(validation_other_dir)
make_file(test_jarvis_dir)
make_file(test_other_dir)

Proj :-*.\datasets\face_recognition\train\jarvis*-  file exist!
Proj :-*.\datasets\face_recognition\train\other*-  file exist!
Proj :-*.\datasets\face_recognition\validation\jarvis*-  file exist!
Proj :-*.\datasets\face_recognition\validation\other*-  file exist!
Proj :-*.\datasets\face_recognition\test\jarvis*-  file exist!
Proj :-*.\datasets\face_recognition\test\other*-  file exist!


True

In [7]:
import cv2
#import face_detect file path from cv2
#you should change this filepath fit your environment
cascade_path = "C:/Users/jarvis/AppData/Local/conda/conda/envs/tensorflow/Library/etc/haarcascades/haarcascade_frontalface_alt2.xml"  
#in reality saved image will be more than catch_pic_num
def CatchFaceFromVideo(window_name, camera_idx, catch_pic_num, path_name):
    #new a window,the name is window_name
    cv2.namedWindow(window_name)
    
    #open camera
    cap = cv2.VideoCapture(camera_idx)                
    
    #load detect face classfier
    classfier = cv2.CascadeClassifier(cascade_path)
    
    #set green rectangle to show face
    color = (0, 255, 0)
    
    num = 0    
    while cap.isOpened():
        #read frame
        _, frame = cap.read()
        #rgb2gray
        grey = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)            
        
        #face detect
        faceRects = classfier.detectMultiScale(grey, scaleFactor = 1.2, minNeighbors = 3, minSize = (32, 32))
        if len(faceRects) > 0:                                    
            for faceRect in faceRects:
                x, y, w, h = faceRect                        
                if (x >= 11) and (y >= 11):#leave distance between edge and face detected
                    #save face
                    img_name = '%s/%d.jpg'%(path_name, num)                
                    image = frame[y - 10: y + h + 10, x - 10: x + w + 10]
                    cv2.imwrite(img_name, image)                                

                    num += 1                
                    if (num > catch_pic_num):
                        break

                    cv2.rectangle(frame, (x - 10, y - 10), (x + w + 10, y + h + 10), color, 2)

                    #show the number of images had been saved
                    font = cv2.FONT_HERSHEY_SIMPLEX
                    cv2.putText(frame,'num:%d' % (num),(x + 30, y + 30), font, 1, (255,255,255),4)                
                else:
                    pass
        #display frame & press q-key exit
        cv2.imshow(window_name, frame)        
        c = cv2.waitKey(10)
        if c & 0xFF == ord('q'):
            break 
            
    cap.release()
    cv2.destroyAllWindows() 

In [8]:
#move your face to train,validation and test file
import os
import shutil
import random
def move_images(number,source,destination):
    filelist=os.listdir(DATA_PATH)
    for fichier in filelist[:]: 
        if not(fichier.endswith(".jpg")):
            filelist.remove(fichier)
    image_index = random.sample(filelist,number) 
    if (number > len(filelist)):
        print("Error: you should check number of images!")
    else:
        for fname in image_index:
            src = os.path.join(source, fname)
            dst = os.path.join(destination, fname)
            shutil.move(src, dst)

In [9]:
def delete_images(path):
    filelist=os.listdir(path)
    for fichier in filelist[:]: 
        if not(fichier.endswith(".jpg")):
            filelist.remove(fichier)
    for fname in filelist:
        file = os.path.join(path, fname)
        os.remove(file)

In [14]:
#catch jarvis face data to save DATA_PATH
#noet：make sure only your face showed in video
CatchFaceFromVideo("get data", 1000, DATA_PATH)
#split jarvis face data to datafiles
move_images(400,DATA_PATH,train_jarvis_dir)
move_images(400,DATA_PATH,validation_jarvis_dir)
move_images(200,DATA_PATH,test_jarvis_dir)
#delete rest data
delete_images(DATA_PATH)

In [20]:
#catch other face data to save DATA_PATH
#noet：make sure only other people showed in video,no your face
CatchFaceFromVideo("get data", 0, 100, DATA_PATH)
#split other face data to datafiles
move_images(400,DATA_PATH,train_other_dir)
move_images(400,DATA_PATH,validation_other_dir)
move_images(200,DATA_PATH,test_other_dir)
#delete rest data
delete_images(DATA_PATH)

In [10]:
#For more performance,you'd better check datasets
#manually delete some data e.g. small,wrong data
#show the number of datafiles
print('total training jarvis images:', len(os.listdir(train_jarvis_dir)))
print('total training other images:', len(os.listdir(train_other_dir)))
print('total validation jarvis images:', len(os.listdir(validation_jarvis_dir)))
print('total validation other images:', len(os.listdir(validation_other_dir)))
print('total test jarvis images:', len(os.listdir(test_jarvis_dir)))
print('total test other images:', len(os.listdir(test_other_dir)))

total training jarvis images: 389
total training other images: 387
total validation jarvis images: 396
total validation other images: 390
total test jarvis images: 196
total test other images: 196
