**This file is a detailed version including creating and training model, if you want to run the file and see the program working directly, please see the file "facerec_direct.ipynb" to run it by only click run all.**

In [62]:
import cv2
import numpy as np
import os
import sys

**step 1: get photos(no need to run this part)**

**we use getTrainingData function below to get training photos for all of us four in our group and store these pictures in corresponding folders.**

In [8]:
def getTrainingData(window_name, camera_id, path_name, max_num): # path_name the path you store your photos，max_num is the number of photos you need 
    cv2.namedWindow(window_name) # create window
    cap = cv2.VideoCapture(camera_id) # open camera
    classifier = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') # load classifier
    color = (0,255,0) 
    num = 0 # count the pictures
    
    while cap.isOpened():
        ok, frame = cap.read()
        if not ok:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faceRects=classifier.detectMultiScale(gray,scaleFactor=1.2,minNeighbors=3,minSize=(32,32))
        
        if len(faceRects) > 0:
            for faceRect in faceRects:
                x,y,w,h = faceRect
                image_name = '%s%d.jpg' % (path_name, num)
                image = frame[y:y+h, x:x+w]
                cv2.imwrite(image_name, image)
                
                num += 1
                if num > max_num:
                    break
                
                cv2.rectangle(frame, (x,y), (x+w,y+h), color, 2)
                font = cv2.FONT_HERSHEY_SIMPLEX
                cv2.putText(frame, ('%d'%num), (x+30, y+30), font, 1, (255,0,255), 4)
        if num > max_num:
            break
        cv2.imshow(window_name, frame)
        c = cv2.waitKey(10)
        if c & 0xFF == ord('q'):# press q to quit
            break

    cap.release()
    cv2.destroyAllWindows()
    print('Finished.')

In [9]:
print ('catching your face and writting into disk...')
getTrainingData('getTrainData',0,'training_data_me/',600)

catching your face and writting into disk...
Finished.


**step 2:(dont need to run this, training_data_others is already prepared by us)**


**prepare training_data_others. we use the photos of people whose name starts with letter A of LFW dataset as the data of others training data.**

In [10]:
import os
import cv2


num = 0
finished = False
def read_lfw(lfw_path):
    global num, finished
    for dir_item in os.listdir(lfw_path):
        full_path = os.path.abspath(os.path.join(lfw_path, dir_item))
        
        if os.path.isdir(full_path): # if it is folder, recursively read the folder
            read_lfw(full_path)
        else: # if it is file
            if dir_item.endswith('.jpg'):
                image = cv2.imread(full_path)
                classifier = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml') # load classifier
                path_name = 'training_data_others/'
                gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                faceRects=classifier.detectMultiScale(gray,scaleFactor=1.2,minNeighbors=3,minSize=(32,32))
        
                if len(faceRects) > 0:
                    for faceRect in faceRects:
                        x,y,w,h = faceRect
                        image_name = '%s%d.jpg' % (path_name, num) 
                        image = image[y:y+h, x:x+w] # store the face photo
                        cv2.imwrite(image_name, image)
                        num += 1
                        if num > 3000:
                            finished = True
                            break
        if finished:
            print('Finished.')
            break 

In [None]:
print ('Processing lfw dataset...')
read_lfw('lfw/') # lfw folder will affect step1, so we have already extracted photos and put them into training_data_others.So lfw is not in our project file now.

**step 3: (you could run from here)**

**we exctract the images and labels into numpy arrays and split those into training and testing dataset, after that we create and train CNN models which got validation accuracy higher than 99%. Then save the model in the model folder.**

In [44]:
IMAGE_SIZE = 128 # we assign the image as (128,128,3)

def resize_image(image, height = IMAGE_SIZE, width = IMAGE_SIZE):
    top, bottom, left, right = (0,0,0,0)
    
    h, w, _ = image.shape
    
    # find the longer edge if the image is not square
    longest_edge = max(h,w)
    
    if h < longest_edge:
        dh = longest_edge - h
        top = dh // 2
        bottom = dh - top
    elif w < longest_edge:
        dw = longest_edge - w
        left = dw // 2
        right = dw - left
    else:
        pass 
    
    BLACK = [0,0,0]
    constant = cv2.copyMakeBorder(image, top, bottom, left, right, cv2.BORDER_CONSTANT, value = BLACK)
    return cv2.resize(constant, (height, width))

In [45]:
images = []
labels = []

# path_name is working directory，we can get it by os.getcwd() later when we use this function
def read_path(path_name):
    for dir_item in os.listdir(path_name):
        
        full_path = os.path.abspath(os.path.join(path_name, dir_item))
        
        if os.path.isdir(full_path):
            read_path(full_path)
        else: #if it is fil
            if dir_item.endswith('.jpg'):
                image = cv2.imread(full_path)
                if image is None:
                    pass
                else:
                    image = resize_image(image, IMAGE_SIZE, IMAGE_SIZE)
                    images.append(image)
                    labels.append(path_name)
    return images, labels

def load_dataset(path_name):
    images,labels = read_path(path_name)
    images = np.array(images, dtype='float')
    dic = {}
    dic['me'] = 0   #me
    dic['ei'] = 1   #yifei
    dic['ng'] = 2   #liang
    dic['rs'] = 3   #others
    dic['ow'] = 4   #yellow
    labels = np.array([dic[i[-2:]] for i in labels])
    
    return images, labels

In [46]:
import random
import keras
import numpy as np
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.models import load_model
from keras import backend as K

class Dataset:
    def __init__(self, path_name): 
        # train
        self.train_images = None
        self.train_labels = None
        
        # test
        self.test_images = None
        self.test_labels = None
        
        self.path_name = path_name
        
        self.input_shape = None 
    
    # load dataset, cross validation
    def load(self, img_rows = IMAGE_SIZE, img_cols = IMAGE_SIZE, img_channels = 3, nb_classes = 5): #5 different classes
        images, labels = load_dataset(self.path_name)
        
        train_images, test_images, train_labels, test_labels = train_test_split(images, labels, test_size = 0.3, random_state = random.randint(0, 100))

       # use tensorflow as backend
        if K.image_data_format == 'channel_first':
            train_images = train_images.reshape(train_images.shape[0],img_channels, img_rows, img_cols)
            test_images = test_images.reshape(test_images.shape[0],img_channels, img_rows, img_cols)
            self.input_shape = (img_channels, img_rows, img_cols)
        else:
            train_images = train_images.reshape(train_images.shape[0], img_rows, img_cols, img_channels)
            test_images = test_images.reshape(test_images.shape[0], img_rows, img_cols, img_channels)
            self.input_shape = (img_rows, img_cols, img_channels)

        print(train_images.shape[0], 'train samples')
        print(test_images.shape[0], 'test samples')
        
        train_labels = keras.utils.to_categorical(train_labels, nb_classes)
        test_labels = keras.utils.to_categorical(test_labels,nb_classes)

        # set pixels to 0~1
        train_images /= 255
        test_images /= 255
        
        self.train_images = train_images
        self.test_images  = test_images
        self.train_labels = train_labels
        self.test_labels  = test_labels


In [61]:
#CNN model
class Model:
    
    def __init__(self):
        self.model = None
        
    def build_model(self, dataset, nb_classes = 5):
        self.model = Sequential()
        self.model.add(Conv2D(32, (3, 3), padding = 'same', input_shape = dataset.input_shape))
        self.model.add(Activation('relu'))
        self.model.add(Conv2D(32, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size = (2,2)))
        self.model.add(Conv2D(64, (3, 3), padding = 'same'))
        self.model.add(Activation('relu'))
        self.model.add(Conv2D(64, (3, 3)))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size = (2,2)))
        self.model.add(Dropout(0.25))
        self.model.add(Flatten())
        self.model.add(Dense(512))
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.25))
        self.model.add(Dense(nb_classes))
        self.model.add(Activation('softmax'))
    def train(self, dataset, batch_size = 128, nb_epoch = 6):
        
        self.model.compile(loss = 'categorical_crossentropy', 
                           optimizer = 'ADAM',
                           metrics = ['accuracy'])
        self.model.fit(dataset.train_images, 
                           dataset.train_labels, 
                           batch_size = batch_size,
                           epochs = nb_epoch, 
                           shuffle = True)

    def evaluate(self, dataset):
        score = self.model.evaluate(dataset.test_images, dataset.test_labels)
        print("%s: %.3f%%" % (self.model.metrics_names[1], score[1] * 100))
        
    def save_model(self, file_path):
        self.model.save(file_path)
    def load_model(self, file_path):
        self.model = load_model(file_path)
    def face_predict(self, image):
        #use resize function defined before
        image = resize_image(image)
        image = image.reshape((1, IMAGE_SIZE, IMAGE_SIZE, 3))
        image = image.astype('float32')
        image /= 255
        result = self.model.predict(image)
        return result.argmax(axis=-1)
        

In [48]:
import os
path_name = os.getcwd()  #get the current working directory
dataset = Dataset(path_name)
dataset.load(128,128,3,5)
m = Model()
m.build_model(dataset,5) #we have five classes
m.model.layers

2126 train samples
912 test samples


[<keras.layers.convolutional.Conv2D at 0x1a69f6d198>,
 <keras.layers.core.Activation at 0x1a69f6d6d8>,
 <keras.layers.convolutional.Conv2D at 0x1a69f6d240>,
 <keras.layers.core.Activation at 0x11fce0160>,
 <keras.layers.pooling.MaxPooling2D at 0x11fce0748>,
 <keras.layers.convolutional.Conv2D at 0x11fce0630>,
 <keras.layers.core.Activation at 0x1a69f67748>,
 <keras.layers.convolutional.Conv2D at 0x1a80bd27b8>,
 <keras.layers.core.Activation at 0x11fce65c0>,
 <keras.layers.pooling.MaxPooling2D at 0x11fce6c18>,
 <keras.layers.core.Dropout at 0x11fce6e48>,
 <keras.layers.core.Flatten at 0x1a5e07cc88>,
 <keras.layers.core.Dense at 0x1a5e07ccc0>,
 <keras.layers.core.Activation at 0x1a5e0b2da0>,
 <keras.layers.core.Dropout at 0x1a5e0b2d30>,
 <keras.layers.core.Dense at 0x1a5e0b2d68>,
 <keras.layers.core.Activation at 0x1a6a11b668>]

In [49]:
m.train(dataset)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [51]:
m.evaluate(dataset)

acc: 99.561%


In [52]:
m.save_model('./model/facemodel.h5')

step 4:
The get_facecover method used to add a cover to the detected face. It required the left corner position (x,y), and the width and height(w,h) of target's face; the captured frame(img); the RGB image and alpha channel image (rgb_image, a) of the face mask.

Firstly, The get_facecover method would resize the face mask based on the target's face size. Secondly, the method would use the alpha channel to get the face mask without white space. Thirdly, the method would add the face mask on the top of the target face which means it changed the captured frame and return the frame.



In [53]:
# Final Method
def get_facecover(x, y, w, h, img, rgb_image, a):
    eyes_center = ((2 * x + w)//2 , (2 * y)//2) # find the the center of the target's eyes
#     factor = 1.2  # manual control size factor  
    resized_image_h = h  # resized_image_h = h * factor
    resized_image_w = w 

    if resized_image_h > y:
        resized_image_h = y-1

    # Resized image based on resized h and w
    resized_image = cv2.resize(rgb_image,(resized_image_w,resized_image_h))
    mask = cv2.resize(a,(resized_image_w,resized_image_h))
    mask_inv =  cv2.bitwise_not(mask)
    # Seted shifing offset
    dh = int(1.015 * h)
    dw = int(-0.2 * w) 
    
    # Find face
    bg_roi = img[y+dh-resized_image_h:y+dh,(eyes_center[0]-resized_image_w//3)+dw:(eyes_center[0]+resized_image_w//3*2)+dw]
    bg_roi = bg_roi.astype(float)
    
    # Generate Mask
    mask_inv = cv2.merge((mask_inv,mask_inv,mask_inv))
    alpha = mask_inv.astype(float)/255

    # resize alpha for future calculation
    alpha = cv2.resize(alpha,(bg_roi.shape[1],bg_roi.shape[0]))
    bg = cv2.multiply(alpha, bg_roi)
    bg = bg.astype('uint8')


    # Get the area of face mask
    image = cv2.bitwise_and(resized_image,resized_image,mask = mask)
    image = cv2.resize(image,(bg_roi.shape[1],bg_roi.shape[0]))
    # Add face mask to the target area
    add_image = cv2.add(bg,image)

    # Replace the original with the face mask
    img[y+dh-resized_image_h:y+dh,(eyes_center[0]-resized_image_w//3)+dw:(eyes_center[0]+resized_image_w//3*2)+dw] = add_image.copy()
    
    # Return the edited frame
    return img



## Images Pre-processing 



This step is used to get the face masks' data. Our model would detect 7 different emotions: angry, hate, fear, happy, sad, surprise and neutral which means that we need 7 face masks. We used OpenCV library to read and split each .png image in r, g, b, a channels. In next step, we merged the r,g and b channels to rgb_face. Then the rgb channels data and alpha channel data are saved in facial_dict:

                facial_dict[emotion] = [rgb_channels, alpha_channel]

In [54]:
from PIL import Image
facial_file = ['angry','hate','fear','happy','sad','surprise','neutral']

facial_dict = {}
for i in range(7):
    print(i)
    face_img = cv2.imread(facial_file[i] + '.png',cv2.IMREAD_UNCHANGED)
    r,g,b,a = cv2.split(face_img) 
    rgb_face = cv2.merge((r,g,b))
    cv2.imwrite(facial_file[i]+'_alpha.png',a)
    facial_dict[facial_file[i]] = [rgb_face,a]

0
1
2
3
4
5
6


In [64]:
import cv2
import sys
from keras.models import load_model
# Def emotions labels
emotion_labels = {
    0: 'angry',
    1: 'hate',
    2: 'fear',
    3: 'happy',
    4: 'sad',
    5: 'surprise',
    6: 'neutral'
}

# Load emotion classifier
emotion_classifier = load_model('model/simple_CNN.530-0.65.hdf5')
# Load face detection model
model = Model()
model.load_model(file_path = './model/facemodel.h5')    

# Create OpenCv window
cv2.namedWindow('Detecting your face.') 
color = (0, 255, 0)

# Load haarcascade classifier
classifier = cv2.CascadeClassifier('haarcascade_frontalface_alt2.xml')
# Capture the real time image frame
cap = cv2.VideoCapture(0)
while cap.isOpened():
        ok, frame = cap.read() # type(frame) <class 'numpy.ndarray'>
        if not ok:
            break
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) # gray scale the frame
        frame2 = frame.copy()
        faceRects=classifier.detectMultiScale(gray,scaleFactor=1.2,minNeighbors=3,minSize=(32,32))
        
        # If detected target face
        if len(faceRects) > 0:
            
            for faceRect in faceRects: 
                
                x, y, w, h = faceRect                
                image = frame[y - 10: y + h + 10, x - 10: x + w + 10].copy()
                
                
                # Dectect if the image is none
                # if we won't do this step, there will be an error:
                    # error error (-215) ssize.width > 0 && ssize.height > 0 in function cv::resize
                if image is None:  
                    break
                else:
                    if image is not None:
                        gray_face = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
                        gray_face = cv2.resize(gray_face,(48,48))
                        gray_face = gray_face/255.0
                        gray_face =np.expand_dims(gray_face,0)
                        gray_face =np.expand_dims(gray_face,-1)
                        
                        # Predict emotion from the gray scale image
                        emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face))
                        emotion = emotion_labels[emotion_label_arg]
                        faceID = model.face_predict(image)
                        
                        # Make a face mask based on the emotions
                        img = frame2.copy()
                        img = get_facecover(x, y, w, h, img, facial_dict[emotion][0], facial_dict[emotion][1])
                        frame2 = img.copy()
                        
                        # Show the target's name and emotion
                        if faceID[0] == 0:                                                        
                            cv2.rectangle(frame2, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
                            cv2.putText(frame2,'Yan is '+ emotion, 
                                    (x + 30, y + 30),                      # Coordinate
                                    cv2.FONT_HERSHEY_SIMPLEX,              # Font
                                    1,                                     # Word size
                                    (255,0,255),                           # Word color
                                    2)                                     # Word's line width
                            
                        elif faceID[0] == 1:
                            cv2.rectangle(frame2, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
                            cv2.putText(frame2,'Yujie is ' + emotion, 
                                    (x + 30, y + 30),                     
                                    cv2.FONT_HERSHEY_SIMPLEX,             
                                    1,                                     
                                    (255,0,255),                        
                                    2)                                   
                        elif faceID[0] == 2:
                            cv2.rectangle(frame2, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
                            cv2.putText(frame2,'Liang is ' + emotion, 
                                    (x + 30, y + 30),                      
                                    cv2.FONT_HERSHEY_SIMPLEX,             
                                    1,                                  
                                    (255,0,255),                          
                                    2)                                     
                        elif faceID[0] == 4:
                            cv2.rectangle(frame2, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
                            cv2.putText(frame2,'Zuxian is ' + emotion, 
                                    (x + 30, y + 30),                     
                                    cv2.FONT_HERSHEY_SIMPLEX,             
                                    1,                                  
                                    (255,0,255),                           
                                    2)                                     
                        else:
                            cv2.rectangle(frame2, (x - 10, y - 10), (x + w + 10, y + h + 10), color, thickness = 2)
                            cv2.putText(frame2,'Unknown is '+ emotion, 
                                    (x + 30, y + 30),                    
                                    cv2.FONT_HERSHEY_SIMPLEX,            
                                    1,                                   
                                    (255,0,255),                         
                                    2)    
        # Show the edited frame on the screen
                
        cv2.imshow("Detecting your face.", frame2)
        
        # Press to q to exit
       
        if cv2.waitKey(1) & 0xFF == ord('q'):
            cap.release()
            cv2.destroyAllWindows()
            break

# Release the capture
cap.release()
cv2.destroyAllWindows()

