# Распознавание эмоций

Классификатор - предобученная MobileNet на весах imagenet. В качестве детектора работает алгоритм Хаара 

In [41]:
import tensorflow as tf
# from tensorflow.keras.applications import MobileNet
from tensorflow.keras.models import Sequential,Model, load_model
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten,GlobalAveragePooling2D
from tensorflow.keras.layers import Conv2D,MaxPooling2D,ZeroPadding2D
# from tensorflow.keras.layers.normalization import BatchNormalization
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
from tensorflow.keras.optimizers import RMSprop,Adam
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
import cv2
import os
import glob
from datetime import datetime
from time import sleep
import numpy as np

In [42]:
# Пути

ROOT_PATH  = os.path.abspath('.')
TRAIN_DATA_PATH = os.path.join(ROOT_PATH, 'train')
VALIDATION_DATA_PATH = os.path.join(ROOT_PATH, 'validation')
STORED_FRAMES_DIR = 'data' # папка для сохранения snapshot'ов
OUTPUT_DIR = 'output'

# Сеттинги

IMG_SIZE = (224, 224, 3)
batch_size = 8
nb_train_samples = 0
nb_validation_samples = 300 # 6
epochs = 10
class_labels = os.listdir(VALIDATION_DATA_PATH) # ['happy', 'sad'] # class names берем из структуры файлов

CLASS_DICT = {"1": "smile",
              "2": "sad",
              "3": "surprised",
              "4": "facepalm",
              "5": "greeting",
              "6": "boared",
              "7": "trick",
              "8": "thumb"
             }

num_classes = len(CLASS_DICT)

In [43]:
# Returns files list
def get_filenames_list(classes, path):
    dataset = []
    for cls in classes:
        print('Processing class: {}'.format(cls))
        for fpath in glob.glob(os.path.join(path, cls, '*.jpg')):
            cls_idx = classes.index(cls)
            dataset.append((fpath, cls_idx))
    print('Done')
    return dataset

In [44]:
n_train_samples = len(get_filenames_list(list(CLASS_DICT.values()), TRAIN_DATA_PATH))
n_val_samples = len(get_filenames_list(list(CLASS_DICT.values()), VALIDATION_DATA_PATH))

Processing class: smile
Processing class: sad
Processing class: surprised
Processing class: facepalm
Processing class: greeting
Processing class: boared
Processing class: trick
Processing class: thumb
Done
Processing class: smile
Processing class: sad
Processing class: surprised
Processing class: facepalm
Processing class: greeting
Processing class: boared
Processing class: trick
Processing class: thumb
Done


## Модуль обучения

#### Сбор и подготовка датасета 

In [5]:
def make_train_filestructure():
    
    # Создает директории с именами классов

    for cls in CLASS_DICT.values():
        d = os.path.join(ROOT_PATH, STORED_FRAMES_DIR, cls)
        if not os.path.exists(d):
            os.mkdir(d)
            print(f"Created {cls} dir")

make_train_filestructure()

Created smile dir
Created sad dir
Created surprised dir
Created facepalm dir
Created greeting dir
Created boared dir
Created trick dir
Created thumb dir


In [6]:
# Запись семплов с камеры

def run_cam_in_dataset_creation_mode():
    
    """
    Usage:
    Выбираем класс из controls, корчим соответствующую рожицу, сохраняем по 's'
    
    Controls: 
    q - exit
    s - take snapshot
    1 - choose "smile" class
    2 - choose "sad" class
    3 - choose "surprized" class
    4 - choose "facepalm" class
    5 - choose "greeting" class
    6 - choose "boared" class
    7 - choose "trick" class
    8 - choose "thumb" class
    """
    
    cap = cv2.VideoCapture(0)
    cur_class = CLASS_DICT.get('1')
    img_counter = 0
    while True:
        # Покадровое чтение захвата
        success, frame = cap.read()
        cv2.imshow('Emotion Detector',frame)
        
        # Options
        
        k = cv2.waitKey(1) & 0xFF
        if k == ord('q'): # wait for ESC key to exit
            break
        elif k == ord('1'):
            print(f"Class changed to {CLASS_DICT.get('1')}")
            cur_class = CLASS_DICT.get('1')
        elif k == ord('2'):
            print(f"Class changed to {CLASS_DICT.get('2')}")
            cur_class = CLASS_DICT.get('2')
        elif k == ord('3'):
            print(f"Class changed to {CLASS_DICT.get('3')}")
            cur_class = CLASS_DICT.get('3')
        elif k == ord('4'):
            print(f"Class changed to {CLASS_DICT.get('4')}")
            cur_class = CLASS_DICT.get('4')        
        elif k == ord('5'):
            print(f"Class changed to {CLASS_DICT.get('5')}")
            cur_class = CLASS_DICT.get('5')            
        elif k == ord('6'):
            print(f"Class changed to {CLASS_DICT.get('6')}")
            cur_class = CLASS_DICT.get('6')
        elif k == ord('7'):
            print(f"Class changed to {CLASS_DICT.get('7')}")
            cur_class = CLASS_DICT.get('7')            
        elif k == ord('8'):
            print(f"Class changed to {CLASS_DICT.get('8')}")
            cur_class = CLASS_DICT.get('8')               
        elif k == ord('s'):
            fn_gen = f"frame{str(datetime.now()).replace(':','-')}{cur_class}.jpg"
            cv2.imwrite(f"./data/{cur_class}/{fn_gen}", frame)
            img_counter += 1
            print(f"Saved img # {img_counter} to {cur_class}")

    cap.release()
    cv2.destroyAllWindows()
    return

In [37]:
run_cam_in_dataset_creation_mode()

Class changed to smile
Saved img # 1 to smile
Saved img # 2 to smile
Saved img # 3 to smile
Class changed to sad
Saved img # 4 to sad
Saved img # 5 to sad
Saved img # 6 to sad
Class changed to surprised
Saved img # 7 to surprised
Saved img # 8 to surprised
Saved img # 9 to surprised
Class changed to facepalm
Saved img # 10 to facepalm
Saved img # 11 to facepalm
Saved img # 12 to facepalm
Class changed to greeting
Saved img # 13 to greeting
Saved img # 14 to greeting
Saved img # 15 to greeting
Class changed to boared
Saved img # 16 to boared
Saved img # 17 to boared
Saved img # 18 to boared
Class changed to trick
Saved img # 19 to trick
Saved img # 20 to trick
Saved img # 21 to trick
Class changed to thumb
Saved img # 22 to thumb
Saved img # 23 to thumb
Saved img # 24 to thumb


### Подготовка и обучение модели

In [7]:
# MobileNet работает с изображениями размерности 224, 224
MobileNet = tf.keras.applications.MobileNet(weights='imagenet',include_top=False,input_shape=IMG_SIZE)

In [8]:
# Set layers trainable
for layer in MobileNet.layers:
    layer.trainable = True

# print layers
# for (i,layer) in enumerate(MobileNet.layers):
#     print(str(i),layer.__class__.__name__,layer.trainable)

In [9]:
def addTopModelMobileNet(bottom_model, num_classes):
    
    """ Добавление  финальных слоев на выход модели"""

    top_model = bottom_model.output
    top_model = GlobalAveragePooling2D()(top_model)
    top_model = Dense(1024,activation='relu')(top_model)
    
    top_model = Dense(1024,activation='relu')(top_model)
    
    top_model = Dense(512,activation='relu')(top_model)
    
    top_model = Dense(num_classes,activation='softmax')(top_model)

    return top_model

FC_Head = addTopModelMobileNet(MobileNet, num_classes)

In [10]:
model = Model(inputs = MobileNet.input, outputs = FC_Head)
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128   

In [16]:
train_datagen = ImageDataGenerator(
                    rescale=1./255,
                    rotation_range=30,
                    width_shift_range=0.3,
                    height_shift_range=0.3,
                    horizontal_flip=True,
                    fill_mode='nearest'
                                   )

validation_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
                        TRAIN_DATA_PATH,
                        target_size = IMG_SIZE[:2],
                        batch_size = batch_size,
                        class_mode = 'categorical'
                        )

validation_generator = validation_datagen.flow_from_directory(
                            VALIDATION_DATA_PATH,
                            target_size=IMG_SIZE[:2],
                            batch_size=batch_size,
                            class_mode='categorical')

Found 101 images belonging to 8 classes.
Found 24 images belonging to 8 classes.


In [21]:
checkpoint = ModelCheckpoint(
                             'my_face_mobilNet.h5',
                             monitor='val_loss',
                             mode='min',
                             save_best_only=True,
                             verbose=1)

earlystop = EarlyStopping(
                          monitor='loss',
                          min_delta=0,
                          patience=10,
                          verbose=1,restore_best_weights=True)

learning_rate_reduction = ReduceLROnPlateau(monitor='accuracy', 
                                            patience=5, 
                                            verbose=1, 
                                            factor=0.2, 
                                            min_lr=0.0001)

callbacks = [earlystop,checkpoint,learning_rate_reduction]

model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=0.001),
              metrics=['accuracy']
              )


history = model.fit(
            train_generator,
            steps_per_epoch=n_train_samples//batch_size,
            epochs=epochs,
            callbacks=callbacks,
            validation_data=validation_generator,
            validation_steps=n_val_samples//batch_size)


  ...
    to  
  ['...']
  ...
    to  
  ['...']
Train for 3 steps, validate for 1 steps
Epoch 1/10
Epoch 00001: val_loss improved from inf to 5.60658, saving model to my_face_mobilNet.h5
Epoch 2/10
Epoch 00002: val_loss improved from 5.60658 to 1.46083, saving model to my_face_mobilNet.h5
Epoch 3/10
Epoch 00003: val_loss did not improve from 1.46083
Epoch 4/10
Epoch 00004: val_loss did not improve from 1.46083
Epoch 5/10
Epoch 00005: val_loss did not improve from 1.46083
Epoch 6/10
Epoch 00006: val_loss did not improve from 1.46083
Epoch 7/10
Epoch 00007: val_loss did not improve from 1.46083
Epoch 8/10
Epoch 00008: val_loss did not improve from 1.46083
Epoch 9/10
Epoch 00009: val_loss did not improve from 1.46083
Epoch 10/10
Epoch 00010: val_loss did not improve from 1.46083


## Модуль Inference

cv2.CascadeClassifier parms:


**image** – исходное изображение

**scaleFactor** – определяет то, на сколько будет увеличиваться скользящее окно поиска на каждой итерации. 1.1 означает на 10%, 1.05 на 5% и т.д. Чем больше это значение, тем быстрее работает алгоритм.

**minNeighbors** — Чем больше это значение, тем более параноидальным будет поиск и тем чаще он будет пропускать реальные лица, считая, что это ложное срабатывание. Оптимальное значение 3-6.

**minSize** – минимальный размер лица на фото. 30 на 30 обычно вполне достаточно.

In [22]:
# Классификатора Хаара
face_classifier = cv2.CascadeClassifier('./haarcascade_frontalface_default.xml')
# Загрузка весов
classifiers = ['my_face_mobilNet.h5','emotion_face_mobilNet.h5', 'Emotion_Detection.h5']
classifier = load_model(classifiers[0])
class_labels = os.listdir(VALIDATION_DATA_PATH) # ['happy', 'sad']

In [23]:
classifier.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
conv1_pad (ZeroPadding2D)    (None, 225, 225, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (ReLU)            (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128   

In [5]:
# class_labels = ['Angry','Happy','Neutral','Sad','Surprise']


In [24]:
def run_cam_in_detection_mode():
    
    """Runs model and predicts label
    Stores snapshots by 's' key """
    
    cap = cv2.VideoCapture(0)

    while True:
        # Покадровое чтение захвата
        success, frame = cap.read()
        labels = []
        faces = face_classifier.detectMultiScale(frame,1.3,5)

        for (x,y,w,h) in faces:
            cv2.rectangle(frame,(x,y),(x+w,y+h),(255,0,0),2)
            roi_gray = frame[y:y+h,x:x+w]
            roi_gray = cv2.resize(roi_gray,(224,224),interpolation=cv2.INTER_AREA)
            
            if np.sum([roi_gray])!=0:
                roi = roi_gray.astype('float')/255.0
                roi = img_to_array(roi)
                roi = np.expand_dims(roi,axis=0)
                
                # make a prediction on the ROI, then lookup the class
                
                preds = classifier.predict(roi)[0]
                label=class_labels[preds.argmax()]
                label_position = (x,y)
                cv2.putText(frame,label,label_position,cv2.FONT_HERSHEY_SIMPLEX,2,(0,255,0),3)
                
            else:
                cv2.putText(frame,'No Face Found',(20,60),cv2.FONT_HERSHEY_SIMPLEX,2,(0,255,0),3)
        cv2.imshow('Emotion Detector',frame)
        
        k = cv2.waitKey(1) & 0xFF
        if k == ord('q'): # wait for ESC key to exit
            break
        elif k == ord('s'):
            fn_gen = f"frame{str(datetime.now()).replace(':','-')}.jpg"
            print(fn_gen)
            cv2.imwrite(f"./{OUTPUT_DIR}/{fn_gen}", frame)
            img_counter += 1
            
            print('Test OK')

    cap.release()
    cv2.destroyAllWindows()
    return


In [25]:
run_cam_in_detection_mode()